From 31c771ab443352741ecc3710d54a91890a68ee79 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:21 -0600 Subject: [PATCH 1/6] builtin/repo: update stats for each object When walking reachable objects in the repository, `count_objects()` processes a set of objects and updates the `struct object_stats`. In preparation for more granular statistics being collected, update the `struct object_stats` for each individual object instead. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 53 +++++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 0ea045abc1..c7c9f0f497 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -558,8 +558,6 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, { struct count_objects_data *data = cb_data; struct object_stats *stats = data->stats; - size_t inflated_total = 0; - size_t disk_total = 0; size_t object_count; for (size_t i = 0; i < oids->nr; i++) { @@ -575,33 +573,30 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, OBJECT_INFO_QUICK) < 0) continue; - inflated_total += inflated; - disk_total += disk; - } - - switch (type) { - case OBJ_TAG: - stats->type_counts.tags += oids->nr; - stats->inflated_sizes.tags += inflated_total; - stats->disk_sizes.tags += disk_total; - break; - case OBJ_COMMIT: - stats->type_counts.commits += oids->nr; - stats->inflated_sizes.commits += inflated_total; - stats->disk_sizes.commits += disk_total; - break; - case OBJ_TREE: - stats->type_counts.trees += oids->nr; - stats->inflated_sizes.trees += inflated_total; - stats->disk_sizes.trees += disk_total; - break; - case OBJ_BLOB: - stats->type_counts.blobs += oids->nr; - stats->inflated_sizes.blobs += inflated_total; - stats->disk_sizes.blobs += disk_total; - break; - default: - BUG("invalid object type"); + switch (type) { + case OBJ_TAG: + stats->type_counts.tags++; + stats->inflated_sizes.tags += inflated; + stats->disk_sizes.tags += disk; + break; + case OBJ_COMMIT: + stats->type_counts.commits++; + stats->inflated_sizes.commits += inflated; + stats->disk_sizes.commits += disk; + break; + case OBJ_TREE: + stats->type_counts.trees++; + stats->inflated_sizes.trees += inflated; + stats->disk_sizes.trees += disk; + break; + case OBJ_BLOB: + stats->type_counts.blobs++; + stats->inflated_sizes.blobs += inflated; + stats->disk_sizes.blobs += disk; + break; + default: + BUG("invalid object type"); + } } object_count = get_total_object_values(&stats->type_counts); From fa1752792711e7383376cf232eb72aac77d726d7 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:22 -0600 Subject: [PATCH 2/6] builtin/repo: add helper for printing keyvalue output The machine-parsable formats for the git-repo(1) "structure" subcommand print output in keyvalue pairs. Introduce the helper function `print_keyvalue()` to remove some code duplication and improve readability. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 71 +++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index c7c9f0f497..782194cf4c 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -446,44 +446,51 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +static inline void print_keyvalue(const char *key, char key_delim, size_t value, + char value_delim) +{ + printf("%s%c%" PRIuMAX "%c", key, key_delim, (uintmax_t)value, + value_delim); +} + static void structure_keyvalue_print(struct repo_structure *stats, char key_delim, char value_delim) { - printf("references.branches.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.branches, value_delim); - printf("references.tags.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.tags, value_delim); - printf("references.remotes.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.remotes, value_delim); - printf("references.others.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.others, value_delim); + print_keyvalue("references.branches.count", key_delim, + stats->refs.branches, value_delim); + print_keyvalue("references.tags.count", key_delim, + stats->refs.tags, value_delim); + print_keyvalue("references.remotes.count", key_delim, + stats->refs.remotes, value_delim); + print_keyvalue("references.others.count", key_delim, + stats->refs.others, value_delim); - printf("objects.commits.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.commits, value_delim); - printf("objects.trees.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.trees, value_delim); - printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.blobs, value_delim); - printf("objects.tags.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.tags, value_delim); + print_keyvalue("objects.commits.count", key_delim, + stats->objects.type_counts.commits, value_delim); + print_keyvalue("objects.trees.count", key_delim, + stats->objects.type_counts.trees, value_delim); + print_keyvalue("objects.blobs.count", key_delim, + stats->objects.type_counts.blobs, value_delim); + print_keyvalue("objects.tags.count", key_delim, + stats->objects.type_counts.tags, value_delim); - printf("objects.commits.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.commits, value_delim); - printf("objects.trees.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.trees, value_delim); - printf("objects.blobs.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.blobs, value_delim); - printf("objects.tags.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.tags, value_delim); + print_keyvalue("objects.commits.inflated_size", key_delim, + stats->objects.inflated_sizes.commits, value_delim); + print_keyvalue("objects.trees.inflated_size", key_delim, + stats->objects.inflated_sizes.trees, value_delim); + print_keyvalue("objects.blobs.inflated_size", key_delim, + stats->objects.inflated_sizes.blobs, value_delim); + print_keyvalue("objects.tags.inflated_size", key_delim, + stats->objects.inflated_sizes.tags, value_delim); - printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.commits, value_delim); - printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.trees, value_delim); - printf("objects.blobs.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.blobs, value_delim); - printf("objects.tags.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.tags, value_delim); + print_keyvalue("objects.commits.disk_size", key_delim, + stats->objects.disk_sizes.commits, value_delim); + print_keyvalue("objects.trees.disk_size", key_delim, + stats->objects.disk_sizes.trees, value_delim); + print_keyvalue("objects.blobs.disk_size", key_delim, + stats->objects.disk_sizes.blobs, value_delim); + print_keyvalue("objects.tags.disk_size", key_delim, + stats->objects.disk_sizes.tags, value_delim); fflush(stdout); } From e33ac9cc9e819f9de8ffe25c165393514cc61b12 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:23 -0600 Subject: [PATCH 3/6] builtin/repo: collect largest inflated objects The "structure" output for git-repo(1) shows the total inflated and disk sizes of reachable objects in the repository, but doesn't show the size of the largest individual objects. Since an individual object may be a large contributor to the overall repository size, it is useful for users to know the maximum size of individual objects. While interating across objects, record the size and OID of the largest objects encountered for each object type to provide as output. Note that the default "table" output format only displays size information and not the corresponding OID. In a subsequent commit, the table format is updated to add table annotations that mention the OID. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 1 + builtin/repo.c | 63 +++++++++++++++++++++++++++++++++++++ t/t1901-repo-structure.sh | 28 +++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 7d70270dfa..e812e59158 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -52,6 +52,7 @@ supported: * Reachable object counts categorized by type * Total inflated size of reachable objects by type * Total disk size of reachable objects by type +* Largest reachable objects in the repository by type + The output format can be chosen through the flag `--format`. Three formats are supported: diff --git a/builtin/repo.c b/builtin/repo.c index 782194cf4c..59d5cb2551 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "environment.h" +#include "hash.h" #include "hex.h" #include "odb.h" #include "parse-options.h" @@ -197,6 +198,18 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, return print_fields(argc, argv, repo, format); } +struct object_data { + struct object_id oid; + size_t value; +}; + +struct largest_objects { + struct object_data tag_size; + struct object_data commit_size; + struct object_data tree_size; + struct object_data blob_size; +}; + struct ref_stats { size_t branches; size_t remotes; @@ -215,6 +228,7 @@ struct object_stats { struct object_values type_counts; struct object_values inflated_sizes; struct object_values disk_sizes; + struct largest_objects largest; }; struct repo_structure { @@ -371,6 +385,21 @@ static void stats_table_setup_structure(struct stats_table *table, " * %s", _("Blobs")); stats_table_size_addf(table, objects->disk_sizes.tags, " * %s", _("Tags")); + + stats_table_addf(table, ""); + stats_table_addf(table, "* %s", _("Largest objects")); + stats_table_addf(table, " * %s", _("Commits")); + stats_table_size_addf(table, objects->largest.commit_size.value, + " * %s", _("Maximum size")); + stats_table_addf(table, " * %s", _("Trees")); + stats_table_size_addf(table, objects->largest.tree_size.value, + " * %s", _("Maximum size")); + stats_table_addf(table, " * %s", _("Blobs")); + stats_table_size_addf(table, objects->largest.blob_size.value, + " * %s", _("Maximum size")); + stats_table_addf(table, " * %s", _("Tags")); + stats_table_size_addf(table, objects->largest.tag_size.value, + " * %s", _("Maximum size")); } static void stats_table_print_structure(const struct stats_table *table) @@ -453,6 +482,14 @@ static inline void print_keyvalue(const char *key, char key_delim, size_t value, value_delim); } +static void print_object_data(const char *key, char key_delim, + struct object_data *data, char value_delim) +{ + print_keyvalue(key, key_delim, data->value, value_delim); + printf("%s_oid%c%s%c", key, key_delim, oid_to_hex(&data->oid), + value_delim); +} + static void structure_keyvalue_print(struct repo_structure *stats, char key_delim, char value_delim) { @@ -492,6 +529,15 @@ static void structure_keyvalue_print(struct repo_structure *stats, print_keyvalue("objects.tags.disk_size", key_delim, stats->objects.disk_sizes.tags, value_delim); + print_object_data("objects.commits.max_size", key_delim, + &stats->objects.largest.commit_size, value_delim); + print_object_data("objects.trees.max_size", key_delim, + &stats->objects.largest.tree_size, value_delim); + print_object_data("objects.blobs.max_size", key_delim, + &stats->objects.largest.blob_size, value_delim); + print_object_data("objects.tags.max_size", key_delim, + &stats->objects.largest.tag_size, value_delim); + fflush(stdout); } @@ -560,6 +606,15 @@ struct count_objects_data { struct progress *progress; }; +static void check_largest(struct object_data *data, struct object_id *oid, + size_t value) +{ + if (value > data->value || is_null_oid(&data->oid)) { + oidcpy(&data->oid, oid); + data->value = value; + } +} + static int count_objects(const char *path UNUSED, struct oid_array *oids, enum object_type type, void *cb_data) { @@ -585,21 +640,29 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, stats->type_counts.tags++; stats->inflated_sizes.tags += inflated; stats->disk_sizes.tags += disk; + check_largest(&stats->largest.tag_size, &oids->oid[i], + inflated); break; case OBJ_COMMIT: stats->type_counts.commits++; stats->inflated_sizes.commits += inflated; stats->disk_sizes.commits += disk; + check_largest(&stats->largest.commit_size, &oids->oid[i], + inflated); break; case OBJ_TREE: stats->type_counts.trees++; stats->inflated_sizes.trees += inflated; stats->disk_sizes.trees += disk; + check_largest(&stats->largest.tree_size, &oids->oid[i], + inflated); break; case OBJ_BLOB: stats->type_counts.blobs++; stats->inflated_sizes.blobs += inflated; stats->disk_sizes.blobs += disk; + check_largest(&stats->largest.blob_size, &oids->oid[i], + inflated); break; default: BUG("invalid object type"); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 17ff164b05..1999f325d0 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -52,6 +52,16 @@ test_expect_success 'empty repository' ' | * Trees | 0 B | | * Blobs | 0 B | | * Tags | 0 B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 0 B | + | * Trees | | + | * Maximum size | 0 B | + | * Blobs | | + | * Maximum size | 0 B | + | * Tags | | + | * Maximum size | 0 B | EOF git repo structure >out 2>err && @@ -104,6 +114,16 @@ test_expect_success SHA1 'repository with references and objects' ' | * Trees | $(object_type_disk_usage tree true) | | * Blobs | $(object_type_disk_usage blob true) | | * Tags | $(object_type_disk_usage tag) B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 223 B | + | * Trees | | + | * Maximum size | 32.29 KiB | + | * Blobs | | + | * Maximum size | 13 B | + | * Tags | | + | * Maximum size | 132 B | EOF git repo structure >out 2>err && @@ -138,6 +158,14 @@ test_expect_success SHA1 'keyvalue and nul format' ' objects.trees.disk_size=$(object_type_disk_usage tree) objects.blobs.disk_size=$(object_type_disk_usage blob) objects.tags.disk_size=$(object_type_disk_usage tag) + objects.commits.max_size=221 + objects.commits.max_size_oid=de3508174b5c2ace6993da67cae9be9069e2df39 + objects.trees.max_size=1335 + objects.trees.max_size_oid=09931deea9d81ec21300d3e13c74412f32eacec5 + objects.blobs.max_size=11 + objects.blobs.max_size_oid=eaeeedced46482bd4281fda5a5f05ce24854151f + objects.tags.max_size=132 + objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c EOF git repo structure --format=keyvalue >out 2>err && From e00bb8c76e18357da3a2098cdac2a3c2c312c17d Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:24 -0600 Subject: [PATCH 4/6] builtin/repo: add OID annotations to table output The "structure" output for git-repo(1) does not show the corresponding OIDs for the largest objects in its "table" output. Update the output to include a list of OID annotations with an index to the corresponding row in the table. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 78 +++++++++++++++++--- t/t1901-repo-structure.sh | 145 ++++++++++++++++++++------------------ 2 files changed, 143 insertions(+), 80 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 59d5cb2551..ea7f5acd3e 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -238,6 +238,7 @@ struct repo_structure { struct stats_table { struct string_list rows; + struct string_list annotations; int name_col_width; int value_col_width; @@ -250,6 +251,8 @@ struct stats_table { struct stats_table_entry { char *value; const char *unit; + size_t index; + struct object_id *oid; }; static void stats_table_vaddf(struct stats_table *table, @@ -272,6 +275,12 @@ static void stats_table_vaddf(struct stats_table *table, table->name_col_width = name_width; if (!entry) return; + if (entry->oid) { + entry->index = table->annotations.nr + 1; + strbuf_addf(&buf, "[%" PRIuMAX "] %s", (uintmax_t)entry->index, + oid_to_hex(entry->oid)); + string_list_append_nodup(&table->annotations, strbuf_detach(&buf, NULL)); + } if (entry->value) { int value_width = utf8_strwidth(entry->value); if (value_width > table->value_col_width) @@ -282,6 +291,8 @@ static void stats_table_vaddf(struct stats_table *table, if (unit_width > table->unit_col_width) table->unit_col_width = unit_width; } + + strbuf_release(&buf); } static void stats_table_addf(struct stats_table *table, const char *format, ...) @@ -321,6 +332,27 @@ static void stats_table_size_addf(struct stats_table *table, size_t value, va_end(ap); } +static void stats_table_object_size_addf(struct stats_table *table, + struct object_id *oid, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + humanise_bytes(value, &entry->value, &entry->unit, HUMANISE_COMPACT); + + /* + * A NULL OID should not have a table annotation. + */ + if (!is_null_oid(oid)) + entry->oid = oid; + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + static inline size_t get_total_reference_count(struct ref_stats *stats) { return stats->branches + stats->remotes + stats->tags + stats->others; @@ -389,19 +421,29 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_addf(table, ""); stats_table_addf(table, "* %s", _("Largest objects")); stats_table_addf(table, " * %s", _("Commits")); - stats_table_size_addf(table, objects->largest.commit_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.commit_size.oid, + objects->largest.commit_size.value, + " * %s", _("Maximum size")); stats_table_addf(table, " * %s", _("Trees")); - stats_table_size_addf(table, objects->largest.tree_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.tree_size.oid, + objects->largest.tree_size.value, + " * %s", _("Maximum size")); stats_table_addf(table, " * %s", _("Blobs")); - stats_table_size_addf(table, objects->largest.blob_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.blob_size.oid, + objects->largest.blob_size.value, + " * %s", _("Maximum size")); stats_table_addf(table, " * %s", _("Tags")); - stats_table_size_addf(table, objects->largest.tag_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.tag_size.oid, + objects->largest.tag_size.value, + " * %s", _("Maximum size")); } +#define INDEX_WIDTH 4 + static void stats_table_print_structure(const struct stats_table *table) { const char *name_col_title = _("Repository structure"); @@ -420,7 +462,8 @@ static void stats_table_print_structure(const struct stats_table *table) value_col_width = title_value_width - unit_col_width; strbuf_addstr(&buf, "| "); - strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, name_col_title); + strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width + INDEX_WIDTH, + name_col_title); strbuf_addstr(&buf, " | "); strbuf_utf8_align(&buf, ALIGN_LEFT, value_col_width + unit_col_width + 1, value_col_title); @@ -428,7 +471,7 @@ static void stats_table_print_structure(const struct stats_table *table) printf("%s\n", buf.buf); printf("| "); - for (int i = 0; i < name_col_width; i++) + for (int i = 0; i < name_col_width + INDEX_WIDTH; i++) putchar('-'); printf(" | "); for (int i = 0; i < value_col_width + unit_col_width + 1; i++) @@ -450,6 +493,13 @@ static void stats_table_print_structure(const struct stats_table *table) strbuf_reset(&buf); strbuf_addstr(&buf, "| "); strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, item->string); + + if (entry && entry->oid) + strbuf_addf(&buf, " [%" PRIuMAX "]", + (uintmax_t)entry->index); + else + strbuf_addchars(&buf, ' ', INDEX_WIDTH); + strbuf_addstr(&buf, " | "); strbuf_utf8_align(&buf, ALIGN_RIGHT, value_col_width, value); strbuf_addch(&buf, ' '); @@ -458,6 +508,12 @@ static void stats_table_print_structure(const struct stats_table *table) printf("%s\n", buf.buf); } + if (table->annotations.nr) { + printf("\n"); + for_each_string_list_item(item, &table->annotations) + printf("%s\n", item->string); + } + strbuf_release(&buf); } @@ -473,6 +529,7 @@ static void stats_table_clear(struct stats_table *table) } string_list_clear(&table->rows, 1); + string_list_clear(&table->annotations, 1); } static inline void print_keyvalue(const char *key, char key_delim, size_t value, @@ -702,6 +759,7 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, { struct stats_table table = { .rows = STRING_LIST_INIT_DUP, + .annotations = STRING_LIST_INIT_DUP, }; enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 1999f325d0..918af7269f 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -27,41 +27,41 @@ test_expect_success 'empty repository' ' ( cd repo && cat >expect <<-\EOF && - | Repository structure | Value | - | -------------------- | ------ | - | * References | | - | * Count | 0 | - | * Branches | 0 | - | * Tags | 0 | - | * Remotes | 0 | - | * Others | 0 | - | | | - | * Reachable objects | | - | * Count | 0 | - | * Commits | 0 | - | * Trees | 0 | - | * Blobs | 0 | - | * Tags | 0 | - | * Inflated size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | * Disk size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size | 0 B | - | * Trees | | - | * Maximum size | 0 B | - | * Blobs | | - | * Maximum size | 0 B | - | * Tags | | - | * Maximum size | 0 B | + | Repository structure | Value | + | ------------------------ | ------ | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | + | * Inflated size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | * Disk size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 0 B | + | * Trees | | + | * Maximum size | 0 B | + | * Blobs | | + | * Maximum size | 0 B | + | * Tags | | + | * Maximum size | 0 B | EOF git repo structure >out 2>err && @@ -89,41 +89,46 @@ test_expect_success SHA1 'repository with references and objects' ' # git-rev-list(1) --disk-usage=human option printing the full # "byte/bytes" unit string instead of just "B". cat >expect <<-EOF && - | Repository structure | Value | - | -------------------- | ---------- | - | * References | | - | * Count | 4 | - | * Branches | 1 | - | * Tags | 1 | - | * Remotes | 1 | - | * Others | 1 | - | | | - | * Reachable objects | | - | * Count | 3.02 k | - | * Commits | 1.01 k | - | * Trees | 1.01 k | - | * Blobs | 1.01 k | - | * Tags | 1 | - | * Inflated size | 16.03 MiB | - | * Commits | 217.92 KiB | - | * Trees | 15.81 MiB | - | * Blobs | 11.68 KiB | - | * Tags | 132 B | - | * Disk size | $(object_type_disk_usage all true) | - | * Commits | $(object_type_disk_usage commit true) | - | * Trees | $(object_type_disk_usage tree true) | - | * Blobs | $(object_type_disk_usage blob true) | - | * Tags | $(object_type_disk_usage tag) B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size | 223 B | - | * Trees | | - | * Maximum size | 32.29 KiB | - | * Blobs | | - | * Maximum size | 13 B | - | * Tags | | - | * Maximum size | 132 B | + | Repository structure | Value | + | ------------------------ | ---------- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 3.02 k | + | * Commits | 1.01 k | + | * Trees | 1.01 k | + | * Blobs | 1.01 k | + | * Tags | 1 | + | * Inflated size | 16.03 MiB | + | * Commits | 217.92 KiB | + | * Trees | 15.81 MiB | + | * Blobs | 11.68 KiB | + | * Tags | 132 B | + | * Disk size | $(object_type_disk_usage all true) | + | * Commits | $(object_type_disk_usage commit true) | + | * Trees | $(object_type_disk_usage tree true) | + | * Blobs | $(object_type_disk_usage blob true) | + | * Tags | $(object_type_disk_usage tag) B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size [1] | 223 B | + | * Trees | | + | * Maximum size [2] | 32.29 KiB | + | * Blobs | | + | * Maximum size [3] | 13 B | + | * Tags | | + | * Maximum size [4] | 132 B | + + [1] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a + [2] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c + [3] 97d808e45116bf02103490294d3d46dad7a2ac62 + [4] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 EOF git repo structure >out 2>err && From 18952a1ef1a14d2fca19638118dc2eea1e24d671 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:25 -0600 Subject: [PATCH 5/6] builtin/repo: find commit with most parents Complex merge events may produce an octopus merge where the resulting merge commit has more than two parents. While iterating through objects in the repository for git-repo-structure, identify the commit with the most parents and display it in the output. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 45 ++++++++++++ t/t1901-repo-structure.sh | 151 ++++++++++++++++++++------------------ 2 files changed, 123 insertions(+), 73 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index ea7f5acd3e..047f5e098d 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -1,6 +1,7 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" +#include "commit.h" #include "environment.h" #include "hash.h" #include "hex.h" @@ -208,6 +209,8 @@ struct largest_objects { struct object_data commit_size; struct object_data tree_size; struct object_data blob_size; + + struct object_data parent_count; }; struct ref_stats { @@ -318,6 +321,27 @@ static void stats_table_count_addf(struct stats_table *table, size_t value, va_end(ap); } +static void stats_table_object_count_addf(struct stats_table *table, + struct object_id *oid, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + humanise_count(value, &entry->value, &entry->unit); + + /* + * A NULL OID should not have a table annotation. + */ + if (!is_null_oid(oid)) + entry->oid = oid; + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + static void stats_table_size_addf(struct stats_table *table, size_t value, const char *format, ...) { @@ -425,6 +449,10 @@ static void stats_table_setup_structure(struct stats_table *table, &objects->largest.commit_size.oid, objects->largest.commit_size.value, " * %s", _("Maximum size")); + stats_table_object_count_addf(table, + &objects->largest.parent_count.oid, + objects->largest.parent_count.value, + " * %s", _("Maximum parents")); stats_table_addf(table, " * %s", _("Trees")); stats_table_object_size_addf(table, &objects->largest.tree_size.oid, @@ -595,6 +623,9 @@ static void structure_keyvalue_print(struct repo_structure *stats, print_object_data("objects.tags.max_size", key_delim, &stats->objects.largest.tag_size, value_delim); + print_object_data("objects.commits.max_parents", key_delim, + &stats->objects.largest.parent_count, value_delim); + fflush(stdout); } @@ -682,16 +713,24 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, for (size_t i = 0; i < oids->nr; i++) { struct object_info oi = OBJECT_INFO_INIT; unsigned long inflated; + struct commit *commit; + struct object *obj; + void *content; off_t disk; + int eaten; oi.sizep = &inflated; oi.disk_sizep = &disk; + oi.contentp = &content; if (odb_read_object_info_extended(data->odb, &oids->oid[i], &oi, OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) < 0) continue; + obj = parse_object_buffer(the_repository, &oids->oid[i], type, + inflated, content, &eaten); + switch (type) { case OBJ_TAG: stats->type_counts.tags++; @@ -701,11 +740,14 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, inflated); break; case OBJ_COMMIT: + commit = object_as_type(obj, OBJ_COMMIT, 0); stats->type_counts.commits++; stats->inflated_sizes.commits += inflated; stats->disk_sizes.commits += disk; check_largest(&stats->largest.commit_size, &oids->oid[i], inflated); + check_largest(&stats->largest.parent_count, &oids->oid[i], + commit_list_count(commit->parents)); break; case OBJ_TREE: stats->type_counts.trees++; @@ -724,6 +766,9 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, default: BUG("invalid object type"); } + + if (!eaten) + free(content); } object_count = get_total_object_values(&stats->type_counts); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 918af7269f..d003d64a8e 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -27,41 +27,42 @@ test_expect_success 'empty repository' ' ( cd repo && cat >expect <<-\EOF && - | Repository structure | Value | - | ------------------------ | ------ | - | * References | | - | * Count | 0 | - | * Branches | 0 | - | * Tags | 0 | - | * Remotes | 0 | - | * Others | 0 | - | | | - | * Reachable objects | | - | * Count | 0 | - | * Commits | 0 | - | * Trees | 0 | - | * Blobs | 0 | - | * Tags | 0 | - | * Inflated size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | * Disk size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size | 0 B | - | * Trees | | - | * Maximum size | 0 B | - | * Blobs | | - | * Maximum size | 0 B | - | * Tags | | - | * Maximum size | 0 B | + | Repository structure | Value | + | ------------------------- | ------ | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | + | * Inflated size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | * Disk size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 0 B | + | * Maximum parents | 0 | + | * Trees | | + | * Maximum size | 0 B | + | * Blobs | | + | * Maximum size | 0 B | + | * Tags | | + | * Maximum size | 0 B | EOF git repo structure >out 2>err && @@ -89,46 +90,48 @@ test_expect_success SHA1 'repository with references and objects' ' # git-rev-list(1) --disk-usage=human option printing the full # "byte/bytes" unit string instead of just "B". cat >expect <<-EOF && - | Repository structure | Value | - | ------------------------ | ---------- | - | * References | | - | * Count | 4 | - | * Branches | 1 | - | * Tags | 1 | - | * Remotes | 1 | - | * Others | 1 | - | | | - | * Reachable objects | | - | * Count | 3.02 k | - | * Commits | 1.01 k | - | * Trees | 1.01 k | - | * Blobs | 1.01 k | - | * Tags | 1 | - | * Inflated size | 16.03 MiB | - | * Commits | 217.92 KiB | - | * Trees | 15.81 MiB | - | * Blobs | 11.68 KiB | - | * Tags | 132 B | - | * Disk size | $(object_type_disk_usage all true) | - | * Commits | $(object_type_disk_usage commit true) | - | * Trees | $(object_type_disk_usage tree true) | - | * Blobs | $(object_type_disk_usage blob true) | - | * Tags | $(object_type_disk_usage tag) B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size [1] | 223 B | - | * Trees | | - | * Maximum size [2] | 32.29 KiB | - | * Blobs | | - | * Maximum size [3] | 13 B | - | * Tags | | - | * Maximum size [4] | 132 B | + | Repository structure | Value | + | ------------------------- | ---------- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 3.02 k | + | * Commits | 1.01 k | + | * Trees | 1.01 k | + | * Blobs | 1.01 k | + | * Tags | 1 | + | * Inflated size | 16.03 MiB | + | * Commits | 217.92 KiB | + | * Trees | 15.81 MiB | + | * Blobs | 11.68 KiB | + | * Tags | 132 B | + | * Disk size | $(object_type_disk_usage all true) | + | * Commits | $(object_type_disk_usage commit true) | + | * Trees | $(object_type_disk_usage tree true) | + | * Blobs | $(object_type_disk_usage blob true) | + | * Tags | $(object_type_disk_usage tag) B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size [1] | 223 B | + | * Maximum parents [2] | 1 | + | * Trees | | + | * Maximum size [3] | 32.29 KiB | + | * Blobs | | + | * Maximum size [4] | 13 B | + | * Tags | | + | * Maximum size [5] | 132 B | [1] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a - [2] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c - [3] 97d808e45116bf02103490294d3d46dad7a2ac62 - [4] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 + [2] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a + [3] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c + [4] 97d808e45116bf02103490294d3d46dad7a2ac62 + [5] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 EOF git repo structure >out 2>err && @@ -171,6 +174,8 @@ test_expect_success SHA1 'keyvalue and nul format' ' objects.blobs.max_size_oid=eaeeedced46482bd4281fda5a5f05ce24854151f objects.tags.max_size=132 objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c + objects.commits.max_parents=1 + objects.commits.max_parents_oid=de3508174b5c2ace6993da67cae9be9069e2df39 EOF git repo structure --format=keyvalue >out 2>err && From 42e69594113d647f53d65440f2ede554570b9f40 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:26 -0600 Subject: [PATCH 6/6] builtin/repo: find tree with most entries The size of a tree object usually corresponds with the number of entries it has. While iterating through objects in the repository for git-repo-structure, identify the tree with the most entries and display it in the output. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 25 +++++++++++++++++++++++++ t/t1901-repo-structure.sh | 13 +++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 047f5e098d..e726bb858c 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -16,6 +16,8 @@ #include "strbuf.h" #include "string-list.h" #include "shallow.h" +#include "tree.h" +#include "tree-walk.h" #include "utf8.h" static const char *const repo_usage[] = { @@ -211,6 +213,7 @@ struct largest_objects { struct object_data blob_size; struct object_data parent_count; + struct object_data tree_entries; }; struct ref_stats { @@ -458,6 +461,10 @@ static void stats_table_setup_structure(struct stats_table *table, &objects->largest.tree_size.oid, objects->largest.tree_size.value, " * %s", _("Maximum size")); + stats_table_object_count_addf(table, + &objects->largest.tree_entries.oid, + objects->largest.tree_entries.value, + " * %s", _("Maximum entries")); stats_table_addf(table, " * %s", _("Blobs")); stats_table_object_size_addf(table, &objects->largest.blob_size.oid, @@ -625,6 +632,8 @@ static void structure_keyvalue_print(struct repo_structure *stats, print_object_data("objects.commits.max_parents", key_delim, &stats->objects.largest.parent_count, value_delim); + print_object_data("objects.trees.max_entries", key_delim, + &stats->objects.largest.tree_entries, value_delim); fflush(stdout); } @@ -703,6 +712,20 @@ static void check_largest(struct object_data *data, struct object_id *oid, } } +static size_t count_tree_entries(struct object *obj) +{ + struct tree *t = object_as_type(obj, OBJ_TREE, 0); + struct name_entry entry; + struct tree_desc desc; + size_t count = 0; + + init_tree_desc(&desc, &t->object.oid, t->buffer, t->size); + while (tree_entry(&desc, &entry)) + count++; + + return count; +} + static int count_objects(const char *path UNUSED, struct oid_array *oids, enum object_type type, void *cb_data) { @@ -755,6 +778,8 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, stats->disk_sizes.trees += disk; check_largest(&stats->largest.tree_size, &oids->oid[i], inflated); + check_largest(&stats->largest.tree_entries, &oids->oid[i], + count_tree_entries(obj)); break; case OBJ_BLOB: stats->type_counts.blobs++; diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index d003d64a8e..12ed67e846 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -59,6 +59,7 @@ test_expect_success 'empty repository' ' | * Maximum parents | 0 | | * Trees | | | * Maximum size | 0 B | + | * Maximum entries | 0 | | * Blobs | | | * Maximum size | 0 B | | * Tags | | @@ -122,16 +123,18 @@ test_expect_success SHA1 'repository with references and objects' ' | * Maximum parents [2] | 1 | | * Trees | | | * Maximum size [3] | 32.29 KiB | + | * Maximum entries [4] | 1.01 k | | * Blobs | | - | * Maximum size [4] | 13 B | + | * Maximum size [5] | 13 B | | * Tags | | - | * Maximum size [5] | 132 B | + | * Maximum size [6] | 132 B | [1] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a [2] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a [3] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c - [4] 97d808e45116bf02103490294d3d46dad7a2ac62 - [5] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 + [4] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c + [5] 97d808e45116bf02103490294d3d46dad7a2ac62 + [6] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 EOF git repo structure >out 2>err && @@ -176,6 +179,8 @@ test_expect_success SHA1 'keyvalue and nul format' ' objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c objects.commits.max_parents=1 objects.commits.max_parents_oid=de3508174b5c2ace6993da67cae9be9069e2df39 + objects.trees.max_entries=42 + objects.trees.max_entries_oid=09931deea9d81ec21300d3e13c74412f32eacec5 EOF git repo structure --format=keyvalue >out 2>err &&