Merge branch 'jt/repo-structure-extrema' into jch

"git repo structure" command learns to report maximum values on
various aspects of objects it inspects.

* jt/repo-structure-extrema:
  builtin/repo: find tree with most entries
  builtin/repo: find commit with most parents
  builtin/repo: add OID annotations to table output
  builtin/repo: collect largest inflated objects
  builtin/repo: add helper for printing keyvalue output
  builtin/repo: update stats for each object
This commit is contained in:
Junio C Hamano
2026-03-03 11:08:30 -08:00
3 changed files with 349 additions and 112 deletions

View File

@@ -64,6 +64,7 @@ supported:
* Reachable object counts categorized by type
* Total inflated size of reachable objects by type
* Total disk size of reachable objects by type
* Largest reachable objects in the repository by type
+
The output format can be chosen through the flag `--format`. Three formats are
supported:

View File

@@ -1,7 +1,9 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "builtin.h"
#include "commit.h"
#include "environment.h"
#include "hash.h"
#include "hex.h"
#include "odb.h"
#include "parse-options.h"
@@ -14,6 +16,8 @@
#include "strbuf.h"
#include "string-list.h"
#include "shallow.h"
#include "tree.h"
#include "tree-walk.h"
#include "utf8.h"
static const char *const repo_usage[] = {
@@ -230,6 +234,21 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix,
return print_fields(argc, argv, repo, format);
}
struct object_data {
struct object_id oid;
size_t value;
};
struct largest_objects {
struct object_data tag_size;
struct object_data commit_size;
struct object_data tree_size;
struct object_data blob_size;
struct object_data parent_count;
struct object_data tree_entries;
};
struct ref_stats {
size_t branches;
size_t remotes;
@@ -248,6 +267,7 @@ struct object_stats {
struct object_values type_counts;
struct object_values inflated_sizes;
struct object_values disk_sizes;
struct largest_objects largest;
};
struct repo_structure {
@@ -257,6 +277,7 @@ struct repo_structure {
struct stats_table {
struct string_list rows;
struct string_list annotations;
int name_col_width;
int value_col_width;
@@ -269,6 +290,8 @@ struct stats_table {
struct stats_table_entry {
char *value;
const char *unit;
size_t index;
struct object_id *oid;
};
static void stats_table_vaddf(struct stats_table *table,
@@ -291,6 +314,12 @@ static void stats_table_vaddf(struct stats_table *table,
table->name_col_width = name_width;
if (!entry)
return;
if (entry->oid) {
entry->index = table->annotations.nr + 1;
strbuf_addf(&buf, "[%" PRIuMAX "] %s", (uintmax_t)entry->index,
oid_to_hex(entry->oid));
string_list_append_nodup(&table->annotations, strbuf_detach(&buf, NULL));
}
if (entry->value) {
int value_width = utf8_strwidth(entry->value);
if (value_width > table->value_col_width)
@@ -301,6 +330,8 @@ static void stats_table_vaddf(struct stats_table *table,
if (unit_width > table->unit_col_width)
table->unit_col_width = unit_width;
}
strbuf_release(&buf);
}
static void stats_table_addf(struct stats_table *table, const char *format, ...)
@@ -326,6 +357,27 @@ static void stats_table_count_addf(struct stats_table *table, size_t value,
va_end(ap);
}
static void stats_table_object_count_addf(struct stats_table *table,
struct object_id *oid, size_t value,
const char *format, ...)
{
struct stats_table_entry *entry;
va_list ap;
CALLOC_ARRAY(entry, 1);
humanise_count(value, &entry->value, &entry->unit);
/*
* A NULL OID should not have a table annotation.
*/
if (!is_null_oid(oid))
entry->oid = oid;
va_start(ap, format);
stats_table_vaddf(table, entry, format, ap);
va_end(ap);
}
static void stats_table_size_addf(struct stats_table *table, size_t value,
const char *format, ...)
{
@@ -340,6 +392,27 @@ static void stats_table_size_addf(struct stats_table *table, size_t value,
va_end(ap);
}
static void stats_table_object_size_addf(struct stats_table *table,
struct object_id *oid, size_t value,
const char *format, ...)
{
struct stats_table_entry *entry;
va_list ap;
CALLOC_ARRAY(entry, 1);
humanise_bytes(value, &entry->value, &entry->unit, HUMANISE_COMPACT);
/*
* A NULL OID should not have a table annotation.
*/
if (!is_null_oid(oid))
entry->oid = oid;
va_start(ap, format);
stats_table_vaddf(table, entry, format, ap);
va_end(ap);
}
static inline size_t get_total_reference_count(struct ref_stats *stats)
{
return stats->branches + stats->remotes + stats->tags + stats->others;
@@ -404,8 +477,41 @@ static void stats_table_setup_structure(struct stats_table *table,
" * %s", _("Blobs"));
stats_table_size_addf(table, objects->disk_sizes.tags,
" * %s", _("Tags"));
stats_table_addf(table, "");
stats_table_addf(table, "* %s", _("Largest objects"));
stats_table_addf(table, " * %s", _("Commits"));
stats_table_object_size_addf(table,
&objects->largest.commit_size.oid,
objects->largest.commit_size.value,
" * %s", _("Maximum size"));
stats_table_object_count_addf(table,
&objects->largest.parent_count.oid,
objects->largest.parent_count.value,
" * %s", _("Maximum parents"));
stats_table_addf(table, " * %s", _("Trees"));
stats_table_object_size_addf(table,
&objects->largest.tree_size.oid,
objects->largest.tree_size.value,
" * %s", _("Maximum size"));
stats_table_object_count_addf(table,
&objects->largest.tree_entries.oid,
objects->largest.tree_entries.value,
" * %s", _("Maximum entries"));
stats_table_addf(table, " * %s", _("Blobs"));
stats_table_object_size_addf(table,
&objects->largest.blob_size.oid,
objects->largest.blob_size.value,
" * %s", _("Maximum size"));
stats_table_addf(table, " * %s", _("Tags"));
stats_table_object_size_addf(table,
&objects->largest.tag_size.oid,
objects->largest.tag_size.value,
" * %s", _("Maximum size"));
}
#define INDEX_WIDTH 4
static void stats_table_print_structure(const struct stats_table *table)
{
const char *name_col_title = _("Repository structure");
@@ -424,7 +530,8 @@ static void stats_table_print_structure(const struct stats_table *table)
value_col_width = title_value_width - unit_col_width;
strbuf_addstr(&buf, "| ");
strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, name_col_title);
strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width + INDEX_WIDTH,
name_col_title);
strbuf_addstr(&buf, " | ");
strbuf_utf8_align(&buf, ALIGN_LEFT,
value_col_width + unit_col_width + 1, value_col_title);
@@ -432,7 +539,7 @@ static void stats_table_print_structure(const struct stats_table *table)
printf("%s\n", buf.buf);
printf("| ");
for (int i = 0; i < name_col_width; i++)
for (int i = 0; i < name_col_width + INDEX_WIDTH; i++)
putchar('-');
printf(" | ");
for (int i = 0; i < value_col_width + unit_col_width + 1; i++)
@@ -453,6 +560,13 @@ static void stats_table_print_structure(const struct stats_table *table)
strbuf_reset(&buf);
strbuf_addstr(&buf, "| ");
strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, item->string);
if (entry && entry->oid)
strbuf_addf(&buf, " [%" PRIuMAX "]",
(uintmax_t)entry->index);
else
strbuf_addchars(&buf, ' ', INDEX_WIDTH);
strbuf_addstr(&buf, " | ");
strbuf_utf8_align(&buf, ALIGN_RIGHT, value_col_width, value);
strbuf_addch(&buf, ' ');
@@ -461,6 +575,12 @@ static void stats_table_print_structure(const struct stats_table *table)
printf("%s\n", buf.buf);
}
if (table->annotations.nr) {
printf("\n");
for_each_string_list_item(item, &table->annotations)
printf("%s\n", item->string);
}
strbuf_release(&buf);
}
@@ -476,46 +596,76 @@ static void stats_table_clear(struct stats_table *table)
}
string_list_clear(&table->rows, 1);
string_list_clear(&table->annotations, 1);
}
static inline void print_keyvalue(const char *key, char key_delim, size_t value,
char value_delim)
{
printf("%s%c%" PRIuMAX "%c", key, key_delim, (uintmax_t)value,
value_delim);
}
static void print_object_data(const char *key, char key_delim,
struct object_data *data, char value_delim)
{
print_keyvalue(key, key_delim, data->value, value_delim);
printf("%s_oid%c%s%c", key, key_delim, oid_to_hex(&data->oid),
value_delim);
}
static void structure_keyvalue_print(struct repo_structure *stats,
char key_delim, char value_delim)
{
printf("references.branches.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.branches, value_delim);
printf("references.tags.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.tags, value_delim);
printf("references.remotes.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.remotes, value_delim);
printf("references.others.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.others, value_delim);
print_keyvalue("references.branches.count", key_delim,
stats->refs.branches, value_delim);
print_keyvalue("references.tags.count", key_delim,
stats->refs.tags, value_delim);
print_keyvalue("references.remotes.count", key_delim,
stats->refs.remotes, value_delim);
print_keyvalue("references.others.count", key_delim,
stats->refs.others, value_delim);
printf("objects.commits.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.type_counts.commits, value_delim);
printf("objects.trees.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.type_counts.trees, value_delim);
printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.type_counts.blobs, value_delim);
printf("objects.tags.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.type_counts.tags, value_delim);
print_keyvalue("objects.commits.count", key_delim,
stats->objects.type_counts.commits, value_delim);
print_keyvalue("objects.trees.count", key_delim,
stats->objects.type_counts.trees, value_delim);
print_keyvalue("objects.blobs.count", key_delim,
stats->objects.type_counts.blobs, value_delim);
print_keyvalue("objects.tags.count", key_delim,
stats->objects.type_counts.tags, value_delim);
printf("objects.commits.inflated_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.inflated_sizes.commits, value_delim);
printf("objects.trees.inflated_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.inflated_sizes.trees, value_delim);
printf("objects.blobs.inflated_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.inflated_sizes.blobs, value_delim);
printf("objects.tags.inflated_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.inflated_sizes.tags, value_delim);
print_keyvalue("objects.commits.inflated_size", key_delim,
stats->objects.inflated_sizes.commits, value_delim);
print_keyvalue("objects.trees.inflated_size", key_delim,
stats->objects.inflated_sizes.trees, value_delim);
print_keyvalue("objects.blobs.inflated_size", key_delim,
stats->objects.inflated_sizes.blobs, value_delim);
print_keyvalue("objects.tags.inflated_size", key_delim,
stats->objects.inflated_sizes.tags, value_delim);
printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.disk_sizes.commits, value_delim);
printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.disk_sizes.trees, value_delim);
printf("objects.blobs.disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.disk_sizes.blobs, value_delim);
printf("objects.tags.disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.disk_sizes.tags, value_delim);
print_keyvalue("objects.commits.disk_size", key_delim,
stats->objects.disk_sizes.commits, value_delim);
print_keyvalue("objects.trees.disk_size", key_delim,
stats->objects.disk_sizes.trees, value_delim);
print_keyvalue("objects.blobs.disk_size", key_delim,
stats->objects.disk_sizes.blobs, value_delim);
print_keyvalue("objects.tags.disk_size", key_delim,
stats->objects.disk_sizes.tags, value_delim);
print_object_data("objects.commits.max_size", key_delim,
&stats->objects.largest.commit_size, value_delim);
print_object_data("objects.trees.max_size", key_delim,
&stats->objects.largest.tree_size, value_delim);
print_object_data("objects.blobs.max_size", key_delim,
&stats->objects.largest.blob_size, value_delim);
print_object_data("objects.tags.max_size", key_delim,
&stats->objects.largest.tag_size, value_delim);
print_object_data("objects.commits.max_parents", key_delim,
&stats->objects.largest.parent_count, value_delim);
print_object_data("objects.trees.max_entries", key_delim,
&stats->objects.largest.tree_entries, value_delim);
fflush(stdout);
}
@@ -585,55 +735,97 @@ struct count_objects_data {
struct progress *progress;
};
static void check_largest(struct object_data *data, struct object_id *oid,
size_t value)
{
if (value > data->value || is_null_oid(&data->oid)) {
oidcpy(&data->oid, oid);
data->value = value;
}
}
static size_t count_tree_entries(struct object *obj)
{
struct tree *t = object_as_type(obj, OBJ_TREE, 0);
struct name_entry entry;
struct tree_desc desc;
size_t count = 0;
init_tree_desc(&desc, &t->object.oid, t->buffer, t->size);
while (tree_entry(&desc, &entry))
count++;
return count;
}
static int count_objects(const char *path UNUSED, struct oid_array *oids,
enum object_type type, void *cb_data)
{
struct count_objects_data *data = cb_data;
struct object_stats *stats = data->stats;
size_t inflated_total = 0;
size_t disk_total = 0;
size_t object_count;
for (size_t i = 0; i < oids->nr; i++) {
struct object_info oi = OBJECT_INFO_INIT;
unsigned long inflated;
struct commit *commit;
struct object *obj;
void *content;
off_t disk;
int eaten;
oi.sizep = &inflated;
oi.disk_sizep = &disk;
oi.contentp = &content;
if (odb_read_object_info_extended(data->odb, &oids->oid[i], &oi,
OBJECT_INFO_SKIP_FETCH_OBJECT |
OBJECT_INFO_QUICK) < 0)
continue;
inflated_total += inflated;
disk_total += disk;
}
obj = parse_object_buffer(the_repository, &oids->oid[i], type,
inflated, content, &eaten);
switch (type) {
case OBJ_TAG:
stats->type_counts.tags += oids->nr;
stats->inflated_sizes.tags += inflated_total;
stats->disk_sizes.tags += disk_total;
break;
case OBJ_COMMIT:
stats->type_counts.commits += oids->nr;
stats->inflated_sizes.commits += inflated_total;
stats->disk_sizes.commits += disk_total;
break;
case OBJ_TREE:
stats->type_counts.trees += oids->nr;
stats->inflated_sizes.trees += inflated_total;
stats->disk_sizes.trees += disk_total;
break;
case OBJ_BLOB:
stats->type_counts.blobs += oids->nr;
stats->inflated_sizes.blobs += inflated_total;
stats->disk_sizes.blobs += disk_total;
break;
default:
BUG("invalid object type");
switch (type) {
case OBJ_TAG:
stats->type_counts.tags++;
stats->inflated_sizes.tags += inflated;
stats->disk_sizes.tags += disk;
check_largest(&stats->largest.tag_size, &oids->oid[i],
inflated);
break;
case OBJ_COMMIT:
commit = object_as_type(obj, OBJ_COMMIT, 0);
stats->type_counts.commits++;
stats->inflated_sizes.commits += inflated;
stats->disk_sizes.commits += disk;
check_largest(&stats->largest.commit_size, &oids->oid[i],
inflated);
check_largest(&stats->largest.parent_count, &oids->oid[i],
commit_list_count(commit->parents));
break;
case OBJ_TREE:
stats->type_counts.trees++;
stats->inflated_sizes.trees += inflated;
stats->disk_sizes.trees += disk;
check_largest(&stats->largest.tree_size, &oids->oid[i],
inflated);
check_largest(&stats->largest.tree_entries, &oids->oid[i],
count_tree_entries(obj));
break;
case OBJ_BLOB:
stats->type_counts.blobs++;
stats->inflated_sizes.blobs += inflated;
stats->disk_sizes.blobs += disk;
check_largest(&stats->largest.blob_size, &oids->oid[i],
inflated);
break;
default:
BUG("invalid object type");
}
if (!eaten)
free(content);
}
object_count = get_total_object_values(&stats->type_counts);
@@ -669,6 +861,7 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix,
{
struct stats_table table = {
.rows = STRING_LIST_INIT_DUP,
.annotations = STRING_LIST_INIT_DUP,
};
enum output_format format = FORMAT_TABLE;
struct repo_structure stats = { 0 };

View File

@@ -27,31 +27,43 @@ test_expect_success 'empty repository' '
(
cd repo &&
cat >expect <<-\EOF &&
| Repository structure | Value |
| -------------------- | ------ |
| * References | |
| * Count | 0 |
| * Branches | 0 |
| * Tags | 0 |
| * Remotes | 0 |
| * Others | 0 |
| | |
| * Reachable objects | |
| * Count | 0 |
| * Commits | 0 |
| * Trees | 0 |
| * Blobs | 0 |
| * Tags | 0 |
| * Inflated size | 0 B |
| * Commits | 0 B |
| * Trees | 0 B |
| * Blobs | 0 B |
| * Tags | 0 B |
| * Disk size | 0 B |
| * Commits | 0 B |
| * Trees | 0 B |
| * Blobs | 0 B |
| * Tags | 0 B |
| Repository structure | Value |
| ------------------------- | ------ |
| * References | |
| * Count | 0 |
| * Branches | 0 |
| * Tags | 0 |
| * Remotes | 0 |
| * Others | 0 |
| | |
| * Reachable objects | |
| * Count | 0 |
| * Commits | 0 |
| * Trees | 0 |
| * Blobs | 0 |
| * Tags | 0 |
| * Inflated size | 0 B |
| * Commits | 0 B |
| * Trees | 0 B |
| * Blobs | 0 B |
| * Tags | 0 B |
| * Disk size | 0 B |
| * Commits | 0 B |
| * Trees | 0 B |
| * Blobs | 0 B |
| * Tags | 0 B |
| | |
| * Largest objects | |
| * Commits | |
| * Maximum size | 0 B |
| * Maximum parents | 0 |
| * Trees | |
| * Maximum size | 0 B |
| * Maximum entries | 0 |
| * Blobs | |
| * Maximum size | 0 B |
| * Tags | |
| * Maximum size | 0 B |
EOF
git repo structure >out 2>err &&
@@ -79,31 +91,50 @@ test_expect_success SHA1 'repository with references and objects' '
# git-rev-list(1) --disk-usage=human option printing the full
# "byte/bytes" unit string instead of just "B".
cat >expect <<-EOF &&
| Repository structure | Value |
| -------------------- | ---------- |
| * References | |
| * Count | 4 |
| * Branches | 1 |
| * Tags | 1 |
| * Remotes | 1 |
| * Others | 1 |
| | |
| * Reachable objects | |
| * Count | 3.02 k |
| * Commits | 1.01 k |
| * Trees | 1.01 k |
| * Blobs | 1.01 k |
| * Tags | 1 |
| * Inflated size | 16.03 MiB |
| * Commits | 217.92 KiB |
| * Trees | 15.81 MiB |
| * Blobs | 11.68 KiB |
| * Tags | 132 B |
| * Disk size | $(object_type_disk_usage all true) |
| * Commits | $(object_type_disk_usage commit true) |
| * Trees | $(object_type_disk_usage tree true) |
| * Blobs | $(object_type_disk_usage blob true) |
| * Tags | $(object_type_disk_usage tag) B |
| Repository structure | Value |
| ------------------------- | ---------- |
| * References | |
| * Count | 4 |
| * Branches | 1 |
| * Tags | 1 |
| * Remotes | 1 |
| * Others | 1 |
| | |
| * Reachable objects | |
| * Count | 3.02 k |
| * Commits | 1.01 k |
| * Trees | 1.01 k |
| * Blobs | 1.01 k |
| * Tags | 1 |
| * Inflated size | 16.03 MiB |
| * Commits | 217.92 KiB |
| * Trees | 15.81 MiB |
| * Blobs | 11.68 KiB |
| * Tags | 132 B |
| * Disk size | $(object_type_disk_usage all true) |
| * Commits | $(object_type_disk_usage commit true) |
| * Trees | $(object_type_disk_usage tree true) |
| * Blobs | $(object_type_disk_usage blob true) |
| * Tags | $(object_type_disk_usage tag) B |
| | |
| * Largest objects | |
| * Commits | |
| * Maximum size [1] | 223 B |
| * Maximum parents [2] | 1 |
| * Trees | |
| * Maximum size [3] | 32.29 KiB |
| * Maximum entries [4] | 1.01 k |
| * Blobs | |
| * Maximum size [5] | 13 B |
| * Tags | |
| * Maximum size [6] | 132 B |
[1] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a
[2] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a
[3] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c
[4] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c
[5] 97d808e45116bf02103490294d3d46dad7a2ac62
[6] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2
EOF
git repo structure >out 2>err &&
@@ -138,6 +169,18 @@ test_expect_success SHA1 'lines and nul format' '
objects.trees.disk_size=$(object_type_disk_usage tree)
objects.blobs.disk_size=$(object_type_disk_usage blob)
objects.tags.disk_size=$(object_type_disk_usage tag)
objects.commits.max_size=221
objects.commits.max_size_oid=de3508174b5c2ace6993da67cae9be9069e2df39
objects.trees.max_size=1335
objects.trees.max_size_oid=09931deea9d81ec21300d3e13c74412f32eacec5
objects.blobs.max_size=11
objects.blobs.max_size_oid=eaeeedced46482bd4281fda5a5f05ce24854151f
objects.tags.max_size=132
objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c
objects.commits.max_parents=1
objects.commits.max_parents_oid=de3508174b5c2ace6993da67cae9be9069e2df39
objects.trees.max_entries=42
objects.trees.max_entries_oid=09931deea9d81ec21300d3e13c74412f32eacec5
EOF
git repo structure --format=lines >out 2>err &&