From 5b21c20953af5129929092292799af0e2ab0ff78 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 10 Mar 2026 16:18:26 +0100 Subject: [PATCH] odb: introduce generic object counting Similar to the preceding commit, introduce counting of objects on the object database level, replacing the logic that we have in `repo_approximate_object_count()`. Note that the function knows to cache the object count. It's unclear whether this cache is really required as we shouldn't have that many cases where we count objects repeatedly. But to be on the safe side the caching mechanism is retained, with the only excepting being that we also have to use the passed flags as caching key. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 6 +++++- commit-graph.c | 3 ++- object-name.c | 6 +++++- odb.c | 37 ++++++++++++++++++++++++++++++++++++- odb.h | 17 +++++++++++++++-- packfile.c | 27 --------------------------- packfile.h | 6 ------ 7 files changed, 63 insertions(+), 39 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 3a64d28da8..cb9ca89a97 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -574,9 +574,13 @@ static uint64_t total_ram(void) static uint64_t estimate_repack_memory(struct gc_config *cfg, struct packed_git *pack) { - unsigned long nr_objects = repo_approximate_object_count(the_repository); + unsigned long nr_objects; size_t os_cache, heap; + if (odb_count_objects(the_repository->objects, + ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0) + return 0; + if (!pack || !nr_objects) return 0; diff --git a/commit-graph.c b/commit-graph.c index f8e24145a5..c030003330 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2607,7 +2607,8 @@ int write_commit_graph(struct odb_source *source, replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE; } - ctx.approx_nr_objects = repo_approximate_object_count(r); + if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &ctx.approx_nr_objects) < 0) + ctx.approx_nr_objects = 0; if (ctx.append && g) { for (i = 0; i < g->num_commits; i++) { diff --git a/object-name.c b/object-name.c index 7b14c3bf9b..e5adec4c9d 100644 --- a/object-name.c +++ b/object-name.c @@ -837,7 +837,11 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, const unsigned hexsz = algo->hexsz; if (len < 0) { - unsigned long count = repo_approximate_object_count(r); + unsigned long count; + + if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + /* * Add one because the MSB only tells us the highest bit set, * not including the value of all the _other_ bits (so "15" diff --git a/odb.c b/odb.c index 84a31084d3..350e23f3c0 100644 --- a/odb.c +++ b/odb.c @@ -917,6 +917,41 @@ int odb_for_each_object(struct object_database *odb, return 0; } +int odb_count_objects(struct object_database *odb, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + struct odb_source *source; + unsigned long count = 0; + int ret; + + if (odb->object_count_valid && odb->object_count_flags == flags) { + *out = odb->object_count; + return 0; + } + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) { + unsigned long c; + + ret = odb_source_count_objects(source, flags, &c); + if (ret < 0) + goto out; + + count += c; + } + + odb->object_count = count; + odb->object_count_valid = 1; + odb->object_count_flags = flags; + + *out = count; + ret = 0; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { @@ -1030,7 +1065,7 @@ void odb_reprepare(struct object_database *o) for (source = o->sources; source; source = source->next) odb_source_reprepare(source); - o->approximate_object_count_valid = 0; + o->object_count_valid = 0; obj_read_unlock(); } diff --git a/odb.h b/odb.h index e6057477f6..7b004f1cf4 100644 --- a/odb.h +++ b/odb.h @@ -112,8 +112,9 @@ struct object_database { * These two fields are not meant for direct access. Use * repo_approximate_object_count() instead. */ - unsigned long approximate_object_count; - unsigned approximate_object_count_valid : 1; + unsigned long object_count; + unsigned object_count_flags; + unsigned object_count_valid : 1; /* * Submodule source paths that will be added as additional sources to @@ -509,6 +510,18 @@ enum odb_count_objects_flags { ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0), }; +/* + * Count the number of objects in the given object database. This object count + * may double-count objects that are stored in multiple backends, or which are + * stored multiple times in a single backend. + * + * Returns 0 on success, a negative error code otherwise. The number of objects + * will be assigned to the `out` pointer on success. + */ +int odb_count_objects(struct object_database *odb, + enum odb_count_objects_flags flags, + unsigned long *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/packfile.c b/packfile.c index 8ee462303a..d4de9f3ffe 100644 --- a/packfile.c +++ b/packfile.c @@ -1132,33 +1132,6 @@ out: return ret; } -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) -{ - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - unsigned long c; - - if (!odb_source_count_objects(source, ODB_COUNT_OBJECTS_APPROXIMATE, &c)) - count += c; - } - - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; - } - return r->objects->approximate_object_count; -} - unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { diff --git a/packfile.h b/packfile.h index 74b6bc58c5..a16ec3950d 100644 --- a/packfile.h +++ b/packfile.h @@ -375,12 +375,6 @@ int packfile_store_for_each_object(struct packfile_store *store, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -/* - * Give a rough count of objects in the repository. This sacrifices accuracy - * for speed. - */ -unsigned long repo_approximate_object_count(struct repository *r); - void pack_report(struct repository *repo); /*