odb: introduce generic object counting

Similar to the preceding commit, introduce counting of objects on the
object database level, replacing the logic that we have in
`repo_approximate_object_count()`.

Note that the function knows to cache the object count. It's unclear
whether this cache is really required as we shouldn't have that many
cases where we count objects repeatedly. But to be on the safe side the
caching mechanism is retained, with the only excepting being that we
also have to use the passed flags as caching key.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Patrick Steinhardt
2026-03-10 16:18:26 +01:00
committed by Junio C Hamano
parent 2f2492f7ee
commit 5b21c20953
7 changed files with 63 additions and 39 deletions

View File

@@ -574,9 +574,13 @@ static uint64_t total_ram(void)
static uint64_t estimate_repack_memory(struct gc_config *cfg,
struct packed_git *pack)
{
unsigned long nr_objects = repo_approximate_object_count(the_repository);
unsigned long nr_objects;
size_t os_cache, heap;
if (odb_count_objects(the_repository->objects,
ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0)
return 0;
if (!pack || !nr_objects)
return 0;

View File

@@ -2607,7 +2607,8 @@ int write_commit_graph(struct odb_source *source,
replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
}
ctx.approx_nr_objects = repo_approximate_object_count(r);
if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &ctx.approx_nr_objects) < 0)
ctx.approx_nr_objects = 0;
if (ctx.append && g) {
for (i = 0; i < g->num_commits; i++) {

View File

@@ -837,7 +837,11 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
const unsigned hexsz = algo->hexsz;
if (len < 0) {
unsigned long count = repo_approximate_object_count(r);
unsigned long count;
if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
count = 0;
/*
* Add one because the MSB only tells us the highest bit set,
* not including the value of all the _other_ bits (so "15"

37
odb.c
View File

@@ -917,6 +917,41 @@ int odb_for_each_object(struct object_database *odb,
return 0;
}
int odb_count_objects(struct object_database *odb,
enum odb_count_objects_flags flags,
unsigned long *out)
{
struct odb_source *source;
unsigned long count = 0;
int ret;
if (odb->object_count_valid && odb->object_count_flags == flags) {
*out = odb->object_count;
return 0;
}
odb_prepare_alternates(odb);
for (source = odb->sources; source; source = source->next) {
unsigned long c;
ret = odb_source_count_objects(source, flags, &c);
if (ret < 0)
goto out;
count += c;
}
odb->object_count = count;
odb->object_count_valid = 1;
odb->object_count_flags = flags;
*out = count;
ret = 0;
out:
return ret;
}
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
@@ -1030,7 +1065,7 @@ void odb_reprepare(struct object_database *o)
for (source = o->sources; source; source = source->next)
odb_source_reprepare(source);
o->approximate_object_count_valid = 0;
o->object_count_valid = 0;
obj_read_unlock();
}

17
odb.h
View File

@@ -112,8 +112,9 @@ struct object_database {
* These two fields are not meant for direct access. Use
* repo_approximate_object_count() instead.
*/
unsigned long approximate_object_count;
unsigned approximate_object_count_valid : 1;
unsigned long object_count;
unsigned object_count_flags;
unsigned object_count_valid : 1;
/*
* Submodule source paths that will be added as additional sources to
@@ -509,6 +510,18 @@ enum odb_count_objects_flags {
ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0),
};
/*
* Count the number of objects in the given object database. This object count
* may double-count objects that are stored in multiple backends, or which are
* stored multiple times in a single backend.
*
* Returns 0 on success, a negative error code otherwise. The number of objects
* will be assigned to the `out` pointer on success.
*/
int odb_count_objects(struct object_database *odb,
enum odb_count_objects_flags flags,
unsigned long *out);
enum {
/*
* By default, `odb_write_object()` does not actually write anything

View File

@@ -1132,33 +1132,6 @@ out:
return ret;
}
/*
* Give a fast, rough count of the number of objects in the repository. This
* ignores loose objects completely. If you have a lot of them, then either
* you should repack because your performance will be awful, or they are
* all unreachable objects about to be pruned, in which case they're not really
* interesting as a measure of repo size in the first place.
*/
unsigned long repo_approximate_object_count(struct repository *r)
{
if (!r->objects->approximate_object_count_valid) {
struct odb_source *source;
unsigned long count = 0;
odb_prepare_alternates(r->objects);
for (source = r->objects->sources; source; source = source->next) {
unsigned long c;
if (!odb_source_count_objects(source, ODB_COUNT_OBJECTS_APPROXIMATE, &c))
count += c;
}
r->objects->approximate_object_count = count;
r->objects->approximate_object_count_valid = 1;
}
return r->objects->approximate_object_count;
}
unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{

View File

@@ -375,12 +375,6 @@ int packfile_store_for_each_object(struct packfile_store *store,
#define PACKDIR_FILE_GARBAGE 4
extern void (*report_garbage)(unsigned seen_bits, const char *path);
/*
* Give a rough count of objects in the repository. This sacrifices accuracy
* for speed.
*/
unsigned long repo_approximate_object_count(struct repository *r);
void pack_report(struct repository *repo);
/*