odb: introduce generic object counting

Similar to the preceding commit, introduce counting of objects on the object database level, replacing the logic that we have in `repo_approximate_object_count()`. Note that the function knows to cache the object count. It's unclear whether this cache is really required as we shouldn't have that many cases where we count objects repeatedly. But to be on the safe side the caching mechanism is retained, with the only excepting being that we also have to use the passed flags as caching key. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2026-03-12 01:49:49 +01:00 · 2026-03-10 16:18:26 +01:00
parent 2f2492f7ee
commit 5b21c20953
7 changed files with 63 additions and 39 deletions
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -574,9 +574,13 @@ static uint64_t total_ram(void)
 static uint64_t estimate_repack_memory(struct gc_config *cfg,
 				       struct packed_git *pack)
 {
-	unsigned long nr_objects = repo_approximate_object_count(the_repository);
+	unsigned long nr_objects;
 	size_t os_cache, heap;

+	if (odb_count_objects(the_repository->objects,
+			      ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0)
+		return 0;
+
 	if (!pack || !nr_objects)
 		return 0;

--- a/commit-graph.c
+++ b/commit-graph.c
@@ -2607,7 +2607,8 @@ int write_commit_graph(struct odb_source *source,
 			replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
 	}

-	ctx.approx_nr_objects = repo_approximate_object_count(r);
+	if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &ctx.approx_nr_objects) < 0)
+		ctx.approx_nr_objects = 0;

 	if (ctx.append && g) {
 		for (i = 0; i < g->num_commits; i++) {
--- a/object-name.c
+++ b/object-name.c
@@ -837,7 +837,11 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
 	const unsigned hexsz = algo->hexsz;

 	if (len < 0) {
-		unsigned long count = repo_approximate_object_count(r);
+		unsigned long count;
+
+		if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
+			count = 0;
+
 		/*
 		 * Add one because the MSB only tells us the highest bit set,
 		 * not including the value of all the _other_ bits (so "15"
--- a/odb.c
+++ b/odb.c
@@ -917,6 +917,41 @@ int odb_for_each_object(struct object_database *odb,
 	return 0;
 }

+int odb_count_objects(struct object_database *odb,
+		      enum odb_count_objects_flags flags,
+		      unsigned long *out)
+{
+	struct odb_source *source;
+	unsigned long count = 0;
+	int ret;
+
+	if (odb->object_count_valid && odb->object_count_flags == flags) {
+		*out = odb->object_count;
+		return 0;
+	}
+
+	odb_prepare_alternates(odb);
+	for (source = odb->sources; source; source = source->next) {
+		unsigned long c;
+
+		ret = odb_source_count_objects(source, flags, &c);
+		if (ret < 0)
+			goto out;
+
+		count += c;
+	}
+
+	odb->object_count = count;
+	odb->object_count_valid = 1;
+	odb->object_count_flags = flags;
+
+	*out = count;
+	ret = 0;
+
+out:
+	return ret;
+}
+
 void odb_assert_oid_type(struct object_database *odb,
 			 const struct object_id *oid, enum object_type expect)
 {
@@ -1030,7 +1065,7 @@ void odb_reprepare(struct object_database *o)
 	for (source = o->sources; source; source = source->next)
 		odb_source_reprepare(source);

-	o->approximate_object_count_valid = 0;
+	o->object_count_valid = 0;

 	obj_read_unlock();
 }
--- a/odb.h
+++ b/odb.h
@@ -112,8 +112,9 @@ struct object_database {
 	 * These two fields are not meant for direct access. Use
 	 * repo_approximate_object_count() instead.
 	 */
-	unsigned long approximate_object_count;
-	unsigned approximate_object_count_valid : 1;
+	unsigned long object_count;
+	unsigned object_count_flags;
+	unsigned object_count_valid : 1;

 	/*
 	 * Submodule source paths that will be added as additional sources to
@@ -509,6 +510,18 @@ enum odb_count_objects_flags {
 	ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0),
 };

+/*
+ * Count the number of objects in the given object database. This object count
+ * may double-count objects that are stored in multiple backends, or which are
+ * stored multiple times in a single backend.
+ *
+ * Returns 0 on success, a negative error code otherwise. The number of objects
+ * will be assigned to the `out` pointer on success.
+ */
+int odb_count_objects(struct object_database *odb,
+		      enum odb_count_objects_flags flags,
+		      unsigned long *out);
+
 enum {
 	/*
 	 * By default, `odb_write_object()` does not actually write anything
--- a/packfile.c
+++ b/packfile.c
@@ -1132,33 +1132,6 @@ out:
 	return ret;
 }

-/*
- * Give a fast, rough count of the number of objects in the repository. This
- * ignores loose objects completely. If you have a lot of them, then either
- * you should repack because your performance will be awful, or they are
- * all unreachable objects about to be pruned, in which case they're not really
- * interesting as a measure of repo size in the first place.
- */
-unsigned long repo_approximate_object_count(struct repository *r)
-{
-	if (!r->objects->approximate_object_count_valid) {
-		struct odb_source *source;
-		unsigned long count = 0;
-
-		odb_prepare_alternates(r->objects);
-		for (source = r->objects->sources; source; source = source->next) {
-			unsigned long c;
-
-			if (!odb_source_count_objects(source, ODB_COUNT_OBJECTS_APPROXIMATE, &c))
-				count += c;
-		}
-
-		r->objects->approximate_object_count = count;
-		r->objects->approximate_object_count_valid = 1;
-	}
-	return r->objects->approximate_object_count;
-}
-
 unsigned long unpack_object_header_buffer(const unsigned char *buf,
 		unsigned long len, enum object_type *type, unsigned long *sizep)
 {
--- a/packfile.h
+++ b/packfile.h
@@ -375,12 +375,6 @@ int packfile_store_for_each_object(struct packfile_store *store,
 #define PACKDIR_FILE_GARBAGE 4
 extern void (*report_garbage)(unsigned seen_bits, const char *path);

-/*
- * Give a rough count of objects in the repository. This sacrifices accuracy
- * for speed.
- */
-unsigned long repo_approximate_object_count(struct repository *r);
-
 void pack_report(struct repository *repo);

 /*