diff --git a/builtin/gc.c b/builtin/gc.c index fb329c2cff..a08c7554cb 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -467,37 +467,18 @@ out: static int too_many_loose_objects(int limit) { /* - * Quickly check if a "gc" is needed, by estimating how - * many loose objects there are. Because SHA-1 is evenly - * distributed, we can check only one and get a reasonable - * estimate. + * This is weird, but stems from legacy behaviour: the GC auto + * threshold was always essentially interpreted as if it was rounded up + * to the next multiple 256 of, so we retain this behaviour for now. */ - DIR *dir; - struct dirent *ent; - int auto_threshold; - int num_loose = 0; - int needed = 0; - const unsigned hexsz_loose = the_hash_algo->hexsz - 2; - char *path; + int auto_threshold = DIV_ROUND_UP(limit, 256) * 256; + unsigned long loose_count; - path = repo_git_path(the_repository, "objects/17"); - dir = opendir(path); - free(path); - if (!dir) + if (odb_source_loose_approximate_object_count(the_repository->objects->sources, + &loose_count) < 0) return 0; - auto_threshold = DIV_ROUND_UP(limit, 256); - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || - ent->d_name[hexsz_loose] != '\0') - continue; - if (++num_loose > auto_threshold) { - needed = 1; - break; - } - } - closedir(dir); - return needed; + return loose_count > auto_threshold; } static struct packed_git *find_base_packs(struct string_list *packs, diff --git a/object-file.c b/object-file.c index a3ff7f586c..da67e3c9ff 100644 --- a/object-file.c +++ b/object-file.c @@ -1868,6 +1868,47 @@ int odb_source_loose_for_each_object(struct odb_source *source, NULL, NULL, &data); } +int odb_source_loose_approximate_object_count(struct odb_source *source, + unsigned long *out) +{ + const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; + unsigned long count = 0; + struct dirent *ent; + char *path = NULL; + DIR *dir = NULL; + int ret; + + path = xstrfmt("%s/17", source->path); + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + *out = 0; + ret = 0; + goto out; + } + + ret = error_errno("cannot open object shard '%s'", path); + goto out; + } + + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != hexsz || + ent->d_name[hexsz] != '\0') + continue; + count++; + } + + *out = count * 256; + ret = 0; + +out: + if (dir) + closedir(dir); + free(path); + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index ff6da65296..b870ea9fa8 100644 --- a/object-file.h +++ b/object-file.h @@ -139,6 +139,19 @@ int odb_source_loose_for_each_object(struct odb_source *source, void *cb_data, unsigned flags); +/* + * Count the number of loose objects in this source. + * + * The object count is approximated by opening a single sharding directory for + * loose objects and scanning its contents. The result is then extrapolated by + * 256. This should generally work as a reasonable estimate given that the + * object hash is supposed to be indistinguishable from random. + * + * Returns 0 on success, a negative error code otherwise. + */ +int odb_source_loose_approximate_object_count(struct odb_source *source, + unsigned long *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object