From 5946a564cddc0bf471f27ae4c3fe205441e3ef65 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 5 Mar 2026 15:19:49 +0100 Subject: [PATCH] odb/source: make `read_object_info()` function pluggable Introduce a new callback function in `struct odb_source` to make the function pluggable. Note that this function is a bit less straight-forward to convert compared to the other functions. The reason here is that the logic to read an object is: 1. We try to read the object. If it exists we return it. 2. If the object does not exist we reprepare the object database source. 3. We then try reading the object info a second time in case the reprepare caused it to appear. The second read is only supposed to happen for the packfile store though, as reading loose objects is not impacted by repreparing the object database. Ideally, we'd just move this whole logic into the ODB source. But that's not easily possible because we try to avoid the reprepare unless really required, which is after we have found out that no other ODB source contains the object, either. So the logic spans across multiple ODB sources, and consequently we cannot move it into an individual source. Instead, introduce a new flag `OBJECT_INFO_SECOND_READ` that tells the backend that we already tried to look up the object once, and that this time around the ODB source should try to find any new objects that may have surfaced due to an on-disk change. With this flag, the "files" backend can trivially skip trying to re-read the object as a loose object. Furthermore, as we know that we only try the second read via the packfile store, we can skip repreparing loose objects and only reprepare the packfile store. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 10 ++++++ odb.c | 22 ++++++------- odb.h | 24 -------------- odb/source-files.c | 15 +++++++++ odb/source.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++ packfile.c | 10 +++++- 6 files changed, 122 insertions(+), 37 deletions(-) diff --git a/object-file.c b/object-file.c index 7ef8291a48..eefde72c7d 100644 --- a/object-file.c +++ b/object-file.c @@ -546,6 +546,16 @@ int odb_source_loose_read_object_info(struct odb_source *source, enum object_info_flags flags) { static struct strbuf buf = STRBUF_INIT; + + /* + * The second read shouldn't cause new loose objects to show up, unless + * there was a race condition with a secondary process. We don't care + * about this case though, so we simply skip reading loose objects a + * second time. + */ + if (flags & OBJECT_INFO_SECOND_READ) + return -1; + odb_loose_path(source, &buf, oid); return read_object_info_from_path(source, buf.buf, oid, oi, flags); } diff --git a/odb.c b/odb.c index f7487eb0df..c0b8cd062b 100644 --- a/odb.c +++ b/odb.c @@ -688,22 +688,20 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!packfile_store_read_object_info(files->packed, real, oi, flags) || - !odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, real, oi, flags)) return 0; - } - /* Not a loose object; someone else may have just packed it. */ + /* + * When the object hasn't been found we try a second read and + * tell the sources so. This may cause them to invalidate + * caches or reload on-disk state. + */ if (!(flags & OBJECT_INFO_QUICK)) { - odb_reprepare(odb->repo->objects); - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!packfile_store_read_object_info(files->packed, real, oi, flags)) + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, real, oi, + flags | OBJECT_INFO_SECOND_READ)) return 0; - } } /* diff --git a/odb.h b/odb.h index e13b5b7c44..70ffb033f9 100644 --- a/odb.h +++ b/odb.h @@ -339,30 +339,6 @@ struct object_info { */ #define OBJECT_INFO_INIT { 0 } -/* Flags that can be passed to `odb_read_object_info_extended()`. */ -enum object_info_flags { - /* Invoke lookup_replace_object() on the given hash. */ - OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), - - /* Do not reprepare object sources when the first lookup has failed. */ - OBJECT_INFO_QUICK = (1 << 1), - - /* - * Do not attempt to fetch the object if missing (even if fetch_is_missing is - * nonzero). - */ - OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), - - /* Die if object corruption (not just an object being missing) was detected. */ - OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), - - /* - * This is meant for bulk prefetching of missing blobs in a partial - * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. - */ - OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), -}; - /* * Read object info from the object database and populate the `object_info` * structure. Returns 0 on success, a negative error code otherwise. diff --git a/odb/source-files.c b/odb/source-files.c index 20a24f524a..f2969a1214 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -41,6 +41,20 @@ static void odb_source_files_reprepare(struct odb_source *source) packfile_store_reprepare(files->packed); } +static int odb_source_files_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!packfile_store_read_object_info(files->packed, oid, oi, flags) || + !odb_source_loose_read_object_info(source, oid, oi, flags)) + return 0; + + return -1; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -55,6 +69,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.free = odb_source_files_free; files->base.close = odb_source_files_close; files->base.reprepare = odb_source_files_reprepare; + files->base.read_object_info = odb_source_files_read_object_info; /* * Ideally, we would only ever store absolute paths in the source. This diff --git a/odb/source.h b/odb/source.h index 0e6c6abdb1..150becafe6 100644 --- a/odb/source.h +++ b/odb/source.h @@ -12,6 +12,45 @@ enum odb_source_type { ODB_SOURCE_FILES, }; +/* Flags that can be passed to `odb_read_object_info_extended()`. */ +enum object_info_flags { + /* Invoke lookup_replace_object() on the given hash. */ + OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_QUICK = (1 << 1), + + /* + * Do not attempt to fetch the object if missing (even if fetch_is_missing is + * nonzero). + */ + OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), + + /* Die if object corruption (not just an object being missing) was detected. */ + OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), + + /* + * We have already tried reading the object, but it couldn't be found + * via any of the attached sources, and are now doing a second read. + * This second read asks the individual sources to also evaluate + * whether any on-disk state may have changed that may have caused the + * object to appear. + * + * This flag is for internal use, only. The second read only occurs + * when `OBJECT_INFO_QUICK` was not passed. + */ + OBJECT_INFO_SECOND_READ = (1 << 4), + + /* + * This is meant for bulk prefetching of missing blobs in a partial + * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. + */ + OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), +}; + +struct object_id; +struct object_info; + /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -72,6 +111,33 @@ struct odb_source { * example just been repacked so that new objects will become visible. */ void (*reprepare)(struct odb_source *source); + + /* + * This callback is expected to read object information from the object + * database source. The object info will be partially populated with + * pointers for each bit of information that was requested by the + * caller. + * + * The flags field is a combination of `OBJECT_INFO` flags. Only the + * following fields need to be handled by the backend: + * + * - `OBJECT_INFO_QUICK` indicates it is fine to use caches without + * re-verifying the data. + * + * - `OBJECT_INFO_SECOND_READ` indicates that the initial object + * lookup has failed and that the object sources should check + * whether any of its on-disk state has changed that may have + * caused the object to appear. Sources are free to ignore the + * second read in case they know that the first read would have + * already surfaced the object without reloading any on-disk state. + * + * The callback is expected to return a negative error code in case + * reading the object has failed, 0 otherwise. + */ + int (*read_object_info)(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags); }; /* @@ -131,4 +197,16 @@ static inline void odb_source_reprepare(struct odb_source *source) source->reprepare(source); } +/* + * Read an object from the object database source identified by its object ID. + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + return source->read_object_info(source, oid, oi, flags); +} + #endif diff --git a/packfile.c b/packfile.c index da1c0dfa39..71db10e7c6 100644 --- a/packfile.c +++ b/packfile.c @@ -2181,11 +2181,19 @@ int packfile_store_freshen_object(struct packfile_store *store, int packfile_store_read_object_info(struct packfile_store *store, const struct object_id *oid, struct object_info *oi, - enum object_info_flags flags UNUSED) + enum object_info_flags flags) { struct pack_entry e; int ret; + /* + * In case the first read didn't surface the object, we have to reload + * packfiles. This may cause us to discover new packfiles that have + * been added since the last time we have prepared the packfile store. + */ + if (flags & OBJECT_INFO_SECOND_READ) + packfile_store_reprepare(store); + if (!find_pack_entry(store, oid, &e)) return 1;