From 26b974b3a9608adee6f964e9effbac86d0220bc3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:08:54 -0500 Subject: [PATCH 1/6] check_connected(): delay opening new_pack In check_connected(), if the transport tells us we got a single packfile that has already been verified as self-contained and connected, then we can skip checking connectivity for any tips that are mentioned in that pack. This goes back to c6807a40dc (clone: open a shortcut for connectivity check, 2013-05-26). We don't need to open that pack until we are about to start sending oids to our child rev-list process, since that's when we check whether they are in the self-contained pack. Let's push the opening of that pack further down in the function. That saves us from having to clean it up when we leave the function early (and by the time have opened the rev-list process, we never leave the function early, since we have to clean up the child process). Signed-off-by: Jeff King Reviewed-by: Jacob Keller Signed-off-by: Junio C Hamano --- connected.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/connected.c b/connected.c index 79403108dd..530357de54 100644 --- a/connected.c +++ b/connected.c @@ -45,20 +45,6 @@ int check_connected(oid_iterate_fn fn, void *cb_data, return err; } - if (transport && transport->smart_options && - transport->smart_options->self_contained_and_connected && - transport->pack_lockfiles.nr == 1 && - strip_suffix(transport->pack_lockfiles.items[0].string, - ".keep", &base_len)) { - struct strbuf idx_file = STRBUF_INIT; - strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string, - base_len); - strbuf_addstr(&idx_file, ".idx"); - new_pack = add_packed_git(the_repository, idx_file.buf, - idx_file.len, 1); - strbuf_release(&idx_file); - } - if (repo_has_promisor_remote(the_repository)) { /* * For partial clones, we don't want to have to do a regular @@ -90,7 +76,6 @@ int check_connected(oid_iterate_fn fn, void *cb_data, promisor_pack_found: ; } while ((oid = fn(cb_data)) != NULL); - free(new_pack); return 0; } @@ -127,15 +112,27 @@ no_promisor_pack_found: else rev_list.no_stderr = opt->quiet; - if (start_command(&rev_list)) { - free(new_pack); + if (start_command(&rev_list)) return error(_("Could not run 'git rev-list'")); - } sigchain_push(SIGPIPE, SIG_IGN); rev_list_in = xfdopen(rev_list.in, "w"); + if (transport && transport->smart_options && + transport->smart_options->self_contained_and_connected && + transport->pack_lockfiles.nr == 1 && + strip_suffix(transport->pack_lockfiles.items[0].string, + ".keep", &base_len)) { + struct strbuf idx_file = STRBUF_INIT; + strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string, + base_len); + strbuf_addstr(&idx_file, ".idx"); + new_pack = add_packed_git(the_repository, idx_file.buf, + idx_file.len, 1); + strbuf_release(&idx_file); + } + do { /* * If index-pack already checked that: From 0921da1724dab83ea1d266b996544674d0e318e4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:09:56 -0500 Subject: [PATCH 2/6] check_connected(): fix leak of pack-index mmap Since c6807a40dc (clone: open a shortcut for connectivity check, 2013-05-26), we may open a one-off packed_git struct to check what's in the pack we just received. At the end of the function we throw away the struct (rather than linking it into the repository struct as usual). We used to leak the struct until dd4143e7bf (connected.c: free the "struct packed_git", 2022-11-08), which calls free(). But that's not sufficient; inside the struct we'll have mmap'd the pack idx data from disk, which needs an munmap() call. Building with SANITIZE=leak doesn't detect this, because we are leaking our own mmap(), and it only finds heap allocations from malloc(). But if we use our compat mmap implementation like this: make NO_MMAP=MapsBecomeMallocs SANITIZE=leak then LSan will notice the leak, because now it's a regular heap buffer allocated by malloc(). We can fix it by calling close_pack(), which will free any associated memory. Note that we need to check for NULL ourselves; unlike free(), it is not safe to pass a NULL pointer to close_pack(). Signed-off-by: Jeff King Reviewed-by: Jacob Keller Signed-off-by: Junio C Hamano --- connected.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/connected.c b/connected.c index 530357de54..6718503649 100644 --- a/connected.c +++ b/connected.c @@ -159,6 +159,9 @@ no_promisor_pack_found: err = error_errno(_("failed to close rev-list's stdin")); sigchain_pop(SIGPIPE); - free(new_pack); + if (new_pack) { + close_pack(new_pack); + free(new_pack); + } return finish_command(&rev_list) || err; } From e2f11392403ccb9ad304546d5be65dc917e0c95f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:12:29 -0500 Subject: [PATCH 3/6] pack-revindex: avoid double-loading .rev files The usual entry point for loading the pack revindex is the load_pack_revindex() function. It returns immediately if the packed_git has a non-NULL revindex or revindex data field (representing an in-memory or mmap'd .rev file, respectively), since the data is already loaded. But in 5a6072f631 (fsck: validate .rev file header, 2023-04-17) the fsck code path switched to calling load_pack_revindex_from_disk() directly, since it wants to check the on-disk data (if there is any). But that function does _not_ check to see if the data has already been loaded; it just maps the file, overwriting the revindex_map pointer (and pointing revindex_data inside that map). And in that case we've leaked the mmap() pointed to by revindex_map (if it was non-NULL). This usually doesn't happen, since fsck wouldn't need to load the revindex for any reason before we get to these checks. But there are some cases where it does. For example, is_promisor_object() runs odb_for_each_object() with the PACK_ORDER flag, which uses the revindex. This happens a few times in our test suite, but SANITIZE=leak doesn't detect it because we are leaking an mmap(), not a heap-allocated buffer from malloc(). However, if you build with NO_MMAP, then our compat mmap will read into a heap buffer instead, and LSan will complain. This causes failures in t5601, t0410, t5702, and t5616. We can fix it by checking for existing revindex_data when loading from disk. This is redundant when we're called from load_pack_revindex(), but it's a cheap check. The alternative is to teach check_pack_rev_indexes() in fsck to skip the load, but that seems messier; it doesn't otherwise know about internals like revindex_map and revindex_data. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-revindex.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pack-revindex.c b/pack-revindex.c index 8598b941c8..392eb04e01 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -277,6 +277,10 @@ int load_pack_revindex_from_disk(struct packed_git *p) { char *revindex_name; int ret; + + if (p->revindex_data) + return 0; + if (open_pack_index(p)) return -1; From b68e875bec29e538a67ad38009ea1c02fa877258 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 6 Mar 2026 21:24:59 -0500 Subject: [PATCH 4/6] object-file: fix mmap() leak in odb_source_loose_read_object_stream() We mmap() a loose object file, storing the result in the local variable "mapped", which is eventually assigned into our stream struct as "st.mapped". If we hit an error, we jump to an error label which does: munmap(st.mapped, st.mapsize); to clean up. But this is wrong; we don't assign st.mapped until the end of the function, after all of the "goto error" jumps. So this munmap() is never cleaning up anything (st.mapped is always NULL, because we initialize the struct with calloc). Instead, we should feed the local variable to munmap(). This leak is due to 595296e124 (streaming: allocate stream inside the backend-specific logic, 2025-11-23), which introduced the local variable. Before that, we assigned the mmap result directly into st.mapped. It was probably switched there so that we do not have to allocate/free the struct when the map operation fails (e.g., because we don't have the loose object). Before that commit, the struct was passed in from the caller, so there was no allocation at all. You can see the leak in the test suite by building with: make SANITIZE=leak NO_MMAP=1 CC=clang and running t1060. We need NO_MMAP so that the mmap() is backed by an actual malloc(), which allows LSan to detect it. And the leak seems not to be detected when compiling with gcc, probably due to some internal compiler decisions about how the stack memory is written. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index e7e4c3348f..b156cc278b 100644 --- a/object-file.c +++ b/object-file.c @@ -2150,7 +2150,7 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, return 0; error: git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); + munmap(mapped, mapsize); free(st); return -1; } From 00611d86c66f4230eb229b2b7dc5ac413aef2221 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:13:05 -0500 Subject: [PATCH 5/6] Makefile: turn on NO_MMAP when building with LSan The past few commits fixed some cases where we leak memory allocated by mmap(). Building with SANITIZE=leak doesn't detect these because it covers only heap buffers allocated by malloc(). But if we build with NO_MMAP, our compat mmap() implementation will allocate a heap buffer and pread() into it. And thus Lsan will detect these leaks for free. Using NO_MMAP is less performant, of course, since we have to use extra memory and read in the whole file, rather than faulting in pages from disk. But LSan builds are already slow, and this doesn't make them measurably worse. Getting extra coverage for our leak-checking is worth it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 8aa489f3b6..6b121daf1e 100644 --- a/Makefile +++ b/Makefile @@ -1594,6 +1594,7 @@ BASIC_CFLAGS += -DSHA1DC_FORCE_ALIGNED_ACCESS endif ifneq ($(filter leak,$(SANITIZERS)),) BASIC_CFLAGS += -O0 +NO_MMAP = CatchMapLeaks SANITIZE_LEAK = YesCompiledWithIt endif ifneq ($(filter address,$(SANITIZERS)),) From a8a69bbb64e1d25b327aed5925b1fbc086a0ba69 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 6 Mar 2026 11:25:13 -0500 Subject: [PATCH 6/6] meson: turn on NO_MMAP when building with LSan The previous commit taught the Makefile to turn on NO_MMAP in this instance. We should do the same with meson for consistency. We already do this for ASan builds, so we can just tweak one conditional. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index dd52efd1c8..fc5d0f5954 100644 --- a/meson.build +++ b/meson.build @@ -1417,7 +1417,7 @@ else 'getpagesize' : [], } - if get_option('b_sanitize').contains('address') + if get_option('b_sanitize').contains('address') or get_option('b_sanitize').contains('leak') libgit_c_args += '-DNO_MMAP' libgit_sources += 'compat/mmap.c' else