From 784ceccb91b82dc8a2c69ddd6f1f5ccc2e2f96f2 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Wed, 14 May 2025 23:50:26 +0800 Subject: [PATCH 1/3] packed-backend: fsck should warn when "packed-refs" file is empty We assume the "packed-refs" won't be empty and instead has at least one line in it (even when there are no refs packed, there is the file header line). Because there is no terminating LF in the empty file, we will report "packedRefEntryNotTerminated(ERROR)" to the user. However, the runtime code paths would accept an empty "packed-refs" file, for example, "create_snapshot" would simply return the "snapshot" without checking the content of "packed-refs". So, we should skip checking the content of "packed-refs" when it is empty during fsck. After 694b7a1999 (repack_without_ref(): write peeled refs in the rewritten file, 2013-04-22), we would always write a header into the "packed-refs" file. So, versions of Git that are not too ancient never write such an empty "packed-refs" file. As an empty file often indicates a sign of a filesystem-level issue, the way we want to resolve this inconsistency is not make everybody totally silent but notice and report the anomaly. Let's create a "FSCK_INFO" message id "EMPTY_PACKED_REFS_FILE" to report to the users that "packed-refs" is empty. Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.adoc | 6 ++++++ fsck.h | 1 + refs/packed-backend.c | 9 +++++++++ t/t0602-reffiles-fsck.sh | 17 +++++++++++++++++ 4 files changed, 33 insertions(+) diff --git a/Documentation/fsck-msgids.adoc b/Documentation/fsck-msgids.adoc index 9601fff228..0ba4f9a27e 100644 --- a/Documentation/fsck-msgids.adoc +++ b/Documentation/fsck-msgids.adoc @@ -59,6 +59,12 @@ `emptyName`:: (WARN) A path contains an empty name. +`emptyPackedRefsFile`:: + (INFO) "packed-refs" file is empty. Report to the + git@vger.kernel.org mailing list if you see this error. As only + very early versions of Git would create such an empty + "packed_refs" file, we might tighten this rule in the future. + `extraHeaderEntry`:: (IGNORE) Extra headers found after `tagger`. diff --git a/fsck.h b/fsck.h index b1deae61ee..0c5869ac34 100644 --- a/fsck.h +++ b/fsck.h @@ -84,6 +84,7 @@ enum fsck_msg_type { FUNC(LARGE_PATHNAME, WARN) \ /* infos (reported as warnings, but ignored by default) */ \ FUNC(BAD_FILEMODE, INFO) \ + FUNC(EMPTY_PACKED_REFS_FILE, INFO) \ FUNC(GITMODULES_PARSE, INFO) \ FUNC(GITIGNORE_SYMLINK, INFO) \ FUNC(GITATTRIBUTES_SYMLINK, INFO) \ diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 3ad1ed0787..fb91833e76 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -2103,6 +2103,15 @@ static int packed_fsck(struct ref_store *ref_store, goto cleanup; } + if (!st.st_size) { + struct fsck_ref_report report = { 0 }; + report.path = "packed-refs"; + ret = fsck_report_ref(o, &report, + FSCK_MSG_EMPTY_PACKED_REFS_FILE, + "file is empty"); + goto cleanup; + } + if (strbuf_read(&packed_ref_content, fd, 0) < 0) { ret = error_errno(_("unable to read '%s'"), refs->path); goto cleanup; diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index 9d1dc2144c..f671ac4d3a 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -647,6 +647,23 @@ test_expect_success SYMLINKS 'the filetype of packed-refs should be checked' ' ) ' +test_expect_success 'empty packed-refs should be reported' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + + >.git/packed-refs && + git refs verify 2>err && + cat >expect <<-EOF && + warning: packed-refs: emptyPackedRefsFile: file is empty + EOF + rm .git/packed-refs && + test_cmp expect err + ) +' + test_expect_success 'packed-refs header should be checked' ' test_when_finished "rm -rf repo" && git init repo && From a0dee3f74b4f42076b7c23ca6d9aca61ed064e82 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Wed, 14 May 2025 23:50:35 +0800 Subject: [PATCH 2/3] packed-backend: extract snapshot allocation in `load_contents` "load_contents" would choose which way to load the content of the "packed-refs". However, we cannot directly use this function when checking the consistency due to we don't want to open the file. And we also need to reuse the logic to avoid causing repetition. Let's create a new helper function "allocate_snapshot_buffer" to extract the snapshot allocation logic in "load_contents" and update the "load_contents" to align with the behavior. Suggested-by: Jeff King Suggested-by: Patrick Steinhardt Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 53 +++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index fb91833e76..1da44a3d6d 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -517,6 +517,32 @@ static int refname_contains_nul(struct strbuf *refname) #define SMALL_FILE_SIZE (32*1024) +static int allocate_snapshot_buffer(struct snapshot *snapshot, int fd, struct stat *st) +{ + ssize_t bytes_read; + size_t size; + + size = xsize_t(st->st_size); + if (!size) + return 0; + + if (mmap_strategy == MMAP_NONE || size <= SMALL_FILE_SIZE) { + snapshot->buf = xmalloc(size); + bytes_read = read_in_full(fd, snapshot->buf, size); + if (bytes_read < 0 || bytes_read != size) + die_errno("couldn't read %s", snapshot->refs->path); + snapshot->mmapped = 0; + } else { + snapshot->buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + snapshot->mmapped = 1; + } + + snapshot->start = snapshot->buf; + snapshot->eof = snapshot->buf + size; + + return 1; +} + /* * Depending on `mmap_strategy`, either mmap or read the contents of * the `packed-refs` file into the snapshot. Return 1 if the file @@ -525,10 +551,9 @@ static int refname_contains_nul(struct strbuf *refname) */ static int load_contents(struct snapshot *snapshot) { - int fd; struct stat st; - size_t size; - ssize_t bytes_read; + int ret; + int fd; fd = open(snapshot->refs->path, O_RDONLY); if (fd < 0) { @@ -550,27 +575,11 @@ static int load_contents(struct snapshot *snapshot) if (fstat(fd, &st) < 0) die_errno("couldn't stat %s", snapshot->refs->path); - size = xsize_t(st.st_size); - if (!size) { - close(fd); - return 0; - } else if (mmap_strategy == MMAP_NONE || size <= SMALL_FILE_SIZE) { - snapshot->buf = xmalloc(size); - bytes_read = read_in_full(fd, snapshot->buf, size); - if (bytes_read < 0 || bytes_read != size) - die_errno("couldn't read %s", snapshot->refs->path); - snapshot->mmapped = 0; - } else { - snapshot->buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - snapshot->mmapped = 1; - } + ret = allocate_snapshot_buffer(snapshot, fd, &st); + close(fd); - - snapshot->start = snapshot->buf; - snapshot->eof = snapshot->buf + size; - - return 1; + return ret; } static const char *find_reference_location_1(struct snapshot *snapshot, From 86ddd588f24acf3960489dccb8aed82dc570796b Mon Sep 17 00:00:00 2001 From: shejialuo Date: Wed, 14 May 2025 23:50:42 +0800 Subject: [PATCH 3/3] packed-backend: mmap large "packed-refs" file during fsck During fsck, we use "strbuf_read" to read the content of "packed-refs" without using mmap mechanism. This is a bad practice which would consume more memory than using mmap mechanism. Besides, as all code paths in "packed-backend.c" use this way, we should make "fsck" align with the current codebase. As we have introduced the helper function "allocate_snapshot_buffer", we can simply use this function to use mmap mechanism. Suggested-by: Jeff King Suggested-by: Patrick Steinhardt Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1da44a3d6d..7fd73a0e6d 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -2068,7 +2068,7 @@ static int packed_fsck(struct ref_store *ref_store, { struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ, "fsck"); - struct strbuf packed_ref_content = STRBUF_INIT; + struct snapshot snapshot = { 0 }; unsigned int sorted = 0; struct stat st; int ret = 0; @@ -2112,7 +2112,7 @@ static int packed_fsck(struct ref_store *ref_store, goto cleanup; } - if (!st.st_size) { + if (!allocate_snapshot_buffer(&snapshot, fd, &st)) { struct fsck_ref_report report = { 0 }; report.path = "packed-refs"; ret = fsck_report_ref(o, &report, @@ -2121,21 +2121,16 @@ static int packed_fsck(struct ref_store *ref_store, goto cleanup; } - if (strbuf_read(&packed_ref_content, fd, 0) < 0) { - ret = error_errno(_("unable to read '%s'"), refs->path); - goto cleanup; - } - - ret = packed_fsck_ref_content(o, ref_store, &sorted, packed_ref_content.buf, - packed_ref_content.buf + packed_ref_content.len); + ret = packed_fsck_ref_content(o, ref_store, &sorted, snapshot.start, + snapshot.eof); if (!ret && sorted) - ret = packed_fsck_ref_sorted(o, ref_store, packed_ref_content.buf, - packed_ref_content.buf + packed_ref_content.len); + ret = packed_fsck_ref_sorted(o, ref_store, snapshot.start, + snapshot.eof); cleanup: if (fd >= 0) close(fd); - strbuf_release(&packed_ref_content); + clear_snapshot_buffer(&snapshot); return ret; }