From 0d30feef3c55f63f8db1dc1e52071090d16dfaaf Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Apr 2023 16:21:38 +0000 Subject: [PATCH 1/4] fsck: create scaffolding for rev-index checks The 'fsck' builtin checks many of Git's on-disk data structures, but does not currently validate the pack rev-index files (a .rev file to pair with a .pack and .idx file). Before doing a more-involved check process, create the scaffolding within builtin/fsck.c to have a new error type and add that error type when the API method verify_pack_revindex() returns an error. That method does nothing currently, but we will add checks to it in later changes. For now, check that 'git fsck' succeeds without any errors in the normal case. Future checks will be paired with tests that corrupt the .rev file appropriately. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/fsck.c | 30 ++++++++++++++++++++++++++++++ pack-revindex.c | 11 +++++++++++ pack-revindex.h | 8 ++++++++ t/t5325-reverse-index.sh | 14 ++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/builtin/fsck.c b/builtin/fsck.c index 095b39d398..2ab78129bd 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -24,6 +24,7 @@ #include "resolve-undo.h" #include "run-command.h" #include "worktree.h" +#include "pack-revindex.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@ -53,6 +54,7 @@ static int name_objects; #define ERROR_REFS 010 #define ERROR_COMMIT_GRAPH 020 #define ERROR_MULTI_PACK_INDEX 040 +#define ERROR_PACK_REV_INDEX 0100 static const char *describe_object(const struct object_id *oid) { @@ -856,6 +858,32 @@ static int mark_packed_for_connectivity(const struct object_id *oid, return 0; } +static int check_pack_rev_indexes(struct repository *r, int show_progress) +{ + struct progress *progress = NULL; + uint32_t pack_count = 0; + int res = 0; + + if (show_progress) { + for (struct packed_git *p = get_all_packs(the_repository); p; p = p->next) + pack_count++; + progress = start_delayed_progress("Verifying reverse pack-indexes", pack_count); + pack_count = 0; + } + + for (struct packed_git *p = get_all_packs(the_repository); p; p = p->next) { + if (!load_pack_revindex(the_repository, p) && + verify_pack_revindex(p)) { + error(_("invalid rev-index for pack '%s'"), p->pack_name); + res = ERROR_PACK_REV_INDEX; + } + display_progress(progress, ++pack_count); + } + stop_progress(&progress); + + return res; +} + static char const * const fsck_usage[] = { N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n" " [--[no-]full] [--strict] [--verbose] [--lost-found]\n" @@ -1019,6 +1047,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) free_worktrees(worktrees); } + errors_found |= check_pack_rev_indexes(the_repository, show_progress); + check_connectivity(); if (the_repository->settings.core_commit_graph) { diff --git a/pack-revindex.c b/pack-revindex.c index 29f5358b25..c3f2aaa3fe 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -301,6 +301,17 @@ int load_pack_revindex(struct repository *r, struct packed_git *p) return -1; } +/* + * verify_pack_revindex verifies that the on-disk rev-index for the given + * pack-file is the same that would be created if written from scratch. + * + * A negative number is returned on error. + */ +int verify_pack_revindex(struct packed_git *p) +{ + return 0; +} + int load_midx_revindex(struct multi_pack_index *m) { struct strbuf revindex_name = STRBUF_INIT; diff --git a/pack-revindex.h b/pack-revindex.h index 46e834064e..c8861873b0 100644 --- a/pack-revindex.h +++ b/pack-revindex.h @@ -51,6 +51,14 @@ struct repository; */ int load_pack_revindex(struct repository *r, struct packed_git *p); +/* + * verify_pack_revindex verifies that the on-disk rev-index for the given + * pack-file is the same that would be created if written from scratch. + * + * A negative number is returned on error. + */ +int verify_pack_revindex(struct packed_git *p); + /* * load_midx_revindex loads the '.rev' file corresponding to the given * multi-pack index by mmap-ing it and assigning pointers in the diff --git a/t/t5325-reverse-index.sh b/t/t5325-reverse-index.sh index 0548fce1aa..206c412f50 100755 --- a/t/t5325-reverse-index.sh +++ b/t/t5325-reverse-index.sh @@ -131,4 +131,18 @@ test_expect_success 'revindex in-memory vs on-disk' ' test_cmp on-disk in-core ) ' + +test_expect_success 'fsck succeeds on good rev-index' ' + test_when_finished rm -fr repo && + git init repo && + ( + cd repo && + + test_commit commit && + git -c pack.writeReverseIndex=true repack -ad && + git fsck 2>err && + test_must_be_empty err + ) +' + test_done From d975fe1fa57d57cfd21a97f96f4a94b99f50f2f4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Apr 2023 16:21:39 +0000 Subject: [PATCH 2/4] fsck: check rev-index checksums The previous change added calls to verify_pack_revindex() in builtin/fsck.c, but the implementation of the method was left empty. Add the first and most-obvious check to this method: checksum verification. While here, create a helper method in the test script that makes it easy to adjust the .rev file and check that 'git fsck' reports the correct error message. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- pack-revindex.c | 10 ++++++++++ t/t5325-reverse-index.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/pack-revindex.c b/pack-revindex.c index c3f2aaa3fe..007a806994 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -5,6 +5,7 @@ #include "packfile.h" #include "config.h" #include "midx.h" +#include "csum-file.h" struct revindex_entry { off_t offset; @@ -309,6 +310,15 @@ int load_pack_revindex(struct repository *r, struct packed_git *p) */ int verify_pack_revindex(struct packed_git *p) { + /* Do not bother checking if not initialized. */ + if (!p->revindex_map) + return 0; + + if (!hashfile_checksum_valid((const unsigned char *)p->revindex_map, p->revindex_size)) { + error(_("invalid checksum")); + return -1; + } + return 0; } diff --git a/t/t5325-reverse-index.sh b/t/t5325-reverse-index.sh index 206c412f50..6b7c709a1f 100755 --- a/t/t5325-reverse-index.sh +++ b/t/t5325-reverse-index.sh @@ -145,4 +145,44 @@ test_expect_success 'fsck succeeds on good rev-index' ' ) ' +test_expect_success 'set up rev-index corruption tests' ' + git init corrupt && + ( + cd corrupt && + + test_commit commit && + git -c pack.writeReverseIndex=true repack -ad && + + revfile=$(ls .git/objects/pack/pack-*.rev) && + chmod a+w $revfile && + cp $revfile $revfile.bak + ) +' + +corrupt_rev_and_verify () { + ( + pos="$1" && + value="$2" && + error="$3" && + + cd corrupt && + revfile=$(ls .git/objects/pack/pack-*.rev) && + + # Reset to original rev-file. + cp $revfile.bak $revfile && + + printf "$value" | dd of=$revfile bs=1 seek="$pos" conv=notrunc && + test_must_fail git fsck 2>err && + grep "$error" err + ) +} + +test_expect_success 'fsck catches invalid checksum' ' + revfile=$(ls corrupt/.git/objects/pack/pack-*.rev) && + orig_size=$(wc -c <$revfile) && + hashpos=$((orig_size - 10)) && + corrupt_rev_and_verify $hashpos bogus \ + "invalid checksum" +' + test_done From 5f658d1b577722111564f51962d6af33d1fe96c6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Apr 2023 16:21:40 +0000 Subject: [PATCH 3/4] fsck: check rev-index position values When checking a rev-index file, it may be helpful to identify exactly which positions are incorrect. Compare the rev-index to a freshly-computed in-memory rev-index and report the comparison failures. This additional check (on top of the checksum validation) can help find files that were corrupt by a single bit flip on-disk or perhaps were written incorrectly due to a bug in Git. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- pack-revindex.c | 25 +++++++++++++++++++++---- t/t5325-reverse-index.sh | 5 +++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/pack-revindex.c b/pack-revindex.c index 007a806994..62a9846470 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -310,16 +310,33 @@ int load_pack_revindex(struct repository *r, struct packed_git *p) */ int verify_pack_revindex(struct packed_git *p) { + int res = 0; + /* Do not bother checking if not initialized. */ - if (!p->revindex_map) - return 0; + if (!p->revindex_map || !p->revindex_data) + return res; if (!hashfile_checksum_valid((const unsigned char *)p->revindex_map, p->revindex_size)) { error(_("invalid checksum")); - return -1; + res = -1; } - return 0; + /* This may fail due to a broken .idx. */ + if (create_pack_revindex_in_memory(p)) + return res; + + for (size_t i = 0; i < p->num_objects; i++) { + uint32_t nr = p->revindex[i].nr; + uint32_t rev_val = get_be32(p->revindex_data + i); + + if (nr != rev_val) { + error(_("invalid rev-index position at %"PRIu64": %"PRIu32" != %"PRIu32""), + (uint64_t)i, nr, rev_val); + res = -1; + } + } + + return res; } int load_midx_revindex(struct multi_pack_index *m) diff --git a/t/t5325-reverse-index.sh b/t/t5325-reverse-index.sh index 6b7c709a1f..5c3c80f88f 100755 --- a/t/t5325-reverse-index.sh +++ b/t/t5325-reverse-index.sh @@ -185,4 +185,9 @@ test_expect_success 'fsck catches invalid checksum' ' "invalid checksum" ' +test_expect_success 'fsck catches invalid row position' ' + corrupt_rev_and_verify 14 "\07" \ + "invalid rev-index position" +' + test_done From 5a6072f631dcf4d9f65e83b08d14c82e2af45dd8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Apr 2023 16:21:41 +0000 Subject: [PATCH 4/4] fsck: validate .rev file header While parsing a .rev file, we check the header information to be sure it makes sense. This happens before doing any additional validation such as a checksum or value check. In order to differentiate between a bad header and a non-existent file, we need to update the API for loading a reverse index. Make load_pack_revindex_from_disk() non-static and specify that a positive value means "the file does not exist" while other errors during parsing are negative values. Since an invalid header prevents setting up the structures we would use for further validations, we can stop at that point. The place where we can distinguish between a missing file and a corrupt file is inside load_revindex_from_disk(), which is used both by pack rev-indexes and multi-pack-index rev-indexes. Some tests in t5326 demonstrate that it is critical to take some conditions to allow positive error signals. Add tests that check the three header values. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/fsck.c | 10 ++++++++-- pack-bitmap.c | 4 ++-- pack-revindex.c | 5 +++-- pack-revindex.h | 8 ++++++++ t/t5325-reverse-index.sh | 15 +++++++++++++++ 5 files changed, 36 insertions(+), 6 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 2ab78129bd..2414190c04 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -872,8 +872,14 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress) } for (struct packed_git *p = get_all_packs(the_repository); p; p = p->next) { - if (!load_pack_revindex(the_repository, p) && - verify_pack_revindex(p)) { + int load_error = load_pack_revindex_from_disk(p); + + if (load_error < 0) { + error(_("unable to load rev-index for pack '%s'"), p->pack_name); + res = ERROR_PACK_REV_INDEX; + } else if (!load_error && + !load_pack_revindex(the_repository, p) && + verify_pack_revindex(p)) { error(_("invalid rev-index for pack '%s'"), p->pack_name); res = ERROR_PACK_REV_INDEX; } diff --git a/pack-bitmap.c b/pack-bitmap.c index 38b35c4823..3828aab612 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -379,7 +379,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, goto cleanup; } - if (load_midx_revindex(bitmap_git->midx) < 0) { + if (load_midx_revindex(bitmap_git->midx)) { warning(_("multi-pack bitmap is missing required reverse index")); goto cleanup; } @@ -2140,7 +2140,7 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, if (!bitmap_is_midx(bitmap_git)) load_reverse_index(r, bitmap_git); - else if (load_midx_revindex(bitmap_git->midx) < 0) + else if (load_midx_revindex(bitmap_git->midx)) BUG("rebuild_existing_bitmaps: missing required rev-cache " "extension"); diff --git a/pack-revindex.c b/pack-revindex.c index 62a9846470..146334e2c9 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -212,7 +212,8 @@ static int load_revindex_from_disk(char *revindex_name, fd = git_open(revindex_name); if (fd < 0) { - ret = -1; + /* "No file" means return 1. */ + ret = 1; goto cleanup; } if (fstat(fd, &st)) { @@ -264,7 +265,7 @@ cleanup: return ret; } -static int load_pack_revindex_from_disk(struct packed_git *p) +int load_pack_revindex_from_disk(struct packed_git *p) { char *revindex_name; int ret; diff --git a/pack-revindex.h b/pack-revindex.h index c8861873b0..6dd47efea1 100644 --- a/pack-revindex.h +++ b/pack-revindex.h @@ -51,6 +51,14 @@ struct repository; */ int load_pack_revindex(struct repository *r, struct packed_git *p); +/* + * Specifically load a pack revindex from disk. + * + * Returns 0 on success, 1 on "no .rev file", and -1 when there is an + * error parsing the .rev file. + */ +int load_pack_revindex_from_disk(struct packed_git *p); + /* * verify_pack_revindex verifies that the on-disk rev-index for the given * pack-file is the same that would be created if written from scratch. diff --git a/t/t5325-reverse-index.sh b/t/t5325-reverse-index.sh index 5c3c80f88f..431a603ca0 100755 --- a/t/t5325-reverse-index.sh +++ b/t/t5325-reverse-index.sh @@ -190,4 +190,19 @@ test_expect_success 'fsck catches invalid row position' ' "invalid rev-index position" ' +test_expect_success 'fsck catches invalid header: magic number' ' + corrupt_rev_and_verify 1 "\07" \ + "reverse-index file .* has unknown signature" +' + +test_expect_success 'fsck catches invalid header: version' ' + corrupt_rev_and_verify 7 "\02" \ + "reverse-index file .* has unsupported version" +' + +test_expect_success 'fsck catches invalid header: hash function' ' + corrupt_rev_and_verify 11 "\03" \ + "reverse-index file .* has unsupported hash id" +' + test_done