diff --git a/object-file.c b/object-file.c index b8265021f9..3094140055 100644 --- a/object-file.c +++ b/object-file.c @@ -129,18 +129,15 @@ int check_object_signature(struct repository *r, const struct object_id *oid, return !oideq(oid, &real_oid) ? -1 : 0; } -int stream_object_signature(struct repository *r, const struct object_id *oid) +int stream_object_signature(struct repository *r, + struct odb_read_stream *st, + const struct object_id *oid) { struct object_id real_oid; - struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = odb_read_stream_open(r->objects, oid, NULL); - if (!st) - return -1; - /* Generate the header */ hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); @@ -160,7 +157,6 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } diff --git a/object-file.h b/object-file.h index 47fad6663b..ff6da65296 100644 --- a/object-file.h +++ b/object-file.h @@ -166,7 +166,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid, * Try reading the object named with "oid" using * the streaming interface and rehash it to do the same. */ -int stream_object_signature(struct repository *r, const struct object_id *oid); +int stream_object_signature(struct repository *r, + struct odb_read_stream *stream, + const struct object_id *oid); enum finalize_object_file_flags { FOF_SKIP_COLLISION_CHECK = 1, diff --git a/object.c b/object.c index 99b6df3780..465902ecc6 100644 --- a/object.c +++ b/object.c @@ -6,6 +6,7 @@ #include "object.h" #include "replace-object.h" #include "object-file.h" +#include "odb/streaming.h" #include "blob.h" #include "statinfo.h" #include "tree.h" @@ -343,9 +344,21 @@ struct object *parse_object_with_flags(struct repository *r, if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) && odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) { - if (!skip_hash && stream_object_signature(r, repl) < 0) { - error(_("hash mismatch %s"), oid_to_hex(oid)); - return NULL; + if (!skip_hash) { + struct odb_read_stream *stream = odb_read_stream_open(r->objects, oid, NULL); + + if (!stream) { + error(_("unable to open object stream for %s"), oid_to_hex(oid)); + return NULL; + } + + if (stream_object_signature(r, stream, repl) < 0) { + error(_("hash mismatch %s"), oid_to_hex(oid)); + odb_read_stream_close(stream); + return NULL; + } + + odb_read_stream_close(stream); } parse_blob_buffer(lookup_blob(r, oid)); return lookup_object(r, oid); diff --git a/pack-check.c b/pack-check.c index 67cb2cf72f..7378c80730 100644 --- a/pack-check.c +++ b/pack-check.c @@ -9,6 +9,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" struct idx_entry { off_t offset; @@ -104,6 +105,7 @@ static int verify_packfile(struct repository *r, QSORT(entries, nr_objects, compare_entries); for (i = 0; i < nr_objects; i++) { + struct odb_read_stream *stream = NULL; void *data; struct object_id oid; enum object_type type; @@ -152,7 +154,9 @@ static int verify_packfile(struct repository *r, type) < 0) err = error("packed %s from %s is corrupt", oid_to_hex(&oid), p->pack_name); - else if (!data && stream_object_signature(r, &oid) < 0) + else if (!data && + (packfile_read_object_stream(&stream, &oid, p, entries[i].offset) < 0 || + stream_object_signature(r, stream, &oid) < 0)) err = error("packed %s from %s is corrupt", oid_to_hex(&oid), p->pack_name); else if (fn) { @@ -163,12 +167,14 @@ static int verify_packfile(struct repository *r, } if (((base_count + i) & 1023) == 0) display_progress(progress, base_count + i); - free(data); + if (stream) + odb_read_stream_close(stream); + free(data); } + display_progress(progress, base_count + i); free(entries); - return err; } diff --git a/packfile.c b/packfile.c index ce837f852a..4a6d4a80ea 100644 --- a/packfile.c +++ b/packfile.c @@ -2621,32 +2621,28 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st) return 0; } -int packfile_store_read_object_stream(struct odb_read_stream **out, - struct packfile_store *store, - const struct object_id *oid) +int packfile_read_object_stream(struct odb_read_stream **out, + const struct object_id *oid, + struct packed_git *pack, + off_t offset) { struct odb_packed_read_stream *stream; struct pack_window *window = NULL; - struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; unsigned long size; - oi.sizep = &size; + in_pack_type = unpack_object_header(pack, &window, &offset, &size); + unuse_pack(&window); - if (packfile_store_read_object_info(store, oid, &oi, 0) || - oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA || - oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA || - repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) + if (repo_settings_get_big_file_threshold(pack->repo) >= size) return -1; - in_pack_type = unpack_object_header(oi.u.packed.pack, - &window, - &oi.u.packed.offset, - &size); - unuse_pack(&window); switch (in_pack_type) { default: return -1; /* we do not do deltas for now */ + case OBJ_BAD: + mark_bad_packed_object(pack, oid); + return -1; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: @@ -2660,10 +2656,22 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, stream->base.type = in_pack_type; stream->base.size = size; stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = oi.u.packed.pack; - stream->pos = oi.u.packed.offset; + stream->pack = pack; + stream->pos = offset; *out = &stream->base; return 0; } + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + + if (!find_pack_entry(store, oid, &e)) + return -1; + + return packfile_read_object_stream(out, oid, e.p, e.offset); +} diff --git a/packfile.h b/packfile.h index 224142fd34..85ce44f8ee 100644 --- a/packfile.h +++ b/packfile.h @@ -449,6 +449,11 @@ off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs, off_t *curpos, enum object_type type, off_t delta_obj_offset); +int packfile_read_object_stream(struct odb_read_stream **out, + const struct object_id *oid, + struct packed_git *pack, + off_t offset); + void release_pack_memory(size_t); /* global flag to enable extra checks when accessing packed objects */ diff --git a/t/helper/test-genrandom.c b/t/helper/test-genrandom.c index 51b67f2f87..d681961abb 100644 --- a/t/helper/test-genrandom.c +++ b/t/helper/test-genrandom.c @@ -6,6 +6,7 @@ #include "test-tool.h" #include "git-compat-util.h" +#include "parse.h" int cmd__genrandom(int argc, const char **argv) { @@ -22,7 +23,9 @@ int cmd__genrandom(int argc, const char **argv) next = next * 11 + *c; } while (*c++); - count = (argc == 3) ? strtoul(argv[2], NULL, 0) : ULONG_MAX; + count = ULONG_MAX; + if (argc == 3 && !git_parse_ulong(argv[2], &count)) + return error_errno("cannot parse argument '%s'", argv[2]); while (count--) { next = next * 1103515245 + 12345; diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 0eee3bb878..5499be8dc9 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -643,7 +643,7 @@ test_expect_success 'object reference via commit text search' ' ' test_expect_success 'setup blobs which are likely to delta' ' - test-tool genrandom foo 10240 >foo && + test-tool genrandom foo 10k >foo && { cat foo && echo plus; } >foo-plus && git add foo foo-plus && git commit -m foo && diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 5be273611a..7d40d08521 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -104,9 +104,9 @@ test_expect_success 'packsize limit' ' # mid1 and mid2 will fit within 256k limit but # appending mid3 will bust the limit and will # result in a separate packfile. - test-tool genrandom "a" $(( 66 * 1024 )) >mid1 && - test-tool genrandom "b" $(( 80 * 1024 )) >mid2 && - test-tool genrandom "c" $(( 128 * 1024 )) >mid3 && + test-tool genrandom "a" 66k >mid1 && + test-tool genrandom "b" 80k >mid2 && + test-tool genrandom "c" 128k >mid3 && git add mid1 mid2 mid3 && count=0 && diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 3fae05f9d9..54e81c2636 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -852,6 +852,44 @@ test_expect_success 'fsck errors in packed objects' ' ! grep corrupt out ' +test_expect_success 'fsck handles multiple packfiles with big blobs' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + # We construct two packfiles with two objects in common and one + # object not in common. The objects in common can then be + # corrupted in one of the packfiles, respectively. The other + # objects that are unique to the packs are merely used to not + # have both packs contain the same data. + blob_one=$(test-tool genrandom one 200k | git hash-object -t blob -w --stdin) && + blob_two=$(test-tool genrandom two 200k | git hash-object -t blob -w --stdin) && + blob_three=$(test-tool genrandom three 200k | git hash-object -t blob -w --stdin) && + blob_four=$(test-tool genrandom four 200k | git hash-object -t blob -w --stdin) && + pack_one=$(printf "%s\n" "$blob_one" "$blob_two" "$blob_three" | git pack-objects .git/objects/pack/pack) && + pack_two=$(printf "%s\n" "$blob_two" "$blob_three" "$blob_four" | git pack-objects .git/objects/pack/pack) && + chmod a+w .git/objects/pack/pack-*.pack && + + # Corrupt blob two in the first pack. + git verify-pack -v .git/objects/pack/pack-$pack_one >objects && + offset_one=$(sed objects && + offset_two=$(sed err && + test_grep "unknown object type 0 at offset $offset_one in .git/objects/pack/pack-$pack_one.pack" err && + test_grep "unknown object type 0 at offset $offset_two in .git/objects/pack/pack-$pack_two.pack" err + ) +' + test_expect_success 'fsck fails on corrupt packfile' ' hsh=$(git commit-tree -m mycommit HEAD^{tree}) && pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) && @@ -918,7 +956,7 @@ test_expect_success 'fsck detects trailing loose garbage (large blob)' ' test_expect_success 'fsck detects truncated loose object' ' # make it big enough that we know we will truncate in the data # portion, not the header - test-tool genrandom truncate 4096 >file && + test-tool genrandom truncate 4k >file && blob=$(git hash-object -w file) && file=$(sha1_file $blob) && test_when_finished "remove_object $blob" && diff --git a/t/t5301-sliding-window.sh b/t/t5301-sliding-window.sh index ff6b5159a3..3c3666b278 100755 --- a/t/t5301-sliding-window.sh +++ b/t/t5301-sliding-window.sh @@ -12,7 +12,7 @@ test_expect_success 'setup' ' for i in a b c do echo $i >$i && - test-tool genrandom "$i" 32768 >>$i && + test-tool genrandom "$i" 32k >>$i && git update-index --add $i || return 1 done && echo d >d && cat c >>d && git update-index --add d && diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 310b708c5c..f693cb5669 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -242,7 +242,7 @@ test_bitmap_cases () { ' test_expect_success 'splitting packs does not generate bogus bitmaps' ' - test-tool genrandom foo $((1024 * 1024)) >rand && + test-tool genrandom foo 1m >rand && git add rand && git commit -m "commit with big file" && git -c pack.packSizeLimit=500k repack -adb && diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index 532e6f0fea..357822c01a 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -20,7 +20,7 @@ test_expect_success 'setup: create "template" repository' ' test_commit -C template 1 && test_commit -C template 2 && test_commit -C template 3 && - test-tool genrandom foo 10240 >template/foo && + test-tool genrandom foo 10k >template/foo && git -C template add foo && git -C template commit -m foo ' @@ -499,7 +499,7 @@ test_expect_success "clone with promisor.advertise set to 'true' but don't delet test_expect_success "setup for subsequent fetches" ' # Generate new commit with large blob - test-tool genrandom bar 10240 >template/bar && + test-tool genrandom bar 10k >template/bar && git -C template add bar && git -C template commit -m bar && diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index acc2589f21..63ef63fc50 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -321,7 +321,7 @@ test_expect_success 'no bitmaps created if .keep files present' ' test_expect_success 'auto-bitmaps do not complain if unavailable' ' test_config -C bare.git pack.packSizeLimit 1M && - blob=$(test-tool genrandom big $((1024*1024)) | + blob=$(test-tool genrandom big 1m | git -C bare.git hash-object -w --stdin) && git -C bare.git update-ref refs/tags/big $blob && @@ -497,9 +497,9 @@ test_expect_success '--filter works with --max-pack-size' ' cd max-pack-size && test_commit base && # two blobs which exceed the maximum pack size - test-tool genrandom foo 1048576 >foo && + test-tool genrandom foo 1m >foo && git hash-object -w foo && - test-tool genrandom bar 1048576 >bar && + test-tool genrandom bar 1m >bar && git hash-object -w bar && git add foo bar && git commit -m "adding foo and bar"