Merge branch 'ps/fsck-stream-from-the-right-object-instance' into jch

"fsck" iterates over packfiles and its access to pack data caused
the list to be permuted, which caused it to loop forever; the code
to access pack data by "fsck" has been updated to avoid this.

* ps/fsck-stream-from-the-right-object-instance:
  pack-check: fix verification of large objects
  packfile: expose function to read object stream for an offset
  object-file: adapt `stream_object_signature()` to take a stream
  t/helper: improve "genrandom" test helper
This commit is contained in:
Junio C Hamano
2026-03-04 10:53:10 -08:00
14 changed files with 114 additions and 43 deletions

View File

@@ -129,18 +129,15 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
return !oideq(oid, &real_oid) ? -1 : 0;
}
int stream_object_signature(struct repository *r, const struct object_id *oid)
int stream_object_signature(struct repository *r,
struct odb_read_stream *st,
const struct object_id *oid)
{
struct object_id real_oid;
struct odb_read_stream *st;
struct git_hash_ctx c;
char hdr[MAX_HEADER_LEN];
int hdrlen;
st = odb_read_stream_open(r->objects, oid, NULL);
if (!st)
return -1;
/* Generate the header */
hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
@@ -160,7 +157,6 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
git_hash_update(&c, buf, readlen);
}
git_hash_final_oid(&real_oid, &c);
odb_read_stream_close(st);
return !oideq(oid, &real_oid) ? -1 : 0;
}

View File

@@ -166,7 +166,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
* Try reading the object named with "oid" using
* the streaming interface and rehash it to do the same.
*/
int stream_object_signature(struct repository *r, const struct object_id *oid);
int stream_object_signature(struct repository *r,
struct odb_read_stream *stream,
const struct object_id *oid);
enum finalize_object_file_flags {
FOF_SKIP_COLLISION_CHECK = 1,

View File

@@ -6,6 +6,7 @@
#include "object.h"
#include "replace-object.h"
#include "object-file.h"
#include "odb/streaming.h"
#include "blob.h"
#include "statinfo.h"
#include "tree.h"
@@ -343,9 +344,21 @@ struct object *parse_object_with_flags(struct repository *r,
if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) &&
odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) {
if (!skip_hash && stream_object_signature(r, repl) < 0) {
error(_("hash mismatch %s"), oid_to_hex(oid));
return NULL;
if (!skip_hash) {
struct odb_read_stream *stream = odb_read_stream_open(r->objects, oid, NULL);
if (!stream) {
error(_("unable to open object stream for %s"), oid_to_hex(oid));
return NULL;
}
if (stream_object_signature(r, stream, repl) < 0) {
error(_("hash mismatch %s"), oid_to_hex(oid));
odb_read_stream_close(stream);
return NULL;
}
odb_read_stream_close(stream);
}
parse_blob_buffer(lookup_blob(r, oid));
return lookup_object(r, oid);

View File

@@ -9,6 +9,7 @@
#include "packfile.h"
#include "object-file.h"
#include "odb.h"
#include "odb/streaming.h"
struct idx_entry {
off_t offset;
@@ -104,6 +105,7 @@ static int verify_packfile(struct repository *r,
QSORT(entries, nr_objects, compare_entries);
for (i = 0; i < nr_objects; i++) {
struct odb_read_stream *stream = NULL;
void *data;
struct object_id oid;
enum object_type type;
@@ -152,7 +154,9 @@ static int verify_packfile(struct repository *r,
type) < 0)
err = error("packed %s from %s is corrupt",
oid_to_hex(&oid), p->pack_name);
else if (!data && stream_object_signature(r, &oid) < 0)
else if (!data &&
(packfile_read_object_stream(&stream, &oid, p, entries[i].offset) < 0 ||
stream_object_signature(r, stream, &oid) < 0))
err = error("packed %s from %s is corrupt",
oid_to_hex(&oid), p->pack_name);
else if (fn) {
@@ -163,12 +167,14 @@ static int verify_packfile(struct repository *r,
}
if (((base_count + i) & 1023) == 0)
display_progress(progress, base_count + i);
free(data);
if (stream)
odb_read_stream_close(stream);
free(data);
}
display_progress(progress, base_count + i);
free(entries);
return err;
}

View File

@@ -2621,32 +2621,28 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st)
return 0;
}
int packfile_store_read_object_stream(struct odb_read_stream **out,
struct packfile_store *store,
const struct object_id *oid)
int packfile_read_object_stream(struct odb_read_stream **out,
const struct object_id *oid,
struct packed_git *pack,
off_t offset)
{
struct odb_packed_read_stream *stream;
struct pack_window *window = NULL;
struct object_info oi = OBJECT_INFO_INIT;
enum object_type in_pack_type;
unsigned long size;
oi.sizep = &size;
in_pack_type = unpack_object_header(pack, &window, &offset, &size);
unuse_pack(&window);
if (packfile_store_read_object_info(store, oid, &oi, 0) ||
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
if (repo_settings_get_big_file_threshold(pack->repo) >= size)
return -1;
in_pack_type = unpack_object_header(oi.u.packed.pack,
&window,
&oi.u.packed.offset,
&size);
unuse_pack(&window);
switch (in_pack_type) {
default:
return -1; /* we do not do deltas for now */
case OBJ_BAD:
mark_bad_packed_object(pack, oid);
return -1;
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
@@ -2660,10 +2656,22 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
stream->base.type = in_pack_type;
stream->base.size = size;
stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
stream->pack = oi.u.packed.pack;
stream->pos = oi.u.packed.offset;
stream->pack = pack;
stream->pos = offset;
*out = &stream->base;
return 0;
}
int packfile_store_read_object_stream(struct odb_read_stream **out,
struct packfile_store *store,
const struct object_id *oid)
{
struct pack_entry e;
if (!find_pack_entry(store, oid, &e))
return -1;
return packfile_read_object_stream(out, oid, e.p, e.offset);
}

View File

@@ -449,6 +449,11 @@ off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
off_t *curpos, enum object_type type,
off_t delta_obj_offset);
int packfile_read_object_stream(struct odb_read_stream **out,
const struct object_id *oid,
struct packed_git *pack,
off_t offset);
void release_pack_memory(size_t);
/* global flag to enable extra checks when accessing packed objects */

View File

@@ -6,6 +6,7 @@
#include "test-tool.h"
#include "git-compat-util.h"
#include "parse.h"
int cmd__genrandom(int argc, const char **argv)
{
@@ -22,7 +23,9 @@ int cmd__genrandom(int argc, const char **argv)
next = next * 11 + *c;
} while (*c++);
count = (argc == 3) ? strtoul(argv[2], NULL, 0) : ULONG_MAX;
count = ULONG_MAX;
if (argc == 3 && !git_parse_ulong(argv[2], &count))
return error_errno("cannot parse argument '%s'", argv[2]);
while (count--) {
next = next * 1103515245 + 12345;

View File

@@ -643,7 +643,7 @@ test_expect_success 'object reference via commit text search' '
'
test_expect_success 'setup blobs which are likely to delta' '
test-tool genrandom foo 10240 >foo &&
test-tool genrandom foo 10k >foo &&
{ cat foo && echo plus; } >foo-plus &&
git add foo foo-plus &&
git commit -m foo &&

View File

@@ -104,9 +104,9 @@ test_expect_success 'packsize limit' '
# mid1 and mid2 will fit within 256k limit but
# appending mid3 will bust the limit and will
# result in a separate packfile.
test-tool genrandom "a" $(( 66 * 1024 )) >mid1 &&
test-tool genrandom "b" $(( 80 * 1024 )) >mid2 &&
test-tool genrandom "c" $(( 128 * 1024 )) >mid3 &&
test-tool genrandom "a" 66k >mid1 &&
test-tool genrandom "b" 80k >mid2 &&
test-tool genrandom "c" 128k >mid3 &&
git add mid1 mid2 mid3 &&
count=0 &&

View File

@@ -852,6 +852,44 @@ test_expect_success 'fsck errors in packed objects' '
! grep corrupt out
'
test_expect_success 'fsck handles multiple packfiles with big blobs' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
# We construct two packfiles with two objects in common and one
# object not in common. The objects in common can then be
# corrupted in one of the packfiles, respectively. The other
# objects that are unique to the packs are merely used to not
# have both packs contain the same data.
blob_one=$(test-tool genrandom one 200k | git hash-object -t blob -w --stdin) &&
blob_two=$(test-tool genrandom two 200k | git hash-object -t blob -w --stdin) &&
blob_three=$(test-tool genrandom three 200k | git hash-object -t blob -w --stdin) &&
blob_four=$(test-tool genrandom four 200k | git hash-object -t blob -w --stdin) &&
pack_one=$(printf "%s\n" "$blob_one" "$blob_two" "$blob_three" | git pack-objects .git/objects/pack/pack) &&
pack_two=$(printf "%s\n" "$blob_two" "$blob_three" "$blob_four" | git pack-objects .git/objects/pack/pack) &&
chmod a+w .git/objects/pack/pack-*.pack &&
# Corrupt blob two in the first pack.
git verify-pack -v .git/objects/pack/pack-$pack_one >objects &&
offset_one=$(sed <objects -n "s/^$blob_two .* \(.*\)$/\1/p") &&
printf "\0" | dd of=.git/objects/pack/pack-$pack_one.pack bs=1 conv=notrunc seek=$offset_one &&
# Corrupt blob three in the second pack.
git verify-pack -v .git/objects/pack/pack-$pack_two >objects &&
offset_two=$(sed <objects -n "s/^$blob_three .* \(.*\)$/\1/p") &&
printf "\0" | dd of=.git/objects/pack/pack-$pack_two.pack bs=1 conv=notrunc seek=$offset_two &&
# We now expect to see two failures for the corrupted objects,
# even though they exist in a non-corrupted form in the
# respective other pack.
test_must_fail git -c core.bigFileThreshold=100k fsck 2>err &&
test_grep "unknown object type 0 at offset $offset_one in .git/objects/pack/pack-$pack_one.pack" err &&
test_grep "unknown object type 0 at offset $offset_two in .git/objects/pack/pack-$pack_two.pack" err
)
'
test_expect_success 'fsck fails on corrupt packfile' '
hsh=$(git commit-tree -m mycommit HEAD^{tree}) &&
pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) &&
@@ -918,7 +956,7 @@ test_expect_success 'fsck detects trailing loose garbage (large blob)' '
test_expect_success 'fsck detects truncated loose object' '
# make it big enough that we know we will truncate in the data
# portion, not the header
test-tool genrandom truncate 4096 >file &&
test-tool genrandom truncate 4k >file &&
blob=$(git hash-object -w file) &&
file=$(sha1_file $blob) &&
test_when_finished "remove_object $blob" &&

View File

@@ -12,7 +12,7 @@ test_expect_success 'setup' '
for i in a b c
do
echo $i >$i &&
test-tool genrandom "$i" 32768 >>$i &&
test-tool genrandom "$i" 32k >>$i &&
git update-index --add $i || return 1
done &&
echo d >d && cat c >>d && git update-index --add d &&

View File

@@ -242,7 +242,7 @@ test_bitmap_cases () {
'
test_expect_success 'splitting packs does not generate bogus bitmaps' '
test-tool genrandom foo $((1024 * 1024)) >rand &&
test-tool genrandom foo 1m >rand &&
git add rand &&
git commit -m "commit with big file" &&
git -c pack.packSizeLimit=500k repack -adb &&

View File

@@ -20,7 +20,7 @@ test_expect_success 'setup: create "template" repository' '
test_commit -C template 1 &&
test_commit -C template 2 &&
test_commit -C template 3 &&
test-tool genrandom foo 10240 >template/foo &&
test-tool genrandom foo 10k >template/foo &&
git -C template add foo &&
git -C template commit -m foo
'
@@ -499,7 +499,7 @@ test_expect_success "clone with promisor.advertise set to 'true' but don't delet
test_expect_success "setup for subsequent fetches" '
# Generate new commit with large blob
test-tool genrandom bar 10240 >template/bar &&
test-tool genrandom bar 10k >template/bar &&
git -C template add bar &&
git -C template commit -m bar &&

View File

@@ -321,7 +321,7 @@ test_expect_success 'no bitmaps created if .keep files present' '
test_expect_success 'auto-bitmaps do not complain if unavailable' '
test_config -C bare.git pack.packSizeLimit 1M &&
blob=$(test-tool genrandom big $((1024*1024)) |
blob=$(test-tool genrandom big 1m |
git -C bare.git hash-object -w --stdin) &&
git -C bare.git update-ref refs/tags/big $blob &&
@@ -497,9 +497,9 @@ test_expect_success '--filter works with --max-pack-size' '
cd max-pack-size &&
test_commit base &&
# two blobs which exceed the maximum pack size
test-tool genrandom foo 1048576 >foo &&
test-tool genrandom foo 1m >foo &&
git hash-object -w foo &&
test-tool genrandom bar 1048576 >bar &&
test-tool genrandom bar 1m >bar &&
git hash-object -w bar &&
git add foo bar &&
git commit -m "adding foo and bar"