mirror of
https://github.com/git/git.git
synced 2026-03-05 06:57:37 +01:00
Merge branch 'ps/fsck-stream-from-the-right-object-instance' into jch
"fsck" iterates over packfiles and its access to pack data caused the list to be permuted, which caused it to loop forever; the code to access pack data by "fsck" has been updated to avoid this. * ps/fsck-stream-from-the-right-object-instance: pack-check: fix verification of large objects packfile: expose function to read object stream for an offset object-file: adapt `stream_object_signature()` to take a stream t/helper: improve "genrandom" test helper
This commit is contained in:
@@ -129,18 +129,15 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
|
||||
return !oideq(oid, &real_oid) ? -1 : 0;
|
||||
}
|
||||
|
||||
int stream_object_signature(struct repository *r, const struct object_id *oid)
|
||||
int stream_object_signature(struct repository *r,
|
||||
struct odb_read_stream *st,
|
||||
const struct object_id *oid)
|
||||
{
|
||||
struct object_id real_oid;
|
||||
struct odb_read_stream *st;
|
||||
struct git_hash_ctx c;
|
||||
char hdr[MAX_HEADER_LEN];
|
||||
int hdrlen;
|
||||
|
||||
st = odb_read_stream_open(r->objects, oid, NULL);
|
||||
if (!st)
|
||||
return -1;
|
||||
|
||||
/* Generate the header */
|
||||
hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
|
||||
|
||||
@@ -160,7 +157,6 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
|
||||
git_hash_update(&c, buf, readlen);
|
||||
}
|
||||
git_hash_final_oid(&real_oid, &c);
|
||||
odb_read_stream_close(st);
|
||||
return !oideq(oid, &real_oid) ? -1 : 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -166,7 +166,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
|
||||
* Try reading the object named with "oid" using
|
||||
* the streaming interface and rehash it to do the same.
|
||||
*/
|
||||
int stream_object_signature(struct repository *r, const struct object_id *oid);
|
||||
int stream_object_signature(struct repository *r,
|
||||
struct odb_read_stream *stream,
|
||||
const struct object_id *oid);
|
||||
|
||||
enum finalize_object_file_flags {
|
||||
FOF_SKIP_COLLISION_CHECK = 1,
|
||||
|
||||
19
object.c
19
object.c
@@ -6,6 +6,7 @@
|
||||
#include "object.h"
|
||||
#include "replace-object.h"
|
||||
#include "object-file.h"
|
||||
#include "odb/streaming.h"
|
||||
#include "blob.h"
|
||||
#include "statinfo.h"
|
||||
#include "tree.h"
|
||||
@@ -343,9 +344,21 @@ struct object *parse_object_with_flags(struct repository *r,
|
||||
|
||||
if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) &&
|
||||
odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) {
|
||||
if (!skip_hash && stream_object_signature(r, repl) < 0) {
|
||||
error(_("hash mismatch %s"), oid_to_hex(oid));
|
||||
return NULL;
|
||||
if (!skip_hash) {
|
||||
struct odb_read_stream *stream = odb_read_stream_open(r->objects, oid, NULL);
|
||||
|
||||
if (!stream) {
|
||||
error(_("unable to open object stream for %s"), oid_to_hex(oid));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (stream_object_signature(r, stream, repl) < 0) {
|
||||
error(_("hash mismatch %s"), oid_to_hex(oid));
|
||||
odb_read_stream_close(stream);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
odb_read_stream_close(stream);
|
||||
}
|
||||
parse_blob_buffer(lookup_blob(r, oid));
|
||||
return lookup_object(r, oid);
|
||||
|
||||
12
pack-check.c
12
pack-check.c
@@ -9,6 +9,7 @@
|
||||
#include "packfile.h"
|
||||
#include "object-file.h"
|
||||
#include "odb.h"
|
||||
#include "odb/streaming.h"
|
||||
|
||||
struct idx_entry {
|
||||
off_t offset;
|
||||
@@ -104,6 +105,7 @@ static int verify_packfile(struct repository *r,
|
||||
QSORT(entries, nr_objects, compare_entries);
|
||||
|
||||
for (i = 0; i < nr_objects; i++) {
|
||||
struct odb_read_stream *stream = NULL;
|
||||
void *data;
|
||||
struct object_id oid;
|
||||
enum object_type type;
|
||||
@@ -152,7 +154,9 @@ static int verify_packfile(struct repository *r,
|
||||
type) < 0)
|
||||
err = error("packed %s from %s is corrupt",
|
||||
oid_to_hex(&oid), p->pack_name);
|
||||
else if (!data && stream_object_signature(r, &oid) < 0)
|
||||
else if (!data &&
|
||||
(packfile_read_object_stream(&stream, &oid, p, entries[i].offset) < 0 ||
|
||||
stream_object_signature(r, stream, &oid) < 0))
|
||||
err = error("packed %s from %s is corrupt",
|
||||
oid_to_hex(&oid), p->pack_name);
|
||||
else if (fn) {
|
||||
@@ -163,12 +167,14 @@ static int verify_packfile(struct repository *r,
|
||||
}
|
||||
if (((base_count + i) & 1023) == 0)
|
||||
display_progress(progress, base_count + i);
|
||||
free(data);
|
||||
|
||||
if (stream)
|
||||
odb_read_stream_close(stream);
|
||||
free(data);
|
||||
}
|
||||
|
||||
display_progress(progress, base_count + i);
|
||||
free(entries);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
40
packfile.c
40
packfile.c
@@ -2621,32 +2621,28 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int packfile_store_read_object_stream(struct odb_read_stream **out,
|
||||
struct packfile_store *store,
|
||||
const struct object_id *oid)
|
||||
int packfile_read_object_stream(struct odb_read_stream **out,
|
||||
const struct object_id *oid,
|
||||
struct packed_git *pack,
|
||||
off_t offset)
|
||||
{
|
||||
struct odb_packed_read_stream *stream;
|
||||
struct pack_window *window = NULL;
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
enum object_type in_pack_type;
|
||||
unsigned long size;
|
||||
|
||||
oi.sizep = &size;
|
||||
in_pack_type = unpack_object_header(pack, &window, &offset, &size);
|
||||
unuse_pack(&window);
|
||||
|
||||
if (packfile_store_read_object_info(store, oid, &oi, 0) ||
|
||||
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
|
||||
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
|
||||
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
|
||||
if (repo_settings_get_big_file_threshold(pack->repo) >= size)
|
||||
return -1;
|
||||
|
||||
in_pack_type = unpack_object_header(oi.u.packed.pack,
|
||||
&window,
|
||||
&oi.u.packed.offset,
|
||||
&size);
|
||||
unuse_pack(&window);
|
||||
switch (in_pack_type) {
|
||||
default:
|
||||
return -1; /* we do not do deltas for now */
|
||||
case OBJ_BAD:
|
||||
mark_bad_packed_object(pack, oid);
|
||||
return -1;
|
||||
case OBJ_COMMIT:
|
||||
case OBJ_TREE:
|
||||
case OBJ_BLOB:
|
||||
@@ -2660,10 +2656,22 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
|
||||
stream->base.type = in_pack_type;
|
||||
stream->base.size = size;
|
||||
stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
|
||||
stream->pack = oi.u.packed.pack;
|
||||
stream->pos = oi.u.packed.offset;
|
||||
stream->pack = pack;
|
||||
stream->pos = offset;
|
||||
|
||||
*out = &stream->base;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int packfile_store_read_object_stream(struct odb_read_stream **out,
|
||||
struct packfile_store *store,
|
||||
const struct object_id *oid)
|
||||
{
|
||||
struct pack_entry e;
|
||||
|
||||
if (!find_pack_entry(store, oid, &e))
|
||||
return -1;
|
||||
|
||||
return packfile_read_object_stream(out, oid, e.p, e.offset);
|
||||
}
|
||||
|
||||
@@ -449,6 +449,11 @@ off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
|
||||
off_t *curpos, enum object_type type,
|
||||
off_t delta_obj_offset);
|
||||
|
||||
int packfile_read_object_stream(struct odb_read_stream **out,
|
||||
const struct object_id *oid,
|
||||
struct packed_git *pack,
|
||||
off_t offset);
|
||||
|
||||
void release_pack_memory(size_t);
|
||||
|
||||
/* global flag to enable extra checks when accessing packed objects */
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include "test-tool.h"
|
||||
#include "git-compat-util.h"
|
||||
#include "parse.h"
|
||||
|
||||
int cmd__genrandom(int argc, const char **argv)
|
||||
{
|
||||
@@ -22,7 +23,9 @@ int cmd__genrandom(int argc, const char **argv)
|
||||
next = next * 11 + *c;
|
||||
} while (*c++);
|
||||
|
||||
count = (argc == 3) ? strtoul(argv[2], NULL, 0) : ULONG_MAX;
|
||||
count = ULONG_MAX;
|
||||
if (argc == 3 && !git_parse_ulong(argv[2], &count))
|
||||
return error_errno("cannot parse argument '%s'", argv[2]);
|
||||
|
||||
while (count--) {
|
||||
next = next * 1103515245 + 12345;
|
||||
|
||||
@@ -643,7 +643,7 @@ test_expect_success 'object reference via commit text search' '
|
||||
'
|
||||
|
||||
test_expect_success 'setup blobs which are likely to delta' '
|
||||
test-tool genrandom foo 10240 >foo &&
|
||||
test-tool genrandom foo 10k >foo &&
|
||||
{ cat foo && echo plus; } >foo-plus &&
|
||||
git add foo foo-plus &&
|
||||
git commit -m foo &&
|
||||
|
||||
@@ -104,9 +104,9 @@ test_expect_success 'packsize limit' '
|
||||
# mid1 and mid2 will fit within 256k limit but
|
||||
# appending mid3 will bust the limit and will
|
||||
# result in a separate packfile.
|
||||
test-tool genrandom "a" $(( 66 * 1024 )) >mid1 &&
|
||||
test-tool genrandom "b" $(( 80 * 1024 )) >mid2 &&
|
||||
test-tool genrandom "c" $(( 128 * 1024 )) >mid3 &&
|
||||
test-tool genrandom "a" 66k >mid1 &&
|
||||
test-tool genrandom "b" 80k >mid2 &&
|
||||
test-tool genrandom "c" 128k >mid3 &&
|
||||
git add mid1 mid2 mid3 &&
|
||||
|
||||
count=0 &&
|
||||
|
||||
@@ -852,6 +852,44 @@ test_expect_success 'fsck errors in packed objects' '
|
||||
! grep corrupt out
|
||||
'
|
||||
|
||||
test_expect_success 'fsck handles multiple packfiles with big blobs' '
|
||||
test_when_finished "rm -rf repo" &&
|
||||
git init repo &&
|
||||
(
|
||||
cd repo &&
|
||||
|
||||
# We construct two packfiles with two objects in common and one
|
||||
# object not in common. The objects in common can then be
|
||||
# corrupted in one of the packfiles, respectively. The other
|
||||
# objects that are unique to the packs are merely used to not
|
||||
# have both packs contain the same data.
|
||||
blob_one=$(test-tool genrandom one 200k | git hash-object -t blob -w --stdin) &&
|
||||
blob_two=$(test-tool genrandom two 200k | git hash-object -t blob -w --stdin) &&
|
||||
blob_three=$(test-tool genrandom three 200k | git hash-object -t blob -w --stdin) &&
|
||||
blob_four=$(test-tool genrandom four 200k | git hash-object -t blob -w --stdin) &&
|
||||
pack_one=$(printf "%s\n" "$blob_one" "$blob_two" "$blob_three" | git pack-objects .git/objects/pack/pack) &&
|
||||
pack_two=$(printf "%s\n" "$blob_two" "$blob_three" "$blob_four" | git pack-objects .git/objects/pack/pack) &&
|
||||
chmod a+w .git/objects/pack/pack-*.pack &&
|
||||
|
||||
# Corrupt blob two in the first pack.
|
||||
git verify-pack -v .git/objects/pack/pack-$pack_one >objects &&
|
||||
offset_one=$(sed <objects -n "s/^$blob_two .* \(.*\)$/\1/p") &&
|
||||
printf "\0" | dd of=.git/objects/pack/pack-$pack_one.pack bs=1 conv=notrunc seek=$offset_one &&
|
||||
|
||||
# Corrupt blob three in the second pack.
|
||||
git verify-pack -v .git/objects/pack/pack-$pack_two >objects &&
|
||||
offset_two=$(sed <objects -n "s/^$blob_three .* \(.*\)$/\1/p") &&
|
||||
printf "\0" | dd of=.git/objects/pack/pack-$pack_two.pack bs=1 conv=notrunc seek=$offset_two &&
|
||||
|
||||
# We now expect to see two failures for the corrupted objects,
|
||||
# even though they exist in a non-corrupted form in the
|
||||
# respective other pack.
|
||||
test_must_fail git -c core.bigFileThreshold=100k fsck 2>err &&
|
||||
test_grep "unknown object type 0 at offset $offset_one in .git/objects/pack/pack-$pack_one.pack" err &&
|
||||
test_grep "unknown object type 0 at offset $offset_two in .git/objects/pack/pack-$pack_two.pack" err
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'fsck fails on corrupt packfile' '
|
||||
hsh=$(git commit-tree -m mycommit HEAD^{tree}) &&
|
||||
pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) &&
|
||||
@@ -918,7 +956,7 @@ test_expect_success 'fsck detects trailing loose garbage (large blob)' '
|
||||
test_expect_success 'fsck detects truncated loose object' '
|
||||
# make it big enough that we know we will truncate in the data
|
||||
# portion, not the header
|
||||
test-tool genrandom truncate 4096 >file &&
|
||||
test-tool genrandom truncate 4k >file &&
|
||||
blob=$(git hash-object -w file) &&
|
||||
file=$(sha1_file $blob) &&
|
||||
test_when_finished "remove_object $blob" &&
|
||||
|
||||
@@ -12,7 +12,7 @@ test_expect_success 'setup' '
|
||||
for i in a b c
|
||||
do
|
||||
echo $i >$i &&
|
||||
test-tool genrandom "$i" 32768 >>$i &&
|
||||
test-tool genrandom "$i" 32k >>$i &&
|
||||
git update-index --add $i || return 1
|
||||
done &&
|
||||
echo d >d && cat c >>d && git update-index --add d &&
|
||||
|
||||
@@ -242,7 +242,7 @@ test_bitmap_cases () {
|
||||
'
|
||||
|
||||
test_expect_success 'splitting packs does not generate bogus bitmaps' '
|
||||
test-tool genrandom foo $((1024 * 1024)) >rand &&
|
||||
test-tool genrandom foo 1m >rand &&
|
||||
git add rand &&
|
||||
git commit -m "commit with big file" &&
|
||||
git -c pack.packSizeLimit=500k repack -adb &&
|
||||
|
||||
@@ -20,7 +20,7 @@ test_expect_success 'setup: create "template" repository' '
|
||||
test_commit -C template 1 &&
|
||||
test_commit -C template 2 &&
|
||||
test_commit -C template 3 &&
|
||||
test-tool genrandom foo 10240 >template/foo &&
|
||||
test-tool genrandom foo 10k >template/foo &&
|
||||
git -C template add foo &&
|
||||
git -C template commit -m foo
|
||||
'
|
||||
@@ -499,7 +499,7 @@ test_expect_success "clone with promisor.advertise set to 'true' but don't delet
|
||||
|
||||
test_expect_success "setup for subsequent fetches" '
|
||||
# Generate new commit with large blob
|
||||
test-tool genrandom bar 10240 >template/bar &&
|
||||
test-tool genrandom bar 10k >template/bar &&
|
||||
git -C template add bar &&
|
||||
git -C template commit -m bar &&
|
||||
|
||||
|
||||
@@ -321,7 +321,7 @@ test_expect_success 'no bitmaps created if .keep files present' '
|
||||
|
||||
test_expect_success 'auto-bitmaps do not complain if unavailable' '
|
||||
test_config -C bare.git pack.packSizeLimit 1M &&
|
||||
blob=$(test-tool genrandom big $((1024*1024)) |
|
||||
blob=$(test-tool genrandom big 1m |
|
||||
git -C bare.git hash-object -w --stdin) &&
|
||||
git -C bare.git update-ref refs/tags/big $blob &&
|
||||
|
||||
@@ -497,9 +497,9 @@ test_expect_success '--filter works with --max-pack-size' '
|
||||
cd max-pack-size &&
|
||||
test_commit base &&
|
||||
# two blobs which exceed the maximum pack size
|
||||
test-tool genrandom foo 1048576 >foo &&
|
||||
test-tool genrandom foo 1m >foo &&
|
||||
git hash-object -w foo &&
|
||||
test-tool genrandom bar 1048576 >bar &&
|
||||
test-tool genrandom bar 1m >bar &&
|
||||
git hash-object -w bar &&
|
||||
git add foo bar &&
|
||||
git commit -m "adding foo and bar"
|
||||
|
||||
Reference in New Issue
Block a user