Merge branch 'np/pack' into next

* np/pack:
  let the GIT native protocol use offsets to delta base when possible
  make pack data reuse compatible with both delta types
  make git-pack-objects able to create deltas with offset to base
  teach git-index-pack about deltas with offset to base
  teach git-unpack-objects about deltas with offset to base
  introduce delta objects with offset to base
This commit is contained in:
Junio C Hamano
2006-09-27 00:47:23 -07:00
8 changed files with 431 additions and 213 deletions

View File

@@ -29,6 +29,7 @@ struct object_entry {
enum object_type type;
enum object_type in_pack_type; /* could be delta */
unsigned long delta_size; /* delta data size (uncompressed) */
#define in_pack_header_size delta_size /* only when reusing pack data */
struct object_entry *delta; /* delta base object */
struct packed_git *in_pack; /* already in pack */
unsigned int in_pack_offset;
@@ -60,6 +61,8 @@ static int non_empty;
static int no_reuse_delta;
static int local;
static int incremental;
static int allow_ofs_delta;
static struct object_entry **sorted_by_sha, **sorted_by_type;
static struct object_entry *objects;
static int nr_objects, nr_alloc, nr_result;
@@ -84,17 +87,25 @@ static int object_ix_hashsz;
* Pack index for existing packs give us easy access to the offsets into
* corresponding pack file where each object's data starts, but the entries
* do not store the size of the compressed representation (uncompressed
* size is easily available by examining the pack entry header). We build
* a hashtable of existing packs (pack_revindex), and keep reverse index
* here -- pack index file is sorted by object name mapping to offset; this
* pack_revindex[].revindex array is an ordered list of offsets, so if you
* know the offset of an object, next offset is where its packed
* representation ends.
* size is easily available by examining the pack entry header). It is
* also rather expensive to find the sha1 for an object given its offset.
*
* We build a hashtable of existing packs (pack_revindex), and keep reverse
* index here -- pack index file is sorted by object name mapping to offset;
* this pack_revindex[].revindex array is a list of offset/index_nr pairs
* ordered by offset, so if you know the offset of an object, next offset
* is where its packed representation ends and the index_nr can be used to
* get the object sha1 from the main index.
*/
struct revindex_entry {
unsigned int offset;
unsigned int nr;
};
struct pack_revindex {
struct packed_git *p;
unsigned long *revindex;
} *pack_revindex = NULL;
struct revindex_entry *revindex;
};
static struct pack_revindex *pack_revindex;
static int pack_revindex_hashsz;
/*
@@ -141,14 +152,9 @@ static void prepare_pack_ix(void)
static int cmp_offset(const void *a_, const void *b_)
{
unsigned long a = *(unsigned long *) a_;
unsigned long b = *(unsigned long *) b_;
if (a < b)
return -1;
else if (a == b)
return 0;
else
return 1;
const struct revindex_entry *a = a_;
const struct revindex_entry *b = b_;
return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
}
/*
@@ -161,25 +167,27 @@ static void prepare_pack_revindex(struct pack_revindex *rix)
int i;
void *index = p->index_base + 256;
rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
for (i = 0; i < num_ent; i++) {
unsigned int hl = *((unsigned int *)((char *) index + 24*i));
rix->revindex[i] = ntohl(hl);
rix->revindex[i].offset = ntohl(hl);
rix->revindex[i].nr = i;
}
/* This knows the pack format -- the 20-byte trailer
* follows immediately after the last object data.
*/
rix->revindex[num_ent] = p->pack_size - 20;
qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
rix->revindex[num_ent].offset = p->pack_size - 20;
rix->revindex[num_ent].nr = -1;
qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
}
static unsigned long find_packed_object_size(struct packed_git *p,
unsigned long ofs)
static struct revindex_entry * find_packed_object(struct packed_git *p,
unsigned int ofs)
{
int num;
int lo, hi;
struct pack_revindex *rix;
unsigned long *revindex;
struct revindex_entry *revindex;
num = pack_revindex_ix(p);
if (num < 0)
die("internal error: pack revindex uninitialized");
@@ -191,10 +199,10 @@ static unsigned long find_packed_object_size(struct packed_git *p,
hi = num_packed_objects(p) + 1;
do {
int mi = (lo + hi) / 2;
if (revindex[mi] == ofs) {
return revindex[mi+1] - ofs;
if (revindex[mi].offset == ofs) {
return revindex + mi;
}
else if (ofs < revindex[mi])
else if (ofs < revindex[mi].offset)
hi = mi;
else
lo = mi + 1;
@@ -202,6 +210,20 @@ static unsigned long find_packed_object_size(struct packed_git *p,
die("internal error: pack revindex corrupt");
}
static unsigned long find_packed_object_size(struct packed_git *p,
unsigned long ofs)
{
struct revindex_entry *entry = find_packed_object(p, ofs);
return entry[1].offset - ofs;
}
static unsigned char *find_packed_object_name(struct packed_git *p,
unsigned long ofs)
{
struct revindex_entry *entry = find_packed_object(p, ofs);
return (unsigned char *)(p->index_base + 256) + 24 * entry->nr + 4;
}
static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
unsigned long othersize, delta_size;
@@ -232,7 +254,7 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
int n = 1;
unsigned char c;
if (type < OBJ_COMMIT || type > OBJ_DELTA)
if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
die("bad type %d", type);
c = (type << 4) | (size & 15);
@@ -247,6 +269,10 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
return n;
}
/*
* we are going to reuse the existing object data as is. make
* sure it is not corrupt.
*/
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
{
z_stream stream;
@@ -278,32 +304,6 @@ static int check_inflate(unsigned char *data, unsigned long len, unsigned long e
return st;
}
/*
* we are going to reuse the existing pack entry data. make
* sure it is not corrupt.
*/
static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len)
{
enum object_type type;
unsigned long size, used;
if (pack_to_stdout)
return 0;
/* the caller has already called use_packed_git() for us,
* so it is safe to access the pack data from mmapped location.
* make sure the entry inflates correctly.
*/
used = unpack_object_header_gently(data, len, &type, &size);
if (!used)
return -1;
if (type == OBJ_DELTA)
used += 20; /* skip base object name */
data += used;
len -= used;
return check_inflate(data, len, entry->size);
}
static int revalidate_loose_object(struct object_entry *entry,
unsigned char *map,
unsigned long mapsize)
@@ -334,13 +334,10 @@ static unsigned long write_object(struct sha1file *f,
enum object_type obj_type;
int to_reuse = 0;
if (entry->preferred_base)
return 0;
obj_type = entry->type;
if (! entry->in_pack)
to_reuse = 0; /* can't reuse what we don't have */
else if (obj_type == OBJ_DELTA)
else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
to_reuse = 1; /* check_object() decided it for us */
else if (obj_type != entry->in_pack_type)
to_reuse = 0; /* pack has delta which is unusable */
@@ -380,18 +377,35 @@ static unsigned long write_object(struct sha1file *f,
if (entry->delta) {
buf = delta_against(buf, size, entry);
size = entry->delta_size;
obj_type = OBJ_DELTA;
obj_type = (allow_ofs_delta && entry->delta->offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA;
}
/*
* The object header is a byte of 'type' followed by zero or
* more bytes of length. For deltas, the 20 bytes of delta
* sha1 follows that.
* more bytes of length.
*/
hdrlen = encode_header(obj_type, size, header);
sha1write(f, header, hdrlen);
if (entry->delta) {
sha1write(f, entry->delta, 20);
if (obj_type == OBJ_OFS_DELTA) {
/*
* Deltas with relative base contain an additional
* encoding of the relative offset for the delta
* base from this object's position in the pack.
*/
unsigned long ofs = entry->offset - entry->delta->offset;
unsigned pos = sizeof(header) - 1;
header[pos] = ofs & 127;
while (ofs >>= 7)
header[--pos] = 128 | (--ofs & 127);
sha1write(f, header + pos, sizeof(header) - pos);
hdrlen += sizeof(header) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
/*
* Deltas with a base reference contain
* an additional 20 bytes for the base sha1.
*/
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
}
datalen = sha1write_compressed(f, buf, size);
@@ -399,21 +413,41 @@ static unsigned long write_object(struct sha1file *f,
}
else {
struct packed_git *p = entry->in_pack;
if (entry->delta) {
obj_type = (allow_ofs_delta && entry->delta->offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA;
reused_delta++;
}
hdrlen = encode_header(obj_type, entry->size, header);
sha1write(f, header, hdrlen);
if (obj_type == OBJ_OFS_DELTA) {
unsigned long ofs = entry->offset - entry->delta->offset;
unsigned pos = sizeof(header) - 1;
header[pos] = ofs & 127;
while (ofs >>= 7)
header[--pos] = 128 | (--ofs & 127);
sha1write(f, header + pos, sizeof(header) - pos);
hdrlen += sizeof(header) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
}
use_packed_git(p);
datalen = find_packed_object_size(p, entry->in_pack_offset);
buf = (char *) p->pack_base + entry->in_pack_offset;
if (revalidate_pack_entry(entry, buf, datalen))
buf = (char *) p->pack_base
+ entry->in_pack_offset
+ entry->in_pack_header_size;
datalen = find_packed_object_size(p, entry->in_pack_offset)
- entry->in_pack_header_size;
//fprintf(stderr, "reusing %d at %d header %d size %d\n", obj_type, entry->in_pack_offset, entry->in_pack_header_size, datalen);
if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
sha1write(f, buf, datalen);
unuse_packed_git(p);
hdrlen = 0; /* not really */
if (obj_type == OBJ_DELTA)
reused_delta++;
reused++;
}
if (obj_type == OBJ_DELTA)
if (entry->delta)
written_delta++;
written++;
return hdrlen + datalen;
@@ -423,17 +457,16 @@ static unsigned long write_one(struct sha1file *f,
struct object_entry *e,
unsigned long offset)
{
if (e->offset)
if (e->offset || e->preferred_base)
/* offset starts from header size and cannot be zero
* if it is written already.
*/
return offset;
e->offset = offset;
offset += write_object(f, e);
/* if we are deltified, write out its base object. */
/* if we are deltified, write out its base object first. */
if (e->delta)
offset = write_one(f, e->delta, offset);
return offset;
e->offset = offset;
return offset + write_object(f, e);
}
static void write_pack_file(void)
@@ -899,26 +932,64 @@ static void check_object(struct object_entry *entry)
char type[20];
if (entry->in_pack && !entry->preferred_base) {
unsigned char base[20];
unsigned long size;
struct object_entry *base_entry;
struct packed_git *p = entry->in_pack;
unsigned long left = p->pack_size - entry->in_pack_offset;
unsigned long size, used;
unsigned char *buf;
struct object_entry *base_entry = NULL;
use_packed_git(p);
buf = p->pack_base;
buf += entry->in_pack_offset;
/* We want in_pack_type even if we do not reuse delta.
* There is no point not reusing non-delta representations.
*/
check_reuse_pack_delta(entry->in_pack,
entry->in_pack_offset,
base, &size,
&entry->in_pack_type);
used = unpack_object_header_gently(buf, left,
&entry->in_pack_type, &size);
if (!used || left - used <= 20)
die("corrupt pack for %s", sha1_to_hex(entry->sha1));
/* Check if it is delta, and the base is also an object
* we are going to pack. If so we will reuse the existing
* delta.
*/
if (!no_reuse_delta &&
entry->in_pack_type == OBJ_DELTA &&
(base_entry = locate_object_entry(base)) &&
(!base_entry->preferred_base)) {
if (!no_reuse_delta) {
unsigned char c, *base_name;
unsigned long ofs;
/* there is at least 20 bytes left in the pack */
switch (entry->in_pack_type) {
case OBJ_REF_DELTA:
base_name = buf + used;
used += 20;
break;
case OBJ_OFS_DELTA:
c = buf[used++];
ofs = c & 127;
while (c & 128) {
ofs += 1;
if (!ofs || ofs & ~(~0UL >> 7))
die("delta base offset overflow in pack for %s",
sha1_to_hex(entry->sha1));
c = buf[used++];
ofs = (ofs << 7) + (c & 127);
}
if (ofs >= entry->in_pack_offset)
die("delta base offset out of bound for %s",
sha1_to_hex(entry->sha1));
ofs = entry->in_pack_offset - ofs;
base_name = find_packed_object_name(p, ofs);
break;
default:
base_name = NULL;
}
if (base_name)
base_entry = locate_object_entry(base_name);
}
unuse_packed_git(p);
entry->in_pack_header_size = used;
if (base_entry && !base_entry->preferred_base) {
/* Depth value does not matter - find_deltas()
* will never consider reused delta as the
@@ -927,9 +998,9 @@ static void check_object(struct object_entry *entry)
*/
/* uncompressed size of the delta data */
entry->size = entry->delta_size = size;
entry->size = size;
entry->delta = base_entry;
entry->type = OBJ_DELTA;
entry->type = entry->in_pack_type;
entry->delta_sibling = base_entry->delta_child;
base_entry->delta_child = entry;
@@ -1484,6 +1555,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
no_reuse_delta = 1;
continue;
}
if (!strcmp("--delta-base-offset", arg)) {
allow_ofs_delta = 1;
continue;
}
if (!strcmp("--stdout", arg)) {
pack_to_stdout = 1;
continue;

View File

@@ -15,7 +15,7 @@ static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-fil
/* We always read in 4kB chunks. */
static unsigned char buffer[4096];
static unsigned long offset, len;
static unsigned long offset, len, consumed_bytes;
static SHA_CTX ctx;
/*
@@ -51,6 +51,7 @@ static void use(int bytes)
die("used more bytes than were available");
len -= bytes;
offset += bytes;
consumed_bytes += bytes;
}
static void *get_data(unsigned long size)
@@ -89,35 +90,49 @@ static void *get_data(unsigned long size)
struct delta_info {
unsigned char base_sha1[20];
unsigned long base_offset;
unsigned long size;
void *delta;
unsigned nr;
struct delta_info *next;
};
static struct delta_info *delta_list;
static void add_delta_to_list(unsigned char *base_sha1, void *delta, unsigned long size)
static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
unsigned long base_offset,
void *delta, unsigned long size)
{
struct delta_info *info = xmalloc(sizeof(*info));
hashcpy(info->base_sha1, base_sha1);
info->base_offset = base_offset;
info->size = size;
info->delta = delta;
info->nr = nr;
info->next = delta_list;
delta_list = info;
}
static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size);
static void write_object(void *buf, unsigned long size, const char *type)
{
struct obj_info {
unsigned long offset;
unsigned char sha1[20];
if (write_sha1_file(buf, size, type, sha1) < 0)
};
static struct obj_info *obj_list;
static void added_object(unsigned nr, const char *type, void *data,
unsigned long size);
static void write_object(unsigned nr, void *buf, unsigned long size,
const char *type)
{
if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
die("failed to write object");
added_object(sha1, type, buf, size);
added_object(nr, type, buf, size);
}
static void resolve_delta(const char *type,
static void resolve_delta(unsigned nr, const char *type,
void *base, unsigned long base_size,
void *delta, unsigned long delta_size)
{
@@ -130,20 +145,23 @@ static void resolve_delta(const char *type,
if (!result)
die("failed to apply delta");
free(delta);
write_object(result, result_size, type);
write_object(nr, result, result_size, type);
free(result);
}
static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size)
static void added_object(unsigned nr, const char *type, void *data,
unsigned long size)
{
struct delta_info **p = &delta_list;
struct delta_info *info;
while ((info = *p) != NULL) {
if (!hashcmp(info->base_sha1, sha1)) {
if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
info->base_offset == obj_list[nr].offset) {
*p = info->next;
p = &delta_list;
resolve_delta(type, data, size, info->delta, info->size);
resolve_delta(info->nr, type, data, size,
info->delta, info->size);
free(info);
continue;
}
@@ -151,7 +169,8 @@ static void added_object(unsigned char *sha1, const char *type, void *data, unsi
}
}
static void unpack_non_delta_entry(enum object_type kind, unsigned long size)
static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
unsigned nr)
{
void *buf = get_data(size);
const char *type;
@@ -164,30 +183,80 @@ static void unpack_non_delta_entry(enum object_type kind, unsigned long size)
default: die("bad type %d", kind);
}
if (!dry_run && buf)
write_object(buf, size, type);
write_object(nr, buf, size, type);
free(buf);
}
static void unpack_delta_entry(unsigned long delta_size)
static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
unsigned nr)
{
void *delta_data, *base;
unsigned long base_size;
char type[20];
unsigned char base_sha1[20];
hashcpy(base_sha1, fill(20));
use(20);
if (kind == OBJ_REF_DELTA) {
hashcpy(base_sha1, fill(20));
use(20);
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
return;
}
if (!has_sha1_file(base_sha1)) {
hashcpy(obj_list[nr].sha1, null_sha1);
add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
return;
}
} else {
unsigned base_found = 0;
unsigned char *pack, c;
unsigned long base_offset;
unsigned lo, mid, hi;
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
return;
pack = fill(1);
c = *pack;
use(1);
base_offset = c & 127;
while (c & 128) {
base_offset += 1;
if (!base_offset || base_offset & ~(~0UL >> 7))
die("offset value overflow for delta base object");
pack = fill(1);
c = *pack;
use(1);
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = obj_list[nr].offset - base_offset;
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
return;
}
lo = 0;
hi = nr;
while (lo < hi) {
mid = (lo + hi)/2;
if (base_offset < obj_list[mid].offset) {
hi = mid;
} else if (base_offset > obj_list[mid].offset) {
lo = mid + 1;
} else {
hashcpy(base_sha1, obj_list[mid].sha1);
base_found = !is_null_sha1(base_sha1);
break;
}
}
if (!base_found) {
/* The delta base object is itself a delta that
has not been resolved yet. */
hashcpy(obj_list[nr].sha1, null_sha1);
add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
return;
}
}
if (!has_sha1_file(base_sha1)) {
add_delta_to_list(base_sha1, delta_data, delta_size);
return;
}
base = read_sha1_file(base_sha1, type, &base_size);
if (!base) {
error("failed to read delta-pack base object %s",
@@ -197,7 +266,7 @@ static void unpack_delta_entry(unsigned long delta_size)
has_errors = 1;
return;
}
resolve_delta(type, base, base_size, delta_data, delta_size);
resolve_delta(nr, type, base, base_size, delta_data, delta_size);
free(base);
}
@@ -208,6 +277,8 @@ static void unpack_one(unsigned nr, unsigned total)
unsigned long size;
enum object_type type;
obj_list[nr].offset = consumed_bytes;
pack = fill(1);
c = *pack;
use(1);
@@ -216,7 +287,7 @@ static void unpack_one(unsigned nr, unsigned total)
shift = 4;
while (c & 0x80) {
pack = fill(1);
c = *pack++;
c = *pack;
use(1);
size += (c & 0x7f) << shift;
shift += 7;
@@ -225,13 +296,14 @@ static void unpack_one(unsigned nr, unsigned total)
static unsigned long last_sec;
static unsigned last_percent;
struct timeval now;
unsigned percentage = (nr * 100) / total;
unsigned percentage = ((nr+1) * 100) / total;
gettimeofday(&now, NULL);
if (percentage != last_percent || now.tv_sec != last_sec) {
last_sec = now.tv_sec;
last_percent = percentage;
fprintf(stderr, "%4u%% (%u/%u) done\r", percentage, nr, total);
fprintf(stderr, "%4u%% (%u/%u) done\r",
percentage, (nr+1), total);
}
}
switch (type) {
@@ -239,10 +311,11 @@ static void unpack_one(unsigned nr, unsigned total)
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
unpack_non_delta_entry(type, size);
unpack_non_delta_entry(type, size, nr);
return;
case OBJ_DELTA:
unpack_delta_entry(size);
case OBJ_REF_DELTA:
case OBJ_OFS_DELTA:
unpack_delta_entry(type, size, nr);
return;
default:
error("bad object type %d", type);
@@ -265,9 +338,10 @@ static void unpack_all(void)
die("unknown pack file version %d", ntohl(hdr->hdr_version));
fprintf(stderr, "Unpacking %d objects\n", nr_objects);
obj_list = xmalloc(nr_objects * sizeof(*obj_list));
use(sizeof(struct pack_header));
for (i = 0; i < nr_objects; i++)
unpack_one(i+1, nr_objects);
unpack_one(i, nr_objects);
if (delta_list)
die("unresolved deltas left after unpacking");
}

View File

@@ -274,8 +274,9 @@ enum object_type {
OBJ_TREE = 2,
OBJ_BLOB = 3,
OBJ_TAG = 4,
/* 5/6 for future expansion */
OBJ_DELTA = 7,
/* 5 for future expansion */
OBJ_OFS_DELTA = 6,
OBJ_REF_DELTA = 7,
OBJ_BAD,
};

View File

@@ -166,12 +166,13 @@ static int find_common(int fd[2], unsigned char *result_sha1,
}
if (!fetching)
packet_write(fd[1], "want %s%s%s%s%s\n",
packet_write(fd[1], "want %s%s%s%s%s%s\n",
sha1_to_hex(remote),
(multi_ack ? " multi_ack" : ""),
(use_sideband == 2 ? " side-band-64k" : ""),
(use_sideband == 1 ? " side-band" : ""),
(use_thin_pack ? " thin-pack" : ""));
(use_thin_pack ? " thin-pack" : ""),
" ofs-delta");
else
packet_write(fd[1], "want %s\n", sha1_to_hex(remote));
fetching++;

View File

@@ -18,10 +18,15 @@ struct object_entry
unsigned char sha1[20];
};
union delta_base {
unsigned char sha1[20];
unsigned long offset;
};
struct delta_entry
{
struct object_entry *obj;
unsigned char base_sha1[20];
union delta_base base;
};
static const char *pack_name;
@@ -134,13 +139,13 @@ static void *unpack_entry_data(unsigned long offset,
static void *unpack_raw_entry(unsigned long offset,
enum object_type *obj_type,
unsigned long *obj_size,
unsigned char *delta_base,
union delta_base *delta_base,
unsigned long *next_obj_offset)
{
unsigned long pack_limit = pack_size - 20;
unsigned long pos = offset;
unsigned char c;
unsigned long size;
unsigned long size, base_offset;
unsigned shift;
enum object_type type;
void *data;
@@ -158,29 +163,46 @@ static void *unpack_raw_entry(unsigned long offset,
}
switch (type) {
case OBJ_DELTA:
case OBJ_REF_DELTA:
if (pos + 20 >= pack_limit)
bad_object(offset, "object extends past end of pack");
hashcpy(delta_base, pack_base + pos);
hashcpy(delta_base->sha1, pack_base + pos);
pos += 20;
/* fallthru */
break;
case OBJ_OFS_DELTA:
memset(delta_base, 0, sizeof(*delta_base));
c = pack_base[pos++];
base_offset = c & 127;
while (c & 128) {
base_offset += 1;
if (!base_offset || base_offset & ~(~0UL >> 7))
bad_object(offset, "offset value overflow for delta base object");
if (pos >= pack_limit)
bad_object(offset, "object extends past end of pack");
c = pack_base[pos++];
base_offset = (base_offset << 7) + (c & 127);
}
delta_base->offset = offset - base_offset;
if (delta_base->offset >= offset)
bad_object(offset, "delta base offset is out of bound");
break;
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
data = unpack_entry_data(offset, &pos, size);
break;
default:
bad_object(offset, "bad object type %d", type);
}
data = unpack_entry_data(offset, &pos, size);
*obj_type = type;
*obj_size = size;
*next_obj_offset = pos;
return data;
}
static int find_delta(const unsigned char *base_sha1)
static int find_delta(const union delta_base *base)
{
int first = 0, last = nr_deltas;
@@ -189,7 +211,7 @@ static int find_delta(const unsigned char *base_sha1)
struct delta_entry *delta = &deltas[next];
int cmp;
cmp = hashcmp(base_sha1, delta->base_sha1);
cmp = memcmp(base, &delta->base, sizeof(*base));
if (!cmp)
return next;
if (cmp < 0) {
@@ -201,18 +223,18 @@ static int find_delta(const unsigned char *base_sha1)
return -first-1;
}
static int find_deltas_based_on_sha1(const unsigned char *base_sha1,
int *first_index, int *last_index)
static int find_delta_childs(const union delta_base *base,
int *first_index, int *last_index)
{
int first = find_delta(base_sha1);
int first = find_delta(base);
int last = first;
int end = nr_deltas - 1;
if (first < 0)
return -1;
while (first > 0 && !hashcmp(deltas[first - 1].base_sha1, base_sha1))
while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base)))
--first;
while (last < end && !hashcmp(deltas[last + 1].base_sha1, base_sha1))
while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base)))
++last;
*first_index = first;
*last_index = last;
@@ -253,13 +275,13 @@ static void resolve_delta(struct delta_entry *delta, void *base_data,
void *result;
unsigned long result_size;
enum object_type delta_type;
unsigned char base_sha1[20];
union delta_base delta_base;
unsigned long next_obj_offset;
int j, first, last;
obj->real_type = type;
delta_data = unpack_raw_entry(obj->offset, &delta_type,
&delta_size, base_sha1,
&delta_size, &delta_base,
&next_obj_offset);
result = patch_delta(base_data, base_size, delta_data, delta_size,
&result_size);
@@ -267,10 +289,22 @@ static void resolve_delta(struct delta_entry *delta, void *base_data,
if (!result)
bad_object(obj->offset, "failed to apply delta");
sha1_object(result, result_size, type, obj->sha1);
if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) {
hashcpy(delta_base.sha1, obj->sha1);
if (!find_delta_childs(&delta_base, &first, &last)) {
for (j = first; j <= last; j++)
resolve_delta(&deltas[j], result, result_size, type);
if (deltas[j].obj->type == OBJ_REF_DELTA)
resolve_delta(&deltas[j], result, result_size, type);
}
memset(&delta_base, 0, sizeof(delta_base));
delta_base.offset = obj->offset;
if (!find_delta_childs(&delta_base, &first, &last)) {
for (j = first; j <= last; j++)
if (deltas[j].obj->type == OBJ_OFS_DELTA)
resolve_delta(&deltas[j], result, result_size, type);
}
free(result);
}
@@ -278,14 +312,14 @@ static int compare_delta_entry(const void *a, const void *b)
{
const struct delta_entry *delta_a = a;
const struct delta_entry *delta_b = b;
return hashcmp(delta_a->base_sha1, delta_b->base_sha1);
return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base));
}
static void parse_pack_objects(void)
{
int i;
unsigned long offset = sizeof(struct pack_header);
unsigned char base_sha1[20];
struct delta_entry *delta = deltas;
void *data;
unsigned long data_size;
@@ -299,12 +333,12 @@ static void parse_pack_objects(void)
struct object_entry *obj = &objects[i];
obj->offset = offset;
data = unpack_raw_entry(offset, &obj->type, &data_size,
base_sha1, &offset);
&delta->base, &offset);
obj->real_type = obj->type;
if (obj->type == OBJ_DELTA) {
struct delta_entry *delta = &deltas[nr_deltas++];
if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
nr_deltas++;
delta->obj = obj;
hashcpy(delta->base_sha1, base_sha1);
delta++;
} else
sha1_object(data, data_size, obj->type, obj->sha1);
free(data);
@@ -312,7 +346,7 @@ static void parse_pack_objects(void)
if (offset != pack_size - 20)
die("packfile '%s' has junk at the end", pack_name);
/* Sort deltas by base SHA1 for fast searching */
/* Sort deltas by base SHA1/offset for fast searching */
qsort(deltas, nr_deltas, sizeof(struct delta_entry),
compare_delta_entry);
@@ -326,22 +360,37 @@ static void parse_pack_objects(void)
*/
for (i = 0; i < nr_objects; i++) {
struct object_entry *obj = &objects[i];
int j, first, last;
union delta_base base;
int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
if (obj->type == OBJ_DELTA)
if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
continue;
if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
hashcpy(base.sha1, obj->sha1);
ref = !find_delta_childs(&base, &ref_first, &ref_last);
memset(&base, 0, sizeof(base));
base.offset = obj->offset;
ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
if (!ref && !ofs)
continue;
data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
base_sha1, &offset);
for (j = first; j <= last; j++)
resolve_delta(&deltas[j], data, data_size, obj->type);
&base, &offset);
if (ref)
for (j = ref_first; j <= ref_last; j++)
if (deltas[j].obj->type == OBJ_REF_DELTA)
resolve_delta(&deltas[j], data,
data_size, obj->type);
if (ofs)
for (j = ofs_first; j <= ofs_last; j++)
if (deltas[j].obj->type == OBJ_OFS_DELTA)
resolve_delta(&deltas[j], data,
data_size, obj->type);
free(data);
}
/* Check for unresolved deltas */
for (i = 0; i < nr_deltas; i++) {
if (deltas[i].obj->real_type == OBJ_DELTA)
if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
deltas[i].obj->real_type == OBJ_OFS_DELTA)
die("packfile '%s' has unresolved deltas", pack_name);
}
}

3
pack.h
View File

@@ -16,7 +16,4 @@ struct pack_header {
};
extern int verify_pack(struct packed_git *, int);
extern int check_reuse_pack_delta(struct packed_git *, unsigned long,
unsigned char *, unsigned long *,
enum object_type *);
#endif

View File

@@ -883,26 +883,61 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
return unpack_sha1_rest(&stream, hdr, *size);
}
static unsigned long get_delta_base(struct packed_git *p,
unsigned long offset,
enum object_type kind,
unsigned long delta_obj_offset,
unsigned long *base_obj_offset)
{
unsigned char *base_info = (unsigned char *) p->pack_base + offset;
unsigned long base_offset;
/* there must be at least 20 bytes left regardless of delta type */
if (p->pack_size <= offset + 20)
die("truncated pack file");
if (kind == OBJ_OFS_DELTA) {
unsigned used = 0;
unsigned char c = base_info[used++];
base_offset = c & 127;
while (c & 128) {
base_offset += 1;
if (!base_offset || base_offset & ~(~0UL >> 7))
die("offset value overflow for delta base object");
c = base_info[used++];
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = delta_obj_offset - base_offset;
if (base_offset >= delta_obj_offset)
die("delta base offset out of bound");
offset += used;
} else if (kind == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
base_offset = find_pack_entry_one(base_info, p);
if (!base_offset)
die("failed to find delta-pack base object %s",
sha1_to_hex(base_info));
offset += 20;
} else
die("I am totally screwed");
*base_obj_offset = base_offset;
return offset;
}
/* forward declaration for a mutually recursive function */
static int packed_object_info(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep);
static int packed_delta_info(struct packed_git *p,
unsigned long offset,
enum object_type kind,
unsigned long obj_offset,
char *type,
unsigned long *sizep)
{
unsigned long base_offset;
unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset;
if (p->pack_size < offset + 20)
die("truncated pack file");
/* The base entry _must_ be in the same pack */
base_offset = find_pack_entry_one(base_sha1, p);
if (!base_offset)
die("failed to find delta-pack base object %s",
sha1_to_hex(base_sha1));
offset += 20;
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
/* We choose to only get the type of the base object and
* ignore potentially corrupt pack file that expects the delta
@@ -965,25 +1000,6 @@ static unsigned long unpack_object_header(struct packed_git *p, unsigned long of
return offset + used;
}
int check_reuse_pack_delta(struct packed_git *p, unsigned long offset,
unsigned char *base, unsigned long *sizep,
enum object_type *kindp)
{
unsigned long ptr;
int status = -1;
use_packed_git(p);
ptr = offset;
ptr = unpack_object_header(p, ptr, kindp, sizep);
if (*kindp != OBJ_DELTA)
goto done;
hashcpy(base, (unsigned char *) p->pack_base + ptr);
status = 0;
done:
unuse_packed_git(p);
return status;
}
void packed_object_info_detail(struct packed_git *p,
unsigned long offset,
char *type,
@@ -992,11 +1008,12 @@ void packed_object_info_detail(struct packed_git *p,
unsigned int *delta_chain_length,
unsigned char *base_sha1)
{
unsigned long val;
unsigned long obj_offset, val;
unsigned char *next_sha1;
enum object_type kind;
*delta_chain_length = 0;
obj_offset = offset;
offset = unpack_object_header(p, offset, &kind, size);
for (;;) {
@@ -1011,7 +1028,13 @@ void packed_object_info_detail(struct packed_git *p,
strcpy(type, type_names[kind]);
*store_size = 0; /* notyet */
return;
case OBJ_DELTA:
case OBJ_OFS_DELTA:
get_delta_base(p, offset, kind, obj_offset, &offset);
if (*delta_chain_length == 0) {
/* TODO: find base_sha1 as pointed by offset */
}
break;
case OBJ_REF_DELTA:
if (p->pack_size <= offset + 20)
die("pack file %s records an incomplete delta base",
p->pack_name);
@@ -1021,6 +1044,7 @@ void packed_object_info_detail(struct packed_git *p,
offset = find_pack_entry_one(next_sha1, p);
break;
}
obj_offset = offset;
offset = unpack_object_header(p, offset, &kind, &val);
(*delta_chain_length)++;
}
@@ -1029,15 +1053,15 @@ void packed_object_info_detail(struct packed_git *p,
static int packed_object_info(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep)
{
unsigned long size;
unsigned long size, obj_offset = offset;
enum object_type kind;
offset = unpack_object_header(p, offset, &kind, &size);
if (kind == OBJ_DELTA)
return packed_delta_info(p, offset, type, sizep);
switch (kind) {
case OBJ_OFS_DELTA:
case OBJ_REF_DELTA:
return packed_delta_info(p, offset, kind, obj_offset, type, sizep);
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
@@ -1083,23 +1107,15 @@ static void *unpack_compressed_entry(struct packed_git *p,
static void *unpack_delta_entry(struct packed_git *p,
unsigned long offset,
unsigned long delta_size,
enum object_type kind,
unsigned long obj_offset,
char *type,
unsigned long *sizep)
{
void *delta_data, *result, *base;
unsigned long result_size, base_size, base_offset;
unsigned char *base_sha1;
if (p->pack_size < offset + 20)
die("truncated pack file");
/* The base entry _must_ be in the same pack */
base_sha1 = (unsigned char*)p->pack_base + offset;
base_offset = find_pack_entry_one(base_sha1, p);
if (!base_offset)
die("failed to find delta-pack base object %s",
sha1_to_hex(base_sha1));
offset += 20;
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
base = unpack_entry_gently(p, base_offset, type, &base_size);
if (!base)
die("failed to read delta base object at %lu from %s",
@@ -1136,13 +1152,14 @@ static void *unpack_entry(struct pack_entry *entry,
void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
char *type, unsigned long *sizep)
{
unsigned long size;
unsigned long size, obj_offset = offset;
enum object_type kind;
offset = unpack_object_header(p, offset, &kind, &size);
switch (kind) {
case OBJ_DELTA:
return unpack_delta_entry(p, offset, size, type, sizep);
case OBJ_OFS_DELTA:
case OBJ_REF_DELTA:
return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep);
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:

View File

@@ -22,7 +22,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n
static unsigned long oldest_have;
static int multi_ack, nr_our_refs;
static int use_thin_pack;
static int use_thin_pack, use_ofs_delta;
static struct object_array have_obj;
static struct object_array want_obj;
static unsigned int timeout;
@@ -143,7 +143,9 @@ static void create_pack_file(void)
close(pu_pipe[1]);
close(pe_pipe[0]);
close(pe_pipe[1]);
execl_git_cmd("pack-objects", "--stdout", "--progress", NULL);
execl_git_cmd("pack-objects", "--stdout", "--progress",
use_ofs_delta ? "--delta-base-offset" : NULL,
NULL);
kill(pid_rev_list, SIGKILL);
die("git-upload-pack: unable to exec git-pack-objects");
}
@@ -476,6 +478,8 @@ static void receive_needs(void)
multi_ack = 1;
if (strstr(line+45, "thin-pack"))
use_thin_pack = 1;
if (strstr(line+45, "ofs-delta"))
use_ofs_delta = 1;
if (strstr(line+45, "side-band-64k"))
use_sideband = LARGE_PACKET_MAX;
else if (strstr(line+45, "side-band"))
@@ -501,7 +505,7 @@ static void receive_needs(void)
static int send_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
{
static const char *capabilities = "multi_ack thin-pack side-band side-band-64k";
static const char *capabilities = "multi_ack thin-pack side-band side-band-64k ofs-delta";
struct object *o = parse_object(sha1);
if (!o)