From eb32d236df0c16b936b04f0c5402addb61cdb311 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:06:49 -0400 Subject: [PATCH 1/6] introduce delta objects with offset to base This adds a new object, namely OBJ_OFS_DELTA, renames OBJ_DELTA to OBJ_REF_DELTA to better make the distinction between those two delta objects, and adds support for the handling of those new delta objects in sha1_file.c only. The OBJ_OFS_DELTA contains a relative offset from the delta object's position in a pack instead of the 20-byte SHA1 reference to identify the base object. Since the base is likely to be not so far away, the relative offset is more likely to have a smaller encoding on average than an absolute offset. And for those delta objects the base must always be stored first because there is no way to know the distance of later objects when streaming a pack. Hence this relative offset is always meant to be negative. The offset encoding is slightly denser than the one used for object size -- credits to (whoever this is) for bringing it to my attention. This allows for pack size reduction between 3.2% (Linux-2.6) to over 5% (linux-historic). Runtime pack access should be faster too since delta replay does skip a search in the pack index for each delta in a chain. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 16 +++---- builtin-unpack-objects.c | 2 +- cache.h | 5 ++- index-pack.c | 8 ++-- sha1_file.c | 96 +++++++++++++++++++++++++++------------- 5 files changed, 82 insertions(+), 45 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 96c069a81d..c62734a2a6 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -232,7 +232,7 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha int n = 1; unsigned char c; - if (type < OBJ_COMMIT || type > OBJ_DELTA) + if (type < OBJ_COMMIT || type > OBJ_REF_DELTA) die("bad type %d", type); c = (type << 4) | (size & 15); @@ -297,7 +297,7 @@ static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data used = unpack_object_header_gently(data, len, &type, &size); if (!used) return -1; - if (type == OBJ_DELTA) + if (type == OBJ_REF_DELTA) used += 20; /* skip base object name */ data += used; len -= used; @@ -340,7 +340,7 @@ static unsigned long write_object(struct sha1file *f, obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (obj_type == OBJ_DELTA) + else if (obj_type == OBJ_REF_DELTA) to_reuse = 1; /* check_object() decided it for us */ else if (obj_type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ @@ -380,7 +380,7 @@ static unsigned long write_object(struct sha1file *f, if (entry->delta) { buf = delta_against(buf, size, entry); size = entry->delta_size; - obj_type = OBJ_DELTA; + obj_type = OBJ_REF_DELTA; } /* * The object header is a byte of 'type' followed by zero or @@ -409,11 +409,11 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, buf, datalen); unuse_packed_git(p); hdrlen = 0; /* not really */ - if (obj_type == OBJ_DELTA) + if (obj_type == OBJ_REF_DELTA) reused_delta++; reused++; } - if (obj_type == OBJ_DELTA) + if (obj_type == OBJ_REF_DELTA) written_delta++; written++; return hdrlen + datalen; @@ -916,7 +916,7 @@ static void check_object(struct object_entry *entry) * delta. */ if (!no_reuse_delta && - entry->in_pack_type == OBJ_DELTA && + entry->in_pack_type == OBJ_REF_DELTA && (base_entry = locate_object_entry(base)) && (!base_entry->preferred_base)) { @@ -929,7 +929,7 @@ static void check_object(struct object_entry *entry) /* uncompressed size of the delta data */ entry->size = entry->delta_size = size; entry->delta = base_entry; - entry->type = OBJ_DELTA; + entry->type = OBJ_REF_DELTA; entry->delta_sibling = base_entry->delta_child; base_entry->delta_child = entry; diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 4f96bcae32..c6c6368956 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -241,7 +241,7 @@ static void unpack_one(unsigned nr, unsigned total) case OBJ_TAG: unpack_non_delta_entry(type, size); return; - case OBJ_DELTA: + case OBJ_REF_DELTA: unpack_delta_entry(size); return; default: diff --git a/cache.h b/cache.h index 97debd03c5..3c5415e77d 100644 --- a/cache.h +++ b/cache.h @@ -274,8 +274,9 @@ enum object_type { OBJ_TREE = 2, OBJ_BLOB = 3, OBJ_TAG = 4, - /* 5/6 for future expansion */ - OBJ_DELTA = 7, + /* 5 for future expansion */ + OBJ_OFS_DELTA = 6, + OBJ_REF_DELTA = 7, OBJ_BAD, }; diff --git a/index-pack.c b/index-pack.c index 80bc6cb45b..aef7f0a32e 100644 --- a/index-pack.c +++ b/index-pack.c @@ -158,7 +158,7 @@ static void *unpack_raw_entry(unsigned long offset, } switch (type) { - case OBJ_DELTA: + case OBJ_REF_DELTA: if (pos + 20 >= pack_limit) bad_object(offset, "object extends past end of pack"); hashcpy(delta_base, pack_base + pos); @@ -301,7 +301,7 @@ static void parse_pack_objects(void) data = unpack_raw_entry(offset, &obj->type, &data_size, base_sha1, &offset); obj->real_type = obj->type; - if (obj->type == OBJ_DELTA) { + if (obj->type == OBJ_REF_DELTA) { struct delta_entry *delta = &deltas[nr_deltas++]; delta->obj = obj; hashcpy(delta->base_sha1, base_sha1); @@ -328,7 +328,7 @@ static void parse_pack_objects(void) struct object_entry *obj = &objects[i]; int j, first, last; - if (obj->type == OBJ_DELTA) + if (obj->type == OBJ_REF_DELTA) continue; if (find_deltas_based_on_sha1(obj->sha1, &first, &last)) continue; @@ -341,7 +341,7 @@ static void parse_pack_objects(void) /* Check for unresolved deltas */ for (i = 0; i < nr_deltas; i++) { - if (deltas[i].obj->real_type == OBJ_DELTA) + if (deltas[i].obj->real_type == OBJ_REF_DELTA) die("packfile '%s' has unresolved deltas", pack_name); } } diff --git a/sha1_file.c b/sha1_file.c index 27b1ebb720..fdb4588280 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -883,26 +883,61 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l return unpack_sha1_rest(&stream, hdr, *size); } +static unsigned long get_delta_base(struct packed_git *p, + unsigned long offset, + enum object_type kind, + unsigned long delta_obj_offset, + unsigned long *base_obj_offset) +{ + unsigned char *base_info = (unsigned char *) p->pack_base + offset; + unsigned long base_offset; + + /* there must be at least 20 bytes left regardless of delta type */ + if (p->pack_size <= offset + 20) + die("truncated pack file"); + + if (kind == OBJ_OFS_DELTA) { + unsigned used = 0; + unsigned char c = base_info[used++]; + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || base_offset & ~(~0UL >> 7)) + die("offset value overflow for delta base object"); + c = base_info[used++]; + base_offset = (base_offset << 7) + (c & 127); + } + base_offset = delta_obj_offset - base_offset; + if (base_offset >= delta_obj_offset) + die("delta base offset out of bound"); + offset += used; + } else if (kind == OBJ_REF_DELTA) { + /* The base entry _must_ be in the same pack */ + base_offset = find_pack_entry_one(base_info, p); + if (!base_offset) + die("failed to find delta-pack base object %s", + sha1_to_hex(base_info)); + offset += 20; + } else + die("I am totally screwed"); + *base_obj_offset = base_offset; + return offset; +} + /* forward declaration for a mutually recursive function */ static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep); static int packed_delta_info(struct packed_git *p, unsigned long offset, + enum object_type kind, + unsigned long obj_offset, char *type, unsigned long *sizep) { unsigned long base_offset; - unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset; - if (p->pack_size < offset + 20) - die("truncated pack file"); - /* The base entry _must_ be in the same pack */ - base_offset = find_pack_entry_one(base_sha1, p); - if (!base_offset) - die("failed to find delta-pack base object %s", - sha1_to_hex(base_sha1)); - offset += 20; + offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta @@ -975,7 +1010,7 @@ int check_reuse_pack_delta(struct packed_git *p, unsigned long offset, use_packed_git(p); ptr = offset; ptr = unpack_object_header(p, ptr, kindp, sizep); - if (*kindp != OBJ_DELTA) + if (*kindp != OBJ_REF_DELTA) goto done; hashcpy(base, (unsigned char *) p->pack_base + ptr); status = 0; @@ -992,11 +1027,12 @@ void packed_object_info_detail(struct packed_git *p, unsigned int *delta_chain_length, unsigned char *base_sha1) { - unsigned long val; + unsigned long obj_offset, val; unsigned char *next_sha1; enum object_type kind; *delta_chain_length = 0; + obj_offset = offset; offset = unpack_object_header(p, offset, &kind, size); for (;;) { @@ -1011,7 +1047,13 @@ void packed_object_info_detail(struct packed_git *p, strcpy(type, type_names[kind]); *store_size = 0; /* notyet */ return; - case OBJ_DELTA: + case OBJ_OFS_DELTA: + get_delta_base(p, offset, kind, obj_offset, &offset); + if (*delta_chain_length == 0) { + /* TODO: find base_sha1 as pointed by offset */ + } + break; + case OBJ_REF_DELTA: if (p->pack_size <= offset + 20) die("pack file %s records an incomplete delta base", p->pack_name); @@ -1021,6 +1063,7 @@ void packed_object_info_detail(struct packed_git *p, offset = find_pack_entry_one(next_sha1, p); break; } + obj_offset = offset; offset = unpack_object_header(p, offset, &kind, &val); (*delta_chain_length)++; } @@ -1029,15 +1072,15 @@ void packed_object_info_detail(struct packed_git *p, static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { - unsigned long size; + unsigned long size, obj_offset = offset; enum object_type kind; offset = unpack_object_header(p, offset, &kind, &size); - if (kind == OBJ_DELTA) - return packed_delta_info(p, offset, type, sizep); - switch (kind) { + case OBJ_OFS_DELTA: + case OBJ_REF_DELTA: + return packed_delta_info(p, offset, kind, obj_offset, type, sizep); case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: @@ -1083,23 +1126,15 @@ static void *unpack_compressed_entry(struct packed_git *p, static void *unpack_delta_entry(struct packed_git *p, unsigned long offset, unsigned long delta_size, + enum object_type kind, + unsigned long obj_offset, char *type, unsigned long *sizep) { void *delta_data, *result, *base; unsigned long result_size, base_size, base_offset; - unsigned char *base_sha1; - - if (p->pack_size < offset + 20) - die("truncated pack file"); - /* The base entry _must_ be in the same pack */ - base_sha1 = (unsigned char*)p->pack_base + offset; - base_offset = find_pack_entry_one(base_sha1, p); - if (!base_offset) - die("failed to find delta-pack base object %s", - sha1_to_hex(base_sha1)); - offset += 20; + offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); base = unpack_entry_gently(p, base_offset, type, &base_size); if (!base) die("failed to read delta base object at %lu from %s", @@ -1136,13 +1171,14 @@ static void *unpack_entry(struct pack_entry *entry, void *unpack_entry_gently(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { - unsigned long size; + unsigned long size, obj_offset = offset; enum object_type kind; offset = unpack_object_header(p, offset, &kind, &size); switch (kind) { - case OBJ_DELTA: - return unpack_delta_entry(p, offset, size, type, sizep); + case OBJ_OFS_DELTA: + case OBJ_REF_DELTA: + return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep); case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: From 209c554ab45701fb78de77fdcec803fef2c8fd39 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:07:39 -0400 Subject: [PATCH 2/6] teach git-unpack-objects about deltas with offset to base For delta resolution to be possible, a list of sha1/offset tuple must be constructed in memory in order to load the appropriate base object. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-unpack-objects.c | 140 ++++++++++++++++++++++++++++++--------- 1 file changed, 107 insertions(+), 33 deletions(-) diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index c6c6368956..e70a71163d 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -15,7 +15,7 @@ static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-fil /* We always read in 4kB chunks. */ static unsigned char buffer[4096]; -static unsigned long offset, len; +static unsigned long offset, len, consumed_bytes; static SHA_CTX ctx; /* @@ -51,6 +51,7 @@ static void use(int bytes) die("used more bytes than were available"); len -= bytes; offset += bytes; + consumed_bytes += bytes; } static void *get_data(unsigned long size) @@ -89,35 +90,49 @@ static void *get_data(unsigned long size) struct delta_info { unsigned char base_sha1[20]; + unsigned long base_offset; unsigned long size; void *delta; + unsigned nr; struct delta_info *next; }; static struct delta_info *delta_list; -static void add_delta_to_list(unsigned char *base_sha1, void *delta, unsigned long size) +static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1, + unsigned long base_offset, + void *delta, unsigned long size) { struct delta_info *info = xmalloc(sizeof(*info)); hashcpy(info->base_sha1, base_sha1); + info->base_offset = base_offset; info->size = size; info->delta = delta; + info->nr = nr; info->next = delta_list; delta_list = info; } -static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size); - -static void write_object(void *buf, unsigned long size, const char *type) -{ +struct obj_info { + unsigned long offset; unsigned char sha1[20]; - if (write_sha1_file(buf, size, type, sha1) < 0) +}; + +static struct obj_info *obj_list; + +static void added_object(unsigned nr, const char *type, void *data, + unsigned long size); + +static void write_object(unsigned nr, void *buf, unsigned long size, + const char *type) +{ + if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0) die("failed to write object"); - added_object(sha1, type, buf, size); + added_object(nr, type, buf, size); } -static void resolve_delta(const char *type, +static void resolve_delta(unsigned nr, const char *type, void *base, unsigned long base_size, void *delta, unsigned long delta_size) { @@ -130,20 +145,23 @@ static void resolve_delta(const char *type, if (!result) die("failed to apply delta"); free(delta); - write_object(result, result_size, type); + write_object(nr, result, result_size, type); free(result); } -static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size) +static void added_object(unsigned nr, const char *type, void *data, + unsigned long size) { struct delta_info **p = &delta_list; struct delta_info *info; while ((info = *p) != NULL) { - if (!hashcmp(info->base_sha1, sha1)) { + if (!hashcmp(info->base_sha1, obj_list[nr].sha1) || + info->base_offset == obj_list[nr].offset) { *p = info->next; p = &delta_list; - resolve_delta(type, data, size, info->delta, info->size); + resolve_delta(info->nr, type, data, size, + info->delta, info->size); free(info); continue; } @@ -151,7 +169,8 @@ static void added_object(unsigned char *sha1, const char *type, void *data, unsi } } -static void unpack_non_delta_entry(enum object_type kind, unsigned long size) +static void unpack_non_delta_entry(enum object_type kind, unsigned long size, + unsigned nr) { void *buf = get_data(size); const char *type; @@ -164,30 +183,80 @@ static void unpack_non_delta_entry(enum object_type kind, unsigned long size) default: die("bad type %d", kind); } if (!dry_run && buf) - write_object(buf, size, type); + write_object(nr, buf, size, type); free(buf); } -static void unpack_delta_entry(unsigned long delta_size) +static void unpack_delta_entry(enum object_type kind, unsigned long delta_size, + unsigned nr) { void *delta_data, *base; unsigned long base_size; char type[20]; unsigned char base_sha1[20]; - hashcpy(base_sha1, fill(20)); - use(20); + if (kind == OBJ_REF_DELTA) { + hashcpy(base_sha1, fill(20)); + use(20); + delta_data = get_data(delta_size); + if (dry_run || !delta_data) { + free(delta_data); + return; + } + if (!has_sha1_file(base_sha1)) { + hashcpy(obj_list[nr].sha1, null_sha1); + add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size); + return; + } + } else { + unsigned base_found = 0; + unsigned char *pack, c; + unsigned long base_offset; + unsigned lo, mid, hi; - delta_data = get_data(delta_size); - if (dry_run || !delta_data) { - free(delta_data); - return; + pack = fill(1); + c = *pack; + use(1); + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || base_offset & ~(~0UL >> 7)) + die("offset value overflow for delta base object"); + pack = fill(1); + c = *pack; + use(1); + base_offset = (base_offset << 7) + (c & 127); + } + base_offset = obj_list[nr].offset - base_offset; + + delta_data = get_data(delta_size); + if (dry_run || !delta_data) { + free(delta_data); + return; + } + lo = 0; + hi = nr; + while (lo < hi) { + mid = (lo + hi)/2; + if (base_offset < obj_list[mid].offset) { + hi = mid; + } else if (base_offset > obj_list[mid].offset) { + lo = mid + 1; + } else { + hashcpy(base_sha1, obj_list[mid].sha1); + base_found = !is_null_sha1(base_sha1); + break; + } + } + if (!base_found) { + /* The delta base object is itself a delta that + has not been resolved yet. */ + hashcpy(obj_list[nr].sha1, null_sha1); + add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size); + return; + } } - if (!has_sha1_file(base_sha1)) { - add_delta_to_list(base_sha1, delta_data, delta_size); - return; - } base = read_sha1_file(base_sha1, type, &base_size); if (!base) { error("failed to read delta-pack base object %s", @@ -197,7 +266,7 @@ static void unpack_delta_entry(unsigned long delta_size) has_errors = 1; return; } - resolve_delta(type, base, base_size, delta_data, delta_size); + resolve_delta(nr, type, base, base_size, delta_data, delta_size); free(base); } @@ -208,6 +277,8 @@ static void unpack_one(unsigned nr, unsigned total) unsigned long size; enum object_type type; + obj_list[nr].offset = consumed_bytes; + pack = fill(1); c = *pack; use(1); @@ -216,7 +287,7 @@ static void unpack_one(unsigned nr, unsigned total) shift = 4; while (c & 0x80) { pack = fill(1); - c = *pack++; + c = *pack; use(1); size += (c & 0x7f) << shift; shift += 7; @@ -225,13 +296,14 @@ static void unpack_one(unsigned nr, unsigned total) static unsigned long last_sec; static unsigned last_percent; struct timeval now; - unsigned percentage = (nr * 100) / total; + unsigned percentage = ((nr+1) * 100) / total; gettimeofday(&now, NULL); if (percentage != last_percent || now.tv_sec != last_sec) { last_sec = now.tv_sec; last_percent = percentage; - fprintf(stderr, "%4u%% (%u/%u) done\r", percentage, nr, total); + fprintf(stderr, "%4u%% (%u/%u) done\r", + percentage, (nr+1), total); } } switch (type) { @@ -239,10 +311,11 @@ static void unpack_one(unsigned nr, unsigned total) case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - unpack_non_delta_entry(type, size); + unpack_non_delta_entry(type, size, nr); return; case OBJ_REF_DELTA: - unpack_delta_entry(size); + case OBJ_OFS_DELTA: + unpack_delta_entry(type, size, nr); return; default: error("bad object type %d", type); @@ -265,9 +338,10 @@ static void unpack_all(void) die("unknown pack file version %d", ntohl(hdr->hdr_version)); fprintf(stderr, "Unpacking %d objects\n", nr_objects); + obj_list = xmalloc(nr_objects * sizeof(*obj_list)); use(sizeof(struct pack_header)); for (i = 0; i < nr_objects; i++) - unpack_one(i+1, nr_objects); + unpack_one(i, nr_objects); if (delta_list) die("unresolved deltas left after unpacking"); } From 53dda6ff6263a3f350514d9edae600468c946ed4 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:08:33 -0400 Subject: [PATCH 3/6] teach git-index-pack about deltas with offset to base Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 111 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/index-pack.c b/index-pack.c index aef7f0a32e..fffddd25c9 100644 --- a/index-pack.c +++ b/index-pack.c @@ -18,10 +18,15 @@ struct object_entry unsigned char sha1[20]; }; +union delta_base { + unsigned char sha1[20]; + unsigned long offset; +}; + struct delta_entry { struct object_entry *obj; - unsigned char base_sha1[20]; + union delta_base base; }; static const char *pack_name; @@ -134,13 +139,13 @@ static void *unpack_entry_data(unsigned long offset, static void *unpack_raw_entry(unsigned long offset, enum object_type *obj_type, unsigned long *obj_size, - unsigned char *delta_base, + union delta_base *delta_base, unsigned long *next_obj_offset) { unsigned long pack_limit = pack_size - 20; unsigned long pos = offset; unsigned char c; - unsigned long size; + unsigned long size, base_offset; unsigned shift; enum object_type type; void *data; @@ -161,26 +166,43 @@ static void *unpack_raw_entry(unsigned long offset, case OBJ_REF_DELTA: if (pos + 20 >= pack_limit) bad_object(offset, "object extends past end of pack"); - hashcpy(delta_base, pack_base + pos); + hashcpy(delta_base->sha1, pack_base + pos); pos += 20; - /* fallthru */ + break; + case OBJ_OFS_DELTA: + memset(delta_base, 0, sizeof(*delta_base)); + c = pack_base[pos++]; + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || base_offset & ~(~0UL >> 7)) + bad_object(offset, "offset value overflow for delta base object"); + if (pos >= pack_limit) + bad_object(offset, "object extends past end of pack"); + c = pack_base[pos++]; + base_offset = (base_offset << 7) + (c & 127); + } + delta_base->offset = offset - base_offset; + if (delta_base->offset >= offset) + bad_object(offset, "delta base offset is out of bound"); + break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - data = unpack_entry_data(offset, &pos, size); break; default: bad_object(offset, "bad object type %d", type); } + data = unpack_entry_data(offset, &pos, size); *obj_type = type; *obj_size = size; *next_obj_offset = pos; return data; } -static int find_delta(const unsigned char *base_sha1) +static int find_delta(const union delta_base *base) { int first = 0, last = nr_deltas; @@ -189,7 +211,7 @@ static int find_delta(const unsigned char *base_sha1) struct delta_entry *delta = &deltas[next]; int cmp; - cmp = hashcmp(base_sha1, delta->base_sha1); + cmp = memcmp(base, &delta->base, sizeof(*base)); if (!cmp) return next; if (cmp < 0) { @@ -201,18 +223,18 @@ static int find_delta(const unsigned char *base_sha1) return -first-1; } -static int find_deltas_based_on_sha1(const unsigned char *base_sha1, - int *first_index, int *last_index) +static int find_delta_childs(const union delta_base *base, + int *first_index, int *last_index) { - int first = find_delta(base_sha1); + int first = find_delta(base); int last = first; int end = nr_deltas - 1; if (first < 0) return -1; - while (first > 0 && !hashcmp(deltas[first - 1].base_sha1, base_sha1)) + while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base))) --first; - while (last < end && !hashcmp(deltas[last + 1].base_sha1, base_sha1)) + while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base))) ++last; *first_index = first; *last_index = last; @@ -253,13 +275,13 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, void *result; unsigned long result_size; enum object_type delta_type; - unsigned char base_sha1[20]; + union delta_base delta_base; unsigned long next_obj_offset; int j, first, last; obj->real_type = type; delta_data = unpack_raw_entry(obj->offset, &delta_type, - &delta_size, base_sha1, + &delta_size, &delta_base, &next_obj_offset); result = patch_delta(base_data, base_size, delta_data, delta_size, &result_size); @@ -267,10 +289,22 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, if (!result) bad_object(obj->offset, "failed to apply delta"); sha1_object(result, result_size, type, obj->sha1); - if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) { + + hashcpy(delta_base.sha1, obj->sha1); + if (!find_delta_childs(&delta_base, &first, &last)) { for (j = first; j <= last; j++) - resolve_delta(&deltas[j], result, result_size, type); + if (deltas[j].obj->type == OBJ_REF_DELTA) + resolve_delta(&deltas[j], result, result_size, type); } + + memset(&delta_base, 0, sizeof(delta_base)); + delta_base.offset = obj->offset; + if (!find_delta_childs(&delta_base, &first, &last)) { + for (j = first; j <= last; j++) + if (deltas[j].obj->type == OBJ_OFS_DELTA) + resolve_delta(&deltas[j], result, result_size, type); + } + free(result); } @@ -278,14 +312,14 @@ static int compare_delta_entry(const void *a, const void *b) { const struct delta_entry *delta_a = a; const struct delta_entry *delta_b = b; - return hashcmp(delta_a->base_sha1, delta_b->base_sha1); + return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base)); } static void parse_pack_objects(void) { int i; unsigned long offset = sizeof(struct pack_header); - unsigned char base_sha1[20]; + struct delta_entry *delta = deltas; void *data; unsigned long data_size; @@ -299,12 +333,12 @@ static void parse_pack_objects(void) struct object_entry *obj = &objects[i]; obj->offset = offset; data = unpack_raw_entry(offset, &obj->type, &data_size, - base_sha1, &offset); + &delta->base, &offset); obj->real_type = obj->type; - if (obj->type == OBJ_REF_DELTA) { - struct delta_entry *delta = &deltas[nr_deltas++]; + if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + nr_deltas++; delta->obj = obj; - hashcpy(delta->base_sha1, base_sha1); + delta++; } else sha1_object(data, data_size, obj->type, obj->sha1); free(data); @@ -312,7 +346,7 @@ static void parse_pack_objects(void) if (offset != pack_size - 20) die("packfile '%s' has junk at the end", pack_name); - /* Sort deltas by base SHA1 for fast searching */ + /* Sort deltas by base SHA1/offset for fast searching */ qsort(deltas, nr_deltas, sizeof(struct delta_entry), compare_delta_entry); @@ -326,22 +360,37 @@ static void parse_pack_objects(void) */ for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - int j, first, last; + union delta_base base; + int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; - if (obj->type == OBJ_REF_DELTA) + if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; - if (find_deltas_based_on_sha1(obj->sha1, &first, &last)) + hashcpy(base.sha1, obj->sha1); + ref = !find_delta_childs(&base, &ref_first, &ref_last); + memset(&base, 0, sizeof(base)); + base.offset = obj->offset; + ofs = !find_delta_childs(&base, &ofs_first, &ofs_last); + if (!ref && !ofs) continue; data = unpack_raw_entry(obj->offset, &obj->type, &data_size, - base_sha1, &offset); - for (j = first; j <= last; j++) - resolve_delta(&deltas[j], data, data_size, obj->type); + &base, &offset); + if (ref) + for (j = ref_first; j <= ref_last; j++) + if (deltas[j].obj->type == OBJ_REF_DELTA) + resolve_delta(&deltas[j], data, + data_size, obj->type); + if (ofs) + for (j = ofs_first; j <= ofs_last; j++) + if (deltas[j].obj->type == OBJ_OFS_DELTA) + resolve_delta(&deltas[j], data, + data_size, obj->type); free(data); } /* Check for unresolved deltas */ for (i = 0; i < nr_deltas; i++) { - if (deltas[i].obj->real_type == OBJ_REF_DELTA) + if (deltas[i].obj->real_type == OBJ_REF_DELTA || + deltas[i].obj->real_type == OBJ_OFS_DELTA) die("packfile '%s' has unresolved deltas", pack_name); } } From be6b19145f64f62790049c06320c35011f7312a7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:09:44 -0400 Subject: [PATCH 4/6] make git-pack-objects able to create deltas with offset to base This is enabled with --delta-base-offset only, and doesn't work with pack data reuse yet. The idea is to allow for the fetch protocol to use an extension flag to notify the remote end that --delta-base-offset can be used with git-pack-objects. Eventually git-repack will always provide this flag. With this, all delta base objects are now pushed before deltas that depend on them. This is a requirements for OBJ_OFS_DELTA. This is not a requirement for OBJ_REF_DELTA but always doing so makes the code simpler. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 47 +++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index c62734a2a6..221264916d 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -60,6 +60,8 @@ static int non_empty; static int no_reuse_delta; static int local; static int incremental; +static int allow_ofs_delta; + static struct object_entry **sorted_by_sha, **sorted_by_type; static struct object_entry *objects; static int nr_objects, nr_alloc, nr_result; @@ -334,9 +336,6 @@ static unsigned long write_object(struct sha1file *f, enum object_type obj_type; int to_reuse = 0; - if (entry->preferred_base) - return 0; - obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ @@ -380,18 +379,35 @@ static unsigned long write_object(struct sha1file *f, if (entry->delta) { buf = delta_against(buf, size, entry); size = entry->delta_size; - obj_type = OBJ_REF_DELTA; + obj_type = (allow_ofs_delta && entry->delta->offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; } /* * The object header is a byte of 'type' followed by zero or - * more bytes of length. For deltas, the 20 bytes of delta - * sha1 follows that. + * more bytes of length. */ hdrlen = encode_header(obj_type, size, header); sha1write(f, header, hdrlen); - if (entry->delta) { - sha1write(f, entry->delta, 20); + if (obj_type == OBJ_OFS_DELTA) { + /* + * Deltas with relative base contain an additional + * encoding of the relative offset for the delta + * base from this object's position in the pack. + */ + unsigned long ofs = entry->offset - entry->delta->offset; + unsigned pos = sizeof(header) - 1; + header[pos] = ofs & 127; + while (ofs >>= 7) + header[--pos] = 128 | (--ofs & 127); + sha1write(f, header + pos, sizeof(header) - pos); + hdrlen += sizeof(header) - pos; + } else if (obj_type == OBJ_REF_DELTA) { + /* + * Deltas with a base reference contain + * an additional 20 bytes for the base sha1. + */ + sha1write(f, entry->delta->sha1, 20); hdrlen += 20; } datalen = sha1write_compressed(f, buf, size); @@ -413,7 +429,7 @@ static unsigned long write_object(struct sha1file *f, reused_delta++; reused++; } - if (obj_type == OBJ_REF_DELTA) + if (entry->delta) written_delta++; written++; return hdrlen + datalen; @@ -423,17 +439,16 @@ static unsigned long write_one(struct sha1file *f, struct object_entry *e, unsigned long offset) { - if (e->offset) + if (e->offset || e->preferred_base) /* offset starts from header size and cannot be zero * if it is written already. */ return offset; - e->offset = offset; - offset += write_object(f, e); - /* if we are deltified, write out its base object. */ + /* if we are deltified, write out its base object first. */ if (e->delta) offset = write_one(f, e->delta, offset); - return offset; + e->offset = offset; + return offset + write_object(f, e); } static void write_pack_file(void) @@ -1484,6 +1499,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) no_reuse_delta = 1; continue; } + if (!strcmp("--delta-base-offset", arg)) { + allow_ofs_delta = no_reuse_delta = 1; + continue; + } if (!strcmp("--stdout", arg)) { pack_to_stdout = 1; continue; From 780e6e735be189097dad4b223d8edeb18cce1928 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 22 Sep 2006 21:25:04 -0400 Subject: [PATCH 5/6] make pack data reuse compatible with both delta types This is the missing part to git-pack-objects allowing it to reuse delta data to/from any of the two delta types. It can reuse delta from any type, and it outputs base offsets when --allow-delta-base-offset is provided and the base is also included in the pack. Otherwise it outputs base sha1 references just like it always did. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 206 ++++++++++++++++++++++++++--------------- pack.h | 3 - sha1_file.c | 19 ---- 3 files changed, 131 insertions(+), 97 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 221264916d..6db97b685f 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -29,6 +29,7 @@ struct object_entry { enum object_type type; enum object_type in_pack_type; /* could be delta */ unsigned long delta_size; /* delta data size (uncompressed) */ +#define in_pack_header_size delta_size /* only when reusing pack data */ struct object_entry *delta; /* delta base object */ struct packed_git *in_pack; /* already in pack */ unsigned int in_pack_offset; @@ -86,17 +87,25 @@ static int object_ix_hashsz; * Pack index for existing packs give us easy access to the offsets into * corresponding pack file where each object's data starts, but the entries * do not store the size of the compressed representation (uncompressed - * size is easily available by examining the pack entry header). We build - * a hashtable of existing packs (pack_revindex), and keep reverse index - * here -- pack index file is sorted by object name mapping to offset; this - * pack_revindex[].revindex array is an ordered list of offsets, so if you - * know the offset of an object, next offset is where its packed - * representation ends. + * size is easily available by examining the pack entry header). It is + * also rather expensive to find the sha1 for an object given its offset. + * + * We build a hashtable of existing packs (pack_revindex), and keep reverse + * index here -- pack index file is sorted by object name mapping to offset; + * this pack_revindex[].revindex array is a list of offset/index_nr pairs + * ordered by offset, so if you know the offset of an object, next offset + * is where its packed representation ends and the index_nr can be used to + * get the object sha1 from the main index. */ +struct revindex_entry { + unsigned int offset; + unsigned int nr; +}; struct pack_revindex { struct packed_git *p; - unsigned long *revindex; -} *pack_revindex = NULL; + struct revindex_entry *revindex; +}; +static struct pack_revindex *pack_revindex; static int pack_revindex_hashsz; /* @@ -143,14 +152,9 @@ static void prepare_pack_ix(void) static int cmp_offset(const void *a_, const void *b_) { - unsigned long a = *(unsigned long *) a_; - unsigned long b = *(unsigned long *) b_; - if (a < b) - return -1; - else if (a == b) - return 0; - else - return 1; + const struct revindex_entry *a = a_; + const struct revindex_entry *b = b_; + return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0; } /* @@ -163,25 +167,27 @@ static void prepare_pack_revindex(struct pack_revindex *rix) int i; void *index = p->index_base + 256; - rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1)); + rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1)); for (i = 0; i < num_ent; i++) { unsigned int hl = *((unsigned int *)((char *) index + 24*i)); - rix->revindex[i] = ntohl(hl); + rix->revindex[i].offset = ntohl(hl); + rix->revindex[i].nr = i; } /* This knows the pack format -- the 20-byte trailer * follows immediately after the last object data. */ - rix->revindex[num_ent] = p->pack_size - 20; - qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset); + rix->revindex[num_ent].offset = p->pack_size - 20; + rix->revindex[num_ent].nr = -1; + qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset); } -static unsigned long find_packed_object_size(struct packed_git *p, - unsigned long ofs) +static struct revindex_entry * find_packed_object(struct packed_git *p, + unsigned int ofs) { int num; int lo, hi; struct pack_revindex *rix; - unsigned long *revindex; + struct revindex_entry *revindex; num = pack_revindex_ix(p); if (num < 0) die("internal error: pack revindex uninitialized"); @@ -193,10 +199,10 @@ static unsigned long find_packed_object_size(struct packed_git *p, hi = num_packed_objects(p) + 1; do { int mi = (lo + hi) / 2; - if (revindex[mi] == ofs) { - return revindex[mi+1] - ofs; + if (revindex[mi].offset == ofs) { + return revindex + mi; } - else if (ofs < revindex[mi]) + else if (ofs < revindex[mi].offset) hi = mi; else lo = mi + 1; @@ -204,6 +210,20 @@ static unsigned long find_packed_object_size(struct packed_git *p, die("internal error: pack revindex corrupt"); } +static unsigned long find_packed_object_size(struct packed_git *p, + unsigned long ofs) +{ + struct revindex_entry *entry = find_packed_object(p, ofs); + return entry[1].offset - ofs; +} + +static unsigned char *find_packed_object_name(struct packed_git *p, + unsigned long ofs) +{ + struct revindex_entry *entry = find_packed_object(p, ofs); + return (unsigned char *)(p->index_base + 256) + 24 * entry->nr + 4; +} + static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) { unsigned long othersize, delta_size; @@ -249,6 +269,10 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha return n; } +/* + * we are going to reuse the existing object data as is. make + * sure it is not corrupt. + */ static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect) { z_stream stream; @@ -280,32 +304,6 @@ static int check_inflate(unsigned char *data, unsigned long len, unsigned long e return st; } -/* - * we are going to reuse the existing pack entry data. make - * sure it is not corrupt. - */ -static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len) -{ - enum object_type type; - unsigned long size, used; - - if (pack_to_stdout) - return 0; - - /* the caller has already called use_packed_git() for us, - * so it is safe to access the pack data from mmapped location. - * make sure the entry inflates correctly. - */ - used = unpack_object_header_gently(data, len, &type, &size); - if (!used) - return -1; - if (type == OBJ_REF_DELTA) - used += 20; /* skip base object name */ - data += used; - len -= used; - return check_inflate(data, len, entry->size); -} - static int revalidate_loose_object(struct object_entry *entry, unsigned char *map, unsigned long mapsize) @@ -339,7 +337,7 @@ static unsigned long write_object(struct sha1file *f, obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (obj_type == OBJ_REF_DELTA) + else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) to_reuse = 1; /* check_object() decided it for us */ else if (obj_type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ @@ -415,18 +413,38 @@ static unsigned long write_object(struct sha1file *f, } else { struct packed_git *p = entry->in_pack; + + if (entry->delta) { + obj_type = (allow_ofs_delta && entry->delta->offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; + reused_delta++; + } + hdrlen = encode_header(obj_type, entry->size, header); + sha1write(f, header, hdrlen); + if (obj_type == OBJ_OFS_DELTA) { + unsigned long ofs = entry->offset - entry->delta->offset; + unsigned pos = sizeof(header) - 1; + header[pos] = ofs & 127; + while (ofs >>= 7) + header[--pos] = 128 | (--ofs & 127); + sha1write(f, header + pos, sizeof(header) - pos); + hdrlen += sizeof(header) - pos; + } else if (obj_type == OBJ_REF_DELTA) { + sha1write(f, entry->delta->sha1, 20); + hdrlen += 20; + } + use_packed_git(p); - - datalen = find_packed_object_size(p, entry->in_pack_offset); - buf = (char *) p->pack_base + entry->in_pack_offset; - - if (revalidate_pack_entry(entry, buf, datalen)) + buf = (char *) p->pack_base + + entry->in_pack_offset + + entry->in_pack_header_size; + datalen = find_packed_object_size(p, entry->in_pack_offset) + - entry->in_pack_header_size; +//fprintf(stderr, "reusing %d at %d header %d size %d\n", obj_type, entry->in_pack_offset, entry->in_pack_header_size, datalen); + if (!pack_to_stdout && check_inflate(buf, datalen, entry->size)) die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); sha1write(f, buf, datalen); unuse_packed_git(p); - hdrlen = 0; /* not really */ - if (obj_type == OBJ_REF_DELTA) - reused_delta++; reused++; } if (entry->delta) @@ -914,26 +932,64 @@ static void check_object(struct object_entry *entry) char type[20]; if (entry->in_pack && !entry->preferred_base) { - unsigned char base[20]; - unsigned long size; - struct object_entry *base_entry; + struct packed_git *p = entry->in_pack; + unsigned long left = p->pack_size - entry->in_pack_offset; + unsigned long size, used; + unsigned char *buf; + struct object_entry *base_entry = NULL; + + use_packed_git(p); + buf = p->pack_base; + buf += entry->in_pack_offset; /* We want in_pack_type even if we do not reuse delta. * There is no point not reusing non-delta representations. */ - check_reuse_pack_delta(entry->in_pack, - entry->in_pack_offset, - base, &size, - &entry->in_pack_type); + used = unpack_object_header_gently(buf, left, + &entry->in_pack_type, &size); + if (!used || left - used <= 20) + die("corrupt pack for %s", sha1_to_hex(entry->sha1)); /* Check if it is delta, and the base is also an object * we are going to pack. If so we will reuse the existing * delta. */ - if (!no_reuse_delta && - entry->in_pack_type == OBJ_REF_DELTA && - (base_entry = locate_object_entry(base)) && - (!base_entry->preferred_base)) { + if (!no_reuse_delta) { + unsigned char c, *base_name; + unsigned long ofs; + /* there is at least 20 bytes left in the pack */ + switch (entry->in_pack_type) { + case OBJ_REF_DELTA: + base_name = buf + used; + used += 20; + break; + case OBJ_OFS_DELTA: + c = buf[used++]; + ofs = c & 127; + while (c & 128) { + ofs += 1; + if (!ofs || ofs & ~(~0UL >> 7)) + die("delta base offset overflow in pack for %s", + sha1_to_hex(entry->sha1)); + c = buf[used++]; + ofs = (ofs << 7) + (c & 127); + } + if (ofs >= entry->in_pack_offset) + die("delta base offset out of bound for %s", + sha1_to_hex(entry->sha1)); + ofs = entry->in_pack_offset - ofs; + base_name = find_packed_object_name(p, ofs); + break; + default: + base_name = NULL; + } + if (base_name) + base_entry = locate_object_entry(base_name); + } + unuse_packed_git(p); + entry->in_pack_header_size = used; + + if (base_entry && !base_entry->preferred_base) { /* Depth value does not matter - find_deltas() * will never consider reused delta as the @@ -942,9 +998,9 @@ static void check_object(struct object_entry *entry) */ /* uncompressed size of the delta data */ - entry->size = entry->delta_size = size; + entry->size = size; entry->delta = base_entry; - entry->type = OBJ_REF_DELTA; + entry->type = entry->in_pack_type; entry->delta_sibling = base_entry->delta_child; base_entry->delta_child = entry; @@ -1500,7 +1556,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) continue; } if (!strcmp("--delta-base-offset", arg)) { - allow_ofs_delta = no_reuse_delta = 1; + allow_ofs_delta = 1; continue; } if (!strcmp("--stdout", arg)) { diff --git a/pack.h b/pack.h index 05557da152..4c9bddd4f2 100644 --- a/pack.h +++ b/pack.h @@ -16,7 +16,4 @@ struct pack_header { }; extern int verify_pack(struct packed_git *, int); -extern int check_reuse_pack_delta(struct packed_git *, unsigned long, - unsigned char *, unsigned long *, - enum object_type *); #endif diff --git a/sha1_file.c b/sha1_file.c index fdb4588280..18c2f88112 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1000,25 +1000,6 @@ static unsigned long unpack_object_header(struct packed_git *p, unsigned long of return offset + used; } -int check_reuse_pack_delta(struct packed_git *p, unsigned long offset, - unsigned char *base, unsigned long *sizep, - enum object_type *kindp) -{ - unsigned long ptr; - int status = -1; - - use_packed_git(p); - ptr = offset; - ptr = unpack_object_header(p, ptr, kindp, sizep); - if (*kindp != OBJ_REF_DELTA) - goto done; - hashcpy(base, (unsigned char *) p->pack_base + ptr); - status = 0; - done: - unuse_packed_git(p); - return status; -} - void packed_object_info_detail(struct packed_git *p, unsigned long offset, char *type, From e4fe4b8ef7cdde842a9e5e2594d0fba1367d9dd3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 26 Sep 2006 11:27:39 -0400 Subject: [PATCH 6/6] let the GIT native protocol use offsets to delta base when possible There is no reason not to always do this when both ends agree. Therefore a client that can accept offsets to delta base always sends the "ofs-delta" flag. The server will stream a pack with or without offset to delta base depending on whether that flag is provided or not with no additional cost. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- fetch-pack.c | 5 +++-- upload-pack.c | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index e8708aa802..474d54520e 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -166,12 +166,13 @@ static int find_common(int fd[2], unsigned char *result_sha1, } if (!fetching) - packet_write(fd[1], "want %s%s%s%s%s\n", + packet_write(fd[1], "want %s%s%s%s%s%s\n", sha1_to_hex(remote), (multi_ack ? " multi_ack" : ""), (use_sideband == 2 ? " side-band-64k" : ""), (use_sideband == 1 ? " side-band" : ""), - (use_thin_pack ? " thin-pack" : "")); + (use_thin_pack ? " thin-pack" : ""), + " ofs-delta"); else packet_write(fd[1], "want %s\n", sha1_to_hex(remote)); fetching++; diff --git a/upload-pack.c b/upload-pack.c index 189b239cc0..9ec3775049 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -16,7 +16,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n #define OUR_REF (1U << 1) #define WANTED (1U << 2) static int multi_ack, nr_our_refs; -static int use_thin_pack; +static int use_thin_pack, use_ofs_delta; static struct object_array have_obj; static struct object_array want_obj; static unsigned int timeout; @@ -137,7 +137,9 @@ static void create_pack_file(void) close(pu_pipe[1]); close(pe_pipe[0]); close(pe_pipe[1]); - execl_git_cmd("pack-objects", "--stdout", "--progress", NULL); + execl_git_cmd("pack-objects", "--stdout", "--progress", + use_ofs_delta ? "--delta-base-offset" : NULL, + NULL); kill(pid_rev_list, SIGKILL); die("git-upload-pack: unable to exec git-pack-objects"); } @@ -393,6 +395,8 @@ static void receive_needs(void) multi_ack = 1; if (strstr(line+45, "thin-pack")) use_thin_pack = 1; + if (strstr(line+45, "ofs-delta")) + use_ofs_delta = 1; if (strstr(line+45, "side-band-64k")) use_sideband = LARGE_PACKET_MAX; else if (strstr(line+45, "side-band")) @@ -418,7 +422,7 @@ static void receive_needs(void) static int send_ref(const char *refname, const unsigned char *sha1) { - static const char *capabilities = "multi_ack thin-pack side-band side-band-64k"; + static const char *capabilities = "multi_ack thin-pack side-band side-band-64k ofs-delta"; struct object *o = parse_object(sha1); if (!o)