From fa736f72b0840950e32dfd8bc470e91831107b10 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 May 2007 12:31:28 -0400 Subject: [PATCH 1/8] allow for undeltified objects not to be reused Currently non deltified object data is always reused when possible. This means that any change to core.compression has no effect on those objects as they don't get recompressed when repacking them. Let's add a --no-reuse-object flag to git-repack in order to force recompression of all objects when desired. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.txt | 7 +++++++ builtin-pack-objects.c | 22 ++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index bd3ee456e3..ce892147dd 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -127,6 +127,13 @@ base-name:: This flag tells the command not to reuse existing deltas but compute them from scratch. +--no-reuse-object:: + This flag tells the command not to reuse existing object data at all, + including non deltified object, forcing recompression of everything. + This implies --no-reuse-delta. Useful only in the obscur case where + wholesale enforcement of a different compression level on the + packed data is desired. + --delta-base-offset:: A packed archive can express base object of a delta as either 20-byte object name or as an offset in the diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 966f843e43..d94c79a339 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -17,9 +17,9 @@ static const char pack_usage[] = "\ git-pack-objects [{ -q | --progress | --all-progress }] \n\ [--local] [--incremental] [--window=N] [--depth=N] \n\ - [--no-reuse-delta] [--delta-base-offset] [--non-empty] \n\ - [--revs [--unpacked | --all]*] [--reflog] [--stdout | base-name] \n\ - [type; - if (! entry->in_pack) + if (no_reuse_object) + to_reuse = 0; /* explicit */ + else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) to_reuse = 1; /* check_object() decided it for us */ @@ -425,7 +427,7 @@ static unsigned long write_object(struct sha1file *f, * and we do not need to deltify it. */ - if (!entry->in_pack && !entry->delta) { + if (!no_reuse_object && !entry->in_pack && !entry->delta) { unsigned char *map; unsigned long mapsize; map = map_sha1_file(entry->sha1, &mapsize); @@ -1125,8 +1127,8 @@ static void check_object(struct object_entry *entry) buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail); /* - * We want in_pack_type even if we do not reuse delta. - * There is no point not reusing non-delta representations. + * We want in_pack_type even if we do not reuse delta + * since non-delta representations could still be reused. */ used = unpack_object_header_gently(buf, avail, &entry->in_pack_type, @@ -1655,6 +1657,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) no_reuse_delta = 1; continue; } + if (!strcmp("--no-reuse-object", arg)) { + no_reuse_object = no_reuse_delta = 1; + continue; + } if (!strcmp("--delta-base-offset", arg)) { allow_ofs_delta = 1; continue; From 479b56ba50144b30f28c5b225d412125c07def9f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 May 2007 12:59:40 -0400 Subject: [PATCH 2/8] make "repack -f" imply "pack-objects --no-reuse-object" Recomputing delta is much more expensive than recompressing anyway, and when the user says 'repack -f', it is a sign that the user is willing to spend CPU cycles. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- git-repack.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git-repack.sh b/git-repack.sh index ddfa8b44a1..8bf66a4fe8 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -8,7 +8,7 @@ SUBDIRECTORY_OK='Yes' . git-sh-setup no_update_info= all_into_one= remove_redundant= -local= quiet= no_reuse_delta= extra= +local= quiet= no_reuse= extra= while case "$#" in 0) break ;; esac do case "$1" in @@ -16,7 +16,7 @@ do -a) all_into_one=t ;; -d) remove_redundant=t ;; -q) quiet=-q ;; - -f) no_reuse_delta=--no-reuse-delta ;; + -f) no_reuse=--no-reuse-object ;; -l) local=--local ;; --window=*) extra="$extra $1" ;; --depth=*) extra="$extra $1" ;; @@ -61,7 +61,7 @@ case ",$all_into_one," in ;; esac -args="$args $local $quiet $no_reuse_delta$extra" +args="$args $local $quiet $no_reuse$extra" name=$(git-pack-objects --non-empty --all --reflog $args Date: Wed, 9 May 2007 14:42:42 -0400 Subject: [PATCH 3/8] deprecate the new loose object header format Now that we encourage and actively preserve objects in a packed form more agressively than we did at the time the new loose object format and core.legacyheaders were introduced, that extra loose object format doesn't appear to be worth it anymore. Because the packing of loose objects has to go through the delta match loop anyway, and since most of them should end up being deltified in most cases, there is really little advantage to have this parallel loose object format as the CPU savings it might provide is rather lost in the noise in the end. This patch gets rid of core.legacyheaders, preserve the legacy format as the only writable loose object format and deprecate the other one to keep things simpler. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- Documentation/config.txt | 13 -------- builtin-pack-objects.c | 69 ---------------------------------------- cache.h | 2 -- config.c | 5 --- environment.c | 1 - sha1_file.c | 47 +++++++-------------------- 6 files changed, 11 insertions(+), 126 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index ea434af9db..d6d89ba463 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -209,19 +209,6 @@ core.compression:: compression, and 1..9 are various speed/size tradeoffs, 9 being slowest. -core.legacyheaders:: - A boolean which - changes the format of loose objects so that they are more - efficient to pack and to send out of the repository over git - native protocol, since v1.4.2. However, loose objects - written in the new format cannot be read by git older than - that version; people fetching from your repository using - older versions of git over dumb transports (e.g. http) - will also be affected. -+ -To let git use the new loose object format, you have to -set core.legacyheaders to false. - core.packedGitWindowSize:: Number of bytes of a pack file to map into memory in a single mapping operation. Larger window sizes may allow diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index d94c79a339..5fa98132fe 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -346,56 +346,6 @@ static void copy_pack_data(struct sha1file *f, } } -static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect) -{ - z_stream stream; - unsigned char fakebuf[4096]; - int st; - - memset(&stream, 0, sizeof(stream)); - stream.next_in = data; - stream.avail_in = len; - stream.next_out = fakebuf; - stream.avail_out = sizeof(fakebuf); - inflateInit(&stream); - - while (1) { - st = inflate(&stream, Z_FINISH); - if (st == Z_STREAM_END || st == Z_OK) { - st = (stream.total_out == expect && - stream.total_in == len) ? 0 : -1; - break; - } - if (st != Z_BUF_ERROR) { - st = -1; - break; - } - stream.next_out = fakebuf; - stream.avail_out = sizeof(fakebuf); - } - inflateEnd(&stream); - return st; -} - -static int revalidate_loose_object(struct object_entry *entry, - unsigned char *map, - unsigned long mapsize) -{ - /* we already know this is a loose object with new type header. */ - enum object_type type; - unsigned long size, used; - - if (pack_to_stdout) - return 0; - - used = unpack_object_header_gently(map, mapsize, &type, &size); - if (!used) - return -1; - map += used; - mapsize -= used; - return check_loose_inflate(map, mapsize, size); -} - static unsigned long write_object(struct sha1file *f, struct object_entry *entry) { @@ -427,25 +377,6 @@ static unsigned long write_object(struct sha1file *f, * and we do not need to deltify it. */ - if (!no_reuse_object && !entry->in_pack && !entry->delta) { - unsigned char *map; - unsigned long mapsize; - map = map_sha1_file(entry->sha1, &mapsize); - if (map && !legacy_loose_object(map)) { - /* We can copy straight into the pack file */ - if (revalidate_loose_object(entry, map, mapsize)) - die("corrupt loose object %s", - sha1_to_hex(entry->sha1)); - sha1write(f, map, mapsize); - munmap(map, mapsize); - written++; - reused++; - return mapsize; - } - if (map) - munmap(map, mapsize); - } - if (!to_reuse) { buf = read_sha1_file(entry->sha1, &type, &size); if (!buf) diff --git a/cache.h b/cache.h index 8e76152645..5725bce6f4 100644 --- a/cache.h +++ b/cache.h @@ -273,7 +273,6 @@ extern void rollback_lock_file(struct lock_file *); extern int delete_ref(const char *, const unsigned char *sha1); /* Environment bits from configuration mechanism */ -extern int use_legacy_headers; extern int trust_executable_bit; extern int has_symlinks; extern int assume_unchanged; @@ -354,7 +353,6 @@ extern int move_temp_to_file(const char *tmpfile, const char *filename); extern int has_sha1_pack(const unsigned char *sha1, const char **ignore); extern int has_sha1_file(const unsigned char *sha1); extern void *map_sha1_file(const unsigned char *sha1, unsigned long *); -extern int legacy_loose_object(unsigned char *); extern int has_pack_file(const unsigned char *sha1); extern int has_pack_index(const unsigned char *sha1); diff --git a/config.c b/config.c index 70d1055679..298966f215 100644 --- a/config.c +++ b/config.c @@ -299,11 +299,6 @@ int git_default_config(const char *var, const char *value) return 0; } - if (!strcmp(var, "core.legacyheaders")) { - use_legacy_headers = git_config_bool(var, value); - return 0; - } - if (!strcmp(var, "core.compression")) { int level = git_config_int(var, value); if (level == -1) diff --git a/environment.c b/environment.c index 22316597df..54e3abae98 100644 --- a/environment.c +++ b/environment.c @@ -11,7 +11,6 @@ char git_default_email[MAX_GITNAME]; char git_default_name[MAX_GITNAME]; -int use_legacy_headers = 1; int trust_executable_bit = 1; int has_symlinks = 1; int assume_unchanged; diff --git a/sha1_file.c b/sha1_file.c index 32244d704e..e71552795a 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -972,7 +972,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) return map; } -int legacy_loose_object(unsigned char *map) +static int legacy_loose_object(unsigned char *map) { unsigned int word; @@ -1034,6 +1034,14 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon return inflate(stream, 0); } + + /* + * There used to be a second loose object header format which + * was meant to mimic the in-pack format, allowing for direct + * copy of the object data. This format turned up not to be + * really worth it and we don't write it any longer. But we + * can still read it. + */ used = unpack_object_header_gently(map, mapsize, &type, &size); if (!used || !valid_loose_object_type[type]) return -1; @@ -1962,40 +1970,6 @@ static int write_buffer(int fd, const void *buf, size_t len) return 0; } -static int write_binary_header(unsigned char *hdr, enum object_type type, unsigned long len) -{ - int hdr_len; - unsigned char c; - - c = (type << 4) | (len & 15); - len >>= 4; - hdr_len = 1; - while (len) { - *hdr++ = c | 0x80; - hdr_len++; - c = (len & 0x7f); - len >>= 7; - } - *hdr = c; - return hdr_len; -} - -static void setup_object_header(z_stream *stream, const char *type, unsigned long len) -{ - int obj_type, hdrlen; - - if (use_legacy_headers) { - while (deflate(stream, 0) == Z_OK) - /* nothing */; - return; - } - obj_type = type_from_string(type); - hdrlen = write_binary_header(stream->next_out, obj_type, len); - stream->total_out = hdrlen; - stream->next_out += hdrlen; - stream->avail_out -= hdrlen; -} - int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1) { @@ -2062,7 +2036,8 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha /* First header.. */ stream.next_in = (unsigned char *)hdr; stream.avail_in = hdrlen; - setup_object_header(&stream, type, len); + while (deflate(&stream, 0) == Z_OK) + /* nothing */; /* Then the data itself.. */ stream.next_in = buf; From 960ccca6803c9fb57429d43572a9545a96107e32 Mon Sep 17 00:00:00 2001 From: Dana How Date: Wed, 9 May 2007 13:56:50 -0700 Subject: [PATCH 4/8] Custom compression levels for objects and packs Add config variables pack.compression and core.loosecompression , and switch --compression=level to pack-objects. Loose objects will be compressed using core.loosecompression if set, else core.compression if set, else Z_BEST_SPEED. Packed objects will be compressed using --compression=level if seen, else pack.compression if set, else core.compression if set, else Z_DEFAULT_COMPRESSION. This is the "pack compression level". Loose objects added to a pack undeltified will be recompressed to the pack compression level if it is unequal to the current loose compression level by the preceding rules, or if the loose object was written while core.legacyheaders = true. Newly deltified loose objects are always compressed to the current pack compression level. Previously packed objects added to a pack are recompressed to the current pack compression level exactly when their deltification status changes, since the previous pack data cannot be reused. In either case, the --no-reuse-object switch from the first patch below will always force recompression to the current pack compression level, instead of assuming the pack compression level hasn't changed and pack data can be reused when possible. This applies on top of the following patches from Nicolas Pitre: [PATCH] allow for undeltified objects not to be reused [PATCH] make "repack -f" imply "pack-objects --no-reuse-object" Signed-off-by: Dana L. How Signed-off-by: Junio C Hamano --- Documentation/config.txt | 17 +++++++++++++-- Documentation/git-pack-objects.txt | 14 ++++++++++++- builtin-pack-objects.c | 33 ++++++++++++++++++++++++++++-- cache.h | 2 ++ config.c | 18 +++++++++++++++- csum-file.c | 4 ++-- csum-file.h | 2 +- environment.c | 4 +++- 8 files changed, 84 insertions(+), 10 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index ea434af9db..382a31bb4f 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -204,10 +204,16 @@ core.warnAmbiguousRefs:: and might match multiple refs in the .git/refs/ tree. True by default. core.compression:: + An integer -1..9, indicating a default compression level. + -1 is the zlib default. 0 means no compression, + and 1..9 are various speed/size tradeoffs, 9 being slowest. + +core.loosecompression:: An integer -1..9, indicating the compression level for objects that - are not in a pack file. -1 is the zlib and git default. 0 means no + are not in a pack file. -1 is the zlib default. 0 means no compression, and 1..9 are various speed/size tradeoffs, 9 being - slowest. + slowest. If not set, defaults to core.compression. If that is + not set, defaults to 0 (best speed). core.legacyheaders:: A boolean which @@ -550,6 +556,13 @@ pack.depth:: The maximum delta depth used by gitlink:git-pack-objects[1] when no maximum depth is given on the command line. Defaults to 50. +pack.compression:: + An integer -1..9, indicating the compression level for objects + in a pack file. -1 is the zlib default. 0 means no + compression, and 1..9 are various speed/size tradeoffs, 9 being + slowest. If not set, defaults to core.compression. If that is + not set, defaults to -1. + pull.octopus:: The default merge strategy to use when pulling multiple branches at once. diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index ce892147dd..2531238df4 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -130,10 +130,22 @@ base-name:: --no-reuse-object:: This flag tells the command not to reuse existing object data at all, including non deltified object, forcing recompression of everything. - This implies --no-reuse-delta. Useful only in the obscur case where + This implies --no-reuse-delta. Useful only in the obscure case where wholesale enforcement of a different compression level on the packed data is desired. +--compression=[N]:: + Specifies compression level for newly-compressed data in the + generated pack. If not specified, pack compression level is + determined first by pack.compression, then by core.compression, + and defaults to -1, the zlib default, if neither is set. + Data copied from loose objects will be recompressed + if core.legacyheaders was true when they were created or if + the loose compression level (see core.loosecompression and + core.compression) is now a different value than the pack + compression level. Add --no-reuse-object if you want to force + a uniform compression level on all data no matter the source. + --delta-base-offset:: A packed archive can express base object of a delta as either 20-byte object name or as an offset in the diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index d94c79a339..5c468aa50a 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -68,6 +68,8 @@ static int depth = 50; static int pack_to_stdout; static int num_preferred_base; static struct progress progress_state; +static int pack_compression_level = Z_DEFAULT_COMPRESSION; +static int pack_compression_seen; /* * The object names in objects array are hashed with this hashtable, @@ -427,7 +429,10 @@ static unsigned long write_object(struct sha1file *f, * and we do not need to deltify it. */ - if (!no_reuse_object && !entry->in_pack && !entry->delta) { + /* differing core & pack compression when loose object -> must recompress */ + if (!entry->in_pack && pack_compression_level != zlib_compression_level) + to_reuse = 0; + else if (!no_reuse_object && !entry->in_pack && !entry->delta) { unsigned char *map; unsigned long mapsize; map = map_sha1_file(entry->sha1, &mapsize); @@ -487,7 +492,7 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, entry->delta->sha1, 20); hdrlen += 20; } - datalen = sha1write_compressed(f, buf, size); + datalen = sha1write_compressed(f, buf, size, pack_compression_level); free(buf); } else { @@ -1496,6 +1501,16 @@ static int git_pack_config(const char *k, const char *v) depth = git_config_int(k, v); return 0; } + if (!strcmp(k, "pack.compression")) { + int level = git_config_int(k, v); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad pack compression level %d", level); + pack_compression_level = level; + pack_compression_seen = 1; + return 0; + } return git_default_config(k, v); } @@ -1607,6 +1622,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) rp_ac = 2; git_config(git_pack_config); + if (!pack_compression_seen && core_compression_seen) + pack_compression_level = core_compression_level; progress = isatty(2); for (i = 1; i < argc; i++) { @@ -1627,6 +1644,18 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) incremental = 1; continue; } + if (!prefixcmp(arg, "--compression=")) { + char *end; + int level = strtoul(arg+14, &end, 0); + if (!arg[14] || *end) + usage(pack_usage); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad pack compression level %d", level); + pack_compression_level = level; + continue; + } if (!prefixcmp(arg, "--window=")) { char *end; window = strtoul(arg+9, &end, 0); diff --git a/cache.h b/cache.h index 8e76152645..2b3f359b21 100644 --- a/cache.h +++ b/cache.h @@ -283,6 +283,8 @@ extern int warn_ambiguous_refs; extern int shared_repository; extern const char *apply_default_whitespace; extern int zlib_compression_level; +extern int core_compression_level; +extern int core_compression_seen; extern size_t packed_git_window_size; extern size_t packed_git_limit; extern size_t delta_base_cache_limit; diff --git a/config.c b/config.c index 70d1055679..5627ed6a79 100644 --- a/config.c +++ b/config.c @@ -12,6 +12,8 @@ static FILE *config_file; static const char *config_file_name; static int config_linenr; +static int zlib_compression_seen; + static int get_next_char(void) { int c; @@ -304,13 +306,27 @@ int git_default_config(const char *var, const char *value) return 0; } - if (!strcmp(var, "core.compression")) { + if (!strcmp(var, "core.loosecompression")) { int level = git_config_int(var, value); if (level == -1) level = Z_DEFAULT_COMPRESSION; else if (level < 0 || level > Z_BEST_COMPRESSION) die("bad zlib compression level %d", level); zlib_compression_level = level; + zlib_compression_seen = 1; + return 0; + } + + if (!strcmp(var, "core.compression")) { + int level = git_config_int(var, value); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad zlib compression level %d", level); + core_compression_level = level; + core_compression_seen = 1; + if (!zlib_compression_seen) + zlib_compression_level = level; return 0; } diff --git a/csum-file.c b/csum-file.c index 7c806ada48..7088f6e93f 100644 --- a/csum-file.c +++ b/csum-file.c @@ -119,14 +119,14 @@ struct sha1file *sha1fd(int fd, const char *name) return f; } -int sha1write_compressed(struct sha1file *f, void *in, unsigned int size) +int sha1write_compressed(struct sha1file *f, void *in, unsigned int size, int level) { z_stream stream; unsigned long maxsize; void *out; memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, zlib_compression_level); + deflateInit(&stream, level); maxsize = deflateBound(&stream, size); out = xmalloc(maxsize); diff --git a/csum-file.h b/csum-file.h index 7e1339189d..4e8b83e093 100644 --- a/csum-file.h +++ b/csum-file.h @@ -16,7 +16,7 @@ extern struct sha1file *sha1fd(int fd, const char *name); extern struct sha1file *sha1create(const char *fmt, ...) __attribute__((format (printf, 1, 2))); extern int sha1close(struct sha1file *, unsigned char *, int); extern int sha1write(struct sha1file *, void *, unsigned int); -extern int sha1write_compressed(struct sha1file *, void *, unsigned int); +extern int sha1write_compressed(struct sha1file *, void *, unsigned int, int); extern void crc32_begin(struct sha1file *); extern uint32_t crc32_end(struct sha1file *); diff --git a/environment.c b/environment.c index 22316597df..b7aeb1a493 100644 --- a/environment.c +++ b/environment.c @@ -24,7 +24,9 @@ const char *git_commit_encoding; const char *git_log_output_encoding; int shared_repository = PERM_UMASK; const char *apply_default_whitespace; -int zlib_compression_level = Z_DEFAULT_COMPRESSION; +int zlib_compression_level = Z_BEST_SPEED; +int core_compression_level; +int core_compression_seen; size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE; size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT; size_t delta_base_cache_limit = 16 * 1024 * 1024; From 0d7566a5baa9cc67fd3361b64bb48757e637fdb0 Mon Sep 17 00:00:00 2001 From: Theodore Tso Date: Wed, 9 May 2007 15:48:39 -0400 Subject: [PATCH 5/8] Add --aggressive option to 'git gc' This option causes 'git gc' to more aggressively optimize the repository at the cost of taking much more wall clock and CPU time. Today this option causes git-pack-objects to use --no-use-delta option, and it allows the --window parameter to be set via the gc.aggressiveWindow configuration parameter. Signed-off-by: "Theodore Ts'o" Signed-off-by: Junio C Hamano --- Documentation/config.txt | 5 +++++ Documentation/git-gc.txt | 16 +++++++++++++++- builtin-gc.c | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index ea434af9db..efcf3019b0 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -384,6 +384,11 @@ format.suffix:: `.patch`. Use this variable to change that suffix (make sure to include the dot if you want it). +gc.aggressiveWindow:: + The window size parameter used in the delta compression + algorithm used by 'git gc --aggressive'. This defaults + to 10. + gc.packrefs:: `git gc` does not run `git pack-refs` in a bare repository by default so that older dumb-transport clients can still fetch diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index bc1658434a..4ac839f938 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -8,7 +8,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository SYNOPSIS -------- -'git-gc' [--prune] +'git-gc' [--prune] [--aggressive] DESCRIPTION ----------- @@ -35,6 +35,13 @@ OPTIONS repository at the same time (e.g. never use this option in a cron script). +--aggressive:: + Usually 'git-gc' runs very quickly while providing good disk + space utilization and performance. This option will cause + git-gc to more aggressive optimize the repository at the expense + of taking much more time. The effects of this optimization are + persistent, so this option only needs to be sporadically; every + few hundred changesets or so. Configuration ------------- @@ -67,6 +74,13 @@ The optional configuration variable 'gc.packrefs' determines if is not run in bare repositories by default, to allow older dumb-transport clients fetch from the repository, but this will change in the future. +The optional configuration variable 'gc.aggressiveWindow' controls how +much time is spent optimizing the delta compression of the objects in +the repository when the --aggressive option is specified. The larger +the value, the more time is spent optimizing the delta compression. See +the documentation for the --window' option in gitlink:git-repack[1] for +more details. This defaults to 10. + See Also -------- gitlink:git-prune[1] diff --git a/builtin-gc.c b/builtin-gc.c index 3b1f8c2f3e..8ea165aef1 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -15,13 +15,15 @@ #define FAILED_RUN "failed to run %s" -static const char builtin_gc_usage[] = "git-gc [--prune]"; +static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]"; static int pack_refs = -1; +static int aggressive_window = -1; +#define MAX_ADD 10 static const char *argv_pack_refs[] = {"pack-refs", "--prune", NULL}; static const char *argv_reflog[] = {"reflog", "expire", "--all", NULL}; -static const char *argv_repack[] = {"repack", "-a", "-d", "-l", NULL}; +static const char *argv_repack[MAX_ADD] = {"repack", "-a", "-d", "-l", NULL}; static const char *argv_prune[] = {"prune", NULL}; static const char *argv_rerere[] = {"rerere", "gc", NULL}; @@ -34,13 +36,31 @@ static int gc_config(const char *var, const char *value) pack_refs = git_config_bool(var, value); return 0; } + if (!strcmp(var, "gc.aggressivewindow")) { + aggressive_window = git_config_int(var, value); + return 0; + } return git_default_config(var, value); } +static void append_option(const char **cmd, const char *opt, int max_length) +{ + int i; + + for (i = 0; cmd[i]; i++) + ; + + if (i + 2 >= max_length) + die("Too many options specified"); + cmd[i++] = opt; + cmd[i] = NULL; +} + int cmd_gc(int argc, const char **argv, const char *prefix) { int i; int prune = 0; + char buf[80]; git_config(gc_config); @@ -53,6 +73,14 @@ int cmd_gc(int argc, const char **argv, const char *prefix) prune = 1; continue; } + if (!strcmp(arg, "--aggressive")) { + append_option(argv_repack, "-f", MAX_ADD); + if (aggressive_window > 0) { + sprintf(buf, "--window=%d", aggressive_window); + append_option(argv_repack, buf, MAX_ADD); + } + continue; + } /* perhaps other parameters later... */ break; } From 68db31cc289c686c4b4454dfbb121aff59a6c602 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Wed, 9 May 2007 12:33:20 +0200 Subject: [PATCH 6/8] git-update-ref: add --no-deref option for overwriting/detaching ref git-checkout is also adapted to make use of this new option instead of the handcrafted command sequence. Signed-off-by: Sven Verdoolaege Signed-off-by: Junio C Hamano --- Documentation/git-update-ref.txt | 5 ++++- builtin-branch.c | 2 +- builtin-fetch--tool.c | 2 +- builtin-reflog.c | 2 +- builtin-update-ref.c | 11 ++++++++--- fast-import.c | 2 +- git-checkout.sh | 10 +--------- receive-pack.c | 2 +- refs.c | 30 +++++++++++++++++++----------- refs.h | 3 ++- 10 files changed, 39 insertions(+), 30 deletions(-) diff --git a/Documentation/git-update-ref.txt b/Documentation/git-update-ref.txt index 9424feab32..f222616591 100644 --- a/Documentation/git-update-ref.txt +++ b/Documentation/git-update-ref.txt @@ -7,7 +7,7 @@ git-update-ref - Update the object name stored in a ref safely SYNOPSIS -------- -'git-update-ref' [-m ] (-d | []) +'git-update-ref' [-m ] (-d | [--no-deref] []) DESCRIPTION ----------- @@ -36,6 +36,9 @@ them and update them as a regular file (i.e. it will allow the filesystem to follow them, but will overwrite such a symlink to somewhere else with a regular filename). +If --no-deref is given, itself is overwritten, rather than +the result of following the symbolic pointers. + In general, using git-update-ref HEAD "$head" diff --git a/builtin-branch.c b/builtin-branch.c index 7408285050..6bd5843b47 100644 --- a/builtin-branch.c +++ b/builtin-branch.c @@ -462,7 +462,7 @@ static void create_branch(const char *name, const char *start_name, die("Not a valid branch point: '%s'.", start_name); hashcpy(sha1, commit->object.sha1); - lock = lock_any_ref_for_update(ref, NULL); + lock = lock_any_ref_for_update(ref, NULL, 0); if (!lock) die("Failed to lock ref for update: %s.", strerror(errno)); diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c index 2065466f27..b14e78ac3f 100644 --- a/builtin-fetch--tool.c +++ b/builtin-fetch--tool.c @@ -42,7 +42,7 @@ static int update_ref(const char *action, if (!rla) rla = "(reflog update)"; snprintf(msg, sizeof(msg), "%s: %s", rla, action); - lock = lock_any_ref_for_update(refname, oldval); + lock = lock_any_ref_for_update(refname, oldval, 0); if (!lock) return 1; if (write_ref_sha1(lock, sha1, msg) < 0) diff --git a/builtin-reflog.c b/builtin-reflog.c index 4c39f1da98..ce093cad78 100644 --- a/builtin-reflog.c +++ b/builtin-reflog.c @@ -249,7 +249,7 @@ static int expire_reflog(const char *ref, const unsigned char *sha1, int unused, /* we take the lock for the ref itself to prevent it from * getting updated. */ - lock = lock_any_ref_for_update(ref, sha1); + lock = lock_any_ref_for_update(ref, sha1, 0); if (!lock) return error("cannot lock ref '%s'", ref); log_file = xstrdup(git_path("logs/%s", ref)); diff --git a/builtin-update-ref.c b/builtin-update-ref.c index 5ee960bf41..feac2ed12d 100644 --- a/builtin-update-ref.c +++ b/builtin-update-ref.c @@ -3,16 +3,17 @@ #include "builtin.h" static const char git_update_ref_usage[] = -"git-update-ref [-m ] (-d | [])"; +"git-update-ref [-m ] (-d | [--no-deref] [])"; int cmd_update_ref(int argc, const char **argv, const char *prefix) { const char *refname=NULL, *value=NULL, *oldval=NULL, *msg=NULL; struct ref_lock *lock; unsigned char sha1[20], oldsha1[20]; - int i, delete; + int i, delete, ref_flags; delete = 0; + ref_flags = 0; git_config(git_default_config); for (i = 1; i < argc; i++) { @@ -30,6 +31,10 @@ int cmd_update_ref(int argc, const char **argv, const char *prefix) delete = 1; continue; } + if (!strcmp("--no-deref", argv[i])) { + ref_flags |= REF_NODEREF; + continue; + } if (!refname) { refname = argv[i]; continue; @@ -59,7 +64,7 @@ int cmd_update_ref(int argc, const char **argv, const char *prefix) if (oldval && *oldval && get_sha1(oldval, oldsha1)) die("%s: not a valid old SHA1", oldval); - lock = lock_any_ref_for_update(refname, oldval ? oldsha1 : NULL); + lock = lock_any_ref_for_update(refname, oldval ? oldsha1 : NULL, ref_flags); if (!lock) die("%s: cannot lock the ref", refname); if (write_ref_sha1(lock, sha1, msg) < 0) diff --git a/fast-import.c b/fast-import.c index 3a2d5ed8e6..ffa00fd3c6 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1271,7 +1271,7 @@ static int update_branch(struct branch *b) if (read_ref(b->name, old_sha1)) hashclr(old_sha1); - lock = lock_any_ref_for_update(b->name, old_sha1); + lock = lock_any_ref_for_update(b->name, old_sha1, 0); if (!lock) return error("Unable to lock %s", b->name); if (!force_update && !is_null_sha1(old_sha1)) { diff --git a/git-checkout.sh b/git-checkout.sh index ed7c2c5f6a..6b6facfd5a 100755 --- a/git-checkout.sh +++ b/git-checkout.sh @@ -270,15 +270,7 @@ if [ "$?" -eq 0 ]; then fi elif test -n "$detached" then - # NEEDSWORK: we would want a command to detach the HEAD - # atomically, instead of this handcrafted command sequence. - # Perhaps: - # git update-ref --detach HEAD $new - # or something like that... - # - git-rev-parse HEAD >"$GIT_DIR/HEAD.new" && - mv "$GIT_DIR/HEAD.new" "$GIT_DIR/HEAD" && - git-update-ref -m "checkout: moving to $arg" HEAD "$detached" || + git-update-ref --no-deref -m "checkout: moving to $arg" HEAD "$detached" || die "Cannot detach HEAD" if test -n "$detach_warn" then diff --git a/receive-pack.c b/receive-pack.c index 26aa26bcb5..d3c422be58 100644 --- a/receive-pack.c +++ b/receive-pack.c @@ -209,7 +209,7 @@ static const char *update(struct command *cmd) return NULL; /* good */ } else { - lock = lock_any_ref_for_update(name, old_sha1); + lock = lock_any_ref_for_update(name, old_sha1, 0); if (!lock) { error("failed to lock %s", name); return "failed to lock"; diff --git a/refs.c b/refs.c index 89876bff87..2ae3235b2f 100644 --- a/refs.c +++ b/refs.c @@ -736,19 +736,20 @@ static int is_refname_available(const char *ref, const char *oldref, return 1; } -static struct ref_lock *lock_ref_sha1_basic(const char *ref, const unsigned char *old_sha1, int *flag) +static struct ref_lock *lock_ref_sha1_basic(const char *ref, const unsigned char *old_sha1, int flags, int *type_p) { char *ref_file; const char *orig_ref = ref; struct ref_lock *lock; struct stat st; int last_errno = 0; + int type; int mustexist = (old_sha1 && !is_null_sha1(old_sha1)); lock = xcalloc(1, sizeof(struct ref_lock)); lock->lock_fd = -1; - ref = resolve_ref(ref, lock->old_sha1, mustexist, flag); + ref = resolve_ref(ref, lock->old_sha1, mustexist, &type); if (!ref && errno == EISDIR) { /* we are trying to lock foo but we used to * have foo/bar which now does not exist; @@ -761,8 +762,10 @@ static struct ref_lock *lock_ref_sha1_basic(const char *ref, const unsigned char error("there are still refs under '%s'", orig_ref); goto error_return; } - ref = resolve_ref(orig_ref, lock->old_sha1, mustexist, flag); + ref = resolve_ref(orig_ref, lock->old_sha1, mustexist, &type); } + if (type_p) + *type_p = type; if (!ref) { last_errno = errno; error("unable to resolve reference %s: %s", @@ -780,10 +783,15 @@ static struct ref_lock *lock_ref_sha1_basic(const char *ref, const unsigned char lock->lk = xcalloc(1, sizeof(struct lock_file)); + if (flags & REF_NODEREF) + ref = orig_ref; lock->ref_name = xstrdup(ref); lock->orig_ref_name = xstrdup(orig_ref); ref_file = git_path("%s", ref); - lock->force_write = lstat(ref_file, &st) && errno == ENOENT; + if (lstat(ref_file, &st) && errno == ENOENT) + lock->force_write = 1; + if ((flags & REF_NODEREF) && (type & REF_ISSYMREF)) + lock->force_write = 1; if (safe_create_leading_directories(ref_file)) { last_errno = errno; @@ -806,14 +814,14 @@ struct ref_lock *lock_ref_sha1(const char *ref, const unsigned char *old_sha1) if (check_ref_format(ref)) return NULL; strcpy(refpath, mkpath("refs/%s", ref)); - return lock_ref_sha1_basic(refpath, old_sha1, NULL); + return lock_ref_sha1_basic(refpath, old_sha1, 0, NULL); } -struct ref_lock *lock_any_ref_for_update(const char *ref, const unsigned char *old_sha1) +struct ref_lock *lock_any_ref_for_update(const char *ref, const unsigned char *old_sha1, int flags) { if (check_ref_format(ref) == -1) return NULL; - return lock_ref_sha1_basic(ref, old_sha1, NULL); + return lock_ref_sha1_basic(ref, old_sha1, flags, NULL); } static struct lock_file packlock; @@ -858,7 +866,7 @@ int delete_ref(const char *refname, const unsigned char *sha1) struct ref_lock *lock; int err, i, ret = 0, flag = 0; - lock = lock_ref_sha1_basic(refname, sha1, &flag); + lock = lock_ref_sha1_basic(refname, sha1, 0, &flag); if (!lock) return 1; if (!(flag & REF_ISPACKED)) { @@ -909,7 +917,7 @@ int rename_ref(const char *oldref, const char *newref, const char *logmsg) if (!is_refname_available(newref, oldref, get_loose_refs(), 0)) return 1; - lock = lock_ref_sha1_basic(renamed_ref, NULL, NULL); + lock = lock_ref_sha1_basic(renamed_ref, NULL, 0, NULL); if (!lock) return error("unable to lock %s", renamed_ref); lock->force_write = 1; @@ -963,7 +971,7 @@ int rename_ref(const char *oldref, const char *newref, const char *logmsg) } logmoved = log; - lock = lock_ref_sha1_basic(newref, NULL, NULL); + lock = lock_ref_sha1_basic(newref, NULL, 0, NULL); if (!lock) { error("unable to lock %s for update", newref); goto rollback; @@ -979,7 +987,7 @@ int rename_ref(const char *oldref, const char *newref, const char *logmsg) return 0; rollback: - lock = lock_ref_sha1_basic(oldref, NULL, NULL); + lock = lock_ref_sha1_basic(oldref, NULL, 0, NULL); if (!lock) { error("unable to lock %s for rollback", oldref); goto rollbacklog; diff --git a/refs.h b/refs.h index f61f6d934e..f234eb76ba 100644 --- a/refs.h +++ b/refs.h @@ -33,7 +33,8 @@ extern int get_ref_sha1(const char *ref, unsigned char *sha1); extern struct ref_lock *lock_ref_sha1(const char *ref, const unsigned char *old_sha1); /** Locks any ref (for 'HEAD' type refs). */ -extern struct ref_lock *lock_any_ref_for_update(const char *ref, const unsigned char *old_sha1); +#define REF_NODEREF 0x01 +extern struct ref_lock *lock_any_ref_for_update(const char *ref, const unsigned char *old_sha1, int flags); /** Release any lock taken but not written. **/ extern void unlock_ref(struct ref_lock *lock); From fefe49d134b4dadf3a184ba7a8e48388f3c15a24 Mon Sep 17 00:00:00 2001 From: James Bowes Date: Sat, 5 May 2007 16:48:54 -0400 Subject: [PATCH 7/8] Add colour support in rebase and merge tree diff stats output. The rebase and merge commands used diff-tree to display the summary stats of what files had changed from the operation. diff-tree does not read the diff ui configuration options, so the diff.color setting was not used. Have rebase and merge call diff rather than diff-tree, which does read the diff ui options. Signed-off-by: James Bowes Signed-off-by: Junio C Hamano --- git-merge.sh | 3 ++- git-rebase.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/git-merge.sh b/git-merge.sh index 7ebbce4bdb..351676f6d4 100755 --- a/git-merge.sh +++ b/git-merge.sh @@ -90,7 +90,8 @@ finish () { ?*) case "$no_summary" in '') - git-diff-tree --stat --summary -M "$head" "$1" + # We want color (if set), but no pager + GIT_PAGER='' git-diff --stat --summary -M "$head" "$1" ;; esac ;; diff --git a/git-rebase.sh b/git-rebase.sh index 2dc2c4fe9b..61770b5a28 100755 --- a/git-rebase.sh +++ b/git-rebase.sh @@ -307,7 +307,8 @@ fi if test -n "$verbose" then echo "Changes from $mb to $onto:" - git-diff-tree --stat --summary "$mb" "$onto" + # We want color (if set), but no pager + GIT_PAGER='' git-diff --stat --summary "$mb" "$onto" fi # Rewind the head to "$onto"; this saves our current head in ORIG_HEAD. From 7841ce79854868eaaa146c1d018b17fc4f3320be Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 16 May 2007 20:09:41 +0300 Subject: [PATCH 8/8] connect: display connection progress Make git notify the user about host resolution/connection attempts. This is useful both as a progress indicator on slow links, and helps reassure the user there are no firewall problems. Signed-off-by: Michael S. Tsirkin Acked-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-archive.c | 2 +- cache.h | 3 ++- connect.c | 30 ++++++++++++++++++++++++------ fetch-pack.c | 2 +- peek-remote.c | 2 +- send-pack.c | 2 +- 6 files changed, 30 insertions(+), 11 deletions(-) diff --git a/builtin-archive.c b/builtin-archive.c index 7f4e409c99..187491bc17 100644 --- a/builtin-archive.c +++ b/builtin-archive.c @@ -45,7 +45,7 @@ static int run_remote_archiver(const char *remote, int argc, } url = xstrdup(remote); - pid = git_connect(fd, url, exec); + pid = git_connect(fd, url, exec, 0); if (pid < 0) return pid; diff --git a/cache.h b/cache.h index aaeb04a1b9..206bbe4fd5 100644 --- a/cache.h +++ b/cache.h @@ -463,7 +463,8 @@ struct ref { #define REF_HEADS (1u << 1) #define REF_TAGS (1u << 2) -extern pid_t git_connect(int fd[2], char *url, const char *prog); +#define CONNECT_VERBOSE (1u << 0) +extern pid_t git_connect(int fd[2], char *url, const char *prog, int flags); extern int finish_connect(pid_t pid); extern int path_match(const char *path, int nr, char **match); extern int match_refs(struct ref *src, struct ref *dst, struct ref ***dst_tail, diff --git a/connect.c b/connect.c index da89c9cfcf..2a26fdbe0d 100644 --- a/connect.c +++ b/connect.c @@ -394,7 +394,7 @@ static enum protocol get_protocol(const char *name) /* * Returns a connected socket() fd, or else die()s. */ -static int git_tcp_connect_sock(char *host) +static int git_tcp_connect_sock(char *host, int flags) { int sockfd = -1, saved_errno = 0; char *colon, *end; @@ -425,10 +425,16 @@ static int git_tcp_connect_sock(char *host) hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; + if (flags & CONNECT_VERBOSE) + fprintf(stderr, "Looking up %s ... ", host); + gai = getaddrinfo(host, port, &hints, &ai); if (gai) die("Unable to look up %s (port %s) (%s)", host, port, gai_strerror(gai)); + if (flags & CONNECT_VERBOSE) + fprintf(stderr, "done.\nConnecting to %s (port %s) ... ", host, port); + for (ai0 = ai; ai; ai = ai->ai_next) { sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); @@ -450,6 +456,9 @@ static int git_tcp_connect_sock(char *host) if (sockfd < 0) die("unable to connect a socket (%s)", strerror(saved_errno)); + if (flags & CONNECT_VERBOSE) + fprintf(stderr, "done.\n"); + return sockfd; } @@ -458,7 +467,7 @@ static int git_tcp_connect_sock(char *host) /* * Returns a connected socket() fd, or else die()s. */ -static int git_tcp_connect_sock(char *host) +static int git_tcp_connect_sock(char *host, int flags) { int sockfd = -1, saved_errno = 0; char *colon, *end; @@ -485,6 +494,9 @@ static int git_tcp_connect_sock(char *host) port = colon + 1; } + if (flags & CONNECT_VERBOSE) + fprintf(stderr, "Looking up %s ... ", host); + he = gethostbyname(host); if (!he) die("Unable to look up %s (%s)", host, hstrerror(h_errno)); @@ -497,6 +509,9 @@ static int git_tcp_connect_sock(char *host) nport = se->s_port; } + if (flags & CONNECT_VERBOSE) + fprintf(stderr, "done.\nConnecting to %s (port %s) ... ", host, port); + for (ap = he->h_addr_list; *ap; ap++) { sockfd = socket(he->h_addrtype, SOCK_STREAM, 0); if (sockfd < 0) { @@ -521,15 +536,18 @@ static int git_tcp_connect_sock(char *host) if (sockfd < 0) die("unable to connect a socket (%s)", strerror(saved_errno)); + if (flags & CONNECT_VERBOSE) + fprintf(stderr, "done.\n"); + return sockfd; } #endif /* NO_IPV6 */ -static void git_tcp_connect(int fd[2], char *host) +static void git_tcp_connect(int fd[2], char *host, int flags) { - int sockfd = git_tcp_connect_sock(host); + int sockfd = git_tcp_connect_sock(host, flags); fd[0] = sockfd; fd[1] = dup(sockfd); @@ -646,7 +664,7 @@ static void git_proxy_connect(int fd[2], char *host) * * Does not return a negative value on error; it just dies. */ -pid_t git_connect(int fd[2], char *url, const char *prog) +pid_t git_connect(int fd[2], char *url, const char *prog, int flags) { char *host, *path = url; char *end; @@ -719,7 +737,7 @@ pid_t git_connect(int fd[2], char *url, const char *prog) if (git_use_proxy(host)) git_proxy_connect(fd, host); else - git_tcp_connect(fd, host); + git_tcp_connect(fd, host, flags); /* * Separate original protocol components prog and path * from extended components with a NUL byte. diff --git a/fetch-pack.c b/fetch-pack.c index 06f4aeced4..aa59043c03 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -733,7 +733,7 @@ int main(int argc, char **argv) } if (!dest) usage(fetch_pack_usage); - pid = git_connect(fd, dest, uploadpack); + pid = git_connect(fd, dest, uploadpack, verbose ? CONNECT_VERBOSE : 0); if (pid < 0) return 1; if (heads && nr_heads) diff --git a/peek-remote.c b/peek-remote.c index 96bfac498b..ceb787170e 100644 --- a/peek-remote.c +++ b/peek-remote.c @@ -64,7 +64,7 @@ int main(int argc, char **argv) if (!dest || i != argc - 1) usage(peek_remote_usage); - pid = git_connect(fd, dest, uploadpack); + pid = git_connect(fd, dest, uploadpack, 0); if (pid < 0) return 1; ret = peek_remote(fd, flags); diff --git a/send-pack.c b/send-pack.c index d5b51628df..83ee87dcf8 100644 --- a/send-pack.c +++ b/send-pack.c @@ -393,7 +393,7 @@ int main(int argc, char **argv) usage(send_pack_usage); verify_remote_names(nr_heads, heads); - pid = git_connect(fd, dest, receivepack); + pid = git_connect(fd, dest, receivepack, verbose ? CONNECT_VERBOSE : 0); if (pid < 0) return 1; ret = send_pack(fd[0], fd[1], nr_heads, heads);