From 844c11ae259bd33b971b9ca389b3f9619427e9a8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 9 Apr 2007 21:13:29 -0700
Subject: [PATCH 001/109] diff-lib: use ce_mode_from_stat() rather than messing
 with modes manually

The diff helpers used to do the magic mode canonicalization and all the
other special mode handling by hand ("trust executable bit" and "has
symlink support" handling).

That's bogus. Use "ce_mode_from_stat()" that does this all for us.

This is also going to be required when we add support for links to other
git repositories.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 diff-lib.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/diff-lib.c b/diff-lib.c
index 5c5b05bfe3..c6d127346a 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -357,7 +357,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed)
 					continue;
 			}
 			else
-				dpath->mode = canon_mode(st.st_mode);
+				dpath->mode = ntohl(ce_mode_from_stat(ce, st.st_mode));
 
 			while (i < entries) {
 				struct cache_entry *nce = active_cache[i];
@@ -374,8 +374,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed)
 					int mode = ntohl(nce->ce_mode);
 					num_compare_stages++;
 					hashcpy(dpath->parent[stage-2].sha1, nce->sha1);
-					dpath->parent[stage-2].mode =
-						canon_mode(mode);
+					dpath->parent[stage-2].mode = ntohl(ce_mode_from_stat(nce, mode));
 					dpath->parent[stage-2].status =
 						DIFF_STATUS_MODIFIED;
 				}
@@ -424,15 +423,7 @@ int run_diff_files(struct rev_info *revs, int silent_on_removed)
 		if (!changed && !revs->diffopt.find_copies_harder)
 			continue;
 		oldmode = ntohl(ce->ce_mode);
-
-		newmode = canon_mode(st.st_mode);
-		if (!trust_executable_bit &&
-		    S_ISREG(newmode) && S_ISREG(oldmode) &&
-		    ((newmode ^ oldmode) == 0111))
-			newmode = oldmode;
-		else if (!has_symlinks &&
-		    S_ISREG(newmode) && S_ISLNK(oldmode))
-			newmode = oldmode;
+		newmode = ntohl(ce_mode_from_stat(ce, st.st_mode));
 		diff_change(&revs->diffopt, oldmode, newmode,
 			    ce->sha1, (changed ? null_sha1 : ce->sha1),
 			    ce->name, NULL);

From 5d5cea67af386cfd53428f1eb404841eca8e9062 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 9 Apr 2007 21:13:58 -0700
Subject: [PATCH 002/109] Avoid overflowing name buffer in deep directory
 structures

This just makes sure that when we do a read_directory(), we check
that the filename fits in the buffer we allocated (with a bit of
slop)

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 dir.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dir.c b/dir.c
index 7426fde330..4f5a2241e6 100644
--- a/dir.c
+++ b/dir.c
@@ -353,6 +353,9 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
 			     !strcmp(de->d_name + 1, "git")))
 				continue;
 			len = strlen(de->d_name);
+			/* Ignore overly long pathnames! */
+			if (len + baselen + 8 > sizeof(fullname))
+				continue;
 			memcpy(fullname + baselen, de->d_name, len+1);
 			if (simplify_away(fullname, baselen + len, simplify))
 				continue;

From 57059091fad25427bce9b3d47e073ce0518d164b Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:28 -0400
Subject: [PATCH 003/109] get rid of num_packed_objects()

The coming index format change doesn't allow for the number of objects
to be determined from the size of the index file directly.  Instead, Let's
initialize a field in the packed_git structure with the object count when
the index is validated since the count is always known at that point.

While at it let's reorder some struct packed_git fields to avoid padding
due to needed 64-bit alignment for some of them.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-count-objects.c |  2 +-
 builtin-fsck.c          |  2 +-
 builtin-pack-objects.c  |  4 ++--
 cache.h                 |  8 ++++----
 pack-check.c            |  4 ++--
 sha1_file.c             | 17 ++++++-----------
 sha1_name.c             |  2 +-
 7 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/builtin-count-objects.c b/builtin-count-objects.c
index 6263d8af29..ff90ebd465 100644
--- a/builtin-count-objects.c
+++ b/builtin-count-objects.c
@@ -111,7 +111,7 @@ int cmd_count_objects(int ac, const char **av, const char *prefix)
 		for (p = packed_git; p; p = p->next) {
 			if (!p->pack_local)
 				continue;
-			packed += num_packed_objects(p);
+			packed += p->num_objects;
 			num_pack++;
 		}
 		printf("count: %lu\n", loose);
diff --git a/builtin-fsck.c b/builtin-fsck.c
index 4d8b66c344..44a02d3120 100644
--- a/builtin-fsck.c
+++ b/builtin-fsck.c
@@ -653,7 +653,7 @@ int cmd_fsck(int argc, char **argv, const char *prefix)
 			verify_pack(p, 0);
 
 		for (p = packed_git; p; p = p->next) {
-			uint32_t i, num = num_packed_objects(p);
+			uint32_t i, num = p->num_objects;
 			for (i = 0; i < num; i++)
 				fsck_sha1(nth_packed_object_sha1(p, i));
 		}
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 45ac3e482a..6bff17b130 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -164,7 +164,7 @@ static int cmp_offset(const void *a_, const void *b_)
 static void prepare_pack_revindex(struct pack_revindex *rix)
 {
 	struct packed_git *p = rix->p;
-	int num_ent = num_packed_objects(p);
+	int num_ent = p->num_objects;
 	int i;
 	const char *index = p->index_data;
 
@@ -198,7 +198,7 @@ static struct revindex_entry * find_packed_object(struct packed_git *p,
 		prepare_pack_revindex(rix);
 	revindex = rix->revindex;
 	lo = 0;
-	hi = num_packed_objects(p) + 1;
+	hi = p->num_objects + 1;
 	do {
 		int mi = (lo + hi) / 2;
 		if (revindex[mi].offset == ofs) {
diff --git a/cache.h b/cache.h
index eb57507b80..5b67f4c989 100644
--- a/cache.h
+++ b/cache.h
@@ -376,11 +376,12 @@ struct pack_window {
 extern struct packed_git {
 	struct packed_git *next;
 	struct pack_window *windows;
-	const void *index_data;
-	off_t index_size;
 	off_t pack_size;
-	time_t mtime;
+	const void *index_data;
+	size_t index_size;
+	uint32_t num_objects;
 	int index_version;
+	time_t mtime;
 	int pack_fd;
 	int pack_local;
 	unsigned char sha1[20];
@@ -431,7 +432,6 @@ extern void pack_report(void);
 extern unsigned char* use_pack(struct packed_git *, struct pack_window **, off_t, unsigned int *);
 extern void unuse_pack(struct pack_window **);
 extern struct packed_git *add_packed_git(const char *, int, int);
-extern uint32_t num_packed_objects(const struct packed_git *p);
 extern const unsigned char *nth_packed_object_sha1(const struct packed_git *, uint32_t);
 extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
 extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
diff --git a/pack-check.c b/pack-check.c
index f58083d11e..d04536bbff 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -40,7 +40,7 @@ static int verify_packfile(struct packed_git *p,
 	 * have verified that nr_objects matches between idx and pack,
 	 * we do not do scan-streaming check on the pack file.
 	 */
-	nr_objects = num_packed_objects(p);
+	nr_objects = p->num_objects;
 	for (i = 0, err = 0; i < nr_objects; i++) {
 		const unsigned char *sha1;
 		void *data;
@@ -79,7 +79,7 @@ static void show_pack_info(struct packed_git *p)
 {
 	uint32_t nr_objects, i, chain_histogram[MAX_CHAIN];
 
-	nr_objects = num_packed_objects(p);
+	nr_objects = p->num_objects;
 	memset(chain_histogram, 0, sizeof(chain_histogram));
 
 	for (i = 0; i < nr_objects; i++) {
diff --git a/sha1_file.c b/sha1_file.c
index 4304fe9bbc..d9ca69a916 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -494,6 +494,7 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
 	p->index_version = 1;
 	p->index_data = idx_map;
 	p->index_size = idx_size;
+	p->num_objects = nr;
 	return 0;
 }
 
@@ -605,11 +606,11 @@ static int open_packed_git_1(struct packed_git *p)
 			p->pack_name, ntohl(hdr.hdr_version));
 
 	/* Verify the pack matches its index. */
-	if (num_packed_objects(p) != ntohl(hdr.hdr_entries))
+	if (p->num_objects != ntohl(hdr.hdr_entries))
 		return error("packfile %s claims to have %u objects"
-			" while index size indicates %u objects",
-			p->pack_name, ntohl(hdr.hdr_entries),
-			num_packed_objects(p));
+			     " while index indicates %u objects",
+			     p->pack_name, ntohl(hdr.hdr_entries),
+			     p->num_objects);
 	if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
 		return error("end of packfile %s is unavailable", p->pack_name);
 	if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
@@ -1526,18 +1527,12 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 	return data;
 }
 
-uint32_t num_packed_objects(const struct packed_git *p)
-{
-	/* See check_packed_git_idx() */
-	return (uint32_t)((p->index_size - 20 - 20 - 4*256) / 24);
-}
-
 const unsigned char *nth_packed_object_sha1(const struct packed_git *p,
 					    uint32_t n)
 {
 	const unsigned char *index = p->index_data;
 	index += 4 * 256;
-	if (num_packed_objects(p) <= n)
+	if (n >= p->num_objects)
 		return NULL;
 	return index + 24 * n + 4;
 }
diff --git a/sha1_name.c b/sha1_name.c
index 267ea3f3ed..b0b12bbe9d 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -76,7 +76,7 @@ static int find_short_packed_object(int len, const unsigned char *match, unsigne
 
 	prepare_packed_git();
 	for (p = packed_git; p && found < 2; p = p->next) {
-		uint32_t num = num_packed_objects(p);
+		uint32_t num = p->num_objects;
 		uint32_t first = 0, last = num;
 		while (first < last) {
 			uint32_t mid = (first + last) / 2;

From 8723f216263ba4a0f06be7b93fada863c0931e09 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:29 -0400
Subject: [PATCH 004/109] make overflow test on delta base offset work
 regardless of variable size

This patch introduces the MSB() macro to obtain the desired number of
most significant bits from a given variable independently of the variable
type.

It is then used to better implement the overflow test on the OBJ_OFS_DELTA
base offset variable with the property of always working correctly
regardless of the type/size of that variable.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c   | 2 +-
 builtin-unpack-objects.c | 2 +-
 git-compat-util.h        | 8 ++++++++
 index-pack.c             | 2 +-
 sha1_file.c              | 2 +-
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 6bff17b130..ee607a0d2c 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1014,7 +1014,7 @@ static void check_object(struct object_entry *entry)
 				ofs = c & 127;
 				while (c & 128) {
 					ofs += 1;
-					if (!ofs || ofs & ~(~0UL >> 7))
+					if (!ofs || MSB(ofs, 7))
 						die("delta base offset overflow in pack for %s",
 						    sha1_to_hex(entry->sha1));
 					c = buf[used_0++];
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index 3956c56334..63f7db6831 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -209,7 +209,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 		base_offset = c & 127;
 		while (c & 128) {
 			base_offset += 1;
-			if (!base_offset || base_offset & ~(~0UL >> 7))
+			if (!base_offset || MSB(base_offset, 7))
 				die("offset value overflow for delta base object");
 			pack = fill(1);
 			c = *pack;
diff --git a/git-compat-util.h b/git-compat-util.h
index 139fc19108..bcfcb35ecf 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -13,6 +13,14 @@
 
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
+#ifdef __GNUC__
+#define TYPEOF(x) (__typeof__(x))
+#else
+#define TYPEOF(x)
+#endif
+
+#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits))))
+
 #if !defined(__APPLE__) && !defined(__FreeBSD__)
 #define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
 #define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
diff --git a/index-pack.c b/index-pack.c
index 3c768fbc63..0e54aa6844 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -249,7 +249,7 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_
 		base_offset = c & 127;
 		while (c & 128) {
 			base_offset += 1;
-			if (!base_offset || base_offset & ~(~0UL >> 7))
+			if (!base_offset || MSB(base_offset, 7))
 				bad_object(obj->offset, "offset value overflow for delta base object");
 			p = fill(1);
 			c = *p;
diff --git a/sha1_file.c b/sha1_file.c
index d9ca69a916..ebdd497bad 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1150,7 +1150,7 @@ static off_t get_delta_base(struct packed_git *p,
 		base_offset = c & 127;
 		while (c & 128) {
 			base_offset += 1;
-			if (!base_offset || base_offset & ~(~0UL >> 7))
+			if (!base_offset || MSB(base_offset, 7))
 				die("offset value overflow for delta base object");
 			c = base_info[used++];
 			base_offset = (base_offset << 7) + (c & 127);

From d7dd02231f75604e388afb905f7bf8afd1bf4b24 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:30 -0400
Subject: [PATCH 005/109] add overflow tests on pack offset variables

Change a few size and offset variables to more appropriate type, then
add overflow tests on those offsets.  This prevents any bad data to be
generated/processed if off_t happens to not be large enough to handle
some big packs.

Better be safe than sorry.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c   | 19 +++++++++++++------
 builtin-unpack-objects.c | 17 +++++++++++------
 index-pack.c             | 14 ++++++++++----
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index ee607a0d2c..d0be879443 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -369,7 +369,7 @@ static int revalidate_loose_object(struct object_entry *entry,
 	return check_loose_inflate(map, mapsize, size);
 }
 
-static off_t write_object(struct sha1file *f,
+static unsigned long write_object(struct sha1file *f,
 				  struct object_entry *entry)
 {
 	unsigned long size;
@@ -503,16 +503,23 @@ static off_t write_one(struct sha1file *f,
 			       struct object_entry *e,
 			       off_t offset)
 {
+	unsigned long size;
+
+	/* offset is non zero if object is written already. */
 	if (e->offset || e->preferred_base)
-		/* offset starts from header size and cannot be zero
-		 * if it is written already.
-		 */
 		return offset;
-	/* if we are deltified, write out its base object first. */
+
+	/* if we are deltified, write out base object first. */
 	if (e->delta)
 		offset = write_one(f, e->delta, offset);
+
 	e->offset = offset;
-	return offset + write_object(f, e);
+	size = write_object(f, e);
+
+	/* make sure off_t is sufficiently large not to wrap */
+	if (offset > offset + size)
+		die("pack too large for current definition of off_t");
+	return offset + size;
 }
 
 static void write_pack_file(void)
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index 63f7db6831..f821906460 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -13,7 +13,8 @@ static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-fil
 
 /* We always read in 4kB chunks. */
 static unsigned char buffer[4096];
-static unsigned long offset, len, consumed_bytes;
+static unsigned int offset, len;
+static off_t consumed_bytes;
 static SHA_CTX ctx;
 
 /*
@@ -49,6 +50,10 @@ static void use(int bytes)
 		die("used more bytes than were available");
 	len -= bytes;
 	offset += bytes;
+
+	/* make sure off_t is sufficiently large not to wrap */
+	if (consumed_bytes > consumed_bytes + bytes)
+		die("pack too large for current definition of off_t");
 	consumed_bytes += bytes;
 }
 
@@ -88,17 +93,17 @@ static void *get_data(unsigned long size)
 
 struct delta_info {
 	unsigned char base_sha1[20];
-	unsigned long base_offset;
+	unsigned nr;
+	off_t base_offset;
 	unsigned long size;
 	void *delta;
-	unsigned nr;
 	struct delta_info *next;
 };
 
 static struct delta_info *delta_list;
 
 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
-			      unsigned long base_offset,
+			      off_t base_offset,
 			      void *delta, unsigned long size)
 {
 	struct delta_info *info = xmalloc(sizeof(*info));
@@ -113,7 +118,7 @@ static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
 }
 
 struct obj_info {
-	unsigned long offset;
+	off_t offset;
 	unsigned char sha1[20];
 };
 
@@ -200,7 +205,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 	} else {
 		unsigned base_found = 0;
 		unsigned char *pack, c;
-		unsigned long base_offset;
+		off_t base_offset;
 		unsigned lo, mid, hi;
 
 		pack = fill(1);
diff --git a/index-pack.c b/index-pack.c
index 0e54aa6844..66fb0bcedf 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -12,7 +12,7 @@ static const char index_pack_usage[] =
 
 struct object_entry
 {
-	unsigned long offset;
+	off_t offset;
 	unsigned long size;
 	unsigned int hdr_size;
 	enum object_type type;
@@ -22,7 +22,7 @@ struct object_entry
 
 union delta_base {
 	unsigned char sha1[20];
-	unsigned long offset;
+	off_t offset;
 };
 
 /*
@@ -83,7 +83,8 @@ static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc)
 
 /* We always read in 4kB chunks. */
 static unsigned char input_buffer[4096];
-static unsigned long input_offset, input_len, consumed_bytes;
+static unsigned int input_offset, input_len;
+static off_t consumed_bytes;
 static SHA_CTX input_ctx;
 static int input_fd, output_fd, pack_fd;
 
@@ -129,6 +130,10 @@ static void use(int bytes)
 		die("used more bytes than were available");
 	input_len -= bytes;
 	input_offset += bytes;
+
+	/* make sure off_t is sufficiently large not to wrap */
+	if (consumed_bytes > consumed_bytes + bytes)
+		die("pack too large for current definition of off_t");
 	consumed_bytes += bytes;
 }
 
@@ -216,7 +221,8 @@ static void *unpack_entry_data(unsigned long offset, unsigned long size)
 static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
 {
 	unsigned char *p, c;
-	unsigned long size, base_offset;
+	unsigned long size;
+	off_t base_offset;
 	unsigned shift;
 
 	obj->offset = consumed_bytes;

From 78d1e84fe5586ed45740b915b52e6856bb3f1c44 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:31 -0400
Subject: [PATCH 006/109] compute a CRC32 for each object as stored in a pack

The most important optimization for performance when repacking is the
ability to reuse data from a previous pack as is and bypass any delta
or even SHA1 computation by simply copying the raw data from one pack
to another directly.

The problem with  this is that any data corruption within a copied object
would go unnoticed and the new (repacked) pack would be self-consistent
with its own checksum despite containing a corrupted object.  This is a
real issue that already happened at least once in the past.

In some attempt to prevent this, we validate the copied data by inflating
it and making sure no error is signaled by zlib.  But this is still not
perfect as a significant portion of a pack content is made of object
headers and references to delta base objects which are not deflated and
therefore not validated when repacking actually making the pack data reuse
still not as safe as it could be.

Of course a full SHA1 validation could be performed, but that implies
full data inflating and delta replaying which is extremely costly, which
cost the data reuse optimization was designed to avoid in the first place.

So the best solution to this is simply to store a CRC32 of the raw pack
data for each object in the pack index.  This way any object in a pack can
be validated before being copied as is in another pack, including header
and any other non deflated data.

Why CRC32 instead of a faster checksum like Adler32?  Quoting Wikipedia:

   Jonathan Stone discovered in 2001 that Adler-32 has a weakness for very
   short messages. He wrote "Briefly, the problem is that, for very short
   packets, Adler32 is guaranteed to give poor coverage of the available
   bits. Don't take my word for it, ask Mark Adler. :-)" The problem is
   that sum A does not wrap for short messages. The maximum value of A for
   a 128-byte message is 32640, which is below the value 65521 used by the
   modulo operation. An extended explanation can be found in RFC 3309,
   which mandates the use of CRC32 instead of Adler-32 for SCTP, the
   Stream Control Transmission Protocol.

In the context of a GIT pack, we have lots of small objects, especially
deltas, which are likely to be quite small and in a size range for which
Adler32 is dimed not to be sufficient.  Another advantage of CRC32 is the
possibility for recovery from certain types of small corruptions like
single bit errors which are the most probable type of corruptions.

OK what this patch does is to compute the CRC32 of each object written to
a pack within pack-objects.  It is not written to the index yet and it is
obviously not validated when reusing pack data yet either.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c |  6 ++++++
 csum-file.c            | 14 ++++++++++++++
 csum-file.h            |  4 ++++
 3 files changed, 24 insertions(+)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index d0be879443..03e36f0183 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -44,6 +44,7 @@ struct object_entry {
 				 * be used as the base objectto delta huge
 				 * objects against.
 				 */
+	uint32_t crc32;		/* crc of raw pack data for this object */
 };
 
 /*
@@ -381,6 +382,9 @@ static unsigned long write_object(struct sha1file *f,
 	enum object_type obj_type;
 	int to_reuse = 0;
 
+	if (!pack_to_stdout)
+		crc32_begin(f);
+
 	obj_type = entry->type;
 	if (! entry->in_pack)
 		to_reuse = 0;	/* can't reuse what we don't have */
@@ -496,6 +500,8 @@ static unsigned long write_object(struct sha1file *f,
 	if (entry->delta)
 		written_delta++;
 	written++;
+	if (!pack_to_stdout)
+		entry->crc32 = crc32_end(f);
 	return hdrlen + datalen;
 }
 
diff --git a/csum-file.c b/csum-file.c
index b7174c6c05..7c806ada48 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -49,6 +49,8 @@ int sha1close(struct sha1file *f, unsigned char *result, int update)
 
 int sha1write(struct sha1file *f, void *buf, unsigned int count)
 {
+	if (f->do_crc)
+		f->crc32 = crc32(f->crc32, buf, count);
 	while (count) {
 		unsigned offset = f->offset;
 		unsigned left = sizeof(f->buffer) - offset;
@@ -91,6 +93,7 @@ struct sha1file *sha1create(const char *fmt, ...)
 	f->fd = fd;
 	f->error = 0;
 	f->offset = 0;
+	f->do_crc = 0;
 	SHA1_Init(&f->ctx);
 	return f;
 }
@@ -111,6 +114,7 @@ struct sha1file *sha1fd(int fd, const char *name)
 	f->fd = fd;
 	f->error = 0;
 	f->offset = 0;
+	f->do_crc = 0;
 	SHA1_Init(&f->ctx);
 	return f;
 }
@@ -143,4 +147,14 @@ int sha1write_compressed(struct sha1file *f, void *in, unsigned int size)
 	return size;
 }
 
+void crc32_begin(struct sha1file *f)
+{
+	f->crc32 = crc32(0, Z_NULL, 0);
+	f->do_crc = 1;
+}
 
+uint32_t crc32_end(struct sha1file *f)
+{
+	f->do_crc = 0;
+	return f->crc32;
+}
diff --git a/csum-file.h b/csum-file.h
index 3ad1a992a7..7e1339189d 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -7,6 +7,8 @@ struct sha1file {
 	unsigned int offset, namelen;
 	SHA_CTX ctx;
 	char name[PATH_MAX];
+	int do_crc;
+	uint32_t crc32;
 	unsigned char buffer[8192];
 };
 
@@ -15,5 +17,7 @@ extern struct sha1file *sha1create(const char *fmt, ...) __attribute__((format (
 extern int sha1close(struct sha1file *, unsigned char *, int);
 extern int sha1write(struct sha1file *, void *, unsigned int);
 extern int sha1write_compressed(struct sha1file *, void *, unsigned int);
+extern void crc32_begin(struct sha1file *);
+extern uint32_t crc32_end(struct sha1file *);
 
 #endif

From ee5743ce191d4f39f976e76fe4c12bf8fc67a590 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:32 -0400
Subject: [PATCH 007/109] compute object CRC32 with index-pack

Same as previous patch but for index-pack.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 index-pack.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/index-pack.c b/index-pack.c
index 66fb0bcedf..d33f723365 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -15,6 +15,7 @@ struct object_entry
 	off_t offset;
 	unsigned long size;
 	unsigned int hdr_size;
+	uint32_t crc32;
 	enum object_type type;
 	enum object_type real_type;
 	unsigned char sha1[20];
@@ -86,6 +87,7 @@ static unsigned char input_buffer[4096];
 static unsigned int input_offset, input_len;
 static off_t consumed_bytes;
 static SHA_CTX input_ctx;
+static uint32_t input_crc32;
 static int input_fd, output_fd, pack_fd;
 
 /* Discard current buffer used content. */
@@ -128,6 +130,7 @@ static void use(int bytes)
 {
 	if (bytes > input_len)
 		die("used more bytes than were available");
+	input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes);
 	input_len -= bytes;
 	input_offset += bytes;
 
@@ -224,8 +227,10 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_
 	unsigned long size;
 	off_t base_offset;
 	unsigned shift;
+	void *data;
 
 	obj->offset = consumed_bytes;
+	input_crc32 = crc32(0, Z_NULL, 0);
 
 	p = fill(1);
 	c = *p;
@@ -276,7 +281,9 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_
 	}
 	obj->hdr_size = consumed_bytes - obj->offset;
 
-	return unpack_entry_data(obj->offset, obj->size);
+	data = unpack_entry_data(obj->offset, obj->size);
+	obj->crc32 = input_crc32;
+	return data;
 }
 
 static void *get_data_from_pack(struct object_entry *obj)
@@ -521,7 +528,7 @@ static void parse_pack_objects(unsigned char *sha1)
 		fputc('\n', stderr);
 }
 
-static int write_compressed(int fd, void *in, unsigned int size)
+static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_crc)
 {
 	z_stream stream;
 	unsigned long maxsize;
@@ -542,6 +549,7 @@ static int write_compressed(int fd, void *in, unsigned int size)
 
 	size = stream.total_out;
 	write_or_die(fd, out, size);
+	*obj_crc = crc32(*obj_crc, out, size);
 	free(out);
 	return size;
 }
@@ -562,8 +570,10 @@ static void append_obj_to_pack(const unsigned char *sha1, void *buf,
 	}
 	header[n++] = c;
 	write_or_die(output_fd, header, n);
+	obj[0].crc32 = crc32(0, Z_NULL, 0);
+	obj[0].crc32 = crc32(obj[0].crc32, header, n);
 	obj[1].offset = obj[0].offset + n;
-	obj[1].offset += write_compressed(output_fd, buf, size);
+	obj[1].offset += write_compressed(output_fd, buf, size, &obj[0].crc32);
 	hashcpy(obj->sha1, sha1);
 }
 

From c553ca25bd60dc9fd50b8bc7bd329601b81cee66 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:33 -0400
Subject: [PATCH 008/109] pack-objects: learn about pack index version 2

Pack index version 2 goes as follows:

 - 8 bytes of header with signature and version.

 - 256 entries of 4-byte first-level fan-out table.

 - Table of sorted 20-byte SHA1 records for each object in pack.

 - Table of 4-byte CRC32 entries for raw pack object data.

 - Table of 4-byte offset entries for objects in the pack if offset is
   representable with 31 bits or less, otherwise it is an index in the next
   table with top bit set.

 - Table of 8-byte offset entries indexed from previous table for offsets
   which are 32 bits or more (optional).

 - 20-byte SHA1 checksum of sorted object names.

 - 20-byte SHA1 checksum of the above.

The object SHA1 table is all contiguous so future pack format that would
contain this table directly won't require big changes to the code. It is
also tighter for slightly better cache locality when looking up entries.

Support for large packs exceeding 31 bits in size won't impose an index
size bloat for packs within that range that don't need a 64-bit offset.
And because newer objects which are likely to be the most frequently used
are located at the beginning of the pack, they won't pay the 64-bit offset
lookup at run time either even if the pack is large.

Right now an index version 2 is created only when the biggest offset in a
pack reaches 31 bits.  It might be a good idea to always use index version
2 eventually to benefit from the CRC32 it contains when reusing pack data
while repacking.

[jc: with the "oops" fix to keep track of the last offset correctly]

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 99 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 87 insertions(+), 12 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 03e36f0183..8cf2871751 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -169,13 +169,33 @@ static void prepare_pack_revindex(struct pack_revindex *rix)
 	int i;
 	const char *index = p->index_data;
 
-	index += 4 * 256;
 	rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
-	for (i = 0; i < num_ent; i++) {
-		uint32_t hl = *((uint32_t *)(index + 24 * i));
-		rix->revindex[i].offset = ntohl(hl);
-		rix->revindex[i].nr = i;
+	index += 4 * 256;
+
+	if (p->index_version > 1) {
+		const uint32_t *off_32 =
+			(uint32_t *)(index + 8 + p->num_objects * (20 + 4));
+		const uint32_t *off_64 = off_32 + p->num_objects;
+		for (i = 0; i < num_ent; i++) {
+			uint32_t off = ntohl(*off_32++);
+			if (!(off & 0x80000000)) {
+				rix->revindex[i].offset = off;
+			} else {
+				rix->revindex[i].offset =
+					((uint64_t)ntohl(*off_64++)) << 32;
+				rix->revindex[i].offset |=
+					ntohl(*off_64++);
+			}
+			rix->revindex[i].nr = i;
+		}
+	} else {
+		for (i = 0; i < num_ent; i++) {
+			uint32_t hl = *((uint32_t *)(index + 24 * i));
+			rix->revindex[i].offset = ntohl(hl);
+			rix->revindex[i].nr = i;
+		}
 	}
+
 	/* This knows the pack format -- the 20-byte trailer
 	 * follows immediately after the last object data.
 	 */
@@ -528,11 +548,11 @@ static off_t write_one(struct sha1file *f,
 	return offset + size;
 }
 
-static void write_pack_file(void)
+static off_t write_pack_file(void)
 {
 	uint32_t i;
 	struct sha1file *f;
-	off_t offset;
+	off_t offset, last_obj_offset = 0;
 	struct pack_header hdr;
 	unsigned last_percent = 999;
 	int do_progress = progress;
@@ -555,6 +575,7 @@ static void write_pack_file(void)
 	if (!nr_result)
 		goto done;
 	for (i = 0; i < nr_objects; i++) {
+		last_obj_offset = offset;
 		offset = write_one(f, objects + i, offset);
 		if (do_progress) {
 			unsigned percent = written * 100 / nr_result;
@@ -572,9 +593,11 @@ static void write_pack_file(void)
 	if (written != nr_result)
 		die("wrote %u objects while expecting %u", written, nr_result);
 	sha1close(f, pack_file_sha1, 1);
+
+	return last_obj_offset;
 }
 
-static void write_index_file(void)
+static void write_index_file(off_t last_obj_offset)
 {
 	uint32_t i;
 	struct sha1file *f = sha1create("%s-%s.%s", base_name,
@@ -582,6 +605,18 @@ static void write_index_file(void)
 	struct object_entry **list = sorted_by_sha;
 	struct object_entry **last = list + nr_result;
 	uint32_t array[256];
+	uint32_t index_version;
+
+	/* if last object's offset is >= 2^31 we should use index V2 */
+	index_version = (last_obj_offset >> 31) ? 2 : 1;
+
+	/* index versions 2 and above need a header */
+	if (index_version >= 2) {
+		struct pack_idx_header hdr;
+		hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
+		hdr.idx_version = htonl(index_version);
+		sha1write(f, &hdr, sizeof(hdr));
+	}
 
 	/*
 	 * Write the first-level table (the list is sorted,
@@ -607,10 +642,49 @@ static void write_index_file(void)
 	list = sorted_by_sha;
 	for (i = 0; i < nr_result; i++) {
 		struct object_entry *entry = *list++;
-		uint32_t offset = htonl(entry->offset);
-		sha1write(f, &offset, 4);
+		if (index_version < 2) {
+			uint32_t offset = htonl(entry->offset);
+			sha1write(f, &offset, 4);
+		}
 		sha1write(f, entry->sha1, 20);
 	}
+
+	if (index_version >= 2) {
+		unsigned int nr_large_offset = 0;
+
+		/* write the crc32 table */
+		list = sorted_by_sha;
+		for (i = 0; i < nr_objects; i++) {
+			struct object_entry *entry = *list++;
+			uint32_t crc32_val = htonl(entry->crc32);
+			sha1write(f, &crc32_val, 4);
+		}
+
+		/* write the 32-bit offset table */
+		list = sorted_by_sha;
+		for (i = 0; i < nr_objects; i++) {
+			struct object_entry *entry = *list++;
+			uint32_t offset = (entry->offset <= 0x7fffffff) ?
+				entry->offset : (0x80000000 | nr_large_offset++);
+			offset = htonl(offset);
+			sha1write(f, &offset, 4);
+		}
+
+		/* write the large offset table */
+		list = sorted_by_sha;
+		while (nr_large_offset) {
+			struct object_entry *entry = *list++;
+			uint64_t offset = entry->offset;
+			if (offset > 0x7fffffff) {
+				uint32_t split[2];
+				split[0]        = htonl(offset >> 32);
+				split[1] = htonl(offset & 0xffffffff);
+				sha1write(f, split, 8);
+				nr_large_offset--;
+			}
+		}
+	}
+
 	sha1write(f, pack_file_sha1, 20);
 	sha1close(f, NULL, 1);
 }
@@ -1698,6 +1772,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	if (reuse_cached_pack(object_list_sha1))
 		;
 	else {
+		off_t last_obj_offset;
 		if (nr_result)
 			prepare_pack(window, depth);
 		if (progress == 1 && pack_to_stdout) {
@@ -1707,9 +1782,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			signal(SIGALRM, SIG_IGN );
 			progress_update = 0;
 		}
-		write_pack_file();
+		last_obj_offset = write_pack_file();
 		if (!pack_to_stdout) {
-			write_index_file();
+			write_index_file(last_obj_offset);
 			puts(sha1_to_hex(object_list_sha1));
 		}
 	}

From d1a46a9eab2ce136240fe2e8db34b36338052e97 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:34 -0400
Subject: [PATCH 009/109] index-pack: learn about pack index version 2

Like previous patch but for index-pack.

[ There is quite some code duplication between pack-objects and index-pack
  for generating a pack index (and fast-import as well I suppose).  This
  should be reworked into a common function eventually. But not now. ]

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 index-pack.c | 66 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 57 insertions(+), 9 deletions(-)

diff --git a/index-pack.c b/index-pack.c
index d33f723365..a833f640f6 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -686,9 +686,10 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1)
 {
 	struct sha1file *f;
 	struct object_entry **sorted_by_sha, **list, **last;
-	unsigned int array[256];
+	uint32_t array[256];
 	int i, fd;
 	SHA_CTX ctx;
+	uint32_t index_version;
 
 	if (nr_objects) {
 		sorted_by_sha =
@@ -699,7 +700,6 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1)
 			sorted_by_sha[i] = &objects[i];
 		qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
 		      sha1_compare);
-
 	}
 	else
 		sorted_by_sha = list = last = NULL;
@@ -718,6 +718,17 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1)
 		die("unable to create %s: %s", index_name, strerror(errno));
 	f = sha1fd(fd, index_name);
 
+	/* if last object's offset is >= 2^31 we should use index V2 */
+	index_version = (objects[nr_objects-1].offset >> 31) ? 2 : 1;
+
+	/* index versions 2 and above need a header */
+	if (index_version >= 2) {
+		struct pack_idx_header hdr;
+		hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
+		hdr.idx_version = htonl(index_version);
+		sha1write(f, &hdr, sizeof(hdr));
+	}
+
 	/*
 	 * Write the first-level table (the list is sorted,
 	 * but we use a 256-entry lookup to be able to avoid
@@ -734,24 +745,61 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1)
 		array[i] = htonl(next - sorted_by_sha);
 		list = next;
 	}
-	sha1write(f, array, 256 * sizeof(int));
+	sha1write(f, array, 256 * 4);
 
-	/* recompute the SHA1 hash of sorted object names.
-	 * currently pack-objects does not do this, but that
-	 * can be fixed.
-	 */
+	/* compute the SHA1 hash of sorted object names. */
 	SHA1_Init(&ctx);
+
 	/*
 	 * Write the actual SHA1 entries..
 	 */
 	list = sorted_by_sha;
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = *list++;
-		unsigned int offset = htonl(obj->offset);
-		sha1write(f, &offset, 4);
+		if (index_version < 2) {
+			uint32_t offset = htonl(obj->offset);
+			sha1write(f, &offset, 4);
+		}
 		sha1write(f, obj->sha1, 20);
 		SHA1_Update(&ctx, obj->sha1, 20);
 	}
+
+	if (index_version >= 2) {
+		unsigned int nr_large_offset = 0;
+
+		/* write the crc32 table */
+		list = sorted_by_sha;
+		for (i = 0; i < nr_objects; i++) {
+			struct object_entry *obj = *list++;
+			uint32_t crc32_val = htonl(obj->crc32);
+			sha1write(f, &crc32_val, 4);
+		}
+
+		/* write the 32-bit offset table */
+		list = sorted_by_sha;
+		for (i = 0; i < nr_objects; i++) {
+			struct object_entry *obj = *list++;
+			uint32_t offset = (obj->offset <= 0x7fffffff) ?
+				obj->offset : (0x80000000 | nr_large_offset++);
+			offset = htonl(offset);
+			sha1write(f, &offset, 4);
+		}
+
+		/* write the large offset table */
+		list = sorted_by_sha;
+		while (nr_large_offset) {
+			struct object_entry *obj = *list++;
+			uint64_t offset = obj->offset;
+			if (offset > 0x7fffffff) {
+				uint32_t split[2];
+				split[0]	= htonl(offset >> 32);
+				split[1] = htonl(offset & 0xffffffff);
+				sha1write(f, split, 8);
+				nr_large_offset--;
+			}
+		}
+	}
+
 	sha1write(f, sha1, 20);
 	sha1close(f, NULL, 1);
 	free(sorted_by_sha);

From 74e34e1fca2ed9998581cc94073bc2dd28bbb8f3 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:35 -0400
Subject: [PATCH 010/109] sha1_file.c: learn about index version 2

With this patch, packs larger than 4GB are usable, even on a 32-bit machine
(at least on Linux).  If off_t is not large enough to deal with a large
pack then die() is called instead of attempting to use the pack and
producing garbage.

This was tested with a 8GB pack specially created for the occasion on
a 32-bit machine.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 sha1_file.c | 118 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 89 insertions(+), 29 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index ebdd497bad..0be9737bd1 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -437,7 +437,7 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
 	void *idx_map;
 	struct pack_idx_header *hdr;
 	size_t idx_size;
-	uint32_t nr, i, *index;
+	uint32_t version, nr, i, *index;
 	int fd = open(path, O_RDONLY);
 	struct stat st;
 
@@ -455,21 +455,23 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
 	idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	close(fd);
 
-	/* a future index format would start with this, as older git
-	 * binaries would fail the non-monotonic index check below.
-	 * give a nicer warning to the user if we can.
-	 */
 	hdr = idx_map;
 	if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
-		munmap(idx_map, idx_size);
-		return error("index file %s is a newer version"
-			" and is not supported by this binary"
-			" (try upgrading GIT to a newer version)",
-			path);
-	}
+		version = ntohl(hdr->idx_version);
+		if (version < 2 || version > 2) {
+			munmap(idx_map, idx_size);
+			return error("index file %s is version %d"
+				     " and is not supported by this binary"
+				     " (try upgrading GIT to a newer version)",
+				     path, version);
+		}
+	} else
+		version = 1;
 
 	nr = 0;
 	index = idx_map;
+	if (version > 1)
+		index += 2;  /* skip index header */
 	for (i = 0; i < 256; i++) {
 		uint32_t n = ntohl(index[i]);
 		if (n < nr) {
@@ -479,19 +481,48 @@ static int check_packed_git_idx(const char *path,  struct packed_git *p)
 		nr = n;
 	}
 
-	/*
-	 * Total size:
-	 *  - 256 index entries 4 bytes each
-	 *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
-	 *  - 20-byte SHA1 of the packfile
-	 *  - 20-byte SHA1 file checksum
-	 */
-	if (idx_size != 4*256 + nr * 24 + 20 + 20) {
-		munmap(idx_map, idx_size);
-		return error("wrong index file size in %s", path);
+	if (version == 1) {
+		/*
+		 * Total size:
+		 *  - 256 index entries 4 bytes each
+		 *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
+		 *  - 20-byte SHA1 of the packfile
+		 *  - 20-byte SHA1 file checksum
+		 */
+		if (idx_size != 4*256 + nr * 24 + 20 + 20) {
+			munmap(idx_map, idx_size);
+			return error("wrong index file size in %s", path);
+		}
+	} else if (version == 2) {
+		/*
+		 * Minimum size:
+		 *  - 8 bytes of header
+		 *  - 256 index entries 4 bytes each
+		 *  - 20-byte sha1 entry * nr
+		 *  - 4-byte crc entry * nr
+		 *  - 4-byte offset entry * nr
+		 *  - 20-byte SHA1 of the packfile
+		 *  - 20-byte SHA1 file checksum
+		 * And after the 4-byte offset table might be a
+		 * variable sized table containing 8-byte entries
+		 * for offsets larger than 2^31.
+		 */
+		unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
+		if (idx_size < min_size || idx_size > min_size + (nr - 1)*8) {
+			munmap(idx_map, idx_size);
+			return error("wrong index file size in %s", path);
+		}
+		if (idx_size != min_size) {
+			/* make sure we can deal with large pack offsets */
+			off_t x = 0x7fffffffUL, y = 0xffffffffUL;
+			if (x > (x + 1) || y > (y + 1)) {
+				munmap(idx_map, idx_size);
+				return error("pack too large for current definition of off_t in %s", path);
+			}
+		}
 	}
 
-	p->index_version = 1;
+	p->index_version = version;
 	p->index_data = idx_map;
 	p->index_size = idx_size;
 	p->num_objects = nr;
@@ -1531,27 +1562,56 @@ const unsigned char *nth_packed_object_sha1(const struct packed_git *p,
 					    uint32_t n)
 {
 	const unsigned char *index = p->index_data;
-	index += 4 * 256;
 	if (n >= p->num_objects)
 		return NULL;
-	return index + 24 * n + 4;
+	index += 4 * 256;
+	if (p->index_version == 1) {
+		return index + 24 * n + 4;
+	} else {
+		index += 8;
+		return index + 20 * n;
+	}
+}
+
+static off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
+{
+	const unsigned char *index = p->index_data;
+	index += 4 * 256;
+	if (p->index_version == 1) {
+		return ntohl(*((uint32_t *)(index + 24 * n)));
+	} else {
+		uint32_t off;
+		index += 8 + p->num_objects * (20 + 4);
+		off = ntohl(*((uint32_t *)(index + 4 * n)));
+		if (!(off & 0x80000000))
+			return off;
+		index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
+		return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
+				   ntohl(*((uint32_t *)(index + 4)));
+	}
 }
 
 off_t find_pack_entry_one(const unsigned char *sha1,
 				  struct packed_git *p)
 {
 	const uint32_t *level1_ofs = p->index_data;
-	int hi = ntohl(level1_ofs[*sha1]);
-	int lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
 	const unsigned char *index = p->index_data;
+	unsigned hi, lo;
 
+	if (p->index_version > 1) {
+		level1_ofs += 2;
+		index += 8;
+	}
 	index += 4 * 256;
+	hi = ntohl(level1_ofs[*sha1]);
+	lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
 
 	do {
-		int mi = (lo + hi) / 2;
-		int cmp = hashcmp(index + 24 * mi + 4, sha1);
+		unsigned mi = (lo + hi) / 2;
+		unsigned x = (p->index_version > 1) ? (mi * 20) : (mi * 24 + 4);
+		int cmp = hashcmp(index + x, sha1);
 		if (!cmp)
-			return ntohl(*((uint32_t *)((char *)index + (24 * mi))));
+			return nth_packed_object_offset(p, mi);
 		if (cmp > 0)
 			hi = mi;
 		else

From 32637cdf4a6a8d806feaa6b018bc4e4c7e2eb366 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:36 -0400
Subject: [PATCH 011/109] show-index.c: learn about index v2

When index v2 is encountered, the CRC32 of each object is also displayed
in parenthesis at the end of the line.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 show-index.c | 68 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 59 insertions(+), 9 deletions(-)

diff --git a/show-index.c b/show-index.c
index a30a2de5d1..57ed9e87b7 100644
--- a/show-index.c
+++ b/show-index.c
@@ -1,14 +1,26 @@
 #include "cache.h"
+#include "pack.h"
 
 int main(int argc, char **argv)
 {
 	int i;
 	unsigned nr;
-	unsigned int entry[6];
+	unsigned int version;
 	static unsigned int top_index[256];
 
-	if (fread(top_index, sizeof(top_index), 1, stdin) != 1)
-		die("unable to read index");
+	if (fread(top_index, 2 * 4, 1, stdin) != 1)
+		die("unable to read header");
+	if (top_index[0] == htonl(PACK_IDX_SIGNATURE)) {
+		version = ntohl(top_index[1]);
+		if (version < 2 || version > 2)
+			die("unknown index version");
+		if (fread(top_index, 256 * 4, 1, stdin) != 1)
+			die("unable to read index");
+	} else {
+		version = 1;
+		if (fread(&top_index[2], 254 * 4, 1, stdin) != 1)
+			die("unable to read index");
+	}
 	nr = 0;
 	for (i = 0; i < 256; i++) {
 		unsigned n = ntohl(top_index[i]);
@@ -16,13 +28,51 @@ int main(int argc, char **argv)
 			die("corrupt index file");
 		nr = n;
 	}
-	for (i = 0; i < nr; i++) {
-		unsigned offset;
+	if (version == 1) {
+		for (i = 0; i < nr; i++) {
+			unsigned int offset, entry[6];
 
-		if (fread(entry, 24, 1, stdin) != 1)
-			die("unable to read entry %u/%u", i, nr);
-		offset = ntohl(entry[0]);
-		printf("%u %s\n", offset, sha1_to_hex((void *)(entry+1)));
+			if (fread(entry, 4 + 20, 1, stdin) != 1)
+				die("unable to read entry %u/%u", i, nr);
+			offset = ntohl(entry[0]);
+			printf("%u %s\n", offset, sha1_to_hex((void *)(entry+1)));
+		}
+	} else {
+		unsigned off64_nr = 0;
+		struct {
+			unsigned char sha1[20];
+			uint32_t crc;
+			uint32_t off;
+		} *entries = xmalloc(nr * sizeof(entries[0]));
+		for (i = 0; i < nr; i++)
+			if (fread(entries[i].sha1, 20, 1, stdin) != 1)
+				die("unable to read sha1 %u/%u", i, nr);
+		for (i = 0; i < nr; i++)
+			if (fread(&entries[i].crc, 4, 1, stdin) != 1)
+				die("unable to read crc %u/%u", i, nr);
+		for (i = 0; i < nr; i++)
+			if (fread(&entries[i].off, 4, 1, stdin) != 1)
+				die("unable to read 32b offset %u/%u", i, nr);
+		for (i = 0; i < nr; i++) {
+			uint64_t offset;
+			uint32_t off = ntohl(entries[i].off);
+			if (!(off & 0x80000000)) {
+				offset = off;
+			} else {
+				uint32_t off64[2];
+				if ((off & 0x7fffffff) != off64_nr)
+					die("inconsistent 64b offset index");
+				if (fread(off64, 8, 1, stdin) != 1)
+					die("unable to read 64b offset %u", off64_nr);
+				offset = (((uint64_t)ntohl(off64[0])) << 32) |
+						     ntohl(off64[1]);
+				off64_nr++;
+			}
+			printf("%llu %s (%08x)\n", (unsigned long long) offset,
+			       sha1_to_hex(entries[i].sha1),
+			       ntohl(entries[i].crc));
+		}
+		free(entries);
 	}
 	return 0;
 }

From 8c681e07c91b544756e2631493ebc15abd1e8589 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 01:06:37 -0400
Subject: [PATCH 012/109] pack-redundant.c: learn about index v2

Initially the conversion was made using nth_packed_object_sha1() which
made this file completely index version agnostic. Unfortunately the
overhead was quite significant so I went back to raw index walking but
with selectable base and step values which brought back similar
performances as the original.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 pack-redundant.c | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/pack-redundant.c b/pack-redundant.c
index 40e579b2d9..87077e150c 100644
--- a/pack-redundant.c
+++ b/pack-redundant.c
@@ -247,16 +247,19 @@ static struct pack_list * pack_list_difference(const struct pack_list *A,
 
 static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 {
-	int p1_off, p2_off;
+	unsigned long p1_off = 0, p2_off = 0, p1_step, p2_step;
 	const unsigned char *p1_base, *p2_base;
 	struct llist_item *p1_hint = NULL, *p2_hint = NULL;
 
-	p1_off = p2_off = 256 * 4 + 4;
 	p1_base = p1->pack->index_data;
 	p2_base = p2->pack->index_data;
+	p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
+	p2_base += 256 * 4 + ((p2->pack->index_version < 2) ? 4 : 8);
+	p1_step = (p1->pack->index_version < 2) ? 24 : 20;
+	p2_step = (p2->pack->index_version < 2) ? 24 : 20;
 
-	while (p1_off <= p1->pack->index_size - 3 * 20 &&
-	       p2_off <= p2->pack->index_size - 3 * 20)
+	while (p1_off < p1->pack->num_objects * p1_step &&
+	       p2_off < p2->pack->num_objects * p2_step)
 	{
 		int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
 		/* cmp ~ p1 - p2 */
@@ -265,14 +268,14 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 					p1_base + p1_off, p1_hint);
 			p2_hint = llist_sorted_remove(p2->unique_objects,
 					p1_base + p1_off, p2_hint);
-			p1_off+=24;
-			p2_off+=24;
+			p1_off += p1_step;
+			p2_off += p2_step;
 			continue;
 		}
 		if (cmp < 0) { /* p1 has the object, p2 doesn't */
-			p1_off+=24;
+			p1_off += p1_step;
 		} else { /* p2 has the object, p1 doesn't */
-			p2_off+=24;
+			p2_off += p2_step;
 		}
 	}
 }
@@ -352,28 +355,31 @@ static int is_superset(struct pack_list *pl, struct llist *list)
 static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 {
 	size_t ret = 0;
-	int p1_off, p2_off;
+	unsigned long p1_off = 0, p2_off = 0, p1_step, p2_step;
 	const unsigned char *p1_base, *p2_base;
 
-	p1_off = p2_off = 256 * 4 + 4;
 	p1_base = p1->index_data;
 	p2_base = p2->index_data;
+	p1_base += 256 * 4 + ((p1->index_version < 2) ? 4 : 8);
+	p2_base += 256 * 4 + ((p2->index_version < 2) ? 4 : 8);
+	p1_step = (p1->index_version < 2) ? 24 : 20;
+	p2_step = (p2->index_version < 2) ? 24 : 20;
 
-	while (p1_off <= p1->index_size - 3 * 20 &&
-	       p2_off <= p2->index_size - 3 * 20)
+	while (p1_off < p1->num_objects * p1_step &&
+	       p2_off < p2->num_objects * p2_step)
 	{
 		int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
 		/* cmp ~ p1 - p2 */
 		if (cmp == 0) {
 			ret++;
-			p1_off+=24;
-			p2_off+=24;
+			p1_off += p1_step;
+			p2_off += p2_step;
 			continue;
 		}
 		if (cmp < 0) { /* p1 has the object, p2 doesn't */
-			p1_off+=24;
+			p1_off += p1_step;
 		} else { /* p2 has the object, p1 doesn't */
-			p2_off+=24;
+			p2_off += p2_step;
 		}
 	}
 	return ret;
@@ -535,7 +541,7 @@ static void scan_alt_odb_packs(void)
 static struct pack_list * add_pack(struct packed_git *p)
 {
 	struct pack_list l;
-	size_t off;
+	unsigned long off = 0, step;
 	const unsigned char *base;
 
 	if (!p->pack_local && !(alt_odb || verbose))
@@ -544,11 +550,12 @@ static struct pack_list * add_pack(struct packed_git *p)
 	l.pack = p;
 	llist_init(&l.all_objects);
 
-	off = 256 * 4 + 4;
 	base = p->index_data;
-	while (off <= p->index_size - 3 * 20) {
+	base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
+	step = (p->index_version < 2) ? 24 : 20;
+	while (off < p->num_objects * step) {
 		llist_insert_back(l.all_objects, base + off);
-		off += 24;
+		off += step;
 	}
 	/* this list will be pruned in cmp_two_packs later */
 	l.unique_objects = llist_copy(l.all_objects);

From 4ba7d711539122c21bd44af06b6cab4fc4f65a74 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 9 Apr 2007 17:32:03 -0400
Subject: [PATCH 013/109] allow forcing index v2 and 64-bit offset treshold

This is necessary for testing the new capabilities in some automated
way without having an actual 4GB+ pack.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 20 +++++++++++++++++---
 index-pack.c           | 18 +++++++++++++++---
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 8cf2871751..099dea0e1e 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -597,6 +597,9 @@ static off_t write_pack_file(void)
 	return last_obj_offset;
 }
 
+static uint32_t index_default_version = 1;
+static uint32_t index_off32_limit = 0x7fffffff;
+
 static void write_index_file(off_t last_obj_offset)
 {
 	uint32_t i;
@@ -608,7 +611,7 @@ static void write_index_file(off_t last_obj_offset)
 	uint32_t index_version;
 
 	/* if last object's offset is >= 2^31 we should use index V2 */
-	index_version = (last_obj_offset >> 31) ? 2 : 1;
+	index_version = (last_obj_offset >> 31) ? 2 : index_default_version;
 
 	/* index versions 2 and above need a header */
 	if (index_version >= 2) {
@@ -664,7 +667,7 @@ static void write_index_file(off_t last_obj_offset)
 		list = sorted_by_sha;
 		for (i = 0; i < nr_objects; i++) {
 			struct object_entry *entry = *list++;
-			uint32_t offset = (entry->offset <= 0x7fffffff) ?
+			uint32_t offset = (entry->offset <= index_off32_limit) ?
 				entry->offset : (0x80000000 | nr_large_offset++);
 			offset = htonl(offset);
 			sha1write(f, &offset, 4);
@@ -675,7 +678,7 @@ static void write_index_file(off_t last_obj_offset)
 		while (nr_large_offset) {
 			struct object_entry *entry = *list++;
 			uint64_t offset = entry->offset;
-			if (offset > 0x7fffffff) {
+			if (offset > index_off32_limit) {
 				uint32_t split[2];
 				split[0]        = htonl(offset >> 32);
 				split[1] = htonl(offset & 0xffffffff);
@@ -1714,6 +1717,17 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			rp_av[1] = "--objects-edge";
 			continue;
 		}
+		if (!prefixcmp(arg, "--index-version=")) {
+			char *c;
+			index_default_version = strtoul(arg + 16, &c, 10);
+			if (index_default_version > 2)
+				die("bad %s", arg);
+			if (*c == ',')
+				index_off32_limit = strtoul(c+1, &c, 0);
+			if (*c || index_off32_limit & 0x80000000)
+				die("bad %s", arg);
+			continue;
+		}
 		usage(pack_usage);
 	}
 
diff --git a/index-pack.c b/index-pack.c
index a833f640f6..7aad261d48 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -671,6 +671,9 @@ static void readjust_pack_header_and_sha1(unsigned char *sha1)
 	write_or_die(output_fd, sha1, 20);
 }
 
+static uint32_t index_default_version = 1;
+static uint32_t index_off32_limit = 0x7fffffff;
+
 static int sha1_compare(const void *_a, const void *_b)
 {
 	struct object_entry *a = *(struct object_entry **)_a;
@@ -719,7 +722,7 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1)
 	f = sha1fd(fd, index_name);
 
 	/* if last object's offset is >= 2^31 we should use index V2 */
-	index_version = (objects[nr_objects-1].offset >> 31) ? 2 : 1;
+	index_version = (objects[nr_objects-1].offset >> 31) ? 2 : index_default_version;
 
 	/* index versions 2 and above need a header */
 	if (index_version >= 2) {
@@ -779,7 +782,7 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1)
 		list = sorted_by_sha;
 		for (i = 0; i < nr_objects; i++) {
 			struct object_entry *obj = *list++;
-			uint32_t offset = (obj->offset <= 0x7fffffff) ?
+			uint32_t offset = (obj->offset <= index_off32_limit) ?
 				obj->offset : (0x80000000 | nr_large_offset++);
 			offset = htonl(offset);
 			sha1write(f, &offset, 4);
@@ -790,7 +793,7 @@ static const char *write_index_file(const char *index_name, unsigned char *sha1)
 		while (nr_large_offset) {
 			struct object_entry *obj = *list++;
 			uint64_t offset = obj->offset;
-			if (offset > 0x7fffffff) {
+			if (offset > index_off32_limit) {
 				uint32_t split[2];
 				split[0]	= htonl(offset >> 32);
 				split[1] = htonl(offset & 0xffffffff);
@@ -929,6 +932,15 @@ int main(int argc, char **argv)
 				if (index_name || (i+1) >= argc)
 					usage(index_pack_usage);
 				index_name = argv[++i];
+			} else if (!prefixcmp(arg, "--index-version=")) {
+				char *c;
+				index_default_version = strtoul(arg + 16, &c, 10);
+				if (index_default_version > 2)
+					die("bad %s", arg);
+				if (*c == ',')
+					index_off32_limit = strtoul(c+1, &c, 0);
+				if (*c || index_off32_limit & 0x80000000)
+					die("bad %s", arg);
 			} else
 				usage(index_pack_usage);
 			continue;

From 91ecbeca48e675fef8141451edb2c86577f9d63c Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 10 Apr 2007 00:15:41 -0400
Subject: [PATCH 014/109] validate reused pack data with CRC when possible

This replaces the inflate validation with a CRC validation when reusing
data from a pack which uses index version 2.  That makes repacking much
safer against corruptions, and it should be a bit faster too.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 46 +++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 099dea0e1e..687b4b5a99 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -233,12 +233,6 @@ static struct revindex_entry * find_packed_object(struct packed_git *p,
 	die("internal error: pack revindex corrupt");
 }
 
-static off_t find_packed_object_size(struct packed_git *p, off_t ofs)
-{
-	struct revindex_entry *entry = find_packed_object(p, ofs);
-	return entry[1].offset - ofs;
-}
-
 static const unsigned char *find_packed_object_name(struct packed_git *p,
 						    off_t ofs)
 {
@@ -321,6 +315,28 @@ static int check_pack_inflate(struct packed_git *p,
 		stream.total_in == len) ? 0 : -1;
 }
 
+static int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
+			  off_t offset, off_t len, unsigned int nr)
+{
+	const uint32_t *index_crc;
+	uint32_t data_crc = crc32(0, Z_NULL, 0);
+
+	do {
+		unsigned int avail;
+		void *data = use_pack(p, w_curs, offset, &avail);
+		if (avail > len)
+			avail = len;
+		data_crc = crc32(data_crc, data, avail);
+		offset += avail;
+		len -= avail;
+	} while (len);
+
+	index_crc = p->index_data;
+	index_crc += 2 + 256 + p->num_objects * (20/4) + nr;
+
+	return data_crc != ntohl(*index_crc);
+}
+
 static void copy_pack_data(struct sha1file *f,
 		struct packed_git *p,
 		struct pack_window **w_curs,
@@ -485,6 +501,7 @@ static unsigned long write_object(struct sha1file *f,
 	else {
 		struct packed_git *p = entry->in_pack;
 		struct pack_window *w_curs = NULL;
+		struct revindex_entry *revidx;
 		off_t offset;
 
 		if (entry->delta) {
@@ -507,12 +524,17 @@ static unsigned long write_object(struct sha1file *f,
 			hdrlen += 20;
 		}
 
-		offset = entry->in_pack_offset + entry->in_pack_header_size;
-		datalen = find_packed_object_size(p, entry->in_pack_offset)
-				- entry->in_pack_header_size;
-		if (!pack_to_stdout && check_pack_inflate(p, &w_curs,
-				offset, datalen, entry->size))
-			die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
+		offset = entry->in_pack_offset;
+		revidx = find_packed_object(p, offset);
+		datalen = revidx[1].offset - offset;
+		if (!pack_to_stdout && p->index_version > 1 &&
+		    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
+			die("bad packed object CRC for %s", sha1_to_hex(entry->sha1));
+		offset += entry->in_pack_header_size;
+		datalen -= entry->in_pack_header_size;
+		if (!pack_to_stdout && p->index_version == 1 &&
+		    check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
+			die("corrupt packed object for %s", sha1_to_hex(entry->sha1));
 		copy_pack_data(f, p, &w_curs, offset, datalen);
 		unuse_pack(&w_curs);
 		reused++;

From 895a1d1e57c8dfb8470bb742f05f1969b9961e07 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 5 Apr 2007 03:22:54 -0700
Subject: [PATCH 015/109] git-fetch--tool pick-rref

This script helper takes list of fully qualified refnames and
results from ls-remote and grabs only the lines for the named
refs from the latter.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-fetch--tool.c | 84 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c
index e9d6764550..be341c159f 100644
--- a/builtin-fetch--tool.c
+++ b/builtin-fetch--tool.c
@@ -436,10 +436,87 @@ static int expand_refs_wildcard(const char *ls_remote_result, int numrefs,
 	return 0;
 }
 
+static int pick_rref(int sha1_only, const char *rref, const char *ls_remote_result)
+{
+	int err = 0;
+	int lrr_count = lrr_count, i, pass;
+	const char *cp;
+	struct lrr {
+		const char *line;
+		const char *name;
+		int namelen;
+		int shown;
+	} *lrr_list = lrr_list;
+
+	for (pass = 0; pass < 2; pass++) {
+		/* pass 0 counts and allocates, pass 1 fills... */
+		cp = ls_remote_result;
+		i = 0;
+		while (1) {
+			const char *np;
+			while (*cp && isspace(*cp))
+				cp++;
+			if (!*cp)
+				break;
+			np = strchr(cp, '\n');
+			if (!np)
+				np = cp + strlen(cp);
+			if (pass) {
+				lrr_list[i].line = cp;
+				lrr_list[i].name = cp + 41;
+				lrr_list[i].namelen = np - (cp + 41);
+			}
+			i++;
+			cp = np;
+		}
+		if (!pass) {
+			lrr_count = i;
+			lrr_list = xcalloc(lrr_count, sizeof(*lrr_list));
+		}
+	}
+
+	while (1) {
+		const char *next;
+		int rreflen;
+		int i;
+
+		while (*rref && isspace(*rref))
+			rref++;
+		if (!*rref)
+			break;
+		next = strchr(rref, '\n');
+		if (!next)
+			next = rref + strlen(rref);
+		rreflen = next - rref;
+
+		for (i = 0; i < lrr_count; i++) {
+			struct lrr *lrr = &(lrr_list[i]);
+
+			if (rreflen == lrr->namelen &&
+			    !memcmp(lrr->name, rref, rreflen)) {
+				if (!lrr->shown)
+					printf("%.*s\n",
+					       sha1_only ? 40 : lrr->namelen + 41,
+					       lrr->line);
+				lrr->shown = 1;
+				break;
+			}
+		}
+		if (lrr_count <= i) {
+			error("pick-rref: %.*s not found", rreflen, rref);
+			err = 1;
+		}
+		rref = next;
+	}
+	free(lrr_list);
+	return err;
+}
+
 int cmd_fetch__tool(int argc, const char **argv, const char *prefix)
 {
 	int verbose = 0;
 	int force = 0;
+	int sopt = 0;
 
 	while (1 < argc) {
 		const char *arg = argv[1];
@@ -447,6 +524,8 @@ int cmd_fetch__tool(int argc, const char **argv, const char *prefix)
 			verbose = 1;
 		else if (!strcmp("-f", arg))
 			force = 1;
+		else if (!strcmp("-s", arg))
+			sopt = 1;
 		else
 			break;
 		argc--;
@@ -491,6 +570,11 @@ int cmd_fetch__tool(int argc, const char **argv, const char *prefix)
 			reflist = get_stdin();
 		return parse_reflist(reflist);
 	}
+	if (!strcmp("pick-rref", argv[1])) {
+		if (argc != 4)
+			return error("pick-rref takes 2 args");
+		return pick_rref(sopt, argv[2], argv[3]);
+	}
 	if (!strcmp("expand-refs-wildcard", argv[1])) {
 		const char *reflist;
 		if (argc < 4)

From e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 5 Apr 2007 03:22:55 -0700
Subject: [PATCH 016/109] git-fetch: use fetch--tool pick-rref to avoid local
 fetch from alternate

When we are fetching from a repository that is on a local
filesystem, first check if we have all the objects that we are
going to fetch available locally, by not just checking the tips
of what we are fetching, but with a full reachability analysis
to our existing refs.  In such a case, we do not have to run
git-fetch-pack which would send many needless objects.  This is
especially true when the other repository is an alternate of the
current repository (e.g. perhaps the repository was created by
running "git clone -l -s" from there).

The useless objects transferred used to be discarded when they
were expanded by git-unpack-objects called from git-fetch-pack,
but recent git-fetch-pack prefers to keep the data it receives
from the other end without exploding them into loose objects,
resulting in a pack full of duplicated data when fetching from
your own alternate.

This also uses fetch--tool pick-rref on dumb transport side to
remove a shell loop to do the same.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-fetch.sh | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/git-fetch.sh b/git-fetch.sh
index b04bd553f8..832b20cce6 100755
--- a/git-fetch.sh
+++ b/git-fetch.sh
@@ -177,9 +177,33 @@ fetch_all_at_once () {
 	    git-bundle unbundle "$remote" $rref ||
 	    echo failed "$remote"
 	else
-	  git-fetch-pack --thin $exec $keep $shallow_depth \
-	      $quiet $no_progress "$remote" $rref ||
-	  echo failed "$remote"
+		if	test -d "$remote" &&
+
+			# The remote might be our alternate.  With
+			# this optimization we will bypass fetch-pack
+			# altogether, which means we cannot be doing
+			# the shallow stuff at all.
+			test ! -f "$GIT_DIR/shallow" &&
+			test -z "$shallow_depth" &&
+
+			# See if all of what we are going to fetch are
+			# connected to our repository's tips, in which
+			# case we do not have to do any fetch.
+			theirs=$(git-fetch--tool -s pick-rref \
+					"$rref" "$ls_remote_result") &&
+
+			# This will barf when $theirs reach an object that
+			# we do not have in our repository.  Otherwise,
+			# we already have everything the fetch would bring in.
+			git-rev-list --objects $theirs --not --all \
+				>/dev/null 2>/dev/null
+		then
+			git-fetch--tool pick-rref "$rref" "$ls_remote_result"
+		else
+			git-fetch-pack --thin $exec $keep $shallow_depth \
+				$quiet $no_progress "$remote" $rref ||
+			echo failed "$remote"
+		fi
 	fi
       ) |
       (
@@ -239,16 +263,8 @@ fetch_per_ref () {
 	  fi
 
 	  # Find $remote_name from ls-remote output.
-	  head=$(
-		IFS='	'
-		echo "$ls_remote_result" |
-		while read sha1 name
-		do
-			test "z$name" = "z$remote_name" || continue
-			echo "$sha1"
-			break
-		done
-	  )
+	  head=$(git-fetch--tool -s pick-rref \
+			"$remote_name" "$ls_remote_result")
 	  expr "z$head" : "z$_x40\$" >/dev/null ||
 		die "No such ref $remote_name at $remote"
 	  echo >&2 "Fetching $remote_name from $remote using $proto"

From 0ebde32c87da2efac5985a808e6bd4130831b7a8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 9 Apr 2007 21:14:26 -0700
Subject: [PATCH 017/109] Add 'resolve_gitlink_ref()' helper function

This new function resolves a ref in *another* git repository.  It's
named for its intended use: to look up the git link to a subproject.

It's not actually wired up to anything yet, but we're getting closer to
having fundamental plumbing support for "links" from one git directory
to another, which is the basis of subproject support.

[jc: amended a FILE* leak]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 refs.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 refs.h |  3 +++
 2 files changed, 83 insertions(+)

diff --git a/refs.c b/refs.c
index d2b7b7fb56..11a67a8c86 100644
--- a/refs.c
+++ b/refs.c
@@ -215,6 +215,86 @@ static struct ref_list *get_loose_refs(void)
 
 /* We allow "recursive" symbolic refs. Only within reason, though */
 #define MAXDEPTH 5
+#define MAXREFLEN (1024)
+
+static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refname, unsigned char *result)
+{
+	FILE *f;
+	struct cached_refs refs;
+	struct ref_list *ref;
+	int retval;
+
+	strcpy(name + pathlen, "packed-refs");
+	f = fopen(name, "r");
+	if (!f)
+		return -1;
+	read_packed_refs(f, &refs);
+	fclose(f);
+	ref = refs.packed;
+	retval = -1;
+	while (ref) {
+		if (!strcmp(ref->name, refname)) {
+			retval = 0;
+			memcpy(result, ref->sha1, 20);
+			break;
+		}
+		ref = ref->next;
+	}
+	free_ref_list(refs.packed);
+	return retval;
+}
+
+static int resolve_gitlink_ref_recursive(char *name, int pathlen, const char *refname, unsigned char *result, int recursion)
+{
+	int fd, len = strlen(refname);
+	char buffer[128], *p;
+
+	if (recursion > MAXDEPTH || len > MAXREFLEN)
+		return -1;
+	memcpy(name + pathlen, refname, len+1);
+	fd = open(name, O_RDONLY);
+	if (fd < 0)
+		return resolve_gitlink_packed_ref(name, pathlen, refname, result);
+
+	len = read(fd, buffer, sizeof(buffer)-1);
+	close(fd);
+	if (len < 0)
+		return -1;
+	while (len && isspace(buffer[len-1]))
+		len--;
+	buffer[len] = 0;
+
+	/* Was it a detached head or an old-fashioned symlink? */
+	if (!get_sha1_hex(buffer, result))
+		return 0;
+
+	/* Symref? */
+	if (strncmp(buffer, "ref:", 4))
+		return -1;
+	p = buffer + 4;
+	while (isspace(*p))
+		p++;
+
+	return resolve_gitlink_ref_recursive(name, pathlen, p, result, recursion+1);
+}
+
+int resolve_gitlink_ref(const char *path, const char *refname, unsigned char *result)
+{
+	int len = strlen(path), retval;
+	char *gitdir;
+
+	while (len && path[len-1] == '/')
+		len--;
+	if (!len)
+		return -1;
+	gitdir = xmalloc(len + MAXREFLEN + 8);
+	memcpy(gitdir, path, len);
+	memcpy(gitdir + len, "/.git/", 7);
+
+	retval = resolve_gitlink_ref_recursive(gitdir, len+6, refname, result, 0);
+	free(gitdir);
+	return retval;
+}
 
 const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int *flag)
 {
diff --git a/refs.h b/refs.h
index acedffc0e4..f61f6d934e 100644
--- a/refs.h
+++ b/refs.h
@@ -60,4 +60,7 @@ extern int check_ref_format(const char *target);
 /** rename ref, return 0 on success **/
 extern int rename_ref(const char *oldref, const char *newref, const char *logmsg);
 
+/** resolve ref in nested "gitlink" repository */
+extern int resolve_gitlink_ref(const char *name, const char *refname, unsigned char *result);
+
 #endif /* REFS_H */

From 9eec4795d44439cd170fb52c73827c728252648d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 9 Apr 2007 21:14:58 -0700
Subject: [PATCH 018/109] Add "S_IFDIRLNK" file mode infrastructure for git
 links

This just adds the basic helper functions to recognize and work with git
tree entries that are links to other git repositories ("subprojects").
They still aren't actually connected up to any of the code-paths, but
now all the infrastructure is in place.

The next commit will start actually adding actual subproject support.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 cache.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index eb57507b80..1b3d00ee11 100644
--- a/cache.h
+++ b/cache.h
@@ -24,6 +24,22 @@
 #define DTYPE(de)	DT_UNKNOWN
 #endif
 
+/*
+ * A "directory link" is a link to another git directory.
+ *
+ * The value 0160000 is not normally a valid mode, and
+ * also just happens to be S_IFDIR + S_IFLNK
+ *
+ * NOTE! We *really* shouldn't depend on the S_IFxxx macros
+ * always having the same values everywhere. We should use
+ * our internal git values for these things, and then we can
+ * translate that to the OS-specific value. It just so
+ * happens that everybody shares the same bit representation
+ * in the UNIX world (and apparently wider too..)
+ */
+#define S_IFDIRLNK	0160000
+#define S_ISDIRLNK(m)	(((m) & S_IFMT) == S_IFDIRLNK)
+
 /*
  * Intensive research over the course of many years has shown that
  * port 9418 is totally unused by anything else. Or
@@ -104,6 +120,8 @@ static inline unsigned int create_ce_mode(unsigned int mode)
 {
 	if (S_ISLNK(mode))
 		return htonl(S_IFLNK);
+	if (S_ISDIR(mode) || S_ISDIRLNK(mode))
+		return htonl(S_IFDIRLNK);
 	return htonl(S_IFREG | ce_permissions(mode));
 }
 static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned int mode)
@@ -121,7 +139,7 @@ static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned in
 }
 #define canon_mode(mode) \
 	(S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \
-	S_ISLNK(mode) ? S_IFLNK : S_IFDIR)
+	S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFDIRLNK)
 
 #define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7)
 

From 8d9721c86b0169c282ad1c5528317eafeb7fb0f7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 9 Apr 2007 21:15:29 -0700
Subject: [PATCH 019/109] Teach "fsck" not to follow subproject links

Since the subprojects don't necessarily even exist in the current tree,
much less in the current git repository (they are totally independent
repositories), we do not want to try to follow the chain from one git
repository to another through a gitlink.

This involves teaching fsck to ignore references to gitlink objects from
a tree and from the current index.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-fsck.c |  9 ++++++++-
 tree.c         | 15 ++++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/builtin-fsck.c b/builtin-fsck.c
index 4d8b66c344..f22de8deaa 100644
--- a/builtin-fsck.c
+++ b/builtin-fsck.c
@@ -253,6 +253,7 @@ static int fsck_tree(struct tree *item)
 		case S_IFREG | 0644:
 		case S_IFLNK:
 		case S_IFDIR:
+		case S_IFDIRLNK:
 			break;
 		/*
 		 * This is nonstandard, but we had a few of these
@@ -695,8 +696,14 @@ int cmd_fsck(int argc, char **argv, const char *prefix)
 		int i;
 		read_cache();
 		for (i = 0; i < active_nr; i++) {
-			struct blob *blob = lookup_blob(active_cache[i]->sha1);
+			unsigned int mode;
+			struct blob *blob;
 			struct object *obj;
+
+			mode = ntohl(active_cache[i]->ce_mode);
+			if (S_ISDIRLNK(mode))
+				continue;
+			blob = lookup_blob(active_cache[i]->sha1);
 			if (!blob)
 				continue;
 			obj = &blob->object;
diff --git a/tree.c b/tree.c
index d188c0fbae..dbb63fc525 100644
--- a/tree.c
+++ b/tree.c
@@ -143,6 +143,14 @@ struct tree *lookup_tree(const unsigned char *sha1)
 	return (struct tree *) obj;
 }
 
+/*
+ * NOTE! Tree refs to external git repositories
+ * (ie gitlinks) do not count as real references.
+ *
+ * You don't have to have those repositories
+ * available at all, much less have the objects
+ * accessible from the current repository.
+ */
 static void track_tree_refs(struct tree *item)
 {
 	int n_refs = 0, i;
@@ -152,8 +160,11 @@ static void track_tree_refs(struct tree *item)
 
 	/* Count how many entries there are.. */
 	init_tree_desc(&desc, item->buffer, item->size);
-	while (tree_entry(&desc, &entry))
+	while (tree_entry(&desc, &entry)) {
+		if (S_ISDIRLNK(entry.mode))
+			continue;
 		n_refs++;
+	}
 
 	/* Allocate object refs and walk it again.. */
 	i = 0;
@@ -162,6 +173,8 @@ static void track_tree_refs(struct tree *item)
 	while (tree_entry(&desc, &entry)) {
 		struct object *obj;
 
+		if (S_ISDIRLNK(entry.mode))
+			continue;
 		if (S_ISDIR(entry.mode))
 			obj = &lookup_tree(entry.sha1)->object;
 		else

From f35a6d3bce79c2995bbf0a3bd9fcad29e54a8d3c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 9 Apr 2007 21:20:29 -0700
Subject: [PATCH 020/109] Teach core object handling functions about gitlinks

This teaches the really fundamental core SHA1 object handling routines
about gitlinks.  We can compare trees with gitlinks in them (although we
can not actually generate patches for them yet - just raw git diffs),
and they show up as commits in "git ls-tree".

We also know to compare gitlinks as if they were directories (ie the
normal "sort as trees" rules apply).

[jc: amended a cut&paste error]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-ls-tree.c | 20 +++++++++++++++++++-
 cache-tree.c      |  2 +-
 read-cache.c      | 35 +++++++++++++++++++++++++++++++----
 sha1_file.c       |  3 +++
 4 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/builtin-ls-tree.c b/builtin-ls-tree.c
index 6472610ac2..1cb4dca277 100644
--- a/builtin-ls-tree.c
+++ b/builtin-ls-tree.c
@@ -6,6 +6,7 @@
 #include "cache.h"
 #include "blob.h"
 #include "tree.h"
+#include "commit.h"
 #include "quote.h"
 #include "builtin.h"
 
@@ -59,7 +60,24 @@ static int show_tree(const unsigned char *sha1, const char *base, int baselen,
 	int retval = 0;
 	const char *type = blob_type;
 
-	if (S_ISDIR(mode)) {
+	if (S_ISDIRLNK(mode)) {
+		/*
+		 * Maybe we want to have some recursive version here?
+		 *
+		 * Something like:
+		 *
+		if (show_subprojects(base, baselen, pathname)) {
+			if (fork()) {
+				chdir(base);
+				exec ls-tree;
+			}
+			waitpid();
+		}
+		 *
+		 * ..or similar..
+		 */
+		type = commit_type;
+	} else if (S_ISDIR(mode)) {
 		if (show_recursive(base, baselen, pathname)) {
 			retval = READ_TREE_RECURSIVE;
 			if (!(ls_options & LS_SHOW_TREES))
diff --git a/cache-tree.c b/cache-tree.c
index 9b73c8669a..6369cc7c53 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -326,7 +326,7 @@ static int update_one(struct cache_tree *it,
 			mode = ntohl(ce->ce_mode);
 			entlen = pathlen - baselen;
 		}
-		if (!missing_ok && !has_sha1_file(sha1))
+		if (mode != S_IFDIRLNK && !missing_ok && !has_sha1_file(sha1))
 			return error("invalid object %s", sha1_to_hex(sha1));
 
 		if (!ce->ce_mode)
diff --git a/read-cache.c b/read-cache.c
index 54573ce2ee..f458f50458 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -5,6 +5,7 @@
  */
 #include "cache.h"
 #include "cache-tree.h"
+#include "refs.h"
 
 /* Index extensions.
  *
@@ -91,6 +92,23 @@ static int ce_compare_link(struct cache_entry *ce, size_t expected_size)
 	return match;
 }
 
+static int ce_compare_gitlink(struct cache_entry *ce)
+{
+	unsigned char sha1[20];
+
+	/*
+	 * We don't actually require that the .git directory
+	 * under DIRLNK directory be a valid git directory. It
+	 * might even be missing (in case nobody populated that
+	 * sub-project).
+	 *
+	 * If so, we consider it always to match.
+	 */
+	if (resolve_gitlink_ref(ce->name, "HEAD", sha1) < 0)
+		return 0;
+	return hashcmp(sha1, ce->sha1);
+}
+
 static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
 {
 	switch (st->st_mode & S_IFMT) {
@@ -102,6 +120,9 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
 		if (ce_compare_link(ce, xsize_t(st->st_size)))
 			return DATA_CHANGED;
 		break;
+	case S_IFDIRLNK:
+		/* No need to do anything, we did the exact compare in "match_stat_basic" */
+		break;
 	default:
 		return TYPE_CHANGED;
 	}
@@ -127,6 +148,12 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
 		    (has_symlinks || !S_ISREG(st->st_mode)))
 			changed |= TYPE_CHANGED;
 		break;
+	case S_IFDIRLNK:
+		if (!S_ISDIR(st->st_mode))
+			changed |= TYPE_CHANGED;
+		else if (ce_compare_gitlink(ce))
+			changed |= DATA_CHANGED;
+		break;
 	default:
 		die("internal error: ce_mode is %o", ntohl(ce->ce_mode));
 	}
@@ -250,9 +277,9 @@ int base_name_compare(const char *name1, int len1, int mode1,
 		return cmp;
 	c1 = name1[len];
 	c2 = name2[len];
-	if (!c1 && S_ISDIR(mode1))
+	if (!c1 && (S_ISDIR(mode1) || S_ISDIRLNK(mode1)))
 		c1 = '/';
-	if (!c2 && S_ISDIR(mode2))
+	if (!c2 && (S_ISDIR(mode2) || S_ISDIRLNK(mode2)))
 		c2 = '/';
 	return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
 }
@@ -334,8 +361,8 @@ int add_file_to_cache(const char *path, int verbose)
 	if (lstat(path, &st))
 		die("%s: unable to stat (%s)", path, strerror(errno));
 
-	if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode))
-		die("%s: can only add regular files or symbolic links", path);
+	if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode) && !S_ISDIR(st.st_mode))
+		die("%s: can only add regular files, symbolic links or git-directories", path);
 
 	namelen = strlen(path);
 	size = cache_entry_size(namelen);
diff --git a/sha1_file.c b/sha1_file.c
index 4304fe9bbc..ab915faa6b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -13,6 +13,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "tree.h"
+#include "refs.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -2332,6 +2333,8 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write
 				     path);
 		free(target);
 		break;
+	case S_IFDIR:
+		return resolve_gitlink_ref(path, "HEAD", sha1);
 	default:
 		return error("%s: unsupported file type", path);
 	}

From 1833a925484675b328d5df04ffca62efa7a0a012 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 11 Apr 2007 14:39:12 -0700
Subject: [PATCH 021/109] Fix thinko in subproject entry sorting

This fixes a total thinko in my original series: subprojects do *not* sort
like directories, because the index is sorted purely by full pathname, and
since a subproject shows up in the index as a normal NUL-terminated
string, it never has the issues with sorting with the '/' at the end.

So if you have a subproject "proj" and a file "proj.c", the subproject
sorts alphabetically before the file in the index (and must thus also sort
that way in a tree object, since trees sort as the index).

In contrast, it you have two files "proj/file" and "proj.c", the "proj.c"
will sort alphabetically before "proj/file" in the index. The index
itself, of course, does not actually contain an entry "proj/", but in the
*tree* that gets written out, the tree entry "proj" will sort after the
file entry "proj.c", which is the only real magic sorting rule.

In other words: the magic sorting rule only affects tree entries, and
*only* affects tree entries that point to other trees (ie are of the type
S_IFDIR).

Anyway, that thinko just means that we should remove the special case to
make S_ISDIRLNK entries sort like S_ISDIR entries. They don't.  They sort
like normal files.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 read-cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index f458f50458..b8b6d11dba 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -277,9 +277,9 @@ int base_name_compare(const char *name1, int len1, int mode1,
 		return cmp;
 	c1 = name1[len];
 	c2 = name2[len];
-	if (!c1 && (S_ISDIR(mode1) || S_ISDIRLNK(mode1)))
+	if (!c1 && S_ISDIR(mode1))
 		c1 = '/';
-	if (!c2 && (S_ISDIR(mode2) || S_ISDIRLNK(mode2)))
+	if (!c2 && S_ISDIR(mode2))
 		c2 = '/';
 	return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
 }

From 095952585c2a955f45deac69df17a702d7584c80 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 11 Apr 2007 14:49:44 -0700
Subject: [PATCH 022/109] Teach directory traversal about subprojects

This is the promised cleaned-up version of teaching directory traversal
(ie the "read_directory()" logic) about subprojects. That makes "git add"
understand to add/update subprojects.

It now knows to look at the index file to see if a directory is marked as
a subproject, and use that as information as whether it should be recursed
into or not.

It also generally cleans up the handling of directory entries when
traversing the working tree, by splitting up the decision-making process
into small functions of their own, and adding a fair number of comments.

Finally, it teaches "add_file_to_cache()" that directory names can have
slashes at the end, since the directory traversal adds them to make the
difference between a file and a directory clear (it always did that, but
my previous too-ugly-to-apply subproject patch had a totally different
path for subproject directories and avoided the slash for that case).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 dir.c        | 133 ++++++++++++++++++++++++++++++++++++++++++++-------
 dir.h        |   3 +-
 read-cache.c |   4 ++
 3 files changed, 121 insertions(+), 19 deletions(-)

diff --git a/dir.c b/dir.c
index 4f5a2241e6..7b91501255 100644
--- a/dir.c
+++ b/dir.c
@@ -7,12 +7,17 @@
  */
 #include "cache.h"
 #include "dir.h"
+#include "refs.h"
 
 struct path_simplify {
 	int len;
 	const char *path;
 };
 
+static int read_directory_recursive(struct dir_struct *dir,
+	const char *path, const char *base, int baselen,
+	int check_only, const struct path_simplify *simplify);
+
 int common_prefix(const char **pathspec)
 {
 	const char *path, *slash, *next;
@@ -286,15 +291,109 @@ struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int
 	return ent;
 }
 
-static int dir_exists(const char *dirname, int len)
+enum exist_status {
+	index_nonexistent = 0,
+	index_directory,
+	index_gitdir,
+};
+
+/*
+ * The index sorts alphabetically by entry name, which
+ * means that a gitlink sorts as '\0' at the end, while
+ * a directory (which is defined not as an entry, but as
+ * the files it contains) will sort with the '/' at the
+ * end.
+ */
+static enum exist_status directory_exists_in_index(const char *dirname, int len)
 {
 	int pos = cache_name_pos(dirname, len);
-	if (pos >= 0)
-		return 1;
-	pos = -pos-1;
-	if (pos >= active_nr) /* can't */
-		return 0;
-	return !strncmp(active_cache[pos]->name, dirname, len);
+	if (pos < 0)
+		pos = -pos-1;
+	while (pos < active_nr) {
+		struct cache_entry *ce = active_cache[pos++];
+		unsigned char endchar;
+
+		if (strncmp(ce->name, dirname, len))
+			break;
+		endchar = ce->name[len];
+		if (endchar > '/')
+			break;
+		if (endchar == '/')
+			return index_directory;
+		if (!endchar && S_ISDIRLNK(ntohl(ce->ce_mode)))
+			return index_gitdir;
+	}
+	return index_nonexistent;
+}
+
+/*
+ * When we find a directory when traversing the filesystem, we
+ * have three distinct cases:
+ *
+ *  - ignore it
+ *  - see it as a directory
+ *  - recurse into it
+ *
+ * and which one we choose depends on a combination of existing
+ * git index contents and the flags passed into the directory
+ * traversal routine.
+ *
+ * Case 1: If we *already* have entries in the index under that
+ * directory name, we always recurse into the directory to see
+ * all the files.
+ *
+ * Case 2: If we *already* have that directory name as a gitlink,
+ * we always continue to see it as a gitlink, regardless of whether
+ * there is an actual git directory there or not (it might not
+ * be checked out as a subproject!)
+ *
+ * Case 3: if we didn't have it in the index previously, we
+ * have a few sub-cases:
+ *
+ *  (a) if "show_other_directories" is true, we show it as
+ *      just a directory, unless "hide_empty_directories" is
+ *      also true and the directory is empty, in which case
+ *      we just ignore it entirely.
+ *  (b) if it looks like a git directory, and we don't have
+ *      'no_dirlinks' set we treat it as a gitlink, and show it
+ *      as a directory.
+ *  (c) otherwise, we recurse into it.
+ */
+enum directory_treatment {
+	show_directory,
+	ignore_directory,
+	recurse_into_directory,
+};
+
+static enum directory_treatment treat_directory(struct dir_struct *dir,
+	const char *dirname, int len,
+	const struct path_simplify *simplify)
+{
+	/* The "len-1" is to strip the final '/' */
+	switch (directory_exists_in_index(dirname, len-1)) {
+	case index_directory:
+		return recurse_into_directory;
+
+	case index_gitdir:
+		return show_directory;
+
+	case index_nonexistent:
+		if (dir->show_other_directories)
+			break;
+		if (!dir->no_dirlinks) {
+			unsigned char sha1[20];
+			if (resolve_gitlink_ref(dirname, "HEAD", sha1) == 0)
+				return show_directory;
+		}
+		return recurse_into_directory;
+	}
+
+	/* This is the "show_other_directories" case */
+	if (!dir->hide_empty_directories)
+		return show_directory;
+	if (!read_directory_recursive(dir, dirname, dirname, len, 1, simplify))
+		return ignore_directory;
+	return show_directory;
 }
 
 /*
@@ -380,19 +479,17 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
 			case DT_DIR:
 				memcpy(fullname + baselen + len, "/", 2);
 				len++;
-				if (dir->show_other_directories &&
-				    !dir_exists(fullname, baselen + len)) {
-					if (dir->hide_empty_directories &&
-					    !read_directory_recursive(dir,
-						    fullname, fullname,
-						    baselen + len, 1, simplify))
-						continue;
+				switch (treat_directory(dir, fullname, baselen + len, simplify)) {
+				case show_directory:
 					break;
+				case recurse_into_directory:
+					contents += read_directory_recursive(dir,
+						fullname, fullname, baselen + len, 0, simplify);
+					continue;
+				case ignore_directory:
+					continue;
 				}
-
-				contents += read_directory_recursive(dir,
-					fullname, fullname, baselen + len, 0, simplify);
-				continue;
+				break;
 			case DT_REG:
 			case DT_LNK:
 				break;
diff --git a/dir.h b/dir.h
index 33c31f25fb..817c674da1 100644
--- a/dir.h
+++ b/dir.h
@@ -33,7 +33,8 @@ struct dir_struct {
 	int nr, alloc;
 	unsigned int show_ignored:1,
 		     show_other_directories:1,
-		     hide_empty_directories:1;
+		     hide_empty_directories:1,
+		     no_dirlinks:1;
 	struct dir_entry **entries;
 
 	/* Exclude info */
diff --git a/read-cache.c b/read-cache.c
index b8b6d11dba..e4c628f927 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -365,6 +365,10 @@ int add_file_to_cache(const char *path, int verbose)
 		die("%s: can only add regular files, symbolic links or git-directories", path);
 
 	namelen = strlen(path);
+	if (S_ISDIR(st.st_mode)) {
+		while (namelen && path[namelen-1] == '/')
+			namelen--;
+	}
 	size = cache_entry_size(namelen);
 	ce = xcalloc(1, size);
 	memcpy(ce->name, path, namelen);

From 2dca1af44806924946828e2fdc82ea408353d286 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 11 Apr 2007 13:59:51 -0400
Subject: [PATCH 023/109] simple random data generator for tests

Reliance on /dev/urandom produces test vectors that are, well, random.
This can cause problems impossible to track down when the data is
different from one test invokation to another.

The goal is not to have random data to test, but rather to have a
convenient way to create sets of large files with non compressible and
non deltifiable data in a reproducible way.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 .gitignore       |  1 +
 Makefile         |  7 +++++--
 test-genrandom.c | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 test-genrandom.c

diff --git a/.gitignore b/.gitignore
index 9229e918cd..fa7ac93594 100644
--- a/.gitignore
+++ b/.gitignore
@@ -149,6 +149,7 @@ test-chmtime
 test-date
 test-delta
 test-dump-cache-tree
+test-genrandom
 test-match-trees
 common-cmds.h
 *.tar.gz
diff --git a/Makefile b/Makefile
index a77d31de98..bd0ba95b1a 100644
--- a/Makefile
+++ b/Makefile
@@ -932,7 +932,7 @@ endif
 
 export NO_SVN_TESTS
 
-test: all test-chmtime$X
+test: all test-chmtime$X test-genrandom$X
 	$(MAKE) -C t/ all
 
 test-date$X: test-date.c date.o ctype.o
@@ -953,6 +953,9 @@ test-match-trees$X: test-match-trees.o $(GITLIBS)
 test-chmtime$X: test-chmtime.c
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $<
 
+test-genrandom$X: test-genrandom.c
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $<
+
 check-sha1:: test-sha1$X
 	./test-sha1.sh
 
@@ -1041,7 +1044,7 @@ dist-doc:
 
 clean:
 	rm -f *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \
-		test-chmtime$X $(LIB_FILE) $(XDIFF_LIB)
+		test-chmtime$X test-genrandom$X $(LIB_FILE) $(XDIFF_LIB)
 	rm -f $(ALL_PROGRAMS) $(BUILT_INS) git$X
 	rm -f *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags
 	rm -rf autom4te.cache
diff --git a/test-genrandom.c b/test-genrandom.c
new file mode 100644
index 0000000000..8cefe6cfed
--- /dev/null
+++ b/test-genrandom.c
@@ -0,0 +1,34 @@
+/*
+ * Simple random data generator used to create reproducible test files.
+ * This is inspired from POSIX.1-2001 implementation example for rand().
+ * Copyright (C) 2007 by Nicolas Pitre, licensed under the GPL version 2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[])
+{
+	unsigned long count, next = 0;
+	unsigned char *c;
+
+	if (argc < 2 || argc > 3) {
+		fprintf( stderr, "Usage: %s <seed_string> [<size>]", argv[0]);
+		return 1;
+	}
+
+	c = (unsigned char *) argv[1];
+	do {
+		next = next * 11 + *c;
+	} while (*c++);
+
+	count = (argc == 3) ? strtoul(argv[2], NULL, 0) : -1L;
+
+	while (count--) {
+		next = next * 1103515245 + 12345;
+		if (putchar((next >> 16) & 0xff) == EOF)
+			return -1;
+	}
+
+	return 0;
+}

From 39551b6926ef869e15af41f4eaa9356da98b2b5e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 11 Apr 2007 13:35:13 -0400
Subject: [PATCH 024/109] use test-genrandom in tests instead of /dev/urandom

This way tests are completely deterministic and possibly more portable.

Signed-off-by: Nicolas Pitre <nico@cam.org>
---
 t/t5301-sliding-window.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t5301-sliding-window.sh b/t/t5301-sliding-window.sh
index a6dbb04a86..fce77f1255 100755
--- a/t/t5301-sliding-window.sh
+++ b/t/t5301-sliding-window.sh
@@ -12,7 +12,7 @@ test_expect_success \
      for i in a b c
      do
          echo $i >$i &&
-         dd if=/dev/urandom bs=32k count=1 >>$i &&
+         test-genrandom "$i" 32768 >>$i &&
          git-update-index --add $i || return 1
      done &&
      echo d >d && cat c >>d && git-update-index --add d &&

From 6e5417769c587c8b1f3412e19a1f80645fe82a3c Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 10 Apr 2007 16:26:10 -0400
Subject: [PATCH 025/109] tests for various pack index features

This is a fairly complete list of tests for various aspects of pack
index versions 1 and  2.

Tests on index v2 include 32-bit and 64-bit offsets, as well as a nice
demonstration of the flawed repacking integrity checks that index
version 2 intend to solve over index version 1 with the per object CRC.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t5302-pack-index.sh | 146 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100755 t/t5302-pack-index.sh

diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh
new file mode 100755
index 0000000000..232e5f1964
--- /dev/null
+++ b/t/t5302-pack-index.sh
@@ -0,0 +1,146 @@
+#!/bin/sh
+#
+# Copyright (c) 2007 Nicolas Pitre
+#
+
+test_description='pack index with 64-bit offsets and object CRC'
+. ./test-lib.sh
+
+test_expect_success \
+    'setup' \
+    'rm -rf .git
+     git-init &&
+     for i in `seq -w 100`
+     do
+         echo $i >file_$i &&
+         test-genrandom "$i" 8192 >>file_$i &&
+         git-update-index --add file_$i || return 1
+     done &&
+     echo 101 >file_101 && tail -c 8192 file_100 >>file_101 &&
+     git-update-index --add file_101 &&
+     tree=`git-write-tree` &&
+     commit=`git-commit-tree $tree </dev/null` && {
+	 echo $tree &&
+	 git-ls-tree $tree | sed -e "s/.* \\([0-9a-f]*\\)	.*/\\1/"
+     } >obj-list &&
+     git-update-ref HEAD $commit'
+
+test_expect_success \
+    'pack-objects with index version 1' \
+    'pack1=$(git-pack-objects --index-version=1 test-1 <obj-list) &&
+     git-verify-pack -v "test-1-${pack1}.pack"'
+
+test_expect_success \
+    'pack-objects with index version 2' \
+    'pack2=$(git-pack-objects --index-version=2 test-2 <obj-list) &&
+     git-verify-pack -v "test-2-${pack2}.pack"'
+
+test_expect_success \
+    'both packs should be identical' \
+    'cmp "test-1-${pack1}.pack" "test-2-${pack2}.pack"'
+
+test_expect_failure \
+    'index v1 and index v2 should be different' \
+    'cmp "test-1-${pack1}.idx" "test-2-${pack2}.idx"'
+
+test_expect_success \
+    'index-pack with index version 1' \
+    'git-index-pack --index-version=1 -o 1.idx "test-1-${pack1}.pack"'
+
+test_expect_success \
+    'index-pack with index version 2' \
+    'git-index-pack --index-version=2 -o 2.idx "test-1-${pack1}.pack"'
+
+test_expect_success \
+    'index-pack results should match pack-objects ones' \
+    'cmp "test-1-${pack1}.idx" "1.idx" &&
+     cmp "test-2-${pack2}.idx" "2.idx"'
+
+test_expect_success \
+    'index v2: force some 64-bit offsets with pack-objects' \
+    'pack3=$(git-pack-objects --index-version=2,0x40000 test-3 <obj-list) &&
+     git-verify-pack -v "test-3-${pack3}.pack"'
+
+test_expect_failure \
+    '64-bit offsets: should be different from previous index v2 results' \
+    'cmp "test-2-${pack2}.idx" "test-3-${pack3}.idx"'
+
+test_expect_success \
+    'index v2: force some 64-bit offsets with index-pack' \
+    'git-index-pack --index-version=2,0x40000 -o 3.idx "test-1-${pack1}.pack"'
+
+test_expect_success \
+    '64-bit offsets: index-pack result should match pack-objects one' \
+    'cmp "test-3-${pack3}.idx" "3.idx"'
+
+test_expect_success \
+    '[index v1] 1) stream pack to repository' \
+    'git-index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" &&
+     git-prune-packed &&
+     git-count-objects | ( read nr rest && test "$nr" -eq 1 ) &&
+     cmp "test-1-${pack1}.pack" ".git/objects/pack/pack-${pack1}.pack" &&
+     cmp "test-1-${pack1}.idx"  ".git/objects/pack/pack-${pack1}.idx"'
+
+test_expect_success \
+    '[index v1] 2) create a stealth corruption in a delta base reference' \
+    '# this test assumes a delta smaller than 16 bytes at the end of the pack
+     git-show-index <1.idx | sort -n | tail -n 1 | (
+       read delta_offs delta_sha1 &&
+       git-cat-file blob "$delta_sha1" > blob_1 &&
+       chmod +w ".git/objects/pack/pack-${pack1}.pack" &&
+       dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \
+	  if=".git/objects/pack/pack-${pack1}.idx" skip=$((256 * 4 + 4)) \
+	  bs=1 count=20 conv=notrunc &&
+       git-cat-file blob "$delta_sha1" > blob_2 )'
+
+test_expect_failure \
+    '[index v1] 3) corrupted delta happily returned wrong data' \
+    'cmp blob_1 blob_2'
+
+test_expect_failure \
+    '[index v1] 4) confirm that the pack is actually corrupted' \
+    'git-fsck --full $commit'
+
+test_expect_success \
+    '[index v1] 5) pack-objects happily reuses corrupted data' \
+    'pack4=$(git-pack-objects test-4 <obj-list) &&
+     test -f "test-4-${pack1}.pack"'
+
+test_expect_failure \
+    '[index v1] 6) newly created pack is BAD !' \
+    'git-verify-pack -v "test-4-${pack1}.pack"'
+
+test_expect_success \
+    '[index v2] 1) stream pack to repository' \
+    'rm -f .git/objects/pack/* &&
+     git-index-pack --index-version=2,0x40000 --stdin < "test-1-${pack1}.pack" &&
+     git-prune-packed &&
+     git-count-objects | ( read nr rest && test "$nr" -eq 1 ) &&
+     cmp "test-1-${pack1}.pack" ".git/objects/pack/pack-${pack1}.pack" &&
+     cmp "test-3-${pack1}.idx"  ".git/objects/pack/pack-${pack1}.idx"'
+
+test_expect_success \
+    '[index v2] 2) create a stealth corruption in a delta base reference' \
+    '# this test assumes a delta smaller than 16 bytes at the end of the pack
+     git-show-index <1.idx | sort -n | tail -n 1 | (
+       read delta_offs delta_sha1 delta_crc &&
+       git-cat-file blob "$delta_sha1" > blob_3 &&
+       chmod +w ".git/objects/pack/pack-${pack1}.pack" &&
+       dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \
+	  if=".git/objects/pack/pack-${pack1}.idx" skip=$((8 + 256 * 4)) \
+	  bs=1 count=20 conv=notrunc &&
+       git-cat-file blob "$delta_sha1" > blob_4 )'
+
+test_expect_failure \
+    '[index v2] 3) corrupted delta happily returned wrong data' \
+    'cmp blob_3 blob_4'
+
+test_expect_failure \
+    '[index v2] 4) confirm that the pack is actually corrupted' \
+    'git-fsck --full $commit'
+
+test_expect_failure \
+    '[index v2] 5) pack-objects refuses to reuse corrupted data' \
+    'git-pack-objects test-5 <obj-list'
+
+test_done

From 29b734e4788143603adf8046174219bac67794e0 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 10 Apr 2007 22:54:36 -0400
Subject: [PATCH 026/109] clean up add_object_entry()

This function used to call locate_object_entry_hash() _twice_ per added
object while only once should suffice. Let's reorganize that code a bit.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 52 ++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 687b4b5a99..bc5f2329a8 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -781,12 +781,19 @@ static unsigned name_hash(const char *name)
 
 static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
 {
-	uint32_t idx = nr_objects;
 	struct object_entry *entry;
-	struct packed_git *p;
+	struct packed_git *p, *found_pack = NULL;
 	off_t found_offset = 0;
-	struct packed_git *found_pack = NULL;
-	int ix, status = 0;
+	int ix;
+
+	ix = nr_objects ? locate_object_entry_hash(sha1) : -1;
+	if (ix >= 0) {
+		if (exclude) {
+			entry = objects + object_ix[ix] - 1;
+			entry->preferred_base = 1;
+		}
+		return 0;
+	}
 
 	if (!exclude) {
 		for (p = packed_git; p; p = p->next) {
@@ -803,43 +810,34 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
 			}
 		}
 	}
-	if ((entry = locate_object_entry(sha1)) != NULL)
-		goto already_added;
 
-	if (idx >= nr_alloc) {
-		nr_alloc = (idx + 1024) * 3 / 2;
+	if (nr_objects >= nr_alloc) {
+		nr_alloc = (nr_alloc  + 1024) * 3 / 2;
 		objects = xrealloc(objects, nr_alloc * sizeof(*entry));
 	}
-	entry = objects + idx;
-	nr_objects = idx + 1;
+
+	entry = objects + nr_objects++;
 	memset(entry, 0, sizeof(*entry));
 	hashcpy(entry->sha1, sha1);
 	entry->hash = hash;
+	if (exclude)
+		entry->preferred_base = 1;
+	if (found_pack) {
+		entry->in_pack = found_pack;
+		entry->in_pack_offset = found_offset;
+	}
 
 	if (object_ix_hashsz * 3 <= nr_objects * 4)
 		rehash_objects();
-	else {
-		ix = locate_object_entry_hash(entry->sha1);
-		if (0 <= ix)
-			die("internal error in object hashing.");
-		object_ix[-1 - ix] = idx + 1;
-	}
-	status = 1;
+	else
+		object_ix[-1 - ix] = nr_objects;
 
- already_added:
 	if (progress_update) {
 		fprintf(stderr, "Counting objects...%u\r", nr_objects);
 		progress_update = 0;
 	}
-	if (exclude)
-		entry->preferred_base = 1;
-	else {
-		if (found_pack) {
-			entry->in_pack = found_pack;
-			entry->in_pack_offset = found_offset;
-		}
-	}
-	return status;
+
+	return 1;
 }
 
 struct pbase_tree_cache {

From e011054b0fdcd1e29d85cdde7ffa5d5c125bd753 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 12 Apr 2007 12:29:40 -0700
Subject: [PATCH 027/109] Teach git-update-index about gitlinks

I finally got around to looking at Alex' patch to teach update-index about
gitlinks too, so that "git commit -a" along with any other explicit
update-index scripts can work.

I don't think there was anything wrong with Alex' patch, but the code he
patched I felt was just so ugly that the added cases just pushed it over
the edge. Especially as I don't think that patch necessarily did the right
thing for a gitlink entry that already existed in the index, but that
wasn't actually a real git repository in the working tree (just an empty
subdirectory or a non-git snapshot because it hadn't wanted to track that
particular subproject).

So I ended up deciding to clean up the git-update-index handling the same
way I tackled the directory traversal used by git-add earlier: by
splitting the different cases up into multiple smaller functions, and just
making the code easier to read (and adding more comments about the
different cases).

So this replaces the old "process_file()" with a new "process_path()"
function that then just calls out to different helper functions depending
on what kind of path it is. Processing a nondirectory ends up being just
one of the simpler cases.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-update-index.c | 200 ++++++++++++++++++++++++++++-------------
 1 file changed, 138 insertions(+), 62 deletions(-)

diff --git a/builtin-update-index.c b/builtin-update-index.c
index 47d42ed645..c4eaa94998 100644
--- a/builtin-update-index.c
+++ b/builtin-update-index.c
@@ -9,6 +9,7 @@
 #include "cache-tree.h"
 #include "tree-walk.h"
 #include "builtin.h"
+#include "refs.h"
 
 /*
  * Default to not allowing changes to the list of files. The
@@ -60,76 +61,151 @@ static int mark_valid(const char *path)
 	return -1;
 }
 
-static int process_file(const char *path)
+static int remove_one_path(const char *path)
 {
-	int size, namelen, option, status;
-	struct cache_entry *ce;
-	struct stat st;
+	if (!allow_remove)
+		return error("%s: does not exist and --remove not passed", path);
+	if (remove_file_from_cache(path))
+		return error("%s: cannot remove from the index", path);
+	return 0;
+}
 
-	status = lstat(path, &st);
+/*
+ * Handle a path that couldn't be lstat'ed. It's either:
+ *  - missing file (ENOENT or ENOTDIR). That's ok if we're
+ *    supposed to be removing it and the removal actually
+ *    succeeds.
+ *  - permission error. That's never ok.
+ */
+static int process_lstat_error(const char *path, int err)
+{
+	if (err == ENOENT || err == ENOTDIR)
+		return remove_one_path(path);
+	return error("lstat(\"%s\"): %s", path, strerror(errno));
+}
+
+static int add_one_path(struct cache_entry *old, const char *path, int len, struct stat *st)
+{
+	int option, size = cache_entry_size(len);
+	struct cache_entry *ce = xcalloc(1, size);
+
+	memcpy(ce->name, path, len);
+	ce->ce_flags = htons(len);
+	fill_stat_cache_info(ce, st);
+	ce->ce_mode = ce_mode_from_stat(old, st->st_mode);
+
+	if (index_path(ce->sha1, path, st, !info_only))
+		return -1;
+	option = allow_add ? ADD_CACHE_OK_TO_ADD : 0;
+	option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0;
+	if (add_cache_entry(ce, option))
+		return error("%s: cannot add to the index - missing --add option?", path);
+	return 0;
+}
+
+/*
+ * Handle a path that was a directory. Four cases:
+ *
+ *  - it's already a gitlink in the index, and we keep it that
+ *    way, and update it if we can (if we cannot find the HEAD,
+ *    we're going to keep it unchanged in the index!)
+ *
+ *  - it's a *file* in the index, in which case it should be
+ *    removed as a file if removal is allowed, since it doesn't
+ *    exist as such any more. If removal isn't allowed, it's
+ *    an error.
+ *
+ *    (NOTE! This is old and arguably fairly strange behaviour.
+ *    We might want to make this an error unconditionally, and
+ *    use "--force-remove" if you actually want to force removal).
+ *
+ *  - it used to exist as a subdirectory (ie multiple files with
+ *    this particular prefix) in the index, in which case it's wrong
+ *    to try to update it as a directory.
+ *
+ *  - it doesn't exist at all in the index, but it is a valid
+ *    git directory, and it should be *added* as a gitlink.
+ */
+static int process_directory(const char *path, int len, struct stat *st)
+{
+	unsigned char sha1[20];
+	int pos = cache_name_pos(path, len);
+
+	/* Exact match: file or existing gitlink */
+	if (pos >= 0) {
+		struct cache_entry *ce = active_cache[pos];
+		if (S_ISDIRLNK(ntohl(ce->ce_mode))) {
+
+			/* Do nothing to the index if there is no HEAD! */
+			if (resolve_gitlink_ref(path, "HEAD", sha1) < 0)
+				return 0;
+
+			return add_one_path(ce, path, len, st);
+		}
+		/* Should this be an unconditional error? */
+		return remove_one_path(path);
+	}
+
+	/* Inexact match: is there perhaps a subdirectory match? */
+	pos = -pos-1;
+	while (pos < active_nr) {
+		struct cache_entry *ce = active_cache[pos++];
+
+		if (strncmp(ce->name, path, len))
+			break;
+		if (ce->name[len] > '/')
+			break;
+		if (ce->name[len] < '/')
+			continue;
+
+		/* Subdirectory match - error out */
+		return error("%s: is a directory - add individual files instead", path);
+	}
+
+	/* No match - should we add it as a gitlink? */
+	if (!resolve_gitlink_ref(path, "HEAD", sha1))
+		return add_one_path(NULL, path, len, st);
+
+	/* Error out. */
+	return error("%s: is a directory - add files inside instead", path);
+}
+
+/*
+ * Process a regular file
+ */
+static int process_file(const char *path, int len, struct stat *st)
+{
+	int pos = cache_name_pos(path, len);
+	struct cache_entry *ce = pos < 0 ? NULL : active_cache[pos];
+
+	if (ce && S_ISDIRLNK(ntohl(ce->ce_mode)))
+		return error("%s is already a gitlink, not replacing", path);
+
+	return add_one_path(ce, path, len, st);
+}
+
+static int process_path(const char *path)
+{
+	int len;
+	struct stat st;
 
 	/* We probably want to do this in remove_file_from_cache() and
 	 * add_cache_entry() instead...
 	 */
 	cache_tree_invalidate_path(active_cache_tree, path);
 
-	if (status < 0 || S_ISDIR(st.st_mode)) {
-		/* When we used to have "path" and now we want to add
-		 * "path/file", we need a way to remove "path" before
-		 * being able to add "path/file".  However,
-		 * "git-update-index --remove path" would not work.
-		 * --force-remove can be used but this is more user
-		 * friendly, especially since we can do the opposite
-		 * case just fine without --force-remove.
-		 */
-		if (status == 0 || (errno == ENOENT || errno == ENOTDIR)) {
-			if (allow_remove) {
-				if (remove_file_from_cache(path))
-					return error("%s: cannot remove from the index",
-					             path);
-				else
-					return 0;
-			} else if (status < 0) {
-				return error("%s: does not exist and --remove not passed",
-				             path);
-			}
-		}
-		if (0 == status)
-			return error("%s: is a directory - add files inside instead",
-			             path);
-		else
-			return error("lstat(\"%s\"): %s", path,
-				     strerror(errno));
-	}
+	/*
+	 * First things first: get the stat information, to decide
+	 * what to do about the pathname!
+	 */
+	if (lstat(path, &st) < 0)
+		return process_lstat_error(path, errno);
 
-	namelen = strlen(path);
-	size = cache_entry_size(namelen);
-	ce = xcalloc(1, size);
-	memcpy(ce->name, path, namelen);
-	ce->ce_flags = htons(namelen);
-	fill_stat_cache_info(ce, &st);
+	len = strlen(path);
+	if (S_ISDIR(st.st_mode))
+		return process_directory(path, len, &st);
 
-	if (trust_executable_bit && has_symlinks)
-		ce->ce_mode = create_ce_mode(st.st_mode);
-	else {
-		/* If there is an existing entry, pick the mode bits and type
-		 * from it, otherwise assume unexecutable regular file.
-		 */
-		struct cache_entry *ent;
-		int pos = cache_name_pos(path, namelen);
-
-		ent = (0 <= pos) ? active_cache[pos] : NULL;
-		ce->ce_mode = ce_mode_from_stat(ent, st.st_mode);
-	}
-
-	if (index_path(ce->sha1, path, &st, !info_only))
-		return -1;
-	option = allow_add ? ADD_CACHE_OK_TO_ADD : 0;
-	option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0;
-	if (add_cache_entry(ce, option))
-		return error("%s: cannot add to the index - missing --add option?",
-			     path);
-	return 0;
+	return process_file(path, len, &st);
 }
 
 static int add_cacheinfo(unsigned int mode, const unsigned char *sha1,
@@ -210,8 +286,8 @@ static void update_one(const char *path, const char *prefix, int prefix_length)
 		report("remove '%s'", path);
 		goto free_return;
 	}
-	if (process_file(p))
-		die("Unable to process file %s", path);
+	if (process_path(p))
+		die("Unable to process path %s", path);
 	report("add '%s'", path);
  free_return:
 	if (p < path || p > path + strlen(path))

From ab22aed3b7517c6390cb622b368bfcf503b7a37a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 12 Apr 2007 14:32:21 -0700
Subject: [PATCH 028/109] Don't show gitlink directories when we want "other"
 files

When "show_other_directories" is set, that implies that we are looking
for untracked files, which obviously means that we should ignore
directories that are marked as gitlinks in the index.

This fixes "git status" in a superproject, that would otherwise always
report that subprojects were "Untracked files:"

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 dir.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dir.c b/dir.c
index 7b91501255..6564a929ff 100644
--- a/dir.c
+++ b/dir.c
@@ -375,6 +375,8 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
 		return recurse_into_directory;
 
 	case index_gitdir:
+		if (dir->show_other_directories)
+			return ignore_directory;
 		return show_directory;
 
 	case index_nonexistent:

From ea376fa7f2c209593bcc63008a2831fe013d4e39 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 12 Apr 2007 21:03:39 -0700
Subject: [PATCH 029/109] Teach git list-objects logic not to follow gitlinks

This allows us to pack superprojects and thus clone them (but not yet
check them out on the receiving side - that's the next patch)

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 list-objects.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/list-objects.c b/list-objects.c
index 2ba2c958e0..310f8d3908 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -25,6 +25,37 @@ static void process_blob(struct rev_info *revs,
 	add_object(obj, p, path, name);
 }
 
+/*
+ * Processing a gitlink entry currently does nothing, since
+ * we do not recurse into the subproject.
+ *
+ * We *could* eventually add a flag that actually does that,
+ * which would involve:
+ *  - is the subproject actually checked out?
+ *  - if so, see if the subproject has already been added
+ *    to the alternates list, and add it if not.
+ *  - process the commit (or tag) the gitlink points to
+ *    recursively.
+ *
+ * However, it's unclear whether there is really ever any
+ * reason to see superprojects and subprojects as such a
+ * "unified" object pool (potentially resulting in a totally
+ * humongous pack - avoiding which was the whole point of
+ * having gitlinks in the first place!).
+ *
+ * So for now, there is just a note that we *could* follow
+ * the link, and how to do it. Whether it necessarily makes
+ * any sense what-so-ever to ever do that is another issue.
+ */
+static void process_gitlink(struct rev_info *revs,
+			    const unsigned char *sha1,
+			    struct object_array *p,
+			    struct name_path *path,
+			    const char *name)
+{
+	/* Nothing to do */
+}
+
 static void process_tree(struct rev_info *revs,
 			 struct tree *tree,
 			 struct object_array *p,
@@ -56,6 +87,9 @@ static void process_tree(struct rev_info *revs,
 			process_tree(revs,
 				     lookup_tree(entry.sha1),
 				     p, &me, entry.path);
+		else if (S_ISDIRLNK(entry.mode))
+			process_gitlink(revs, entry.sha1,
+					p, &me, entry.path);
 		else
 			process_blob(revs,
 				     lookup_blob(entry.sha1),

From 9129e056fb021df45d98c9472b6029456941a508 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 12 Apr 2007 21:08:52 -0700
Subject: [PATCH 030/109] Teach "git-read-tree -u" to check out submodules as a
 directory

This actually allows us to check out a supermodule after cloning, although
the submodules will obviously not be checked out, and will just be an
empty subdirectory.

[ Side note: this also shows that we currently don't correctly handle
  such subprojects that aren't checked out correctly yet.  They should
  always show up as not being modified, but failing to resolve the
  gitlink HEAD does not properly trigger the "not modified" logic in all
  places it needs to..

  So more work to be done, but that's a separate issue, unrelated to
  the action of checking out the superproject. ]

The bulk of this patch is simply because we need to check the type of the
index entry *before* we try to read the object it points to, and that
meant that the code needed some re-organization. So I moved some of the
code in common to both symlinks and files to be a trivial helper function.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 entry.c | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/entry.c b/entry.c
index d72f811580..9545e895d0 100644
--- a/entry.c
+++ b/entry.c
@@ -62,26 +62,33 @@ static int create_file(const char *path, unsigned int mode)
 	return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
 }
 
+static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned long *size)
+{
+	enum object_type type;
+	void *new = read_sha1_file(ce->sha1, &type, size);
+
+	if (new) {
+		if (type == OBJ_BLOB)
+			return new;
+		free(new);
+	}
+	return NULL;
+}
+
 static int write_entry(struct cache_entry *ce, char *path, struct checkout *state, int to_tempfile)
 {
 	int fd;
-	void *new;
-	unsigned long size;
 	long wrote;
-	enum object_type type;
 
-	new = read_sha1_file(ce->sha1, &type, &size);
-	if (!new || type != OBJ_BLOB) {
-		if (new)
-			free(new);
-		return error("git-checkout-index: unable to read sha1 file of %s (%s)",
-			path, sha1_to_hex(ce->sha1));
-	}
 	switch (ntohl(ce->ce_mode) & S_IFMT) {
-		char *buf;
-		unsigned long nsize;
+		char *buf, *new;
+		unsigned long size, nsize;
 
 	case S_IFREG:
+		new = read_blob_entry(ce, path, &size);
+		if (!new)
+			return error("git-checkout-index: unable to read sha1 file of %s (%s)",
+				path, sha1_to_hex(ce->sha1));
 		if (to_tempfile) {
 			strcpy(path, ".merge_file_XXXXXX");
 			fd = mkstemp(path);
@@ -111,6 +118,10 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
 			return error("git-checkout-index: unable to write file %s", path);
 		break;
 	case S_IFLNK:
+		new = read_blob_entry(ce, path, &size);
+		if (!new)
+			return error("git-checkout-index: unable to read sha1 file of %s (%s)",
+				path, sha1_to_hex(ce->sha1));
 		if (to_tempfile || !has_symlinks) {
 			if (to_tempfile) {
 				strcpy(path, ".merge_link_XXXXXX");
@@ -136,8 +147,13 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
 						 "symlink %s (%s)", path, strerror(errno));
 		}
 		break;
+	case S_IFDIRLNK:
+		if (to_tempfile)
+			return error("git-checkout-index: cannot create temporary subproject %s", path);
+		if (mkdir(path, 0777) < 0)
+			return error("git-checkout-index: cannot create subproject directory %s", path);
+		break;
 	default:
-		free(new);
 		return error("git-checkout-index: unknown file mode for %s", path);
 	}
 

From a8ee75bc7a2ddbb10e0a4303b21bb5c300f98cc2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 13 Apr 2007 09:24:13 -0700
Subject: [PATCH 031/109] Fix gitlink index entry filesystem matching

The code to match up index entries with the filesystem was stupidly
broken.  We shouldn't compare the filesystem stat() information with
S_IFDIRLNK, since that's purely a git-internal value, and not what the
filesystem uses (on the filesystem, it's just a regular directory).

Also, don't bother to make the stat() time comparisons etc for DIRLNK
entries in ce_match_stat_basic(), since we do an exact match for these
things, and the hints in the stat data simply doesn't matter.

This fixes "git status" with submodules that haven't been checked out in
the supermodule.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 read-cache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index e4c628f927..d2f332a622 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -120,9 +120,9 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st)
 		if (ce_compare_link(ce, xsize_t(st->st_size)))
 			return DATA_CHANGED;
 		break;
-	case S_IFDIRLNK:
-		/* No need to do anything, we did the exact compare in "match_stat_basic" */
-		break;
+	case S_IFDIR:
+		if (S_ISDIRLNK(ntohl(ce->ce_mode)))
+			return 0;
 	default:
 		return TYPE_CHANGED;
 	}
@@ -153,7 +153,7 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
 			changed |= TYPE_CHANGED;
 		else if (ce_compare_gitlink(ce))
 			changed |= DATA_CHANGED;
-		break;
+		return changed;
 	default:
 		die("internal error: ce_mode is %o", ntohl(ce->ce_mode));
 	}

From 6e2f441bd42c55c73b5e7ac1fdc2aded07901cb3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 13 Apr 2007 09:25:01 -0700
Subject: [PATCH 032/109] Teach git list-objects logic to not follow gitlinks

This allows us to pack superprojects and thus clone them (but not yet
check them out on the receiving side.. That's the next patch)

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 list-objects.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/list-objects.c b/list-objects.c
index 2ba2c958e0..310f8d3908 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -25,6 +25,37 @@ static void process_blob(struct rev_info *revs,
 	add_object(obj, p, path, name);
 }
 
+/*
+ * Processing a gitlink entry currently does nothing, since
+ * we do not recurse into the subproject.
+ *
+ * We *could* eventually add a flag that actually does that,
+ * which would involve:
+ *  - is the subproject actually checked out?
+ *  - if so, see if the subproject has already been added
+ *    to the alternates list, and add it if not.
+ *  - process the commit (or tag) the gitlink points to
+ *    recursively.
+ *
+ * However, it's unclear whether there is really ever any
+ * reason to see superprojects and subprojects as such a
+ * "unified" object pool (potentially resulting in a totally
+ * humongous pack - avoiding which was the whole point of
+ * having gitlinks in the first place!).
+ *
+ * So for now, there is just a note that we *could* follow
+ * the link, and how to do it. Whether it necessarily makes
+ * any sense what-so-ever to ever do that is another issue.
+ */
+static void process_gitlink(struct rev_info *revs,
+			    const unsigned char *sha1,
+			    struct object_array *p,
+			    struct name_path *path,
+			    const char *name)
+{
+	/* Nothing to do */
+}
+
 static void process_tree(struct rev_info *revs,
 			 struct tree *tree,
 			 struct object_array *p,
@@ -56,6 +87,9 @@ static void process_tree(struct rev_info *revs,
 			process_tree(revs,
 				     lookup_tree(entry.sha1),
 				     p, &me, entry.path);
+		else if (S_ISDIRLNK(entry.mode))
+			process_gitlink(revs, entry.sha1,
+					p, &me, entry.path);
 		else
 			process_blob(revs,
 				     lookup_blob(entry.sha1),

From f0807e62b42df51a079c730dcf4868de9ad44ea5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 13 Apr 2007 09:26:04 -0700
Subject: [PATCH 033/109] Teach "git-read-tree -u" to check out submodules as a
 directory

This actually allows us to check out a supermodule after cloning, although
the submodules themselves will obviously not be checked out, and will just
be empty directories.

Checking out the submodules will be up to higher levels - we may not even
want to!

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 entry.c | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/entry.c b/entry.c
index d72f811580..50ffae40c7 100644
--- a/entry.c
+++ b/entry.c
@@ -62,26 +62,33 @@ static int create_file(const char *path, unsigned int mode)
 	return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
 }
 
+static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned long *size)
+{
+	enum object_type type;
+	void *new = read_sha1_file(ce->sha1, &type, size);
+
+	if (new) {
+		if (type == OBJ_BLOB)
+			return new;
+		free(new);
+	}
+	return NULL;
+}
+
 static int write_entry(struct cache_entry *ce, char *path, struct checkout *state, int to_tempfile)
 {
 	int fd;
-	void *new;
-	unsigned long size;
 	long wrote;
-	enum object_type type;
 
-	new = read_sha1_file(ce->sha1, &type, &size);
-	if (!new || type != OBJ_BLOB) {
-		if (new)
-			free(new);
-		return error("git-checkout-index: unable to read sha1 file of %s (%s)",
-			path, sha1_to_hex(ce->sha1));
-	}
 	switch (ntohl(ce->ce_mode) & S_IFMT) {
-		char *buf;
-		unsigned long nsize;
+		char *buf, *new;
+		unsigned long size, nsize;
 
 	case S_IFREG:
+		new = read_blob_entry(ce, path, &size);
+		if (!new)
+			return error("git-checkout-index: unable to read sha1 file of %s (%s)",
+				path, sha1_to_hex(ce->sha1));
 		if (to_tempfile) {
 			strcpy(path, ".merge_file_XXXXXX");
 			fd = mkstemp(path);
@@ -111,6 +118,10 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
 			return error("git-checkout-index: unable to write file %s", path);
 		break;
 	case S_IFLNK:
+		new = read_blob_entry(ce, path, &size);
+		if (!new)
+			return error("git-checkout-index: unable to read sha1 file of %s (%s)",
+				path, sha1_to_hex(ce->sha1));
 		if (to_tempfile || !has_symlinks) {
 			if (to_tempfile) {
 				strcpy(path, ".merge_link_XXXXXX");
@@ -136,8 +147,13 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
 						 "symlink %s (%s)", path, strerror(errno));
 		}
 		break;
+	case S_IFDIRLNK:
+		if (to_tempfile)
+			return error("git-checkout-index: cannot create temporary subproject %s", path);
+		if (mkdir(path, 0777) < 0)
+			return error("git-checkout-index: cannot create subproject directory %s", path);
+		break;
 	default:
-		free(new);
 		return error("git-checkout-index: unknown file mode for %s", path);
 	}
 
@@ -179,6 +195,9 @@ int checkout_entry(struct cache_entry *ce, struct checkout *state, char *topath)
 		 */
 		unlink(path);
 		if (S_ISDIR(st.st_mode)) {
+			/* If it is a gitlink, leave it alone! */
+			if (S_ISDIRLNK(ntohl(ce->ce_mode)))
+				return 0;
 			if (!state->force)
 				return error("%s is a directory", path);
 			remove_subtree(path);

From d0bfd026a8241d544c339944976927b388d61a5e Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 12 Apr 2007 01:07:32 -0700
Subject: [PATCH 034/109] Add basic infrastructure to assign attributes to
 paths

This adds the basic infrastructure to assign attributes to
paths, in a way similar to what the exclusion mechanism does
based on $GIT_DIR/info/exclude and .gitignore files.

An attribute is just a simple string that does not contain any
whitespace.  They can be specified in $GIT_DIR/info/attributes
file, and .gitattributes file in each directory.

Each line in these files defines a pattern matching rule.
Similar to the exclusion mechanism, a later match overrides an
earlier match in the same file, and entries from .gitattributes
file in the same directory takes precedence over the ones from
parent directories.  Lines in $GIT_DIR/info/attributes file are
used as the lowest precedence default rules.

A line is either a comment (an empty line, or a line that begins
with a '#'), or a rule, which is a whitespace separated list of
tokens.  The first token on the line is a shell glob pattern.
The rest are names of attributes, each of which can optionally
be prefixed with '!'.  Such a line means "if a path matches this
glob, this attribute is set (or unset -- if the attribute name
is prefixed with '!').  For glob matching, the same "if the
pattern does not have a slash in it, the basename of the path is
matched with fnmatch(3) against the pattern, otherwise, the path
is matched with the pattern with FNM_PATHNAME" rule as the
exclusion mechanism is used.

This does not define what an attribute means.  Tying an
attribute to various effects it has on git operation for paths
that have it will be specified separately.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 .gitignore           |   1 +
 Makefile             |   5 +-
 attr.c               | 380 +++++++++++++++++++++++++++++++++++++++++++
 attr.h               |  16 ++
 builtin-check-attr.c |  49 ++++++
 builtin.h            |   1 +
 cache.h              |   2 +
 git.c                |   1 +
 8 files changed, 453 insertions(+), 2 deletions(-)
 create mode 100644 attr.c
 create mode 100644 attr.h
 create mode 100644 builtin-check-attr.c

diff --git a/.gitignore b/.gitignore
index 9229e918cd..d96f4f0c50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ git-blame
 git-branch
 git-bundle
 git-cat-file
+git-check-attr
 git-check-ref-format
 git-checkout
 git-checkout-index
diff --git a/Makefile b/Makefile
index b8e6030940..ac89d1ba36 100644
--- a/Makefile
+++ b/Makefile
@@ -283,7 +283,7 @@ LIB_H = \
 	diff.h object.h pack.h pkt-line.h quote.h refs.h list-objects.h sideband.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
 	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
-	utf8.h reflog-walk.h patch-ids.h
+	utf8.h reflog-walk.h attr.h
 
 DIFF_OBJS = \
 	diff.o diff-lib.o diffcore-break.o diffcore-order.o \
@@ -305,7 +305,7 @@ LIB_OBJS = \
 	write_or_die.o trace.o list-objects.o grep.o match-trees.o \
 	alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
 	color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
-	convert.o
+	convert.o attr.o
 
 BUILTIN_OBJS = \
 	builtin-add.o \
@@ -316,6 +316,7 @@ BUILTIN_OBJS = \
 	builtin-branch.o \
 	builtin-bundle.o \
 	builtin-cat-file.o \
+	builtin-check-attr.o \
 	builtin-checkout-index.o \
 	builtin-check-ref-format.o \
 	builtin-commit-tree.o \
diff --git a/attr.c b/attr.c
new file mode 100644
index 0000000000..7435d927a9
--- /dev/null
+++ b/attr.c
@@ -0,0 +1,380 @@
+#include "cache.h"
+#include "attr.h"
+
+/*
+ * The basic design decision here is that we are not going to have
+ * insanely large number of attributes.
+ *
+ * This is a randomly chosen prime.
+ */
+#define HASHSIZE 257
+
+#ifndef DEBUG_ATTR
+#define DEBUG_ATTR 0
+#endif
+
+struct git_attr {
+	struct git_attr *next;
+	unsigned h;
+	char name[FLEX_ARRAY];
+};
+
+static struct git_attr *(git_attr_hash[HASHSIZE]);
+
+static unsigned hash_name(const char *name, int namelen)
+{
+	unsigned val = 0;
+	unsigned char c;
+
+	while (namelen--) {
+		c = *name++;
+		val = ((val << 7) | (val >> 22)) ^ c;
+	}
+	return val;
+}
+
+struct git_attr *git_attr(const char *name, int len)
+{
+	unsigned hval = hash_name(name, len);
+	unsigned pos = hval % HASHSIZE;
+	struct git_attr *a;
+
+	for (a = git_attr_hash[pos]; a; a = a->next) {
+		if (a->h == hval &&
+		    !memcmp(a->name, name, len) && !a->name[len])
+			return a;
+	}
+
+	a = xmalloc(sizeof(*a) + len + 1);
+	memcpy(a->name, name, len);
+	a->name[len] = 0;
+	a->h = hval;
+	a->next = git_attr_hash[pos];
+	git_attr_hash[pos] = a;
+	return a;
+}
+
+/*
+ * .gitattributes file is one line per record, each of which is
+ *
+ * (1) glob pattern.
+ * (2) whitespace
+ * (3) whitespace separated list of attribute names, each of which
+ *     could be prefixed with '!' to mean "not set".
+ */
+
+struct attr_state {
+	int unset;
+	struct git_attr *attr;
+};
+
+struct match_attr {
+	char *pattern;
+	unsigned num_attr;
+	struct attr_state state[FLEX_ARRAY];
+};
+
+static const char blank[] = " \t\r\n";
+
+static struct match_attr *parse_attr_line(const char *line)
+{
+	int namelen;
+	int num_attr;
+	const char *cp, *name;
+	struct match_attr *res = res;
+	int pass;
+
+	cp = line + strspn(line, blank);
+	if (!*cp || *cp == '#')
+		return NULL;
+	name = cp;
+	namelen = strcspn(name, blank);
+
+	for (pass = 0; pass < 2; pass++) {
+		/* pass 0 counts and allocates, pass 1 fills */
+		num_attr = 0;
+		cp = name + namelen;
+		cp = cp + strspn(cp, blank);
+		while (*cp) {
+			const char *ep;
+			ep = cp + strcspn(cp, blank);
+			if (pass) {
+				struct attr_state *e;
+
+				e = &(res->state[num_attr]);
+				if (*cp == '!') {
+					e->unset = 1;
+					cp++;
+				}
+				e->attr = git_attr(cp, ep - cp);
+			}
+			num_attr++;
+			cp = ep + strspn(ep, blank);
+		}
+		if (pass)
+			break;
+		res = xcalloc(1,
+			      sizeof(*res) +
+			      sizeof(struct attr_state) * num_attr +
+			      namelen + 1);
+		res->pattern = (char*)&(res->state[num_attr]);
+		memcpy(res->pattern, name, namelen);
+		res->pattern[namelen] = 0;
+		res->num_attr = num_attr;
+	}
+	return res;
+}
+
+/*
+ * Like info/exclude and .gitignore, the attribute information can
+ * come from many places.
+ *
+ * (1) .gitattribute file of the same directory;
+ * (2) .gitattribute file of the parent directory if (1) does not have any match;
+ *     this goes recursively upwards, just like .gitignore
+ * (3) perhaps $GIT_DIR/info/attributes, as the final fallback.
+ *
+ * In the same file, later entries override the earlier match, so in the
+ * global list, we would have entries from info/attributes the earliest
+ * (reading the file from top to bottom), .gitattribute of the root
+ * directory (again, reading the file from top to bottom) down to the
+ * current directory, and then scan the list backwards to find the first match.
+ * This is exactly the same as what excluded() does in dir.c to deal with
+ * .gitignore
+ */
+
+static struct attr_stack {
+	struct attr_stack *prev;
+	char *origin;
+	unsigned num_matches;
+	struct match_attr **attrs;
+} *attr_stack;
+
+static void free_attr_elem(struct attr_stack *e)
+{
+	int i;
+	free(e->origin);
+	for (i = 0; i < e->num_matches; i++)
+		free(e->attrs[i]);
+	free(e);
+}
+
+static const char *builtin_attr[] = {
+	NULL,
+};
+
+static struct attr_stack *read_attr_from_array(const char **list)
+{
+	struct attr_stack *res;
+	const char *line;
+
+	res = xcalloc(1, sizeof(*res));
+	while ((line = *(list++)) != NULL) {
+		struct match_attr *a = parse_attr_line(line);
+		if (!a)
+			continue;
+		res->attrs = xrealloc(res->attrs, res->num_matches + 1);
+		res->attrs[res->num_matches++] = a;
+	}
+	return res;
+}
+
+static struct attr_stack *read_attr_from_file(const char *path)
+{
+	FILE *fp;
+	struct attr_stack *res;
+	char buf[2048];
+
+	res = xcalloc(1, sizeof(*res));
+	fp = fopen(path, "r");
+	if (!fp)
+		return res;
+
+	while (fgets(buf, sizeof(buf), fp)) {
+		struct match_attr *a = parse_attr_line(buf);
+		if (!a)
+			continue;
+		res->attrs = xrealloc(res->attrs, res->num_matches + 1);
+		res->attrs[res->num_matches++] = a;
+	}
+	fclose(fp);
+	return res;
+}
+
+#if DEBUG_ATTR
+static void debug_info(const char *what, struct attr_stack *elem)
+{
+	fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()");
+}
+#define debug_push(a) debug_info("push", (a))
+#define debug_pop(a) debug_info("pop", (a))
+#else
+#define debug_push(a) do { ; } while (0)
+#define debug_pop(a) do { ; } while (0)
+#endif
+
+static void prepare_attr_stack(const char *path, int dirlen)
+{
+	struct attr_stack *elem, *info;
+	int len;
+	char pathbuf[PATH_MAX];
+
+	/*
+	 * At the bottom of the attribute stack is the built-in
+	 * set of attribute definitions.  Then, contents from
+	 * .gitattribute files from directories closer to the
+	 * root to the ones in deeper directories are pushed
+	 * to the stack.  Finally, at the very top of the stack
+	 * we always keep the contents of $GIT_DIR/info/attributes.
+	 *
+	 * When checking, we use entries from near the top of the
+	 * stack, preferring $GIT_DIR/info/attributes, then
+	 * .gitattributes in deeper directories to shallower ones,
+	 * and finally use the built-in set as the default.
+	 */
+	if (!attr_stack) {
+		elem = read_attr_from_array(builtin_attr);
+		elem->origin = NULL;
+		elem->prev = attr_stack;
+		attr_stack = elem;
+
+		elem = read_attr_from_file(GITATTRIBUTES_FILE);
+		elem->origin = strdup("");
+		elem->prev = attr_stack;
+		attr_stack = elem;
+		debug_push(elem);
+
+		elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE));
+		elem->origin = NULL;
+		elem->prev = attr_stack;
+		attr_stack = elem;
+	}
+
+	/*
+	 * Pop the "info" one that is always at the top of the stack.
+	 */
+	info = attr_stack;
+	attr_stack = info->prev;
+
+	/*
+	 * Pop the ones from directories that are not the prefix of
+	 * the path we are checking.
+	 */
+	while (attr_stack && attr_stack->origin) {
+		int namelen = strlen(attr_stack->origin);
+
+		elem = attr_stack;
+		if (namelen <= dirlen &&
+		    !strncmp(elem->origin, path, namelen))
+			break;
+
+		debug_pop(elem);
+		attr_stack = elem->prev;
+		free_attr_elem(elem);
+	}
+
+	/*
+	 * Read from parent directories and push them down
+	 */
+	while (1) {
+		char *cp;
+
+		len = strlen(attr_stack->origin);
+		if (dirlen <= len)
+			break;
+		memcpy(pathbuf, path, dirlen);
+		memcpy(pathbuf + dirlen, "/", 2);
+		cp = strchr(pathbuf + len + 1, '/');
+		strcpy(cp + 1, GITATTRIBUTES_FILE);
+		elem = read_attr_from_file(pathbuf);
+		*cp = '\0';
+		elem->origin = strdup(pathbuf);
+		elem->prev = attr_stack;
+		attr_stack = elem;
+		debug_push(elem);
+	}
+
+	/*
+	 * Finally push the "info" one at the top of the stack.
+	 */
+	info->prev = attr_stack;
+	attr_stack = info;
+}
+
+static int path_matches(const char *pathname, int pathlen,
+			const char *pattern,
+			const char *base, int baselen)
+{
+	if (!strchr(pattern, '/')) {
+		/* match basename */
+		const char *basename = strrchr(pathname, '/');
+		basename = basename ? basename + 1 : pathname;
+		return (fnmatch(pattern, basename, 0) == 0);
+	}
+	/*
+	 * match with FNM_PATHNAME; the pattern has base implicitly
+	 * in front of it.
+	 */
+	if (*pattern == '/')
+		pattern++;
+	if (pathlen < baselen ||
+	    (baselen && pathname[baselen - 1] != '/') ||
+	    strncmp(pathname, base, baselen))
+		return 0;
+	return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0;
+}
+
+/*
+ * I do not like this at all.  Only because we allow individual
+ * attribute to be set or unset incrementally by individual
+ * lines in .gitattribute files, we need to do this triple
+ * loop which looks quite wasteful.
+ */
+static int fill(const char *path, int pathlen,
+		struct attr_stack *stk, struct git_attr_check *check,
+		int num, int rem)
+{
+	int i, j, k;
+	const char *base = stk->origin ? stk->origin : "";
+
+	for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
+		struct match_attr *a = stk->attrs[i];
+		if (path_matches(path, pathlen,
+				 a->pattern, base, strlen(base))) {
+			for (j = 0; j < a->num_attr; j++) {
+				struct git_attr *attr = a->state[j].attr;
+				int set = !a->state[j].unset;
+				for (k = 0; k < num; k++) {
+					if (0 <= check[k].isset ||
+					    check[k].attr != attr)
+						continue;
+					check[k].isset = set;
+					rem--;
+				}
+			}
+		}
+	}
+	return rem;
+}
+
+int git_checkattr(const char *path, int num, struct git_attr_check *check)
+{
+	struct attr_stack *stk;
+	const char *cp;
+	int dirlen, pathlen, i, rem;
+
+	for (i = 0; i < num; i++)
+		check[i].isset = -1;
+
+	pathlen = strlen(path);
+	cp = strrchr(path, '/');
+	if (!cp)
+		dirlen = 0;
+	else
+		dirlen = cp - path;
+	prepare_attr_stack(path, dirlen);
+	rem = num;
+	for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
+		rem = fill(path, pathlen, stk, check, num, rem);
+	return 0;
+}
diff --git a/attr.h b/attr.h
new file mode 100644
index 0000000000..1e5ab40694
--- /dev/null
+++ b/attr.h
@@ -0,0 +1,16 @@
+#ifndef ATTR_H
+#define ATTR_H
+
+/* An attribute is a pointer to this opaque structure */
+struct git_attr;
+
+struct git_attr *git_attr(const char *, int);
+
+struct git_attr_check {
+	struct git_attr *attr;
+	int isset;
+};
+
+int git_checkattr(const char *path, int, struct git_attr_check *);
+
+#endif /* ATTR_H */
diff --git a/builtin-check-attr.c b/builtin-check-attr.c
new file mode 100644
index 0000000000..47b07210d6
--- /dev/null
+++ b/builtin-check-attr.c
@@ -0,0 +1,49 @@
+#include "builtin.h"
+#include "attr.h"
+#include "quote.h"
+
+static const char check_attr_usage[] =
+"git-check-attr attr... [--] pathname...";
+
+int cmd_check_attr(int argc, const char **argv, const char *prefix)
+{
+	struct git_attr_check *check;
+	int cnt, i, doubledash;
+
+	doubledash = -1;
+	for (i = 1; doubledash < 0 && i < argc; i++) {
+		if (!strcmp(argv[i], "--"))
+			doubledash = i;
+	}
+
+	/* If there is no double dash, we handle only one attribute */
+	if (doubledash < 0) {
+		cnt = 1;
+		doubledash = 1;
+	} else
+		cnt = doubledash - 1;
+	doubledash++;
+
+	if (cnt <= 0 || argc < doubledash)
+		usage(check_attr_usage);
+	check = xcalloc(cnt, sizeof(*check));
+	for (i = 0; i < cnt; i++) {
+		const char *name;
+		name = argv[i + 1];
+		check[i].attr = git_attr(name, strlen(name));
+	}
+
+	for (i = doubledash; i < argc; i++) {
+		int j;
+		if (git_checkattr(argv[i], cnt, check))
+			die("git_checkattr died");
+		for (j = 0; j < cnt; j++) {
+			write_name_quoted("", 0, argv[i], 1, stdout);
+			printf(": %s: %s\n", argv[j+1],
+			       (check[j].isset < 0) ? "unspecified" :
+			       (check[j].isset == 0) ? "unset" :
+			       "set");
+		}
+	}
+	return 0;
+}
diff --git a/builtin.h b/builtin.h
index af203e9e36..d3f3a7496e 100644
--- a/builtin.h
+++ b/builtin.h
@@ -22,6 +22,7 @@ extern int cmd_branch(int argc, const char **argv, const char *prefix);
 extern int cmd_bundle(int argc, const char **argv, const char *prefix);
 extern int cmd_cat_file(int argc, const char **argv, const char *prefix);
 extern int cmd_checkout_index(int argc, const char **argv, const char *prefix);
+extern int cmd_check_attr(int argc, const char **argv, const char *prefix);
 extern int cmd_check_ref_format(int argc, const char **argv, const char *prefix);
 extern int cmd_cherry(int argc, const char **argv, const char *prefix);
 extern int cmd_cherry_pick(int argc, const char **argv, const char *prefix);
diff --git a/cache.h b/cache.h
index b1bd9e46c2..63af43fe5c 100644
--- a/cache.h
+++ b/cache.h
@@ -151,6 +151,8 @@ enum object_type {
 #define CONFIG_ENVIRONMENT "GIT_CONFIG"
 #define CONFIG_LOCAL_ENVIRONMENT "GIT_CONFIG_LOCAL"
 #define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
+#define GITATTRIBUTES_FILE ".gitattributes"
+#define INFOATTRIBUTES_FILE "info/attributes"
 
 extern int is_bare_repository_cfg;
 extern int is_bare_repository(void);
diff --git a/git.c b/git.c
index 7def319e60..f20090721a 100644
--- a/git.c
+++ b/git.c
@@ -234,6 +234,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp)
 		{ "cat-file", cmd_cat_file, RUN_SETUP },
 		{ "checkout-index", cmd_checkout_index, RUN_SETUP },
 		{ "check-ref-format", cmd_check_ref_format },
+		{ "check-attr", cmd_check_attr, RUN_SETUP | NOT_BARE },
 		{ "cherry", cmd_cherry, RUN_SETUP },
 		{ "cherry-pick", cmd_cherry_pick, RUN_SETUP | NOT_BARE },
 		{ "commit-tree", cmd_commit_tree, RUN_SETUP },

From 35ebfd6a0cd71795c4fa510b99e55ad89fb654f1 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 12 Apr 2007 22:30:05 -0700
Subject: [PATCH 035/109] Define 'crlf' attribute.

This defines the semantics of 'crlf' attribute as an example.
When a path has this attribute unset (i.e. '!crlf'), autocrlf
line-end conversion is not applied.

Eventually we would want to let users to build a pipeline of
processing to munge blob data to filesystem format (and in the
other direction) based on combination of attributes, and at that
point the mechanism in convert_to_{git,working_tree}() that
looks at 'crlf' attribute needs to be enhanced.  Perhaps the
existing 'crlf' would become the first step in the input chain,
and the last step in the output chain.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c          | 18 ++++++++++++++++++
 convert.c       | 49 ++++++++++++++++++++++++++++++++++++++++++-------
 t/t0020-crlf.sh | 24 ++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/attr.c b/attr.c
index 7435d927a9..ed4db01a89 100644
--- a/attr.c
+++ b/attr.c
@@ -378,3 +378,21 @@ int git_checkattr(const char *path, int num, struct git_attr_check *check)
 		rem = fill(path, pathlen, stk, check, num, rem);
 	return 0;
 }
+
+static void setup_binary_check(struct git_attr_check *check)
+{
+	static struct git_attr *attr_binary;
+
+	if (!attr_binary)
+		attr_binary = git_attr("binary", 6);
+	check->attr = attr_binary;
+}
+
+int git_path_is_binary(const char *path)
+{
+	struct git_attr_check attr_binary_check;
+
+	setup_binary_check(&attr_binary_check);
+	return (!git_checkattr(path, 1, &attr_binary_check) &&
+		(0 < attr_binary_check.isset));
+}
diff --git a/convert.c b/convert.c
index 898bfe3eb2..20c744aa23 100644
--- a/convert.c
+++ b/convert.c
@@ -1,4 +1,6 @@
 #include "cache.h"
+#include "attr.h"
+
 /*
  * convert.c - convert a file when checking it out and checking it in.
  *
@@ -72,17 +74,12 @@ static int is_binary(unsigned long size, struct text_stat *stats)
 	return 0;
 }
 
-int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
+static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
 {
 	char *buffer, *nbuf;
 	unsigned long size, nsize;
 	struct text_stat stats;
 
-	/*
-	 * FIXME! Other pluggable conversions should go here,
-	 * based on filename patterns. Right now we just do the
-	 * stupid auto-CRLF one.
-	 */
 	if (!auto_crlf)
 		return 0;
 
@@ -128,7 +125,7 @@ int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
 	return 1;
 }
 
-int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 {
 	char *buffer, *nbuf;
 	unsigned long size, nsize;
@@ -184,3 +181,41 @@ int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 
 	return 1;
 }
+
+static void setup_crlf_check(struct git_attr_check *check)
+{
+	static struct git_attr *attr_crlf;
+
+	if (!attr_crlf)
+		attr_crlf = git_attr("crlf", 4);
+	check->attr = attr_crlf;
+}
+
+static int git_path_is_binary(const char *path)
+{
+	struct git_attr_check attr_crlf_check;
+
+	setup_crlf_check(&attr_crlf_check);
+
+	/*
+	 * If crlf is not mentioned, default to autocrlf;
+	 * disable autocrlf only when crlf attribute is explicitly
+	 * unset.
+	 */
+	return (!git_checkattr(path, 1, &attr_crlf_check) &&
+		(0 == attr_crlf_check.isset));
+}
+
+int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
+{
+	if (git_path_is_binary(path))
+		return 0;
+	return autocrlf_to_git(path, bufp, sizep);
+}
+
+int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+{
+	if (git_path_is_binary(path))
+		return 0;
+	return autocrlf_to_working_tree(path, bufp, sizep);
+}
diff --git a/t/t0020-crlf.sh b/t/t0020-crlf.sh
index 723b29ad17..600dcd30a0 100755
--- a/t/t0020-crlf.sh
+++ b/t/t0020-crlf.sh
@@ -214,4 +214,28 @@ test_expect_success 'apply patch --index (autocrlf=true)' '
 	}
 '
 
+test_expect_success '.gitattributes says two is binary' '
+
+	echo "two !crlf" >.gitattributes &&
+	rm -f tmp one dir/two &&
+	git repo-config core.autocrlf true &&
+	git read-tree --reset -u HEAD &&
+
+	if remove_cr dir/two >/dev/null
+	then
+		echo "Huh?"
+		false
+	else
+		: happy
+	fi &&
+
+	if remove_cr one >/dev/null
+	then
+		: happy
+	else
+		echo "Huh?"
+		false
+	fi
+'
+
 test_done

From 8c701249d2257699c19822b528c101668abc55b9 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 12 Apr 2007 23:05:29 -0700
Subject: [PATCH 036/109] Teach 'diff' about 'diff' attribute.

This makes paths that explicitly unset 'diff' attribute not to
produce "textual" diffs from 'git-diff' family.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c | 18 ------------------
 diff.c | 42 ++++++++++++++++++++++++++++++++----------
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/attr.c b/attr.c
index ed4db01a89..7435d927a9 100644
--- a/attr.c
+++ b/attr.c
@@ -378,21 +378,3 @@ int git_checkattr(const char *path, int num, struct git_attr_check *check)
 		rem = fill(path, pathlen, stk, check, num, rem);
 	return 0;
 }
-
-static void setup_binary_check(struct git_attr_check *check)
-{
-	static struct git_attr *attr_binary;
-
-	if (!attr_binary)
-		attr_binary = git_attr("binary", 6);
-	check->attr = attr_binary;
-}
-
-int git_path_is_binary(const char *path)
-{
-	struct git_attr_check attr_binary_check;
-
-	setup_binary_check(&attr_binary_check);
-	return (!git_checkattr(path, 1, &attr_binary_check) &&
-		(0 < attr_binary_check.isset));
-}
diff --git a/diff.c b/diff.c
index fbb79d70a9..e4efb657e8 100644
--- a/diff.c
+++ b/diff.c
@@ -8,6 +8,7 @@
 #include "delta.h"
 #include "xdiff-interface.h"
 #include "color.h"
+#include "attr.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -1051,13 +1052,34 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
 	emit_binary_diff_body(two, one);
 }
 
-#define FIRST_FEW_BYTES 8000
-static int mmfile_is_binary(mmfile_t *mf)
+static void setup_diff_attr_check(struct git_attr_check *check)
 {
-	long sz = mf->size;
+	static struct git_attr *attr_diff;
+
+	if (!attr_diff)
+		attr_diff = git_attr("diff", 4);
+	check->attr = attr_diff;
+}
+
+#define FIRST_FEW_BYTES 8000
+static int file_is_binary(struct diff_filespec *one)
+{
+	unsigned long sz;
+	struct git_attr_check attr_diff_check;
+
+	setup_diff_attr_check(&attr_diff_check);
+	if (!git_checkattr(one->path, 1, &attr_diff_check) &&
+	    (0 == attr_diff_check.isset))
+		return 1;
+	if (!one->data) {
+		if (!DIFF_FILE_VALID(one))
+			return 0;
+		diff_populate_filespec(one, 0);
+	}
+	sz = one->size;
 	if (FIRST_FEW_BYTES < sz)
 		sz = FIRST_FEW_BYTES;
-	return !!memchr(mf->ptr, 0, sz);
+	return !!memchr(one->data, 0, sz);
 }
 
 static void builtin_diff(const char *name_a,
@@ -1114,7 +1136,7 @@ static void builtin_diff(const char *name_a,
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
-	if (!o->text && (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2))) {
+	if (!o->text && (file_is_binary(one) || file_is_binary(two))) {
 		/* Quite common confusing case */
 		if (mf1.size == mf2.size &&
 		    !memcmp(mf1.ptr, mf2.ptr, mf1.size))
@@ -1190,7 +1212,7 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
-	if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) {
+	if (file_is_binary(one) || file_is_binary(two)) {
 		data->is_binary = 1;
 		data->added = mf2.size;
 		data->deleted = mf1.size;
@@ -1228,7 +1250,7 @@ static void builtin_checkdiff(const char *name_a, const char *name_b,
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
-	if (mmfile_is_binary(&mf2))
+	if (file_is_binary(two))
 		return;
 	else {
 		/* Crazy xdl interfaces.. */
@@ -1805,8 +1827,8 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
 
 		if (o->binary) {
 			mmfile_t mf;
-			if ((!fill_mmfile(&mf, one) && mmfile_is_binary(&mf)) ||
-			    (!fill_mmfile(&mf, two) && mmfile_is_binary(&mf)))
+			if ((!fill_mmfile(&mf, one) && file_is_binary(one)) ||
+			    (!fill_mmfile(&mf, two) && file_is_binary(two)))
 				abbrev = 40;
 		}
 		len += snprintf(msg + len, sizeof(msg) - len,
@@ -2701,7 +2723,7 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
 			return error("unable to read files to diff");
 
 		/* Maybe hash p->two? into the patch id? */
-		if (mmfile_is_binary(&mf2))
+		if (file_is_binary(p->two))
 			continue;
 
 		len1 = remove_space(p->one->path, strlen(p->one->path));

From 5698454ea052369dc98d38b45c21307b07a6c4a3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 14 Apr 2007 16:22:08 -0700
Subject: [PATCH 037/109] Fix some "git ls-files -o" fallout from gitlinks

Since "git ls-files" doesn't really pass down any details on what it
really wants done to the directory walking code, the directory walking
code doesn't really know whether the caller wants to know about gitlink
directories, or whether it wants to just know about ignored files.

So the directory walking code will return those gitlink directories unless
the caller has explicitly told it not to ("dir->show_other_directories"
tells the directory walker to only show "other" directories).

This kind of confuses "git ls-files -o", because
 - it didn't really expect to see entries listed that were already in the
   index, unless they  were unmerged, and would die on that unexpected
   setup, rather than just "continue".
 - it didn't know how to match directory entries with the final "/"

This trivial change updates the "show_other_files()" function to handle
both of these issues gracefully. There really was no reason to die, when
the obviously correct thing for the function was to just ignore files it
already knew about (that's what "other" means here!).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-ls-files.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/builtin-ls-files.c b/builtin-ls-files.c
index 74a6acacc1..f7c066b24b 100644
--- a/builtin-ls-files.c
+++ b/builtin-ls-files.c
@@ -89,20 +89,38 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
 static void show_other_files(struct dir_struct *dir)
 {
 	int i;
+
+
+	/*
+	 * Skip matching and unmerged entries for the paths,
+	 * since we want just "others".
+	 *
+	 * (Matching entries are normally pruned during
+	 * the directory tree walk, but will show up for
+	 * gitlinks because we don't necessarily have
+	 * dir->show_other_directories set to suppress
+	 * them).
+	 */
 	for (i = 0; i < dir->nr; i++) {
-		/* We should not have a matching entry, but we
-		 * may have an unmerged entry for this path.
-		 */
 		struct dir_entry *ent = dir->entries[i];
-		int pos = cache_name_pos(ent->name, ent->len);
+		int len, pos;
 		struct cache_entry *ce;
+
+		/*
+		 * Remove the '/' at the end that directory
+		 * walking adds for directory entries.
+		 */
+		len = ent->len;
+		if (len && ent->name[len-1] == '/')
+			len--;
+		pos = cache_name_pos(ent->name, len);
 		if (0 <= pos)
-			die("bug in show-other-files");
+			continue;	/* exact match */
 		pos = -pos - 1;
 		if (pos < active_nr) { 
 			ce = active_cache[pos];
-			if (ce_namelen(ce) == ent->len &&
-			    !memcmp(ce->name, ent->name, ent->len))
+			if (ce_namelen(ce) == len &&
+			    !memcmp(ce->name, ent->name, len))
 				continue; /* Yup, this one exists unmerged */
 		}
 		show_dir_entry(tag_other, ent);

From 04786756f90a734445cc300a5acf9fcfc9fd4c04 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 15 Apr 2007 11:14:28 -0700
Subject: [PATCH 038/109] Expose subprojects as special files to "git diff"
 machinery

The same way we generate diffs on symlinks as the the diff of text of the
symlink, we can generate subproject diffs (when not recursing into them!)
as the diff of the text that describes the subproject.

Of course, since what descibes a subproject is just the SHA1, that's what
we'll use. Add some pretty-printing to make it a bit more obvious what is
going on, and we're done.

So with this, we can get both raw diffs and "textual" diffs of subproject
changes:

 - git diff --raw:

	:160000 160000 2de597b5ad348b7db04bd10cdd38cd81cbc93ab5 0000000... M    sub-A

 - git diff:

	diff --git a/sub-A b/sub-A
	index 2de597b..e8f11a4 160000
	--- a/sub-A
	+++ b/sub-A
	@@ -1 +1 @@
	-Subproject commit 2de597b5ad348b7db04bd10cdd38cd81cbc93ab5
	+Subproject commit e8f11a45c5c6b9e2fec6d136d3fb5aff75393d42

NOTE! We'll also want to have the ability to recurse into the subproject
and actually diff it recursively, but that will involve a new command line
option (I'd suggest "--subproject" and "-S", but the latter is in use by
pickaxe), and some very different code.

But regardless of ay future recursive behaviour, we need the non-recursive
version too (and it should be the default, at least in the absense of
config options, so that large superprojects don't default to something
extremely expensive).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 diff.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/diff.c b/diff.c
index fbb79d70a9..1e8e689be2 100644
--- a/diff.c
+++ b/diff.c
@@ -1397,6 +1397,22 @@ static int populate_from_stdin(struct diff_filespec *s)
 	return 0;
 }
 
+static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
+{
+	int len;
+	char *data = xmalloc(100);
+	len = snprintf(data, 100,
+		"Subproject commit %s\n", sha1_to_hex(s->sha1));
+	s->data = data;
+	s->size = len;
+	s->should_free = 1;
+	if (size_only) {
+		s->data = NULL;
+		free(data);
+	}
+	return 0;
+}
+
 /*
  * While doing rename detection and pickaxe operation, we may need to
  * grab the data for the blob (or file) for our own in-core comparison.
@@ -1415,6 +1431,10 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 
 	if (s->data)
 		return err;
+
+	if (S_ISDIRLNK(s->mode))
+		return diff_populate_gitlink(s, size_only);
+
 	if (!s->sha1_valid ||
 	    reuse_worktree_file(s->path, s->sha1, 0)) {
 		struct stat st;

From 201ac8efc79668353281583629aa15ac7f36e843 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 15 Apr 2007 13:35:45 -0700
Subject: [PATCH 039/109] Fix 'crlf' attribute semantics.

Earlier we said 'crlf lets the path go through core.autocrlf
process while !crlf disables it altogether'.  This fixes the
semantics to:

 - Lack of 'crlf' attribute makes core.autocrlf to apply
   (i.e. we guess based on the contents and if platform
   expresses its desire to have CRLF line endings via
   core.autocrlf, we do so).

 - Setting 'crlf' attribute to true forces CRLF line endings in
   working tree files, even if blob does not look like text
   (e.g. contains NUL or other bytes we consider binary).

 - Setting 'crlf' attribute to false disables conversion.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 convert.c | 120 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 77 insertions(+), 43 deletions(-)

diff --git a/convert.c b/convert.c
index 20c744aa23..d0d4b81871 100644
--- a/convert.c
+++ b/convert.c
@@ -74,13 +74,13 @@ static int is_binary(unsigned long size, struct text_stat *stats)
 	return 0;
 }
 
-static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
+static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int guess)
 {
 	char *buffer, *nbuf;
 	unsigned long size, nsize;
 	struct text_stat stats;
 
-	if (!auto_crlf)
+	if (guess && !auto_crlf)
 		return 0;
 
 	size = *sizep;
@@ -94,19 +94,21 @@ static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
 	if (!stats.cr)
 		return 0;
 
-	/*
-	 * We're currently not going to even try to convert stuff
-	 * that has bare CR characters. Does anybody do that crazy
-	 * stuff?
-	 */
-	if (stats.cr != stats.crlf)
-		return 0;
+	if (guess) {
+		/*
+		 * We're currently not going to even try to convert stuff
+		 * that has bare CR characters. Does anybody do that crazy
+		 * stuff?
+		 */
+		if (stats.cr != stats.crlf)
+			return 0;
 
-	/*
-	 * And add some heuristics for binary vs text, of course...
-	 */
-	if (is_binary(size, &stats))
-		return 0;
+		/*
+		 * And add some heuristics for binary vs text, of course...
+		 */
+		if (is_binary(size, &stats))
+			return 0;
+	}
 
 	/*
 	 * Ok, allocate a new buffer, fill it in, and return true
@@ -116,28 +118,42 @@ static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
 	nbuf = xmalloc(nsize);
 	*bufp = nbuf;
 	*sizep = nsize;
-	do {
-		unsigned char c = *buffer++;
-		if (c != '\r')
-			*nbuf++ = c;
-	} while (--size);
+
+	if (guess) {
+		do {
+			unsigned char c = *buffer++;
+			if (c != '\r')
+				*nbuf++ = c;
+		} while (--size);
+	} else {
+		do {
+			unsigned char c = *buffer++;
+			if (! (c == '\r' && (1 < size && *buffer == '\n')))
+				*nbuf++ = c;
+		} while (--size);
+	}
 
 	return 1;
 }
 
-static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
+{
+	return crlf_to_git(path, bufp, sizep, 1);
+}
+
+static int forcecrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
+{
+	return crlf_to_git(path, bufp, sizep, 0);
+}
+
+static int crlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep, int guess)
 {
 	char *buffer, *nbuf;
 	unsigned long size, nsize;
 	struct text_stat stats;
 	unsigned char last;
 
-	/*
-	 * FIXME! Other pluggable conversions should go here,
-	 * based on filename patterns. Right now we just do the
-	 * stupid auto-CRLF one.
-	 */
-	if (auto_crlf <= 0)
+	if (guess && auto_crlf <= 0)
 		return 0;
 
 	size = *sizep;
@@ -155,12 +171,14 @@ static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long
 	if (stats.lf == stats.crlf)
 		return 0;
 
-	/* If we have any bare CR characters, we're not going to touch it */
-	if (stats.cr != stats.crlf)
-		return 0;
+	if (guess) {
+		/* If we have any bare CR characters, we're not going to touch it */
+		if (stats.cr != stats.crlf)
+			return 0;
 
-	if (is_binary(size, &stats))
-		return 0;
+		if (is_binary(size, &stats))
+			return 0;
+	}
 
 	/*
 	 * Ok, allocate a new buffer, fill it in, and return true
@@ -182,6 +200,16 @@ static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long
 	return 1;
 }
 
+static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+{
+	return crlf_to_working_tree(path, bufp, sizep, 1);
+}
+
+static int forcecrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+{
+	return crlf_to_working_tree(path, bufp, sizep, 0);
+}
+
 static void setup_crlf_check(struct git_attr_check *check)
 {
 	static struct git_attr *attr_crlf;
@@ -191,31 +219,37 @@ static void setup_crlf_check(struct git_attr_check *check)
 	check->attr = attr_crlf;
 }
 
-static int git_path_is_binary(const char *path)
+static int git_path_check_crlf(const char *path)
 {
 	struct git_attr_check attr_crlf_check;
 
 	setup_crlf_check(&attr_crlf_check);
 
-	/*
-	 * If crlf is not mentioned, default to autocrlf;
-	 * disable autocrlf only when crlf attribute is explicitly
-	 * unset.
-	 */
-	return (!git_checkattr(path, 1, &attr_crlf_check) &&
-		(0 == attr_crlf_check.isset));
+	if (git_checkattr(path, 1, &attr_crlf_check))
+		return -1;
+	return attr_crlf_check.isset;
 }
 
 int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
 {
-	if (git_path_is_binary(path))
+	switch (git_path_check_crlf(path)) {
+	case 0:
 		return 0;
-	return autocrlf_to_git(path, bufp, sizep);
+	case 1:
+		return forcecrlf_to_git(path, bufp, sizep);
+	default:
+		return autocrlf_to_git(path, bufp, sizep);
+	}
 }
 
 int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 {
-	if (git_path_is_binary(path))
+	switch (git_path_check_crlf(path)) {
+	case 0:
 		return 0;
-	return autocrlf_to_working_tree(path, bufp, sizep);
+	case 1:
+		return forcecrlf_to_working_tree(path, bufp, sizep);
+	default:
+		return autocrlf_to_working_tree(path, bufp, sizep);
+	}
 }

From 40250af411f33afa0c39a5d461829b676453ce3b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 15 Apr 2007 14:35:11 -0700
Subject: [PATCH 040/109] Fix 'diff' attribute semantics.

This is in the same spirit as the previous one.  Earlier 'diff'
meant 'do the built-in binary heuristics and disable patch text
generation based on it' while '!diff' meant 'do not guess, do
not generate patch text'.  There was no way to say 'do generate
patch text even when the heuristics says it has NUL in it'.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 diff.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index e4efb657e8..dcea405ed3 100644
--- a/diff.c
+++ b/diff.c
@@ -1069,8 +1069,9 @@ static int file_is_binary(struct diff_filespec *one)
 
 	setup_diff_attr_check(&attr_diff_check);
 	if (!git_checkattr(one->path, 1, &attr_diff_check) &&
-	    (0 == attr_diff_check.isset))
-		return 1;
+	    (0 <= attr_diff_check.isset))
+		return !attr_diff_check.isset;
+
 	if (!one->data) {
 		if (!DIFF_FILE_VALID(one))
 			return 0;

From 6d4da3dea0ba01bb17a9001efd3ab2bbce531b6f Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 15 Apr 2007 13:39:32 -0700
Subject: [PATCH 041/109] Makefile: add patch-ids.h back in.

I lost it by mistake while shuffling the gitattributes series which
originally was on top of the subproject topic onto the master branch.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ac89d1ba36..4a399dda81 100644
--- a/Makefile
+++ b/Makefile
@@ -283,7 +283,7 @@ LIB_H = \
 	diff.h object.h pack.h pkt-line.h quote.h refs.h list-objects.h sideband.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
 	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
-	utf8.h reflog-walk.h attr.h
+	utf8.h reflog-walk.h patch-ids.h attr.h
 
 DIFF_OBJS = \
 	diff.o diff-lib.o diffcore-break.o diffcore-order.o \

From f48fd68887a03756658a46486a5dd1301c5a655f Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 14 Apr 2007 08:54:37 -0700
Subject: [PATCH 042/109] attribute macro support

This adds "attribute macros" (for lack of better name).  So far,
we have low-level attributes such as crlf and diff, which are
defined in operational terms --- setting or unsetting them on a
particular path directly affects what is done to the path.  For
example, in order to decline diffs or crlf conversions on a
binary blob, no diffs on PostScript files, and treat all other
files normally, you would have something like these:

	*		diff crlf
	*.ps		!diff
	proprietary.o	!diff !crlf

That is fine as the operation goes, but gets unwieldy rather
rapidly, when we start adding more low-level attributes that are
defined in operational terms.  A near-term example of such an
attribute would be 'merge-3way' which would control if git
should attempt the usual 3-way file-level merge internally, or
leave merging to a specialized external program of user's
choice.  When it is added, we do _not_ want to force the users
to update the above to:

	*		diff crlf merge-3way
	*.ps		!diff
	proprietary.o	!diff !crlf !merge-3way

The way this patch solves this issue is to realize that the
attributes the user is assigning to paths are not defined in
terms of operations but in terms of what they are.

All of the three low-level attributes usually make sense for
most of the files that sane SCM users have git operate on (these
files are typically called "text').  Only a few cases, such as
binary blob, need exception to decline the "usual treatment
given to text files" -- and people mark them as "binary".

So this allows the $GIT_DIR/info/alternates and .gitattributes
at the toplevel of the project to also specify attributes that
assigns other attributes.  The syntax is '[attr]' followed by an
attribute name followed by a list of attribute names:

	[attr] binary	!diff !crlf !merge-3way

When "binary" attribute is set to a path, if the path has not
got diff/crlf/merge-3way attribute set or unset by other rules,
this rule unsets the three low-level attributes.

It is expected that the user level .gitattributes will be
expressed mostly in terms of attributes based on what the files
are, and the above sample would become like this:

	(built-in attribute configuration)
	[attr] binary	!diff !crlf !merge-3way
	*		diff crlf merge-3way

	(project specific .gitattributes)
	proprietary.o	binary

	(user preference $GIT_DIR/info/attributes)
	*.ps		!diff

There are a few caveats.

 * As described above, you can define these macros only in
   $GIT_DIR/info/attributes and toplevel .gitattributes.

 * There is no attempt to detect circular definition of macro
   attributes, and definitions are evaluated from bottom to top
   as usual to fill in other attributes that have not yet got
   values.  The following would work as expected:

	[attr] text	diff crlf
	[attr] ps	text !diff
	*.ps	ps

   while this would most likely not (I haven't tried):

	[attr] ps	text !diff
	[attr] text	diff crlf
	*.ps	ps

 * When a macro says "[attr] A B !C", saying that a path does
   not have attribute A does not let you tell anything about
   attributes B or C.  That is, given this:

	[attr] text	diff crlf
	[attr] ps	text !diff
	*.txt !ps

  path hello.txt, which would match "*.txt" pattern, would have
  "ps" attribute set to zero, but that does not make text
  attribute of hello.txt set to false (nor diff attribute set to
  true).

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c  | 179 +++++++++++++++++++++++++++++++++++++++++---------------
 cache.h |   1 +
 2 files changed, 132 insertions(+), 48 deletions(-)

diff --git a/attr.c b/attr.c
index 7435d927a9..a306144204 100644
--- a/attr.c
+++ b/attr.c
@@ -16,9 +16,12 @@
 struct git_attr {
 	struct git_attr *next;
 	unsigned h;
+	int attr_nr;
 	char name[FLEX_ARRAY];
 };
+static int attr_nr;
 
+static struct git_attr_check *check_all_attr;
 static struct git_attr *(git_attr_hash[HASHSIZE]);
 
 static unsigned hash_name(const char *name, int namelen)
@@ -50,7 +53,12 @@ struct git_attr *git_attr(const char *name, int len)
 	a->name[len] = 0;
 	a->h = hval;
 	a->next = git_attr_hash[pos];
+	a->attr_nr = attr_nr++;
 	git_attr_hash[pos] = a;
+
+	check_all_attr = xrealloc(check_all_attr,
+				  sizeof(*check_all_attr) * attr_nr);
+	check_all_attr[a->attr_nr].attr = a;
 	return a;
 }
 
@@ -69,26 +77,46 @@ struct attr_state {
 };
 
 struct match_attr {
-	char *pattern;
+	union {
+		char *pattern;
+		struct git_attr *attr;
+	} u;
+	char is_macro;
 	unsigned num_attr;
 	struct attr_state state[FLEX_ARRAY];
 };
 
 static const char blank[] = " \t\r\n";
 
-static struct match_attr *parse_attr_line(const char *line)
+static struct match_attr *parse_attr_line(const char *line, const char *src,
+					  int lineno, int macro_ok)
 {
 	int namelen;
 	int num_attr;
 	const char *cp, *name;
 	struct match_attr *res = res;
 	int pass;
+	int is_macro;
 
 	cp = line + strspn(line, blank);
 	if (!*cp || *cp == '#')
 		return NULL;
 	name = cp;
 	namelen = strcspn(name, blank);
+	if (strlen(ATTRIBUTE_MACRO_PREFIX) < namelen &&
+	    !prefixcmp(name, ATTRIBUTE_MACRO_PREFIX)) {
+		if (!macro_ok) {
+			fprintf(stderr, "%s not allowed: %s:%d\n",
+				name, src, lineno);
+			return NULL;
+		}
+		is_macro = 1;
+		name += strlen(ATTRIBUTE_MACRO_PREFIX);
+		name += strspn(name, blank);
+		namelen = strcspn(name, blank);
+	}
+	else
+		is_macro = 0;
 
 	for (pass = 0; pass < 2; pass++) {
 		/* pass 0 counts and allocates, pass 1 fills */
@@ -113,13 +141,19 @@ static struct match_attr *parse_attr_line(const char *line)
 		}
 		if (pass)
 			break;
+
 		res = xcalloc(1,
 			      sizeof(*res) +
 			      sizeof(struct attr_state) * num_attr +
-			      namelen + 1);
-		res->pattern = (char*)&(res->state[num_attr]);
-		memcpy(res->pattern, name, namelen);
-		res->pattern[namelen] = 0;
+			      (is_macro ? 0 : namelen + 1));
+		if (is_macro)
+			res->u.attr = git_attr(name, namelen);
+		else {
+			res->u.pattern = (char*)&(res->state[num_attr]);
+			memcpy(res->u.pattern, name, namelen);
+			res->u.pattern[namelen] = 0;
+		}
+		res->is_macro = is_macro;
 		res->num_attr = num_attr;
 	}
 	return res;
@@ -167,10 +201,13 @@ static struct attr_stack *read_attr_from_array(const char **list)
 {
 	struct attr_stack *res;
 	const char *line;
+	int lineno = 0;
 
 	res = xcalloc(1, sizeof(*res));
 	while ((line = *(list++)) != NULL) {
-		struct match_attr *a = parse_attr_line(line);
+		struct match_attr *a;
+
+		a = parse_attr_line(line, "[builtin]", ++lineno, 1);
 		if (!a)
 			continue;
 		res->attrs = xrealloc(res->attrs, res->num_matches + 1);
@@ -179,11 +216,12 @@ static struct attr_stack *read_attr_from_array(const char **list)
 	return res;
 }
 
-static struct attr_stack *read_attr_from_file(const char *path)
+static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
 {
 	FILE *fp;
 	struct attr_stack *res;
 	char buf[2048];
+	int lineno = 0;
 
 	res = xcalloc(1, sizeof(*res));
 	fp = fopen(path, "r");
@@ -191,7 +229,9 @@ static struct attr_stack *read_attr_from_file(const char *path)
 		return res;
 
 	while (fgets(buf, sizeof(buf), fp)) {
-		struct match_attr *a = parse_attr_line(buf);
+		struct match_attr *a;
+
+		a = parse_attr_line(buf, path, ++lineno, macro_ok);
 		if (!a)
 			continue;
 		res->attrs = xrealloc(res->attrs, res->num_matches + 1);
@@ -206,13 +246,42 @@ static void debug_info(const char *what, struct attr_stack *elem)
 {
 	fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()");
 }
+static void debug_set(const char *what, const char *match, struct git_attr *attr, int set)
+{
+	fprintf(stderr, "%s: %s => %d (%s)\n",
+		what, attr->name, set, match);
+}
 #define debug_push(a) debug_info("push", (a))
 #define debug_pop(a) debug_info("pop", (a))
 #else
 #define debug_push(a) do { ; } while (0)
 #define debug_pop(a) do { ; } while (0)
+#define debug_set(a,b,c,d) do { ; } while (0)
 #endif
 
+static void bootstrap_attr_stack(void)
+{
+	if (!attr_stack) {
+		struct attr_stack *elem;
+
+		elem = read_attr_from_array(builtin_attr);
+		elem->origin = NULL;
+		elem->prev = attr_stack;
+		attr_stack = elem;
+
+		elem = read_attr_from_file(GITATTRIBUTES_FILE, 1);
+		elem->origin = strdup("");
+		elem->prev = attr_stack;
+		attr_stack = elem;
+		debug_push(elem);
+
+		elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE), 1);
+		elem->origin = NULL;
+		elem->prev = attr_stack;
+		attr_stack = elem;
+	}
+}
+
 static void prepare_attr_stack(const char *path, int dirlen)
 {
 	struct attr_stack *elem, *info;
@@ -232,23 +301,8 @@ static void prepare_attr_stack(const char *path, int dirlen)
 	 * .gitattributes in deeper directories to shallower ones,
 	 * and finally use the built-in set as the default.
 	 */
-	if (!attr_stack) {
-		elem = read_attr_from_array(builtin_attr);
-		elem->origin = NULL;
-		elem->prev = attr_stack;
-		attr_stack = elem;
-
-		elem = read_attr_from_file(GITATTRIBUTES_FILE);
-		elem->origin = strdup("");
-		elem->prev = attr_stack;
-		attr_stack = elem;
-		debug_push(elem);
-
-		elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE));
-		elem->origin = NULL;
-		elem->prev = attr_stack;
-		attr_stack = elem;
-	}
+	if (!attr_stack)
+		bootstrap_attr_stack();
 
 	/*
 	 * Pop the "info" one that is always at the top of the stack.
@@ -286,7 +340,7 @@ static void prepare_attr_stack(const char *path, int dirlen)
 		memcpy(pathbuf + dirlen, "/", 2);
 		cp = strchr(pathbuf + len + 1, '/');
 		strcpy(cp + 1, GITATTRIBUTES_FILE);
-		elem = read_attr_from_file(pathbuf);
+		elem = read_attr_from_file(pathbuf, 0);
 		*cp = '\0';
 		elem->origin = strdup(pathbuf);
 		elem->prev = attr_stack;
@@ -324,31 +378,26 @@ static int path_matches(const char *pathname, int pathlen,
 	return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0;
 }
 
-/*
- * I do not like this at all.  Only because we allow individual
- * attribute to be set or unset incrementally by individual
- * lines in .gitattribute files, we need to do this triple
- * loop which looks quite wasteful.
- */
-static int fill(const char *path, int pathlen,
-		struct attr_stack *stk, struct git_attr_check *check,
-		int num, int rem)
+static int fill(const char *path, int pathlen, struct attr_stack *stk, int rem)
 {
-	int i, j, k;
 	const char *base = stk->origin ? stk->origin : "";
+	int i, j;
+	struct git_attr_check *check = check_all_attr;
 
 	for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
 		struct match_attr *a = stk->attrs[i];
+		if (a->is_macro)
+			continue;
 		if (path_matches(path, pathlen,
-				 a->pattern, base, strlen(base))) {
-			for (j = 0; j < a->num_attr; j++) {
+				 a->u.pattern, base, strlen(base))) {
+			for (j = 0; 0 < rem && j < a->num_attr; j++) {
 				struct git_attr *attr = a->state[j].attr;
 				int set = !a->state[j].unset;
-				for (k = 0; k < num; k++) {
-					if (0 <= check[k].isset ||
-					    check[k].attr != attr)
-						continue;
-					check[k].isset = set;
+				int *n = &(check[attr->attr_nr].isset);
+
+				if (*n < 0) {
+					debug_set("fill", a->u.pattern, attr, set);
+					*n = set;
 					rem--;
 				}
 			}
@@ -357,14 +406,41 @@ static int fill(const char *path, int pathlen,
 	return rem;
 }
 
+static int macroexpand(struct attr_stack *stk, int rem)
+{
+	int i, j;
+	struct git_attr_check *check = check_all_attr;
+
+	for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
+		struct match_attr *a = stk->attrs[i];
+		if (!a->is_macro)
+			continue;
+		if (check[a->u.attr->attr_nr].isset < 0)
+			continue;
+		for (j = 0; 0 < rem && j < a->num_attr; j++) {
+			struct git_attr *attr = a->state[j].attr;
+			int set = !a->state[j].unset;
+			int *n = &(check[attr->attr_nr].isset);
+
+			if (*n < 0) {
+				debug_set("expand", a->u.attr->name, attr, set);
+				*n = set;
+				rem--;
+			}
+		}
+	}
+	return rem;
+}
+
 int git_checkattr(const char *path, int num, struct git_attr_check *check)
 {
 	struct attr_stack *stk;
 	const char *cp;
 	int dirlen, pathlen, i, rem;
 
-	for (i = 0; i < num; i++)
-		check[i].isset = -1;
+	bootstrap_attr_stack();
+	for (i = 0; i < attr_nr; i++)
+		check_all_attr[i].isset = -1;
 
 	pathlen = strlen(path);
 	cp = strrchr(path, '/');
@@ -373,8 +449,15 @@ int git_checkattr(const char *path, int num, struct git_attr_check *check)
 	else
 		dirlen = cp - path;
 	prepare_attr_stack(path, dirlen);
-	rem = num;
+	rem = attr_nr;
 	for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
-		rem = fill(path, pathlen, stk, check, num, rem);
+		rem = fill(path, pathlen, stk, rem);
+
+	for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
+		rem = macroexpand(stk, rem);
+
+	for (i = 0; i < num; i++)
+		check[i].isset = check_all_attr[check[i].attr->attr_nr].isset;
+
 	return 0;
 }
diff --git a/cache.h b/cache.h
index 63af43fe5c..38ad00661d 100644
--- a/cache.h
+++ b/cache.h
@@ -153,6 +153,7 @@ enum object_type {
 #define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
 #define GITATTRIBUTES_FILE ".gitattributes"
 #define INFOATTRIBUTES_FILE "info/attributes"
+#define ATTRIBUTE_MACRO_PREFIX "[attr]"
 
 extern int is_bare_repository_cfg;
 extern int is_bare_repository(void);

From fc2d07b05fb691f98b3a55c1499fae6fb25a7d31 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 14 Apr 2007 08:56:35 -0700
Subject: [PATCH 043/109] Define a built-in attribute macro "binary".

For binary files we would want to disable textual diff
generation and automatic crlf conversion.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/attr.c b/attr.c
index a306144204..410bca613c 100644
--- a/attr.c
+++ b/attr.c
@@ -194,6 +194,7 @@ static void free_attr_elem(struct attr_stack *e)
 }
 
 static const char *builtin_attr[] = {
+	"[attr]binary !diff !crlf",
 	NULL,
 };
 

From e4aee10a2eaf0937d86d046f85ee569a75cae9ac Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 15 Apr 2007 14:56:09 -0700
Subject: [PATCH 044/109] Change attribute negation marker from '!' to '-'.

At the same time, we do not want to allow arbitrary strings for
attribute names, as we are likely to want to extend the syntax
later.  Allow only alnum, dash, underscore and dot for now.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c               | 51 +++++++++++++++++++++++++++++++++++++++-----
 builtin-check-attr.c |  6 +++++-
 t/t0020-crlf.sh      |  2 +-
 3 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/attr.c b/attr.c
index 410bca613c..60fe48f3b8 100644
--- a/attr.c
+++ b/attr.c
@@ -36,6 +36,27 @@ static unsigned hash_name(const char *name, int namelen)
 	return val;
 }
 
+static int invalid_attr_name(const char *name, int namelen)
+{
+	/*
+	 * Attribute name cannot begin with '-' and from
+	 * [-A-Za-z0-9_.].  We'd specifically exclude '=' for now,
+	 * as we might later want to allow non-binary value for
+	 * attributes, e.g. "*.svg	merge=special-merge-program-for-svg"
+	 */
+	if (*name == '-')
+		return -1;
+	while (namelen--) {
+		char ch = *name++;
+		if (! (ch == '-' || ch == '.' || ch == '_' ||
+		       ('0' <= ch && ch <= '9') ||
+		       ('a' <= ch && ch <= 'z') ||
+		       ('A' <= ch && ch <= 'Z')) )
+			return -1;
+	}
+	return 0;
+}
+
 struct git_attr *git_attr(const char *name, int len)
 {
 	unsigned hval = hash_name(name, len);
@@ -48,6 +69,9 @@ struct git_attr *git_attr(const char *name, int len)
 			return a;
 	}
 
+	if (invalid_attr_name(name, len))
+		return NULL;
+
 	a = xmalloc(sizeof(*a) + len + 1);
 	memcpy(a->name, name, len);
 	a->name[len] = 0;
@@ -68,7 +92,7 @@ struct git_attr *git_attr(const char *name, int len)
  * (1) glob pattern.
  * (2) whitespace
  * (3) whitespace separated list of attribute names, each of which
- *     could be prefixed with '!' to mean "not set".
+ *     could be prefixed with '-' to mean "not set".
  */
 
 struct attr_state {
@@ -114,6 +138,12 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		name += strlen(ATTRIBUTE_MACRO_PREFIX);
 		name += strspn(name, blank);
 		namelen = strcspn(name, blank);
+		if (invalid_attr_name(name, namelen)) {
+			fprintf(stderr,
+				"%.*s is not a valid attribute name: %s:%d\n",
+				namelen, name, src, lineno);
+			return NULL;
+		}
 	}
 	else
 		is_macro = 0;
@@ -126,11 +156,21 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		while (*cp) {
 			const char *ep;
 			ep = cp + strcspn(cp, blank);
-			if (pass) {
+			if (!pass) {
+				if (*cp == '-')
+					cp++;
+				if (invalid_attr_name(cp, ep - cp)) {
+					fprintf(stderr,
+						"%.*s is not a valid attribute name: %s:%d\n",
+						(int)(ep - cp), cp,
+						src, lineno);
+					return NULL;
+				}
+			} else {
 				struct attr_state *e;
 
 				e = &(res->state[num_attr]);
-				if (*cp == '!') {
+				if (*cp == '-') {
 					e->unset = 1;
 					cp++;
 				}
@@ -146,8 +186,9 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 			      sizeof(*res) +
 			      sizeof(struct attr_state) * num_attr +
 			      (is_macro ? 0 : namelen + 1));
-		if (is_macro)
+		if (is_macro) {
 			res->u.attr = git_attr(name, namelen);
+		}
 		else {
 			res->u.pattern = (char*)&(res->state[num_attr]);
 			memcpy(res->u.pattern, name, namelen);
@@ -194,7 +235,7 @@ static void free_attr_elem(struct attr_stack *e)
 }
 
 static const char *builtin_attr[] = {
-	"[attr]binary !diff !crlf",
+	"[attr]binary -diff -crlf",
 	NULL,
 };
 
diff --git a/builtin-check-attr.c b/builtin-check-attr.c
index 47b07210d6..634be9ed2e 100644
--- a/builtin-check-attr.c
+++ b/builtin-check-attr.c
@@ -29,8 +29,12 @@ int cmd_check_attr(int argc, const char **argv, const char *prefix)
 	check = xcalloc(cnt, sizeof(*check));
 	for (i = 0; i < cnt; i++) {
 		const char *name;
+		struct git_attr *a;
 		name = argv[i + 1];
-		check[i].attr = git_attr(name, strlen(name));
+		a = git_attr(name, strlen(name));
+		if (!a)
+			return error("%s: not a valid attribute name", name);
+		check[i].attr = a;
 	}
 
 	for (i = doubledash; i < argc; i++) {
diff --git a/t/t0020-crlf.sh b/t/t0020-crlf.sh
index 600dcd30a0..cf84f0a1ab 100755
--- a/t/t0020-crlf.sh
+++ b/t/t0020-crlf.sh
@@ -216,7 +216,7 @@ test_expect_success 'apply patch --index (autocrlf=true)' '
 
 test_expect_success '.gitattributes says two is binary' '
 
-	echo "two !crlf" >.gitattributes &&
+	echo "two -crlf" >.gitattributes &&
 	rm -f tmp one dir/two &&
 	git repo-config core.autocrlf true &&
 	git read-tree --reset -u HEAD &&

From b568a503def81f49704ba94f5a822d523022102a Mon Sep 17 00:00:00 2001
From: James Bowes <jbowes@dangerouslyinc.com>
Date: Sat, 14 Apr 2007 21:27:20 -0400
Subject: [PATCH 045/109] Document git-check-attr

Signed-off-by: James Bowes <jbowes@dangerouslyinc.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/cmd-list.perl      |  1 +
 Documentation/git-check-attr.txt | 37 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 Documentation/git-check-attr.txt

diff --git a/Documentation/cmd-list.perl b/Documentation/cmd-list.perl
index 0381590d38..443802a9a3 100755
--- a/Documentation/cmd-list.perl
+++ b/Documentation/cmd-list.perl
@@ -84,6 +84,7 @@ git-bundle                              mainporcelain
 git-cat-file                            plumbinginterrogators
 git-checkout-index                      plumbingmanipulators
 git-checkout                            mainporcelain
+git-check-attr                          purehelpers
 git-check-ref-format                    purehelpers
 git-cherry                              ancillaryinterrogators
 git-cherry-pick                         mainporcelain
diff --git a/Documentation/git-check-attr.txt b/Documentation/git-check-attr.txt
new file mode 100644
index 0000000000..ceb51959b1
--- /dev/null
+++ b/Documentation/git-check-attr.txt
@@ -0,0 +1,37 @@
+git-check-attr(1)
+=================
+
+NAME
+----
+git-check-attr - Display gitattributes information.
+
+
+SYNOPSIS
+--------
+'git-check-attr' attr... [--] pathname...
+
+DESCRIPTION
+-----------
+For every pathname, this command will list if each attr is 'unspecified',
+'set', or 'unset' as a gitattribute on that pathname.
+
+OPTIONS
+-------
+\--::
+	Interpret all preceding arguments as attributes, and all following
+	arguments as path names. If not supplied, only the first argument will
+	be treated as an attribute.
+
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by James Bowes.
+
+GIT
+---
+Part of the gitlink:git[7] suite
+

From 8a5a8d6c97e36dbd95361eab1109e4380fe45df4 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:28:10 -0400
Subject: [PATCH 046/109] pack-objects: optimize preferred base handling a bit

Let's avoid some cycles when there is no base to test against, and avoid
unnecessary object lookups.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index bc5f2329a8..62a011e2e9 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -959,22 +959,21 @@ static void add_pbase_object(struct tree_desc *tree,
 			     const char *fullname)
 {
 	struct name_entry entry;
+	int cmp;
 
 	while (tree_entry(tree,&entry)) {
-		unsigned long size;
-		enum object_type type;
-
-		if (tree_entry_len(entry.path, entry.sha1) != cmplen ||
-		    memcmp(entry.path, name, cmplen) ||
-		    !has_sha1_file(entry.sha1) ||
-		    (type = sha1_object_info(entry.sha1, &size)) < 0)
+		cmp = tree_entry_len(entry.path, entry.sha1) != cmplen ? 1 :
+		      memcmp(name, entry.path, cmplen);
+		if (cmp > 0)
 			continue;
+		if (cmp < 0)
+			return;
 		if (name[cmplen] != '/') {
 			unsigned hash = name_hash(fullname);
 			add_object_entry(entry.sha1, hash, 1);
 			return;
 		}
-		if (type == OBJ_TREE) {
+		if (S_ISDIR(entry.mode)) {
 			struct tree_desc sub;
 			struct pbase_tree_cache *tree;
 			const char *down = name+cmplen+1;
@@ -1034,15 +1033,15 @@ static int check_pbase_path(unsigned hash)
 static void add_preferred_base_object(const char *name, unsigned hash)
 {
 	struct pbase_tree *it;
-	int cmplen = name_cmp_len(name);
+	int cmplen;
 
-	if (check_pbase_path(hash))
+	if (!num_preferred_base || check_pbase_path(hash))
 		return;
 
+	cmplen = name_cmp_len(name);
 	for (it = pbase_tree; it; it = it->next) {
 		if (cmplen == 0) {
-			hash = name_hash("");
-			add_object_entry(it->pcache.sha1, hash, 1);
+			add_object_entry(it->pcache.sha1, 0, 1);
 		}
 		else {
 			struct tree_desc tree;
@@ -1587,9 +1586,7 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit)
 {
-	unsigned hash = name_hash("");
-	add_preferred_base_object("", hash);
-	add_object_entry(commit->object.sha1, hash, 0);
+	add_object_entry(commit->object.sha1, 0, 0);
 }
 
 static void show_object(struct object_array_entry *p)

From adcc70950e594065050c375ace8a039678d2e31f Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:28:52 -0400
Subject: [PATCH 047/109] pack-objects: equal objects in size should delta
 against newer objects

Before finding best delta combinations, we sort objects by name hash,
then by size, then by their position in memory.  Then we walk the list
backwards to test delta candidates.

We hope that a bigger size usually means a newer objects.  But a bigger
address in memory does not mean a newer object.  So the last comparison
must be reversed.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 62a011e2e9..869ca1ab26 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1276,7 +1276,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
 		return -1;
 	if (a->size > b->size)
 		return 1;
-	return a < b ? -1 : (a > b);
+	return a > b ? -1 : (a < b);  /* newest last */
 }
 
 struct unpacked {

From 898b14cedc353de95945fcc56e14f463c3066bf0 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:29:16 -0400
Subject: [PATCH 048/109] pack-objects: rework check_delta_limit usage

Objects that have delta "children" from pack data reuse must consider the
depth of their deepest child when they try to deltify themselves for those
children not to become too deep.

However, in the context of a "thin" pack, the delta children depth was
skipped entirely on the presumption that the pack was always going to be
exploded on the receiving end, hence the delta length wasn't an issue.

Now that we keep received packs as is and reuse pack data when repacking,
those packs do contain delta chains that are longer than expected. Worse,
those delta chain may even grow longer when the pack is further repacked
into another thin pack for a subsequent transmission.

So this patch restores strict delta length even for thin packs, and it
moves check_delta_limit() usage directly in the delta loop where it is
needed.  This way the delta_limit can be removed from struct object_entry
as well.  Oh and the initial value was wrong too.

The  progress_interval() function was moved to a more logical location in
the process.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 76 ++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 44 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 869ca1ab26..d44b8f4c01 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -27,7 +27,6 @@ struct object_entry {
 				 * nonzero if already written.
 				 */
 	unsigned int depth;	/* delta depth */
-	unsigned int delta_limit;	/* base adjustment for in-pack delta */
 	unsigned int hash;	/* name hint hash */
 	enum object_type type;
 	enum object_type in_pack_type;	/* could be delta */
@@ -1172,19 +1171,6 @@ static void check_object(struct object_entry *entry)
 		    sha1_to_hex(entry->sha1));
 }
 
-static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
-{
-	struct object_entry *child = me->delta_child;
-	unsigned int m = n;
-	while (child) {
-		unsigned int c = check_delta_limit(child, n + 1);
-		if (m < c)
-			m = c;
-		child = child->delta_sibling;
-	}
-	return m;
-}
-
 static void get_object_details(void)
 {
 	uint32_t i;
@@ -1193,23 +1179,6 @@ static void get_object_details(void)
 	prepare_pack_ix();
 	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
 		check_object(entry);
-
-	if (nr_objects == nr_result) {
-		/*
-		 * Depth of objects that depend on the entry -- this
-		 * is subtracted from depth-max to break too deep
-		 * delta chain because of delta data reusing.
-		 * However, we loosen this restriction when we know we
-		 * are creating a thin pack -- it will have to be
-		 * expanded on the other end anyway, so do not
-		 * artificially cut the delta chain and let it go as
-		 * deep as it wants.
-		 */
-		for (i = 0, entry = objects; i < nr_objects; i++, entry++)
-			if (!entry->delta && entry->delta_child)
-				entry->delta_limit =
-					check_delta_limit(entry, 1);
-	}
 }
 
 typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
@@ -1322,16 +1291,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	    trg_entry->in_pack_type != OBJ_OFS_DELTA)
 		return 0;
 
-	/*
-	 * If the current object is at pack edge, take the depth the
-	 * objects that depend on the current object into account --
-	 * otherwise they would become too deep.
-	 */
-	if (trg_entry->delta_child) {
-		if (max_depth <= trg_entry->delta_limit)
-			return 0;
-		max_depth -= trg_entry->delta_limit;
-	}
+	/* Let's not bust the allowed depth. */
 	if (src_entry->depth >= max_depth)
 		return 0;
 
@@ -1378,9 +1338,17 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	return 1;
 }
 
-static void progress_interval(int signum)
+static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
 {
-	progress_update = 1;
+	struct object_entry *child = me->delta_child;
+	unsigned int m = n;
+	while (child) {
+		unsigned int c = check_delta_limit(child, n + 1);
+		if (m < c)
+			m = c;
+		child = child->delta_sibling;
+	}
+	return m;
 }
 
 static void find_deltas(struct object_entry **list, int window, int depth)
@@ -1389,6 +1357,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 	unsigned int array_size = window * sizeof(struct unpacked);
 	struct unpacked *array;
 	unsigned last_percent = 999;
+	int max_depth;
 
 	if (!nr_objects)
 		return;
@@ -1429,6 +1398,18 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		n->data = NULL;
 		n->entry = entry;
 
+		/*
+		 * If the current object is at pack edge, take the depth the
+		 * objects that depend on the current object into account
+		 * otherwise they would become too deep.
+		 */
+		max_depth = depth;
+		if (entry->delta_child) {
+			max_depth -= check_delta_limit(entry, 0);
+			if (max_depth <= 0)
+				goto next;
+		}
+
 		j = window;
 		while (--j > 0) {
 			uint32_t other_idx = idx + j;
@@ -1438,9 +1419,10 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 			m = array + other_idx;
 			if (!m->entry)
 				break;
-			if (try_delta(n, m, depth) < 0)
+			if (try_delta(n, m, max_depth) < 0)
 				break;
 		}
+
 		/* if we made n a delta, and if n is already at max
 		 * depth, leaving it in the window is pointless.  we
 		 * should evict it first.
@@ -1448,6 +1430,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		if (entry->delta && depth <= entry->depth)
 			continue;
 
+		next:
 		idx++;
 		if (idx >= window)
 			idx = 0;
@@ -1525,6 +1508,11 @@ static int reuse_cached_pack(unsigned char *sha1)
 	return 1;
 }
 
+static void progress_interval(int signum)
+{
+	progress_update = 1;
+}
+
 static void setup_progress_signal(void)
 {
 	struct sigaction sa;

From 9668cf59a83d1aa881036818abf29cc2ea9e291b Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:29:54 -0400
Subject: [PATCH 049/109] pack-objects: clean up list sorting

Get rid of sort_comparator() as it impose a run time double indirect
function call for little compile time type checking gain.

Also get rid of create_sorted_list() as it only has one user which would
as well be just fine doing its sorting locally.  Eventually the list of
deltifiable objects might be shorter than the whole object list.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 53 ++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index d44b8f4c01..15119d63d9 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -66,7 +66,7 @@ static int local;
 static int incremental;
 static int allow_ofs_delta;
 
-static struct object_entry **sorted_by_sha, **sorted_by_type;
+static struct object_entry **sorted_by_sha;
 static struct object_entry *objects;
 static uint32_t nr_objects, nr_alloc, nr_result;
 static const char *base_name;
@@ -1181,31 +1181,10 @@ static void get_object_details(void)
 		check_object(entry);
 }
 
-typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
-
-static entry_sort_t current_sort;
-
-static int sort_comparator(const void *_a, const void *_b)
-{
-	struct object_entry *a = *(struct object_entry **)_a;
-	struct object_entry *b = *(struct object_entry **)_b;
-	return current_sort(a,b);
-}
-
-static struct object_entry **create_sorted_list(entry_sort_t sort)
-{
-	struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
-	uint32_t i;
-
-	for (i = 0; i < nr_objects; i++)
-		list[i] = objects + i;
-	current_sort = sort;
-	qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
-	return list;
-}
-
-static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
+static int sha1_sort(const void *_a, const void *_b)
 {
+	const struct object_entry *a = *(struct object_entry **)_a;
+	const struct object_entry *b = *(struct object_entry **)_b;
 	return hashcmp(a->sha1, b->sha1);
 }
 
@@ -1222,13 +1201,15 @@ static struct object_entry **create_final_object_list(void)
 		if (!objects[i].preferred_base)
 			list[j++] = objects + i;
 	}
-	current_sort = sha1_sort;
-	qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator);
+	qsort(list, nr_result, sizeof(struct object_entry *), sha1_sort);
 	return list;
 }
 
-static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
+static int type_size_sort(const void *_a, const void *_b)
 {
+	const struct object_entry *a = *(struct object_entry **)_a;
+	const struct object_entry *b = *(struct object_entry **)_b;
+
 	if (a->type < b->type)
 		return -1;
 	if (a->type > b->type)
@@ -1448,10 +1429,20 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 
 static void prepare_pack(int window, int depth)
 {
+	struct object_entry **delta_list;
+	uint32_t i;
+
 	get_object_details();
-	sorted_by_type = create_sorted_list(type_size_sort);
-	if (window && depth)
-		find_deltas(sorted_by_type, window+1, depth);
+
+	if (!window || !depth)
+		return;
+
+	delta_list = xmalloc(nr_objects * sizeof(*delta_list));
+	for (i = 0; i < nr_objects; i++)
+		delta_list[i] = objects + i;
+	qsort(delta_list, nr_objects, sizeof(*delta_list), type_size_sort);
+	find_deltas(delta_list, window+1, depth);
+	free(delta_list);
 }
 
 static int reuse_cached_pack(unsigned char *sha1)

From f7ae6a930a3c5d501439cdba98417af747350738 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:30:15 -0400
Subject: [PATCH 050/109] pack-objects: get rid of reuse_cached_pack

This capability is practically never useful, and therefore never tested,
because it is fairly unlikely that the requested pack will be already
available.  Furthermore it is of little gain over the ability to reuse
existing pack data.

In fact the ability to change delta type on the fly when reusing delta
data is a nice thing that has almost no cost and allows greater backward
compatibility with a client's capabilities than if the client is blindly
sent a whole pack without any discrimination.

And this "feature" is simply in the way of other cleanups.
Let's get rid of it.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 86 +++++++-----------------------------------
 1 file changed, 14 insertions(+), 72 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 15119d63d9..c2f7c30817 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1445,60 +1445,6 @@ static void prepare_pack(int window, int depth)
 	free(delta_list);
 }
 
-static int reuse_cached_pack(unsigned char *sha1)
-{
-	static const char cache[] = "pack-cache/pack-%s.%s";
-	char *cached_pack, *cached_idx;
-	int ifd, ofd, ifd_ix = -1;
-
-	cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
-	ifd = open(cached_pack, O_RDONLY);
-	if (ifd < 0)
-		return 0;
-
-	if (!pack_to_stdout) {
-		cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
-		ifd_ix = open(cached_idx, O_RDONLY);
-		if (ifd_ix < 0) {
-			close(ifd);
-			return 0;
-		}
-	}
-
-	if (progress)
-		fprintf(stderr, "Reusing %u objects pack %s\n", nr_objects,
-			sha1_to_hex(sha1));
-
-	if (pack_to_stdout) {
-		if (copy_fd(ifd, 1))
-			exit(1);
-		close(ifd);
-	}
-	else {
-		char name[PATH_MAX];
-		snprintf(name, sizeof(name),
-			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
-		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
-		if (ofd < 0)
-			die("unable to open %s (%s)", name, strerror(errno));
-		if (copy_fd(ifd, ofd))
-			exit(1);
-		close(ifd);
-
-		snprintf(name, sizeof(name),
-			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
-		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
-		if (ofd < 0)
-			die("unable to open %s (%s)", name, strerror(errno));
-		if (copy_fd(ifd_ix, ofd))
-			exit(1);
-		close(ifd_ix);
-		puts(sha1_to_hex(sha1));
-	}
-
-	return 1;
-}
-
 static void progress_interval(int signum)
 {
 	progress_update = 1;
@@ -1618,6 +1564,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	SHA_CTX ctx;
 	int depth = 10;
 	struct object_entry **list;
+	off_t last_obj_offset;
 	int use_internal_rev_list = 0;
 	int thin = 0;
 	uint32_t i;
@@ -1779,24 +1726,19 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	if (progress && (nr_objects != nr_result))
 		fprintf(stderr, "Result has %u objects.\n", nr_result);
 
-	if (reuse_cached_pack(object_list_sha1))
-		;
-	else {
-		off_t last_obj_offset;
-		if (nr_result)
-			prepare_pack(window, depth);
-		if (progress == 1 && pack_to_stdout) {
-			/* the other end usually displays progress itself */
-			struct itimerval v = {{0,},};
-			setitimer(ITIMER_REAL, &v, NULL);
-			signal(SIGALRM, SIG_IGN );
-			progress_update = 0;
-		}
-		last_obj_offset = write_pack_file();
-		if (!pack_to_stdout) {
-			write_index_file(last_obj_offset);
-			puts(sha1_to_hex(object_list_sha1));
-		}
+	if (nr_result)
+		prepare_pack(window, depth);
+	if (progress == 1 && pack_to_stdout) {
+		/* the other end usually displays progress itself */
+		struct itimerval v = {{0,},};
+		setitimer(ITIMER_REAL, &v, NULL);
+		signal(SIGALRM, SIG_IGN );
+		progress_update = 0;
+	}
+	last_obj_offset = write_pack_file();
+	if (!pack_to_stdout) {
+		write_index_file(last_obj_offset);
+		puts(sha1_to_hex(object_list_sha1));
 	}
 	if (progress)
 		fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",

From 81a216a5d6a12976b20d9a39829562f280ae96f2 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:31:05 -0400
Subject: [PATCH 051/109] pack-objects: get rid of create_final_object_list()

Because we don't have to know the SHA1 h(hence the name) of the pack
up front anymore, let's get rid of yet another global sorted object list
and sort them only in write_index_file(), then compute the object list
SHA1 on the fly.

This has the advantage of saving another chunk of memory, and the sorted
list SHA1 won't be computed needlessly on servers during a fetch.

Of course the cunning plan is also to make write_index_file() much like
the function with the same name in index-pack.c for an eventual easy
sharing.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 127 +++++++++++++++++++++++------------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index c2f7c30817..7af1776673 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -59,17 +59,16 @@ struct object_entry {
  * heuristics.
  */
 
-static unsigned char object_list_sha1[20];
 static int non_empty;
 static int no_reuse_delta;
 static int local;
 static int incremental;
 static int allow_ofs_delta;
 
-static struct object_entry **sorted_by_sha;
 static struct object_entry *objects;
 static uint32_t nr_objects, nr_alloc, nr_result;
-static const char *base_name;
+static const char *pack_tmp_name, *idx_tmp_name;
+static char tmpname[PATH_MAX];
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
 static volatile sig_atomic_t progress_update;
@@ -578,13 +577,19 @@ static off_t write_pack_file(void)
 	unsigned last_percent = 999;
 	int do_progress = progress;
 
-	if (!base_name) {
+	if (pack_to_stdout) {
 		f = sha1fd(1, "<stdout>");
 		do_progress >>= 1;
+	} else {
+		int fd;
+		snprintf(tmpname, sizeof(tmpname), "tmp_pack_XXXXXX");
+		fd = mkstemp(tmpname);
+		if (fd < 0)
+			die("unable to create %s: %s\n", tmpname, strerror(errno));
+		pack_tmp_name = xstrdup(tmpname);
+		f = sha1fd(fd, pack_tmp_name);
 	}
-	else
-		f = sha1create("%s-%s.%s", base_name,
-			       sha1_to_hex(object_list_sha1), "pack");
+
 	if (do_progress)
 		fprintf(stderr, "Writing %u objects.\n", nr_result);
 
@@ -618,18 +623,46 @@ static off_t write_pack_file(void)
 	return last_obj_offset;
 }
 
+static int sha1_sort(const void *_a, const void *_b)
+{
+	const struct object_entry *a = *(struct object_entry **)_a;
+	const struct object_entry *b = *(struct object_entry **)_b;
+	return hashcmp(a->sha1, b->sha1);
+}
+
 static uint32_t index_default_version = 1;
 static uint32_t index_off32_limit = 0x7fffffff;
 
-static void write_index_file(off_t last_obj_offset)
+static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
 {
-	uint32_t i;
-	struct sha1file *f = sha1create("%s-%s.%s", base_name,
-					sha1_to_hex(object_list_sha1), "idx");
-	struct object_entry **list = sorted_by_sha;
-	struct object_entry **last = list + nr_result;
+	struct sha1file *f;
+	struct object_entry **sorted_by_sha, **list, **last;
 	uint32_t array[256];
-	uint32_t index_version;
+	uint32_t i, index_version;
+	SHA_CTX ctx;
+	int fd;
+
+	snprintf(tmpname, sizeof(tmpname), "tmp_idx_XXXXXX");
+	fd = mkstemp(tmpname);
+	if (fd < 0)
+		die("unable to create %s: %s\n", tmpname, strerror(errno));
+	idx_tmp_name = xstrdup(tmpname);
+	f = sha1fd(fd, idx_tmp_name);
+
+	if (nr_result) {
+		uint32_t j = 0;
+		sorted_by_sha =
+			xcalloc(nr_result, sizeof(struct object_entry *));
+		for (i = 0; i < nr_objects; i++)
+			if (!objects[i].preferred_base)
+				sorted_by_sha[j++] = objects + i;
+		if (j != nr_result)
+			die("listed %u objects while expecting %u", j, nr_result);
+		qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
+		list = sorted_by_sha;
+		last = sorted_by_sha + nr_result;
+	} else
+		sorted_by_sha = list = last = NULL;
 
 	/* if last object's offset is >= 2^31 we should use index V2 */
 	index_version = (last_obj_offset >> 31) ? 2 : index_default_version;
@@ -660,9 +693,10 @@ static void write_index_file(off_t last_obj_offset)
 	}
 	sha1write(f, array, 256 * 4);
 
-	/*
-	 * Write the actual SHA1 entries..
-	 */
+	/* Compute the SHA1 hash of sorted object names. */
+	SHA1_Init(&ctx);
+
+	/* Write the actual SHA1 entries. */
 	list = sorted_by_sha;
 	for (i = 0; i < nr_result; i++) {
 		struct object_entry *entry = *list++;
@@ -671,6 +705,7 @@ static void write_index_file(off_t last_obj_offset)
 			sha1write(f, &offset, 4);
 		}
 		sha1write(f, entry->sha1, 20);
+		SHA1_Update(&ctx, entry->sha1, 20);
 	}
 
 	if (index_version >= 2) {
@@ -711,6 +746,8 @@ static void write_index_file(off_t last_obj_offset)
 
 	sha1write(f, pack_file_sha1, 20);
 	sha1close(f, NULL, 1);
+	free(sorted_by_sha);
+	SHA1_Final(sha1, &ctx);
 }
 
 static int locate_object_entry_hash(const unsigned char *sha1)
@@ -789,6 +826,8 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
 	if (ix >= 0) {
 		if (exclude) {
 			entry = objects + object_ix[ix] - 1;
+			if (!entry->preferred_base)
+				nr_result--;
 			entry->preferred_base = 1;
 		}
 		return 0;
@@ -821,6 +860,8 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
 	entry->hash = hash;
 	if (exclude)
 		entry->preferred_base = 1;
+	else
+		nr_result++;
 	if (found_pack) {
 		entry->in_pack = found_pack;
 		entry->in_pack_offset = found_offset;
@@ -1181,30 +1222,6 @@ static void get_object_details(void)
 		check_object(entry);
 }
 
-static int sha1_sort(const void *_a, const void *_b)
-{
-	const struct object_entry *a = *(struct object_entry **)_a;
-	const struct object_entry *b = *(struct object_entry **)_b;
-	return hashcmp(a->sha1, b->sha1);
-}
-
-static struct object_entry **create_final_object_list(void)
-{
-	struct object_entry **list;
-	uint32_t i, j;
-
-	for (i = nr_result = 0; i < nr_objects; i++)
-		if (!objects[i].preferred_base)
-			nr_result++;
-	list = xmalloc(nr_result * sizeof(struct object_entry *));
-	for (i = j = 0; i < nr_objects; i++) {
-		if (!objects[i].preferred_base)
-			list[j++] = objects + i;
-	}
-	qsort(list, nr_result, sizeof(struct object_entry *), sha1_sort);
-	return list;
-}
-
 static int type_size_sort(const void *_a, const void *_b)
 {
 	const struct object_entry *a = *(struct object_entry **)_a;
@@ -1561,13 +1578,12 @@ static void get_object_list(int ac, const char **av)
 
 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 {
-	SHA_CTX ctx;
 	int depth = 10;
-	struct object_entry **list;
-	off_t last_obj_offset;
 	int use_internal_rev_list = 0;
 	int thin = 0;
 	uint32_t i;
+	off_t last_obj_offset;
+	const char *base_name = NULL;
 	const char **rp_av;
 	int rp_ac_alloc = 64;
 	int rp_ac;
@@ -1712,20 +1728,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 
 	if (progress)
 		fprintf(stderr, "Done counting %u objects.\n", nr_objects);
-	sorted_by_sha = create_final_object_list();
 	if (non_empty && !nr_result)
 		return 0;
-
-	SHA1_Init(&ctx);
-	list = sorted_by_sha;
-	for (i = 0; i < nr_result; i++) {
-		struct object_entry *entry = *list++;
-		SHA1_Update(&ctx, entry->sha1, 20);
-	}
-	SHA1_Final(object_list_sha1, &ctx);
 	if (progress && (nr_objects != nr_result))
 		fprintf(stderr, "Result has %u objects.\n", nr_result);
-
 	if (nr_result)
 		prepare_pack(window, depth);
 	if (progress == 1 && pack_to_stdout) {
@@ -1737,7 +1743,18 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	}
 	last_obj_offset = write_pack_file();
 	if (!pack_to_stdout) {
-		write_index_file(last_obj_offset);
+		unsigned char object_list_sha1[20];
+		write_index_file(last_obj_offset, object_list_sha1);
+		snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
+			 base_name, sha1_to_hex(object_list_sha1));
+		if (rename(pack_tmp_name, tmpname))
+			die("unable to rename temporary pack file: %s",
+			    strerror(errno));
+		snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
+			 base_name, sha1_to_hex(object_list_sha1));
+		if (rename(idx_tmp_name, tmpname))
+			die("unable to rename temporary index file: %s",
+			    strerror(errno));
 		puts(sha1_to_hex(object_list_sha1));
 	}
 	if (progress)

From a3fbf4dfe1bf2386add261dc7c2809b652b5f9ae Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:31:31 -0400
Subject: [PATCH 052/109] pack-objects: make in_pack_header_size a variable of
 its own

It currently aliases delta_size on the principle that reused deltas won't
go through the whole delta matching loop hence delta_size was unused.
This is not true if given delta doesn't find its base in the pack though.
But we need that information even for whole object data reuse.

Well in short the current state looks awful and is prone to bugs.  It just
works fine now because try_delta() tests trg_entry->delta before using
trg_entry->delta_size, but that is a bit subtle and I was wondering for a
while why things just worked fine... even if I'm guilty of having
introduced this abomination myself in the first place.

Let's do the sensible thing instead with no ambiguity, which is to have
a separate variable for in_pack_header_size.  This might even help future
optimizations.

While at it, let's reorder some struct object_entry members so they all
align well with their own width, regardless of the architecture or the
size of off_t.  Some memory saving is to be expected with this alone.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 7af1776673..7100a76cd2 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -22,28 +22,26 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\
 
 struct object_entry {
 	unsigned char sha1[20];
+	uint32_t crc32;		/* crc of raw pack data for this object */
+	off_t offset;		/* offset into the final pack file */
 	unsigned long size;	/* uncompressed size */
-	off_t offset;	/* offset into the final pack file;
-				 * nonzero if already written.
-				 */
-	unsigned int depth;	/* delta depth */
 	unsigned int hash;	/* name hint hash */
-	enum object_type type;
-	enum object_type in_pack_type;	/* could be delta */
-	unsigned long delta_size;	/* delta data size (uncompressed) */
-#define in_pack_header_size delta_size	/* only when reusing pack data */
-	struct object_entry *delta;	/* delta base object */
+	unsigned int depth;	/* delta depth */
 	struct packed_git *in_pack; 	/* already in pack */
 	off_t in_pack_offset;
+	struct object_entry *delta;	/* delta base object */
 	struct object_entry *delta_child; /* deltified objects who bases me */
 	struct object_entry *delta_sibling; /* other deltified objects who
 					     * uses the same base as me
 					     */
-	int preferred_base;	/* we do not pack this, but is encouraged to
-				 * be used as the base objectto delta huge
-				 * objects against.
-				 */
-	uint32_t crc32;		/* crc of raw pack data for this object */
+	unsigned long delta_size;	/* delta data size (uncompressed) */
+	enum object_type type;
+	enum object_type in_pack_type;	/* could be delta */
+	unsigned char in_pack_header_size;
+	unsigned char preferred_base; /* we do not pack this, but is available
+				       * to be used as the base objectto delta
+				       * objects against.
+				       */
 };
 
 /*

From 54dab52ae8518da67e271b5b3a1f91af1fd5e314 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:31:56 -0400
Subject: [PATCH 053/109] add get_size_from_delta()

... which consists of existing code split out of packed_delta_info()
for other callers to use it as well.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 cache.h     |  1 +
 sha1_file.c | 73 ++++++++++++++++++++++++++++-------------------------
 2 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/cache.h b/cache.h
index 5b67f4c989..63399c7c6b 100644
--- a/cache.h
+++ b/cache.h
@@ -436,6 +436,7 @@ extern const unsigned char *nth_packed_object_sha1(const struct packed_git *, ui
 extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
 extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
 extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
+extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
 
 /* Dumb servers support */
diff --git a/sha1_file.c b/sha1_file.c
index 0be9737bd1..5dac4666b6 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1160,6 +1160,43 @@ static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type
 	return unpack_sha1_rest(&stream, hdr, *size, sha1);
 }
 
+unsigned long get_size_from_delta(struct packed_git *p,
+				  struct pack_window **w_curs,
+			          off_t curpos)
+{
+	const unsigned char *data;
+	unsigned char delta_head[20], *in;
+	z_stream stream;
+	int st;
+
+	memset(&stream, 0, sizeof(stream));
+	stream.next_out = delta_head;
+	stream.avail_out = sizeof(delta_head);
+
+	inflateInit(&stream);
+	do {
+		in = use_pack(p, w_curs, curpos, &stream.avail_in);
+		stream.next_in = in;
+		st = inflate(&stream, Z_FINISH);
+		curpos += stream.next_in - in;
+	} while ((st == Z_OK || st == Z_BUF_ERROR) &&
+		 stream.total_out < sizeof(delta_head));
+	inflateEnd(&stream);
+	if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head))
+		die("delta data unpack-initial failed");
+
+	/* Examine the initial part of the delta to figure out
+	 * the result size.
+	 */
+	data = delta_head;
+
+	/* ignore base size */
+	get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
+
+	/* Read the result size */
+	return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
+}
+
 static off_t get_delta_base(struct packed_git *p,
 				    struct pack_window **w_curs,
 				    off_t *curpos,
@@ -1223,40 +1260,8 @@ static int packed_delta_info(struct packed_git *p,
 	 * based on a base with a wrong size.  This saves tons of
 	 * inflate() calls.
 	 */
-	if (sizep) {
-		const unsigned char *data;
-		unsigned char delta_head[20], *in;
-		z_stream stream;
-		int st;
-
-		memset(&stream, 0, sizeof(stream));
-		stream.next_out = delta_head;
-		stream.avail_out = sizeof(delta_head);
-
-		inflateInit(&stream);
-		do {
-			in = use_pack(p, w_curs, curpos, &stream.avail_in);
-			stream.next_in = in;
-			st = inflate(&stream, Z_FINISH);
-			curpos += stream.next_in - in;
-		} while ((st == Z_OK || st == Z_BUF_ERROR)
-			&& stream.total_out < sizeof(delta_head));
-		inflateEnd(&stream);
-		if ((st != Z_STREAM_END) &&
-		    stream.total_out != sizeof(delta_head))
-			die("delta data unpack-initial failed");
-
-		/* Examine the initial part of the delta to figure out
-		 * the result size.
-		 */
-		data = delta_head;
-
-		/* ignore base size */
-		get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
-
-		/* Read the result size */
-		*sizep = get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
-	}
+	if (sizep)
+		*sizep = get_size_from_delta(p, w_curs, curpos);
 
 	return type;
 }

From 5c49c11686df9d1c27a194349d0b2092e6446f42 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 16 Apr 2007 12:32:13 -0400
Subject: [PATCH 054/109] pack-objects: better check_object() performances

With large amount of objects, check_object() is really trashing the pack
sliding map and the filesystem cache.  It has a completely random access
pattern especially with old objects where delta replay jumps back and
forth all over the pack.

This patch improves things by:

 1) sorting objects by their offset in pack before calling check_object()
    so the pack access pattern is linear;

 2) recording the object type at add_object_entry() time since it is
    already known in most cases;

 3) recording the pack offset even for preferred_base objects;

 4) avoid calling sha1_object_info() if all possible.

This limits pack accesses to the bare minimum and makes them perfectly
linear.

In the process check_object() was made more clear (to me at least).

Note: I thought about walking the sorted_by_offset list backward in
get_object_details() so if a pack happens to be larger than the available
file cache, then the cache would have been populated with useful data from
the beginning of the pack already when find_deltas() is called.  Strangely,
testing (on Linux) showed absolutely no performance difference.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 216 +++++++++++++++++++++++++----------------
 1 file changed, 131 insertions(+), 85 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 7100a76cd2..19fae4c917 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -813,7 +813,8 @@ static unsigned name_hash(const char *name)
 	return hash;
 }
 
-static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
+static int add_object_entry(const unsigned char *sha1, enum object_type type,
+			    unsigned hash, int exclude)
 {
 	struct object_entry *entry;
 	struct packed_git *p, *found_pack = NULL;
@@ -831,19 +832,19 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
 		return 0;
 	}
 
-	if (!exclude) {
-		for (p = packed_git; p; p = p->next) {
-			off_t offset = find_pack_entry_one(sha1, p);
-			if (offset) {
-				if (incremental)
-					return 0;
-				if (local && !p->pack_local)
-					return 0;
-				if (!found_pack) {
-					found_offset = offset;
-					found_pack = p;
-				}
+	for (p = packed_git; p; p = p->next) {
+		off_t offset = find_pack_entry_one(sha1, p);
+		if (offset) {
+			if (!found_pack) {
+				found_offset = offset;
+				found_pack = p;
 			}
+			if (exclude)
+				break;
+			if (incremental)
+				return 0;
+			if (local && !p->pack_local)
+				return 0;
 		}
 	}
 
@@ -856,6 +857,8 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
 	memset(entry, 0, sizeof(*entry));
 	hashcpy(entry->sha1, sha1);
 	entry->hash = hash;
+	if (type)
+		entry->type = type;
 	if (exclude)
 		entry->preferred_base = 1;
 	else
@@ -1008,7 +1011,9 @@ static void add_pbase_object(struct tree_desc *tree,
 			return;
 		if (name[cmplen] != '/') {
 			unsigned hash = name_hash(fullname);
-			add_object_entry(entry.sha1, hash, 1);
+			add_object_entry(entry.sha1,
+					 S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB,
+					 hash, 1);
 			return;
 		}
 		if (S_ISDIR(entry.mode)) {
@@ -1079,7 +1084,7 @@ static void add_preferred_base_object(const char *name, unsigned hash)
 	cmplen = name_cmp_len(name);
 	for (it = pbase_tree; it; it = it->next) {
 		if (cmplen == 0) {
-			add_object_entry(it->pcache.sha1, 0, 1);
+			add_object_entry(it->pcache.sha1, OBJ_TREE, 0, 1);
 		}
 		else {
 			struct tree_desc tree;
@@ -1121,87 +1126,105 @@ static void add_preferred_base(unsigned char *sha1)
 
 static void check_object(struct object_entry *entry)
 {
-	if (entry->in_pack && !entry->preferred_base) {
+	if (entry->in_pack) {
 		struct packed_git *p = entry->in_pack;
 		struct pack_window *w_curs = NULL;
-		unsigned long size, used;
+		const unsigned char *base_ref = NULL;
+		struct object_entry *base_entry;
+		unsigned long used, used_0;
 		unsigned int avail;
-		unsigned char *buf;
-		struct object_entry *base_entry = NULL;
+		off_t ofs;
+		unsigned char *buf, c;
 
 		buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
 
-		/* We want in_pack_type even if we do not reuse delta.
+		/*
+		 * We want in_pack_type even if we do not reuse delta.
 		 * There is no point not reusing non-delta representations.
 		 */
 		used = unpack_object_header_gently(buf, avail,
-						   &entry->in_pack_type, &size);
+						   &entry->in_pack_type,
+						   &entry->size);
 
-		/* Check if it is delta, and the base is also an object
-		 * we are going to pack.  If so we will reuse the existing
-		 * delta.
+		/*
+		 * Determine if this is a delta and if so whether we can
+		 * reuse it or not.  Otherwise let's find out as cheaply as
+		 * possible what the actual type and size for this object is.
 		 */
-		if (!no_reuse_delta) {
-			unsigned char c;
-			const unsigned char *base_name;
-			off_t ofs;
-			unsigned long used_0;
-			/* there is at least 20 bytes left in the pack */
-			switch (entry->in_pack_type) {
-			case OBJ_REF_DELTA:
-				base_name = use_pack(p, &w_curs,
-					entry->in_pack_offset + used, NULL);
-				used += 20;
-				break;
-			case OBJ_OFS_DELTA:
-				buf = use_pack(p, &w_curs,
-					entry->in_pack_offset + used, NULL);
-				used_0 = 0;
-				c = buf[used_0++];
-				ofs = c & 127;
-				while (c & 128) {
-					ofs += 1;
-					if (!ofs || MSB(ofs, 7))
-						die("delta base offset overflow in pack for %s",
-						    sha1_to_hex(entry->sha1));
-					c = buf[used_0++];
-					ofs = (ofs << 7) + (c & 127);
-				}
-				if (ofs >= entry->in_pack_offset)
-					die("delta base offset out of bound for %s",
-					    sha1_to_hex(entry->sha1));
-				ofs = entry->in_pack_offset - ofs;
-				base_name = find_packed_object_name(p, ofs);
-				used += used_0;
-				break;
-			default:
-				base_name = NULL;
-			}
-			if (base_name)
-				base_entry = locate_object_entry(base_name);
-		}
-		unuse_pack(&w_curs);
-		entry->in_pack_header_size = used;
-
-		if (base_entry) {
-
-			/* Depth value does not matter - find_deltas()
-			 * will never consider reused delta as the
-			 * base object to deltify other objects
-			 * against, in order to avoid circular deltas.
-			 */
-
-			/* uncompressed size of the delta data */
-			entry->size = size;
-			entry->delta = base_entry;
+		switch (entry->in_pack_type) {
+		default:
+			/* Not a delta hence we've already got all we need. */
 			entry->type = entry->in_pack_type;
+			entry->in_pack_header_size = used;
+			unuse_pack(&w_curs);
+			return;
+		case OBJ_REF_DELTA:
+			if (!no_reuse_delta && !entry->preferred_base)
+				base_ref = use_pack(p, &w_curs,
+						entry->in_pack_offset + used, NULL);
+			entry->in_pack_header_size = used + 20;
+			break;
+		case OBJ_OFS_DELTA:
+			buf = use_pack(p, &w_curs,
+				       entry->in_pack_offset + used, NULL);
+			used_0 = 0;
+			c = buf[used_0++];
+			ofs = c & 127;
+			while (c & 128) {
+				ofs += 1;
+				if (!ofs || MSB(ofs, 7))
+					die("delta base offset overflow in pack for %s",
+					    sha1_to_hex(entry->sha1));
+				c = buf[used_0++];
+				ofs = (ofs << 7) + (c & 127);
+			}
+			if (ofs >= entry->in_pack_offset)
+				die("delta base offset out of bound for %s",
+				    sha1_to_hex(entry->sha1));
+			ofs = entry->in_pack_offset - ofs;
+			if (!no_reuse_delta && !entry->preferred_base)
+				base_ref = find_packed_object_name(p, ofs);
+			entry->in_pack_header_size = used + used_0;
+			break;
+		}
 
+		if (base_ref && (base_entry = locate_object_entry(base_ref))) {
+			/*
+			 * If base_ref was set above that means we wish to
+			 * reuse delta data, and we even found that base
+			 * in the list of objects we want to pack. Goodie!
+			 *
+			 * Depth value does not matter - find_deltas() will
+			 * never consider reused delta as the base object to
+			 * deltify other objects against, in order to avoid
+			 * circular deltas.
+			 */
+			entry->type = entry->in_pack_type;
+			entry->delta = base_entry;
 			entry->delta_sibling = base_entry->delta_child;
 			base_entry->delta_child = entry;
-
+			unuse_pack(&w_curs);
 			return;
 		}
-		/* Otherwise we would do the usual */
+
+		if (entry->type) {
+			/*
+			 * This must be a delta and we already know what the
+			 * final object type is.  Let's extract the actual
+			 * object size from the delta header.
+			 */
+			entry->size = get_size_from_delta(p, &w_curs,
+					entry->in_pack_offset + entry->in_pack_header_size);
+			unuse_pack(&w_curs);
+			return;
+		}
+
+		/*
+		 * No choice but to fall back to the recursive delta walk
+		 * with sha1_object_info() to find about the object type
+		 * at this point...
+		 */
+		unuse_pack(&w_curs);
 	}
 
 	entry->type = sha1_object_info(entry->sha1, &entry->size);
@@ -1210,14 +1233,37 @@ static void check_object(struct object_entry *entry)
 		    sha1_to_hex(entry->sha1));
 }
 
+static int pack_offset_sort(const void *_a, const void *_b)
+{
+	const struct object_entry *a = *(struct object_entry **)_a;
+	const struct object_entry *b = *(struct object_entry **)_b;
+
+	/* avoid filesystem trashing with loose objects */
+	if (!a->in_pack && !b->in_pack)
+		return hashcmp(a->sha1, b->sha1);
+
+	if (a->in_pack < b->in_pack)
+		return -1;
+	if (a->in_pack > b->in_pack)
+		return 1;
+	return a->in_pack_offset < b->in_pack_offset ? -1 :
+			(a->in_pack_offset > b->in_pack_offset);
+}
+
 static void get_object_details(void)
 {
 	uint32_t i;
-	struct object_entry *entry;
+	struct object_entry **sorted_by_offset;
+
+	sorted_by_offset = xcalloc(nr_objects, sizeof(struct object_entry *));
+	for (i = 0; i < nr_objects; i++)
+		sorted_by_offset[i] = objects + i;
+	qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
 
 	prepare_pack_ix();
-	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
-		check_object(entry);
+	for (i = 0; i < nr_objects; i++)
+		check_object(sorted_by_offset[i]);
+	free(sorted_by_offset);
 }
 
 static int type_size_sort(const void *_a, const void *_b)
@@ -1520,20 +1566,20 @@ static void read_object_list_from_stdin(void)
 
 		hash = name_hash(line+41);
 		add_preferred_base_object(line+41, hash);
-		add_object_entry(sha1, hash, 0);
+		add_object_entry(sha1, 0, hash, 0);
 	}
 }
 
 static void show_commit(struct commit *commit)
 {
-	add_object_entry(commit->object.sha1, 0, 0);
+	add_object_entry(commit->object.sha1, OBJ_COMMIT, 0, 0);
 }
 
 static void show_object(struct object_array_entry *p)
 {
 	unsigned hash = name_hash(p->name);
 	add_preferred_base_object(p->name, hash);
-	add_object_entry(p->item->sha1, hash, 0);
+	add_object_entry(p->item->sha1, p->item->type, hash, 0);
 }
 
 static void show_edge(struct commit *commit)

From 2c1cbec1e2f0bd7b15fe5e921d287babfd91c7d3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 16 Apr 2007 22:10:19 -0700
Subject: [PATCH 055/109] Use proper object allocators for unknown object nodes
 too

We used to use a different allocator scheme for when we didn't know the
object type.  That meant that objects that were created without any
up-front knowledge of the type would not go through the same allocation
paths as normal object allocations, and would miss out on the statistics.

But perhaps more importantly than the statistics (that are useful when
looking at memory usage but not much else), if we want to make the
object hash tables use a denser object pointer representation, we need
to make sure that they all go through the same blocking allocator.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 alloc.c  | 28 ++++++++++++++++++++--------
 cache.h  |  1 +
 object.c | 15 +++------------
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/alloc.c b/alloc.c
index 460db192d5..53eba373db 100644
--- a/alloc.c
+++ b/alloc.c
@@ -18,26 +18,38 @@
 
 #define BLOCKING 1024
 
-#define DEFINE_ALLOCATOR(name)					\
+#define DEFINE_ALLOCATOR(name, type)				\
 static unsigned int name##_allocs;				\
 struct name *alloc_##name##_node(void)				\
 {								\
 	static int nr;						\
-	static struct name *block;				\
+	static type *block;					\
+	void *ret;						\
 								\
 	if (!nr) {						\
 		nr = BLOCKING;					\
-		block = xcalloc(BLOCKING, sizeof(struct name));	\
+		block = xmalloc(BLOCKING * sizeof(type));	\
 	}							\
 	nr--;							\
 	name##_allocs++;					\
-	return block++;						\
+	ret = block++;						\
+	memset(ret, 0, sizeof(type));				\
+	return ret;						\
 }
 
-DEFINE_ALLOCATOR(blob)
-DEFINE_ALLOCATOR(tree)
-DEFINE_ALLOCATOR(commit)
-DEFINE_ALLOCATOR(tag)
+union any_object {
+	struct object object;
+	struct blob blob;
+	struct tree tree;
+	struct commit commit;
+	struct tag tag;
+};
+
+DEFINE_ALLOCATOR(blob, struct blob)
+DEFINE_ALLOCATOR(tree, struct tree)
+DEFINE_ALLOCATOR(commit, struct commit)
+DEFINE_ALLOCATOR(tag, struct tag)
+DEFINE_ALLOCATOR(object, union any_object)
 
 #ifdef NO_C99_FORMAT
 #define SZ_FMT "%u"
diff --git a/cache.h b/cache.h
index b1bd9e46c2..aa72791947 100644
--- a/cache.h
+++ b/cache.h
@@ -484,6 +484,7 @@ extern struct blob *alloc_blob_node(void);
 extern struct tree *alloc_tree_node(void);
 extern struct commit *alloc_commit_node(void);
 extern struct tag *alloc_tag_node(void);
+extern struct object *alloc_object_node(void);
 extern void alloc_report(void);
 
 /* trace.c */
diff --git a/object.c b/object.c
index 78a44a6ef4..153ebac66d 100644
--- a/object.c
+++ b/object.c
@@ -120,22 +120,13 @@ void created_object(const unsigned char *sha1, struct object *obj)
 	nr_objs++;
 }
 
-union any_object {
-	struct object object;
-	struct commit commit;
-	struct tree tree;
-	struct blob blob;
-	struct tag tag;
-};
-
 struct object *lookup_unknown_object(const unsigned char *sha1)
 {
 	struct object *obj = lookup_object(sha1);
 	if (!obj) {
-		union any_object *ret = xcalloc(1, sizeof(*ret));
-		created_object(sha1, &ret->object);
-		ret->object.type = OBJ_NONE;
-		return &ret->object;
+		obj = alloc_object_node();
+		created_object(sha1, obj);
+		obj->type = OBJ_NONE;
 	}
 	return obj;
 }

From 100c5f3b0b27ec6617de1a785c4ff481e92636c1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 16 Apr 2007 22:11:43 -0700
Subject: [PATCH 056/109] Clean up object creation to use more common code

This replaces the fairly odd "created_object()" function that did _most_
of the object setup with a more complete "create_object()" function that
also has a more natural calling convention.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 alloc.c  |  2 +-
 blob.c   |  8 ++------
 cache.h  | 14 +++++---------
 commit.c |  8 ++------
 object.c | 14 +++++++-------
 object.h |  2 +-
 tag.c    | 10 +++-------
 tree.c   |  8 ++------
 8 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/alloc.c b/alloc.c
index 53eba373db..216c23a6f8 100644
--- a/alloc.c
+++ b/alloc.c
@@ -20,7 +20,7 @@
 
 #define DEFINE_ALLOCATOR(name, type)				\
 static unsigned int name##_allocs;				\
-struct name *alloc_##name##_node(void)				\
+void *alloc_##name##_node(void)					\
 {								\
 	static int nr;						\
 	static type *block;					\
diff --git a/blob.c b/blob.c
index 0a9ea417b8..bd7d078e1a 100644
--- a/blob.c
+++ b/blob.c
@@ -6,12 +6,8 @@ const char *blob_type = "blob";
 struct blob *lookup_blob(const unsigned char *sha1)
 {
 	struct object *obj = lookup_object(sha1);
-	if (!obj) {
-		struct blob *ret = alloc_blob_node();
-		created_object(sha1, &ret->object);
-		ret->object.type = OBJ_BLOB;
-		return ret;
-	}
+	if (!obj)
+		return create_object(sha1, OBJ_BLOB, alloc_blob_node());
 	if (!obj->type)
 		obj->type = OBJ_BLOB;
 	if (obj->type != OBJ_BLOB) {
diff --git a/cache.h b/cache.h
index aa72791947..4de25cc4b2 100644
--- a/cache.h
+++ b/cache.h
@@ -476,15 +476,11 @@ int decode_85(char *dst, const char *line, int linelen);
 void encode_85(char *buf, const unsigned char *data, int bytes);
 
 /* alloc.c */
-struct blob;
-struct tree;
-struct commit;
-struct tag;
-extern struct blob *alloc_blob_node(void);
-extern struct tree *alloc_tree_node(void);
-extern struct commit *alloc_commit_node(void);
-extern struct tag *alloc_tag_node(void);
-extern struct object *alloc_object_node(void);
+extern void *alloc_blob_node(void);
+extern void *alloc_tree_node(void);
+extern void *alloc_commit_node(void);
+extern void *alloc_tag_node(void);
+extern void *alloc_object_node(void);
 extern void alloc_report(void);
 
 /* trace.c */
diff --git a/commit.c b/commit.c
index 952095faa7..10466c4ae0 100644
--- a/commit.c
+++ b/commit.c
@@ -98,12 +98,8 @@ struct commit *lookup_commit_reference(const unsigned char *sha1)
 struct commit *lookup_commit(const unsigned char *sha1)
 {
 	struct object *obj = lookup_object(sha1);
-	if (!obj) {
-		struct commit *ret = alloc_commit_node();
-		created_object(sha1, &ret->object);
-		ret->object.type = OBJ_COMMIT;
-		return ret;
-	}
+	if (!obj)
+		return create_object(sha1, OBJ_COMMIT, alloc_commit_node());
 	if (!obj->type)
 		obj->type = OBJ_COMMIT;
 	return check_commit(obj, sha1, 0);
diff --git a/object.c b/object.c
index 153ebac66d..7bd3fec556 100644
--- a/object.c
+++ b/object.c
@@ -105,11 +105,13 @@ static void grow_object_hash(void)
 	obj_hash_size = new_hash_size;
 }
 
-void created_object(const unsigned char *sha1, struct object *obj)
+void *create_object(const unsigned char *sha1, int type, void *o)
 {
+	struct object *obj = o;
+
 	obj->parsed = 0;
 	obj->used = 0;
-	obj->type = OBJ_NONE;
+	obj->type = type;
 	obj->flags = 0;
 	hashcpy(obj->sha1, sha1);
 
@@ -118,16 +120,14 @@ void created_object(const unsigned char *sha1, struct object *obj)
 
 	insert_obj_hash(obj, obj_hash, obj_hash_size);
 	nr_objs++;
+	return obj;
 }
 
 struct object *lookup_unknown_object(const unsigned char *sha1)
 {
 	struct object *obj = lookup_object(sha1);
-	if (!obj) {
-		obj = alloc_object_node();
-		created_object(sha1, obj);
-		obj->type = OBJ_NONE;
-	}
+	if (!obj)
+		obj = create_object(sha1, OBJ_NONE, alloc_object_node());
 	return obj;
 }
 
diff --git a/object.h b/object.h
index bdbbc1889c..3e26a0e8b9 100644
--- a/object.h
+++ b/object.h
@@ -46,7 +46,7 @@ extern struct object_refs *lookup_object_refs(struct object *);
 /** Internal only **/
 struct object *lookup_object(const unsigned char *sha1);
 
-void created_object(const unsigned char *sha1, struct object *obj);
+extern void *create_object(const unsigned char *sha1, int type, void *obj);
 
 /** Returns the object, having parsed it to find out what it is. **/
 struct object *parse_object(const unsigned char *sha1);
diff --git a/tag.c b/tag.c
index 56a49f4fe1..330d287924 100644
--- a/tag.c
+++ b/tag.c
@@ -20,13 +20,9 @@ struct object *deref_tag(struct object *o, const char *warn, int warnlen)
 
 struct tag *lookup_tag(const unsigned char *sha1)
 {
-        struct object *obj = lookup_object(sha1);
-        if (!obj) {
-                struct tag *ret = alloc_tag_node();
-                created_object(sha1, &ret->object);
-                ret->object.type = OBJ_TAG;
-                return ret;
-        }
+	struct object *obj = lookup_object(sha1);
+	if (!obj)
+		return create_object(sha1, OBJ_TAG, alloc_tag_node());
 	if (!obj->type)
 		obj->type = OBJ_TAG;
         if (obj->type != OBJ_TAG) {
diff --git a/tree.c b/tree.c
index d188c0fbae..e5bfbceb22 100644
--- a/tree.c
+++ b/tree.c
@@ -127,12 +127,8 @@ int read_tree(struct tree *tree, int stage, const char **match)
 struct tree *lookup_tree(const unsigned char *sha1)
 {
 	struct object *obj = lookup_object(sha1);
-	if (!obj) {
-		struct tree *ret = alloc_tree_node();
-		created_object(sha1, &ret->object);
-		ret->object.type = OBJ_TREE;
-		return ret;
-	}
+	if (!obj)
+		return create_object(sha1, OBJ_TREE, alloc_tree_node());
 	if (!obj->type)
 		obj->type = OBJ_TREE;
 	if (obj->type != OBJ_TREE) {

From b9849a1ab63143c3b70e339491a897ef62a4173b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 16 Apr 2007 00:42:29 -0700
Subject: [PATCH 057/109] Make sure quickfetch is not fooled with a previous,
 incomplete fetch.

This updates git-rev-list --objects to be a bit more careful
when listing a blob object to make sure the blob actually
exists, and uses it to make sure the quick-fetch optimization we
introduced earlier is not fooled by a previous incomplete fetch.

The quick-fetch optimization works by running this command:

	git rev-list --objects <<commit-list>> --not --all

where <<commit-list>> is a list of commits that we are going to
fetch from the other side.  If there is any object missing to
complete the <<commit-list>>, the rev-list would fail and die
(say, the commit was in our repository, but its tree wasn't --
then it will barf while trying to list the blobs the tree
contains because it cannot read that tree).

Usually we do not have the objects (otherwise why would we
fetching?), but in one important special case we do: when the
remote repository is used as an alternate object store
(i.e. pointed by .git/objects/info/alternates).  We could check
.git/objects/info/alternates to see if the remote we are
interacting with is one of them (or is used as an alternate,
recursively, by one of them), but that check is more cumbersome
than it is worth.

The above check however did not catch missing blob, because
object listing code did not read nor check blob objects, knowing
that blobs do not contain any further references to other
objects.  This commit fixes it with practically unmeasurable
overhead.

I've benched this with

	git rev-list --objects --all >/dev/null

in the kernel repository, with three different implementations
of the "check-blob".

 - Checking with has_sha1_file() has negligible (unmeasurable)
   performance penalty.

 - Checking with sha1_object_info() makes it somewhat slower,
   perhaps by 5%.

 - Checking with read_sha1_file() to cause a fully re-validation
   is prohibitively expensive (about 4 times as much runtime).

In my original patch, I had this as a command line option, but
the overhead is small enough that it is not really worth it.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-rev-list.c    |  4 ++
 t/t5502-quickfetch.sh | 89 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100755 t/t5502-quickfetch.sh

diff --git a/builtin-rev-list.c b/builtin-rev-list.c
index 09774f9559..c0329dcecd 100644
--- a/builtin-rev-list.c
+++ b/builtin-rev-list.c
@@ -113,6 +113,10 @@ static void show_object(struct object_array_entry *p)
 	 * confuse downstream git-pack-objects very badly.
 	 */
 	const char *ep = strchr(p->name, '\n');
+
+	if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
+		die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
+
 	if (ep) {
 		printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
 		       (int) (ep - p->name),
diff --git a/t/t5502-quickfetch.sh b/t/t5502-quickfetch.sh
new file mode 100755
index 0000000000..b4760f2dc0
--- /dev/null
+++ b/t/t5502-quickfetch.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+
+test_description='test quickfetch from local'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+
+	test_tick &&
+	echo ichi >file &&
+	git add file &&
+	git commit -m initial &&
+
+	cnt=$( (
+		git count-objects | sed -e "s/ *objects,.*//"
+	) ) &&
+	test $cnt -eq 3
+'
+
+test_expect_success 'clone without alternate' '
+
+	(
+		mkdir cloned &&
+		cd cloned &&
+		git init-db &&
+		git remote add -f origin ..
+	) &&
+	cnt=$( (
+		cd cloned &&
+		git count-objects | sed -e "s/ *objects,.*//"
+	) ) &&
+	test $cnt -eq 3
+'
+
+test_expect_success 'further commits in the original' '
+
+	test_tick &&
+	echo ni >file &&
+	git commit -a -m second &&
+
+	cnt=$( (
+		git count-objects | sed -e "s/ *objects,.*//"
+	) ) &&
+	test $cnt -eq 6
+'
+
+test_expect_success 'copy commit and tree but not blob by hand' '
+
+	git rev-list --objects HEAD |
+	git pack-objects --stdout |
+	(
+		cd cloned &&
+		git unpack-objects
+	) &&
+
+	cnt=$( (
+		cd cloned &&
+		git count-objects | sed -e "s/ *objects,.*//"
+	) ) &&
+	test $cnt -eq 6
+
+	blob=$(git rev-parse HEAD:file | sed -e "s|..|&/|") &&
+	test -f "cloned/.git/objects/$blob" &&
+	rm -f "cloned/.git/objects/$blob" &&
+
+	cnt=$( (
+		cd cloned &&
+		git count-objects | sed -e "s/ *objects,.*//"
+	) ) &&
+	test $cnt -eq 5
+
+'
+
+test_expect_success 'quickfetch should not leave a corrupted repository' '
+
+	(
+		cd cloned &&
+		git fetch
+	) &&
+
+	cnt=$( (
+		cd cloned &&
+		git count-objects | sed -e "s/ *objects,.*//"
+	) ) &&
+	test $cnt -eq 6
+
+'
+
+test_done

From 515106fa1335462393c08fa8712dddd767dc147a Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 16 Apr 2007 21:33:31 -0700
Subject: [PATCH 058/109] Allow more than true/false to attributes.

This allows you to define three values (and possibly more) to
each attribute: true, false, and unset.

Typically the handlers that notice and act on attribute values
treat "unset" attribute to mean "do your default thing"
(e.g. crlf that is unset would trigger "guess from contents"),
so being able to override a setting to an unset state is
actually useful.

 - If you want to set the attribute value to true, have an entry
   in .gitattributes file that mentions the attribute name; e.g.

	*.o	binary

 - If you want to set the attribute value explicitly to false,
   use '-'; e.g.

	*.a	-diff

 - If you want to make the attribute value _unset_, perhaps to
   override an earlier entry, use '!'; e.g.

	*.a	-diff
	c.i.a	!diff

This also allows string values to attributes, with the natural
syntax:

	attrname=attrvalue

but you cannot use it, as nobody takes notice and acts on
it yet.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c               | 194 +++++++++++++++++++++++++++----------------
 attr.h               |  12 ++-
 builtin-check-attr.c |  14 +++-
 convert.c            |  16 +++-
 diff.c               |  15 +++-
 5 files changed, 169 insertions(+), 82 deletions(-)

diff --git a/attr.c b/attr.c
index 60fe48f3b8..b3496a6eb5 100644
--- a/attr.c
+++ b/attr.c
@@ -1,6 +1,8 @@
 #include "cache.h"
 #include "attr.h"
 
+#define ATTR__UNKNOWN	((void *) -2)
+
 /*
  * The basic design decision here is that we are not going to have
  * insanely large number of attributes.
@@ -83,6 +85,7 @@ struct git_attr *git_attr(const char *name, int len)
 	check_all_attr = xrealloc(check_all_attr,
 				  sizeof(*check_all_attr) * attr_nr);
 	check_all_attr[a->attr_nr].attr = a;
+	check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
 	return a;
 }
 
@@ -92,12 +95,14 @@ struct git_attr *git_attr(const char *name, int len)
  * (1) glob pattern.
  * (2) whitespace
  * (3) whitespace separated list of attribute names, each of which
- *     could be prefixed with '-' to mean "not set".
+ *     could be prefixed with '-' to mean "set to false", '!' to mean
+ *     "unset".
  */
 
+/* What does a matched pattern decide? */
 struct attr_state {
-	int unset;
 	struct git_attr *attr;
+	void *setto;
 };
 
 struct match_attr {
@@ -112,13 +117,63 @@ struct match_attr {
 
 static const char blank[] = " \t\r\n";
 
+static const char *parse_attr(const char *src, int lineno, const char *cp,
+			      int *num_attr, struct match_attr *res)
+{
+	const char *ep, *equals;
+	int len;
+
+	ep = cp + strcspn(cp, blank);
+	equals = strchr(cp, '=');
+	if (equals && ep < equals)
+		equals = NULL;
+	if (equals)
+		len = equals - cp;
+	else
+		len = ep - cp;
+	if (!res) {
+		if (*cp == '-' || *cp == '!') {
+			cp++;
+			len--;
+		}
+		if (invalid_attr_name(cp, len)) {
+			fprintf(stderr,
+				"%.*s is not a valid attribute name: %s:%d\n",
+				len, cp, src, lineno);
+			return NULL;
+		}
+	} else {
+		struct attr_state *e;
+
+		e = &(res->state[*num_attr]);
+		if (*cp == '-' || *cp == '!') {
+			e->setto = (*cp == '-') ? ATTR__FALSE : ATTR__UNSET;
+			cp++;
+			len--;
+		}
+		else if (!equals)
+			e->setto = ATTR__TRUE;
+		else {
+			char *value;
+			int vallen = ep - equals;
+			value = xmalloc(vallen);
+			memcpy(value, equals+1, vallen-1);
+			value[vallen-1] = 0;
+			e->setto = value;
+		}
+		e->attr = git_attr(cp, len);
+	}
+	(*num_attr)++;
+	return ep + strspn(ep, blank);
+}
+
 static struct match_attr *parse_attr_line(const char *line, const char *src,
 					  int lineno, int macro_ok)
 {
 	int namelen;
 	int num_attr;
 	const char *cp, *name;
-	struct match_attr *res = res;
+	struct match_attr *res = NULL;
 	int pass;
 	int is_macro;
 
@@ -153,42 +208,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		num_attr = 0;
 		cp = name + namelen;
 		cp = cp + strspn(cp, blank);
-		while (*cp) {
-			const char *ep;
-			ep = cp + strcspn(cp, blank);
-			if (!pass) {
-				if (*cp == '-')
-					cp++;
-				if (invalid_attr_name(cp, ep - cp)) {
-					fprintf(stderr,
-						"%.*s is not a valid attribute name: %s:%d\n",
-						(int)(ep - cp), cp,
-						src, lineno);
-					return NULL;
-				}
-			} else {
-				struct attr_state *e;
-
-				e = &(res->state[num_attr]);
-				if (*cp == '-') {
-					e->unset = 1;
-					cp++;
-				}
-				e->attr = git_attr(cp, ep - cp);
-			}
-			num_attr++;
-			cp = ep + strspn(ep, blank);
-		}
+		while (*cp)
+			cp = parse_attr(src, lineno, cp, &num_attr, res);
 		if (pass)
 			break;
-
 		res = xcalloc(1,
 			      sizeof(*res) +
 			      sizeof(struct attr_state) * num_attr +
 			      (is_macro ? 0 : namelen + 1));
-		if (is_macro) {
+		if (is_macro)
 			res->u.attr = git_attr(name, namelen);
-		}
 		else {
 			res->u.pattern = (char*)&(res->state[num_attr]);
 			memcpy(res->u.pattern, name, namelen);
@@ -205,9 +234,9 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
  * come from many places.
  *
  * (1) .gitattribute file of the same directory;
- * (2) .gitattribute file of the parent directory if (1) does not have any match;
- *     this goes recursively upwards, just like .gitignore
- * (3) perhaps $GIT_DIR/info/attributes, as the final fallback.
+ * (2) .gitattribute file of the parent directory if (1) does not have
+ *      any match; this goes recursively upwards, just like .gitignore.
+ * (3) $GIT_DIR/info/attributes, which overrides both of the above.
  *
  * In the same file, later entries override the earlier match, so in the
  * global list, we would have entries from info/attributes the earliest
@@ -229,8 +258,21 @@ static void free_attr_elem(struct attr_stack *e)
 {
 	int i;
 	free(e->origin);
-	for (i = 0; i < e->num_matches; i++)
-		free(e->attrs[i]);
+	for (i = 0; i < e->num_matches; i++) {
+		struct match_attr *a = e->attrs[i];
+		int j;
+		for (j = 0; j < a->num_attr; j++) {
+			void *setto = a->state[j].setto;
+			if (setto == ATTR__TRUE ||
+			    setto == ATTR__FALSE ||
+			    setto == ATTR__UNSET ||
+			    setto == ATTR__UNKNOWN)
+				;
+			else
+				free(setto);
+		}
+		free(a);
+	}
 	free(e);
 }
 
@@ -288,10 +330,19 @@ static void debug_info(const char *what, struct attr_stack *elem)
 {
 	fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()");
 }
-static void debug_set(const char *what, const char *match, struct git_attr *attr, int set)
+static void debug_set(const char *what, const char *match, struct git_attr *attr, void *v)
 {
-	fprintf(stderr, "%s: %s => %d (%s)\n",
-		what, attr->name, set, match);
+	const char *value = v;
+
+	if (ATTR_TRUE(value))
+		value = "set";
+	else if (ATTR_FALSE(value))
+		value = "unset";
+	else if (ATTR_UNSET(value))
+		value = "unspecified";
+
+	fprintf(stderr, "%s: %s => %s (%s)\n",
+		what, attr->name, (char *) value, match);
 }
 #define debug_push(a) debug_info("push", (a))
 #define debug_pop(a) debug_info("pop", (a))
@@ -420,56 +471,53 @@ static int path_matches(const char *pathname, int pathlen,
 	return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0;
 }
 
+static int fill_one(const char *what, struct match_attr *a, int rem)
+{
+	struct git_attr_check *check = check_all_attr;
+	int i;
+
+	for (i = 0; 0 < rem && i < a->num_attr; i++) {
+		struct git_attr *attr = a->state[i].attr;
+		void **n = &(check[attr->attr_nr].value);
+		void *v = a->state[i].setto;
+
+		if (*n == ATTR__UNKNOWN) {
+			debug_set(what, a->u.pattern, attr, v);
+			*n = v;
+			rem--;
+		}
+	}
+	return rem;
+}
+
 static int fill(const char *path, int pathlen, struct attr_stack *stk, int rem)
 {
+	int i;
 	const char *base = stk->origin ? stk->origin : "";
-	int i, j;
-	struct git_attr_check *check = check_all_attr;
 
 	for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
 		struct match_attr *a = stk->attrs[i];
 		if (a->is_macro)
 			continue;
 		if (path_matches(path, pathlen,
-				 a->u.pattern, base, strlen(base))) {
-			for (j = 0; 0 < rem && j < a->num_attr; j++) {
-				struct git_attr *attr = a->state[j].attr;
-				int set = !a->state[j].unset;
-				int *n = &(check[attr->attr_nr].isset);
-
-				if (*n < 0) {
-					debug_set("fill", a->u.pattern, attr, set);
-					*n = set;
-					rem--;
-				}
-			}
-		}
+				 a->u.pattern, base, strlen(base)))
+			rem = fill_one("fill", a, rem);
 	}
 	return rem;
 }
 
 static int macroexpand(struct attr_stack *stk, int rem)
 {
-	int i, j;
+	int i;
 	struct git_attr_check *check = check_all_attr;
 
 	for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) {
 		struct match_attr *a = stk->attrs[i];
 		if (!a->is_macro)
 			continue;
-		if (check[a->u.attr->attr_nr].isset < 0)
+		if (check[a->u.attr->attr_nr].value != ATTR__TRUE)
 			continue;
-		for (j = 0; 0 < rem && j < a->num_attr; j++) {
-			struct git_attr *attr = a->state[j].attr;
-			int set = !a->state[j].unset;
-			int *n = &(check[attr->attr_nr].isset);
-
-			if (*n < 0) {
-				debug_set("expand", a->u.attr->name, attr, set);
-				*n = set;
-				rem--;
-			}
-		}
+		rem = fill_one("expand", a, rem);
 	}
 	return rem;
 }
@@ -482,7 +530,7 @@ int git_checkattr(const char *path, int num, struct git_attr_check *check)
 
 	bootstrap_attr_stack();
 	for (i = 0; i < attr_nr; i++)
-		check_all_attr[i].isset = -1;
+		check_all_attr[i].value = ATTR__UNKNOWN;
 
 	pathlen = strlen(path);
 	cp = strrchr(path, '/');
@@ -498,8 +546,12 @@ int git_checkattr(const char *path, int num, struct git_attr_check *check)
 	for (stk = attr_stack; 0 < rem && stk; stk = stk->prev)
 		rem = macroexpand(stk, rem);
 
-	for (i = 0; i < num; i++)
-		check[i].isset = check_all_attr[check[i].attr->attr_nr].isset;
+	for (i = 0; i < num; i++) {
+		void *value = check_all_attr[check[i].attr->attr_nr].value;
+		if (value == ATTR__UNKNOWN)
+			value = ATTR__UNSET;
+		check[i].value = value;
+	}
 
 	return 0;
 }
diff --git a/attr.h b/attr.h
index 1e5ab40694..8ec2d3d35c 100644
--- a/attr.h
+++ b/attr.h
@@ -6,9 +6,19 @@ struct git_attr;
 
 struct git_attr *git_attr(const char *, int);
 
+/* Internal use */
+#define ATTR__TRUE	((void *) 1)
+#define ATTR__FALSE	((void *) 0)
+#define ATTR__UNSET	((void *) -1)
+
+/* For public to check git_attr_check results */
+#define ATTR_TRUE(v) ((v) == ATTR__TRUE)
+#define ATTR_FALSE(v) ((v) == ATTR__FALSE)
+#define ATTR_UNSET(v) ((v) == ATTR__UNSET)
+
 struct git_attr_check {
 	struct git_attr *attr;
-	int isset;
+	void *value;
 };
 
 int git_checkattr(const char *path, int, struct git_attr_check *);
diff --git a/builtin-check-attr.c b/builtin-check-attr.c
index 634be9ed2e..6983a73c1b 100644
--- a/builtin-check-attr.c
+++ b/builtin-check-attr.c
@@ -42,11 +42,17 @@ int cmd_check_attr(int argc, const char **argv, const char *prefix)
 		if (git_checkattr(argv[i], cnt, check))
 			die("git_checkattr died");
 		for (j = 0; j < cnt; j++) {
+			void *value = check[j].value;
+
+			if (ATTR_TRUE(value))
+				value = "set";
+			else if (ATTR_FALSE(value))
+				value = "unset";
+			else if (ATTR_UNSET(value))
+				value = "unspecified";
+
 			write_name_quoted("", 0, argv[i], 1, stdout);
-			printf(": %s: %s\n", argv[j+1],
-			       (check[j].isset < 0) ? "unspecified" :
-			       (check[j].isset == 0) ? "unset" :
-			       "set");
+			printf(": %s: %s\n", argv[j+1], (char *) value);
 		}
 	}
 	return 0;
diff --git a/convert.c b/convert.c
index d0d4b81871..68bb70f12f 100644
--- a/convert.c
+++ b/convert.c
@@ -225,9 +225,19 @@ static int git_path_check_crlf(const char *path)
 
 	setup_crlf_check(&attr_crlf_check);
 
-	if (git_checkattr(path, 1, &attr_crlf_check))
-		return -1;
-	return attr_crlf_check.isset;
+	if (!git_checkattr(path, 1, &attr_crlf_check)) {
+		void *value = attr_crlf_check.value;
+		if (ATTR_TRUE(value))
+			return 1;
+		else if (ATTR_FALSE(value))
+			return 0;
+		else if (ATTR_UNSET(value))
+			;
+		else
+			die("unknown value %s given to 'crlf' attribute",
+			    (char *)value);
+	}
+	return -1;
 }
 
 int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
diff --git a/diff.c b/diff.c
index dcea405ed3..a32078ea01 100644
--- a/diff.c
+++ b/diff.c
@@ -1068,9 +1068,18 @@ static int file_is_binary(struct diff_filespec *one)
 	struct git_attr_check attr_diff_check;
 
 	setup_diff_attr_check(&attr_diff_check);
-	if (!git_checkattr(one->path, 1, &attr_diff_check) &&
-	    (0 <= attr_diff_check.isset))
-		return !attr_diff_check.isset;
+	if (!git_checkattr(one->path, 1, &attr_diff_check)) {
+		void *value = attr_diff_check.value;
+		if (ATTR_TRUE(value))
+			return 0;
+		else if (ATTR_FALSE(value))
+			return 1;
+		else if (ATTR_UNSET(value))
+			;
+		else
+			die("unknown value %s given to 'diff' attribute",
+			    (char *)value);
+	}
 
 	if (!one->data) {
 		if (!DIFF_FILE_VALID(one))

From 3e5261a24071ca23c3514c0ebd5ee55f1e79d9cc Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 16 Apr 2007 21:58:01 -0700
Subject: [PATCH 059/109] merge-recursive: separate out xdl_merge() interface.

This just moves code around to make the actual call to
xdl_merge() into a separate function.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c | 56 +++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 3096594b3e..4eb62cf64a 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -659,6 +659,39 @@ static void fill_mm(const unsigned char *sha1, mmfile_t *mm)
 	mm->size = size;
 }
 
+static int ll_merge(mmbuffer_t *result_buf,
+		    struct diff_filespec *o,
+		    struct diff_filespec *a,
+		    struct diff_filespec *b,
+		    const char *branch1,
+		    const char *branch2)
+{
+	mmfile_t orig, src1, src2;
+	xpparam_t xpp;
+	char *name1, *name2;
+	int merge_status;
+
+	name1 = xstrdup(mkpath("%s:%s", branch1, a->path));
+	name2 = xstrdup(mkpath("%s:%s", branch2, b->path));
+
+	fill_mm(o->sha1, &orig);
+	fill_mm(a->sha1, &src1);
+	fill_mm(b->sha1, &src2);
+
+	memset(&xpp, 0, sizeof(xpp));
+	merge_status = xdl_merge(&orig,
+				 &src1, name1,
+				 &src2, name2,
+				 &xpp, XDL_MERGE_ZEALOUS,
+				 result_buf);
+	free(name1);
+	free(name2);
+	free(orig.ptr);
+	free(src1.ptr);
+	free(src2.ptr);
+	return merge_status;
+}
+
 static struct merge_file_info merge_file(struct diff_filespec *o,
 		struct diff_filespec *a, struct diff_filespec *b,
 		const char *branch1, const char *branch2)
@@ -687,30 +720,11 @@ static struct merge_file_info merge_file(struct diff_filespec *o,
 		else if (sha_eq(b->sha1, o->sha1))
 			hashcpy(result.sha, a->sha1);
 		else if (S_ISREG(a->mode)) {
-			mmfile_t orig, src1, src2;
 			mmbuffer_t result_buf;
-			xpparam_t xpp;
-			char *name1, *name2;
 			int merge_status;
 
-			name1 = xstrdup(mkpath("%s:%s", branch1, a->path));
-			name2 = xstrdup(mkpath("%s:%s", branch2, b->path));
-
-			fill_mm(o->sha1, &orig);
-			fill_mm(a->sha1, &src1);
-			fill_mm(b->sha1, &src2);
-
-			memset(&xpp, 0, sizeof(xpp));
-			merge_status = xdl_merge(&orig,
-						 &src1, name1,
-						 &src2, name2,
-						 &xpp, XDL_MERGE_ZEALOUS,
-						 &result_buf);
-			free(name1);
-			free(name2);
-			free(orig.ptr);
-			free(src1.ptr);
-			free(src2.ptr);
+			merge_status = ll_merge(&result_buf, o, a, b,
+						branch1, branch2);
 
 			if ((merge_status < 0) || !result_buf.ptr)
 				die("Failed to execute internal merge");

From a129d96f4144215711e379565af97f6a82197f4f Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 16 Apr 2007 22:59:18 -0700
Subject: [PATCH 060/109] Allow specifying specialized merge-backend per path.

This allows 'merge' attribute to control how the file-level
three-way merge is done per path.

 - If you set 'merge' to true, leave it unspecified, or set it
   to "text", we use the built-in 3-way xdl-merge.

 - If you set 'merge' to false, or set it to "binary, the
   "binary" merge is done.  The merge result is the blob from
   'our' tree, but this still leaves the path conflicted, so
   that the mess can be sorted out by the user.  This is
   obviously meant to be useful for binary files.

 - 'merge=union' (this is the first example of a string valued
   attribute, introduced in the previous one) uses the "union"
   merge.  The "union" merge takes lines in conflicted hunks
   from both sides, which is useful for line-oriented files such
   as .gitignore.

Instead fo setting merge to 'true' or 'false' by using 'merge'
or '-merge', setting it explicitly to "text" or "binary" will
become useful once we start allowing custom per-path backends to
be added, and allow them to be activated for the default
(i.e. 'merge' attribute specified to 'true' or 'false') case,
using some other mechanisms.  Setting merge attribute to "text"
or "binary" will be a way to explicitly request to override such
a custom default for selected paths.

Currently there is no way to specify random programs but it
should be trivial for motivated contributors to add later.

There is one caveat, though.  ll_merge() is called for both
internal ancestor merge and the outer "final" merge.  I think an
interactive custom per-path merge backend should refrain from
going interactive when performing an internal merge (you can
tell it by checking call_depth) and instead just call either
ll_xdl_merge() if the content is text, or call ll_binary_merge()
otherwise.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c | 136 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 129 insertions(+), 7 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 4eb62cf64a..3b34401d0b 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -15,6 +15,7 @@
 #include "unpack-trees.h"
 #include "path-list.h"
 #include "xdiff-interface.h"
+#include "attr.h"
 
 static int subtree_merge;
 
@@ -659,6 +660,127 @@ static void fill_mm(const unsigned char *sha1, mmfile_t *mm)
 	mm->size = size;
 }
 
+/* Low-level merge functions */
+typedef int (*ll_merge_fn)(mmfile_t *orig,
+			   mmfile_t *src1, const char *name1,
+			   mmfile_t *src2, const char *name2,
+			   mmbuffer_t *result);
+
+static int ll_xdl_merge(mmfile_t *orig,
+			mmfile_t *src1, const char *name1,
+			mmfile_t *src2, const char *name2,
+			mmbuffer_t *result)
+{
+	xpparam_t xpp;
+
+	memset(&xpp, 0, sizeof(xpp));
+	return xdl_merge(orig,
+			 src1, name1,
+			 src2, name2,
+			 &xpp, XDL_MERGE_ZEALOUS,
+			 result);
+}
+
+static int ll_union_merge(mmfile_t *orig,
+			  mmfile_t *src1, const char *name1,
+			  mmfile_t *src2, const char *name2,
+			  mmbuffer_t *result)
+{
+	char *src, *dst;
+	long size;
+	const int marker_size = 7;
+
+	int status = ll_xdl_merge(orig, src1, NULL, src2, NULL, result);
+	if (status <= 0)
+		return status;
+	size = result->size;
+	src = dst = result->ptr;
+	while (size) {
+		char ch;
+		if ((marker_size < size) &&
+		    (*src == '<' || *src == '=' || *src == '>')) {
+			int i;
+			ch = *src;
+			for (i = 0; i < marker_size; i++)
+				if (src[i] != ch)
+					goto not_a_marker;
+			if (src[marker_size] != '\n')
+				goto not_a_marker;
+			src += marker_size + 1;
+			size -= marker_size + 1;
+			continue;
+		}
+	not_a_marker:
+		do {
+			ch = *src++;
+			*dst++ = ch;
+			size--;
+		} while (ch != '\n' && size);
+	}
+	result->size = dst - result->ptr;
+	return 0;
+}
+
+static int ll_binary_merge(mmfile_t *orig,
+			   mmfile_t *src1, const char *name1,
+			   mmfile_t *src2, const char *name2,
+			   mmbuffer_t *result)
+{
+	/*
+	 * The tentative merge result is "ours" for the final round,
+	 * or common ancestor for an internal merge.  Still return
+	 * "conflicted merge" status.
+	 */
+	mmfile_t *stolen = index_only ? orig : src1;
+
+	result->ptr = stolen->ptr;
+	result->size = stolen->size;
+	stolen->ptr = NULL;
+	return 1;
+}
+
+static struct {
+	const char *name;
+	ll_merge_fn fn;
+} ll_merge_fns[] = {
+	{ "text", ll_xdl_merge },
+	{ "binary", ll_binary_merge },
+	{ "union", ll_union_merge },
+	{ NULL, NULL },
+};
+
+static ll_merge_fn find_ll_merge_fn(void *merge_attr)
+{
+	const char *name;
+	int i;
+
+	if (ATTR_TRUE(merge_attr) || ATTR_UNSET(merge_attr))
+		return ll_xdl_merge;
+	else if (ATTR_FALSE(merge_attr))
+		return ll_binary_merge;
+
+	/* Otherwise merge_attr may name the merge function */
+	name = merge_attr;
+	for (i = 0; ll_merge_fns[i].name; i++)
+		if (!strcmp(ll_merge_fns[i].name, name))
+			return ll_merge_fns[i].fn;
+
+	/* default to the 3-way */
+	return ll_xdl_merge;
+}
+
+static void *git_path_check_merge(const char *path)
+{
+	static struct git_attr_check attr_merge_check;
+
+	if (!attr_merge_check.attr)
+		attr_merge_check.attr = git_attr("merge", 5);
+
+	if (git_checkattr(path, 1, &attr_merge_check))
+		return ATTR__UNSET;
+	return attr_merge_check.value;
+}
+
 static int ll_merge(mmbuffer_t *result_buf,
 		    struct diff_filespec *o,
 		    struct diff_filespec *a,
@@ -667,9 +789,10 @@ static int ll_merge(mmbuffer_t *result_buf,
 		    const char *branch2)
 {
 	mmfile_t orig, src1, src2;
-	xpparam_t xpp;
 	char *name1, *name2;
 	int merge_status;
+	void *merge_attr;
+	ll_merge_fn fn;
 
 	name1 = xstrdup(mkpath("%s:%s", branch1, a->path));
 	name2 = xstrdup(mkpath("%s:%s", branch2, b->path));
@@ -678,12 +801,11 @@ static int ll_merge(mmbuffer_t *result_buf,
 	fill_mm(a->sha1, &src1);
 	fill_mm(b->sha1, &src2);
 
-	memset(&xpp, 0, sizeof(xpp));
-	merge_status = xdl_merge(&orig,
-				 &src1, name1,
-				 &src2, name2,
-				 &xpp, XDL_MERGE_ZEALOUS,
-				 result_buf);
+	merge_attr = git_path_check_merge(a->path);
+	fn = find_ll_merge_fn(merge_attr);
+
+	merge_status = fn(&orig, &src1, name1, &src2, name2, result_buf);
+
 	free(name1);
 	free(name2);
 	free(orig.ptr);

From 47579efc009c6f7afaf31be107eb92395a4f10db Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Tue, 17 Apr 2007 00:05:00 -0700
Subject: [PATCH 061/109] Add a demonstration/test of customized merge.

This demonstrates how the new low-level per-path merge backends,
union and ours, work, and shows how they are controlled by the
gitattribute mechanism.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t6026-merge-attr.sh | 72 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100755 t/t6026-merge-attr.sh

diff --git a/t/t6026-merge-attr.sh b/t/t6026-merge-attr.sh
new file mode 100755
index 0000000000..5daa2236de
--- /dev/null
+++ b/t/t6026-merge-attr.sh
@@ -0,0 +1,72 @@
+#!/bin/sh
+#
+# Copyright (c) 2007 Junio C Hamano
+#
+
+test_description='per path merge controlled by merge attribute'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+
+	for f in text binary union
+	do
+		echo Initial >$f && git add $f || break
+	done &&
+	test_tick &&
+	git commit -m Initial &&
+
+	git branch side &&
+	for f in text binary union
+	do
+		echo Master >>$f && git add $f || break
+	done &&
+	test_tick &&
+	git commit -m Master &&
+
+	git checkout side &&
+	for f in text binary union
+	do
+		echo Side >>$f && git add $f || break
+	done &&
+	test_tick &&
+	git commit -m Side
+
+'
+
+test_expect_success merge '
+
+	{
+		echo "binary -merge"
+		echo "union merge=union"
+	} >.gitattributes &&
+
+	if git merge master
+	then
+		echo Gaah, should have conflicted
+		false
+	else
+		echo Ok, conflicted.
+	fi
+'
+
+test_expect_success 'check merge result in index' '
+
+	git ls-files -u | grep binary &&
+	git ls-files -u | grep text &&
+	! (git ls-files -u | grep union)
+
+'
+
+test_expect_success 'check merge result in working tree' '
+
+	git cat-file -p HEAD:binary >binary-orig &&
+	grep "<<<<<<<" text &&
+	cmp binary-orig binary &&
+	! grep "<<<<<<<" union &&
+	grep Master union &&
+	grep Side union
+
+'
+
+test_done

From f3ef6b6bbe9bfd3d09130f7e26b87dbe11b93c5b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Tue, 17 Apr 2007 22:51:45 -0700
Subject: [PATCH 062/109] Custom low-level merge driver support.

This allows users to specify custom low-level merge driver per
path, using the attributes mechanism.  Just like you can specify
one of built-in "text", "binary", "union" low-level merge
drivers by saying:

	*		merge=text
	.gitignore	merge=union
	*.jpg		merge=binary

pick a name of your favorite merge driver, and assign it as the
value of the 'merge' attribute.

A custom low-level merge driver is defined via the config
mechanism.  This patch introduces 'merge.driver', a multi-valued
configuration.  Its value is the name (i.e. the one you use as
the value of 'merge' attribute) followed by a command line
specification.  The command line can contain %O, %A, and %B to
be interpolated with the names of temporary files that hold the
common ancestor version, the version from your branch, and the
version from the other branch, and the resulting command is
spawned.

The low-level merge driver is expected to update the temporary
file for your branch (i.e. %A) with the result and exit with
status 0 for a clean merge, and non-zero status for a conflicted
merge.

A new test in t6026 demonstrates a sample usage.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c     | 177 +++++++++++++++++++++++++++++++++++++++---
 t/t6026-merge-attr.sh |  71 ++++++++++++++++-
 2 files changed, 235 insertions(+), 13 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 3b34401d0b..8ec18ad577 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -15,6 +15,7 @@
 #include "unpack-trees.h"
 #include "path-list.h"
 #include "xdiff-interface.h"
+#include "interpolate.h"
 #include "attr.h"
 
 static int subtree_merge;
@@ -661,12 +662,14 @@ static void fill_mm(const unsigned char *sha1, mmfile_t *mm)
 }
 
 /* Low-level merge functions */
-typedef int (*ll_merge_fn)(mmfile_t *orig,
+typedef int (*ll_merge_fn)(const char *cmd,
+			   mmfile_t *orig,
 			   mmfile_t *src1, const char *name1,
 			   mmfile_t *src2, const char *name2,
 			   mmbuffer_t *result);
 
-static int ll_xdl_merge(mmfile_t *orig,
+static int ll_xdl_merge(const char *cmd__unused,
+			mmfile_t *orig,
 			mmfile_t *src1, const char *name1,
 			mmfile_t *src2, const char *name2,
 			mmbuffer_t *result)
@@ -681,7 +684,8 @@ static int ll_xdl_merge(mmfile_t *orig,
 			 result);
 }
 
-static int ll_union_merge(mmfile_t *orig,
+static int ll_union_merge(const char *cmd__unused,
+			  mmfile_t *orig,
 			  mmfile_t *src1, const char *name1,
 			  mmfile_t *src2, const char *name2,
 			  mmbuffer_t *result)
@@ -690,7 +694,8 @@ static int ll_union_merge(mmfile_t *orig,
 	long size;
 	const int marker_size = 7;
 
-	int status = ll_xdl_merge(orig, src1, NULL, src2, NULL, result);
+	int status = ll_xdl_merge(cmd__unused, orig,
+				  src1, NULL, src2, NULL, result);
 	if (status <= 0)
 		return status;
 	size = result->size;
@@ -721,7 +726,8 @@ static int ll_union_merge(mmfile_t *orig,
 	return 0;
 }
 
-static int ll_binary_merge(mmfile_t *orig,
+static int ll_binary_merge(const char *cmd__unused,
+			   mmfile_t *orig,
 			   mmfile_t *src1, const char *name1,
 			   mmfile_t *src2, const char *name2,
 			   mmbuffer_t *result)
@@ -743,24 +749,169 @@ static struct {
 	const char *name;
 	ll_merge_fn fn;
 } ll_merge_fns[] = {
-	{ "text", ll_xdl_merge },
 	{ "binary", ll_binary_merge },
+	{ "text", ll_xdl_merge },
 	{ "union", ll_union_merge },
 	{ NULL, NULL },
 };
 
-static ll_merge_fn find_ll_merge_fn(void *merge_attr)
+static void create_temp(mmfile_t *src, char *path)
 {
+	int fd;
+
+	strcpy(path, ".merge_file_XXXXXX");
+	fd = mkstemp(path);
+	if (fd < 0)
+		die("unable to create temp-file");
+	if (write_in_full(fd, src->ptr, src->size) != src->size)
+		die("unable to write temp-file");
+	close(fd);
+}
+
+static int ll_ext_merge(const char *cmd,
+			mmfile_t *orig,
+			mmfile_t *src1, const char *name1,
+			mmfile_t *src2, const char *name2,
+			mmbuffer_t *result)
+{
+	char temp[3][50];
+	char cmdbuf[2048];
+	struct interp table[] = {
+		{ "%O" },
+		{ "%A" },
+		{ "%B" },
+	};
+	struct child_process child;
+	const char *args[20];
+	int status, fd, i;
+	struct stat st;
+
+	result->ptr = NULL;
+	result->size = 0;
+	create_temp(orig, temp[0]);
+	create_temp(src1, temp[1]);
+	create_temp(src2, temp[2]);
+
+	interp_set_entry(table, 0, temp[0]);
+	interp_set_entry(table, 1, temp[1]);
+	interp_set_entry(table, 2, temp[2]);
+
+	interpolate(cmdbuf, sizeof(cmdbuf), cmd, table, 3);
+
+	memset(&child, 0, sizeof(child));
+	child.argv = args;
+	args[0] = "sh";
+	args[1] = "-c";
+	args[2] = cmdbuf;
+	args[3] = NULL;
+
+	status = run_command(&child);
+	if (status < -ERR_RUN_COMMAND_FORK)
+		; /* failure in run-command */
+	else
+		status = -status;
+	fd = open(temp[1], O_RDONLY);
+	if (fd < 0)
+		goto bad;
+	if (fstat(fd, &st))
+		goto close_bad;
+	result->size = st.st_size;
+	result->ptr = xmalloc(result->size + 1);
+	if (read_in_full(fd, result->ptr, result->size) != result->size) {
+		free(result->ptr);
+		result->ptr = NULL;
+		result->size = 0;
+	}
+ close_bad:
+	close(fd);
+ bad:
+	for (i = 0; i < 3; i++)
+		unlink(temp[i]);
+	return status;
+}
+
+/*
+ * merge.default and merge.driver configuration items
+ */
+static struct user_merge_fn {
+	struct user_merge_fn *next;
+	const char *name;
+	char *cmdline;
+	char b_[1];
+} *ll_user_merge_fns, **ll_user_merge_fns_tail;
+
+static int read_merge_config(const char *var, const char *value)
+{
+	struct user_merge_fn *fn;
+	int blen, nlen;
+
+	if (strcmp(var, "merge.driver"))
+		return 0;
+	if (!value)
+		return error("%s: lacks value", var);
+	/*
+	 * merge.driver is a multi-valued configuration, whose value is
+	 * of form:
+	 *
+	 *	name command-line
+	 *
+	 * The command-line will be interpolated with the following
+	 * tokens and is given to the shell:
+	 *
+	 *    %O - temporary file name for the merge base.
+	 *    %A - temporary file name for our version.
+	 *    %B - temporary file name for the other branches' version.
+	 *
+	 * The external merge driver should write the results in the file
+	 * named by %A, and signal that it has done with exit status 0.
+	 */
+	for (nlen = -1, blen = 0; value[blen]; blen++)
+		if (nlen < 0 && isspace(value[blen]))
+			nlen = blen;
+	if (nlen < 0)
+		return error("%s '%s': lacks command line", var, value);
+	fn = xcalloc(1, sizeof(struct user_merge_fn) + blen + 1);
+	memcpy(fn->b_, value, blen + 1);
+	fn->name = fn->b_;
+	fn->b_[nlen] = 0;
+	fn->cmdline = fn->b_ + nlen + 1;
+	fn->next = *ll_user_merge_fns_tail;
+	*ll_user_merge_fns_tail = fn;
+	return 0;
+}
+
+static void initialize_ll_merge(void)
+{
+	if (ll_user_merge_fns_tail)
+		return;
+	ll_user_merge_fns_tail = &ll_user_merge_fns;
+	git_config(read_merge_config);
+}
+
+static ll_merge_fn find_ll_merge_fn(void *merge_attr, const char **cmdline)
+{
+	struct user_merge_fn *fn;
 	const char *name;
 	int i;
 
-	if (ATTR_TRUE(merge_attr) || ATTR_UNSET(merge_attr))
+	initialize_ll_merge();
+
+	if (ATTR_TRUE(merge_attr))
 		return ll_xdl_merge;
 	else if (ATTR_FALSE(merge_attr))
 		return ll_binary_merge;
+	else if (ATTR_UNSET(merge_attr))
+		return ll_xdl_merge;
+	else
+		name = merge_attr;
+
+	for (fn = ll_user_merge_fns; fn; fn = fn->next) {
+		if (!strcmp(fn->name, name)) {
+			*cmdline = fn->cmdline;
+			return ll_ext_merge;
+		}
+	}
 
-	/* Otherwise merge_attr may name the merge function */
-	name = merge_attr;
 	for (i = 0; ll_merge_fns[i].name; i++)
 		if (!strcmp(ll_merge_fns[i].name, name))
 			return ll_merge_fns[i].fn;
@@ -793,6 +944,7 @@ static int ll_merge(mmbuffer_t *result_buf,
 	int merge_status;
 	void *merge_attr;
 	ll_merge_fn fn;
+	const char *driver = NULL;
 
 	name1 = xstrdup(mkpath("%s:%s", branch1, a->path));
 	name2 = xstrdup(mkpath("%s:%s", branch2, b->path));
@@ -802,9 +954,10 @@ static int ll_merge(mmbuffer_t *result_buf,
 	fill_mm(b->sha1, &src2);
 
 	merge_attr = git_path_check_merge(a->path);
-	fn = find_ll_merge_fn(merge_attr);
+	fn = find_ll_merge_fn(merge_attr, &driver);
 
-	merge_status = fn(&orig, &src1, name1, &src2, name2, result_buf);
+	merge_status = fn(driver, &orig,
+			  &src1, name1, &src2, name2, result_buf);
 
 	free(name1);
 	free(name2);
diff --git a/t/t6026-merge-attr.sh b/t/t6026-merge-attr.sh
index 5daa2236de..1732b60ed8 100755
--- a/t/t6026-merge-attr.sh
+++ b/t/t6026-merge-attr.sh
@@ -30,8 +30,9 @@ test_expect_success setup '
 		echo Side >>$f && git add $f || break
 	done &&
 	test_tick &&
-	git commit -m Side
+	git commit -m Side &&
 
+	git tag anchor
 '
 
 test_expect_success merge '
@@ -69,4 +70,72 @@ test_expect_success 'check merge result in working tree' '
 
 '
 
+cat >./custom-merge <<\EOF
+#!/bin/sh
+
+orig="$1" ours="$2" theirs="$3" exit="$4"
+(
+	echo "orig is $orig"
+	echo "ours is $ours"
+	echo "theirs is $theirs"
+	echo "=== orig ==="
+	cat "$orig"
+	echo "=== ours ==="
+	cat "$ours"
+	echo "=== theirs ==="
+	cat "$theirs"
+) >"$ours+"
+cat "$ours+" >"$ours"
+rm -f "$ours+"
+exit "$exit"
+EOF
+chmod +x ./custom-merge
+
+test_expect_success 'custom merge backend' '
+
+	echo "* merge=union" >.gitattributes &&
+	echo "text merge=custom" >>.gitattributes &&
+
+	git reset --hard anchor &&
+	git config --replace-all \
+	merge.driver "custom ./custom-merge %O %A %B 0" &&
+
+	git merge master &&
+
+	cmp binary union &&
+	sed -e 1,3d text >check-1 &&
+	o=$(git-unpack-file master^:text) &&
+	a=$(git-unpack-file side^:text) &&
+	b=$(git-unpack-file master:text) &&
+	sh -c "./custom-merge $o $a $b 0" &&
+	sed -e 1,3d $a >check-2 &&
+	cmp check-1 check-2 &&
+	rm -f $o $a $b
+'
+
+test_expect_success 'custom merge backend' '
+
+	git reset --hard anchor &&
+	git config --replace-all \
+	merge.driver "custom ./custom-merge %O %A %B 1" &&
+
+	if git merge master
+	then
+		echo "Eh? should have conflicted"
+		false
+	else
+		echo "Ok, conflicted"
+	fi &&
+
+	cmp binary union &&
+	sed -e 1,3d text >check-1 &&
+	o=$(git-unpack-file master^:text) &&
+	a=$(git-unpack-file anchor:text) &&
+	b=$(git-unpack-file master:text) &&
+	sh -c "./custom-merge $o $a $b 0" &&
+	sed -e 1,3d $a >check-2 &&
+	cmp check-1 check-2 &&
+	rm -f $o $a $b
+'
+
 test_done

From be89cb239e8ec02e23015675cc8b2d60992a6cfc Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 18 Apr 2007 01:47:21 -0700
Subject: [PATCH 063/109] Allow the default low-level merge driver to be
 configured.

When no 'merge' attribute is given to a path, merge-recursive
uses the built-in xdl-merge as the low-level merge driver.

A new configuration item 'merge.default' can name a low-level
merge driver of user's choice to be used instead.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 8ec18ad577..5983000971 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -839,12 +839,18 @@ static struct user_merge_fn {
 	char *cmdline;
 	char b_[1];
 } *ll_user_merge_fns, **ll_user_merge_fns_tail;
+static const char *default_ll_merge;
 
 static int read_merge_config(const char *var, const char *value)
 {
 	struct user_merge_fn *fn;
 	int blen, nlen;
 
+	if (!strcmp(var, "merge.default")) {
+		default_ll_merge = strdup(value);
+		return 0;
+	}
+
 	if (strcmp(var, "merge.driver"))
 		return 0;
 	if (!value)
@@ -900,8 +906,12 @@ static ll_merge_fn find_ll_merge_fn(void *merge_attr, const char **cmdline)
 		return ll_xdl_merge;
 	else if (ATTR_FALSE(merge_attr))
 		return ll_binary_merge;
-	else if (ATTR_UNSET(merge_attr))
-		return ll_xdl_merge;
+	else if (ATTR_UNSET(merge_attr)) {
+		if (!default_ll_merge)
+			return ll_xdl_merge;
+		else
+			name = default_ll_merge;
+	}
 	else
 		name = merge_attr;
 

From 153920da5b62024c0aceef23252b82ad18e5fe22 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 18 Apr 2007 11:27:32 -0700
Subject: [PATCH 064/109] Custom low-level merge driver: change the
 configuration scheme.

This changes the configuration syntax for defining a low-level
merge driver to be:

	[merge "<<drivername>>"]
		driver = "<<command line>>"
		name = "<<driver description>>"

which is much nicer to read and is extensible.  Credit goes to
Martin Waitz and Linus.

In addition, when we use an external low-level merge driver, it
is reported as an extra output from merge-recursive, using the
value of merge.<<drivername>.name variable.

The demonstration in t6026 has also been updated.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c     | 204 ++++++++++++++++++++++++++----------------
 t/t6026-merge-attr.sh |   8 +-
 2 files changed, 132 insertions(+), 80 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 5983000971..0f5c28eaff 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -661,14 +661,31 @@ static void fill_mm(const unsigned char *sha1, mmfile_t *mm)
 	mm->size = size;
 }
 
-/* Low-level merge functions */
-typedef int (*ll_merge_fn)(const char *cmd,
+/*
+ * Customizable low-level merge drivers support.
+ */
+
+struct ll_merge_driver;
+typedef int (*ll_merge_fn)(const struct ll_merge_driver *,
+			   const char *path,
 			   mmfile_t *orig,
 			   mmfile_t *src1, const char *name1,
 			   mmfile_t *src2, const char *name2,
 			   mmbuffer_t *result);
 
-static int ll_xdl_merge(const char *cmd__unused,
+struct ll_merge_driver {
+	const char *name;
+	const char *description;
+	ll_merge_fn fn;
+	struct ll_merge_driver *next;
+	char *cmdline;
+};
+
+/*
+ * Built-in low-levels
+ */
+static int ll_xdl_merge(const struct ll_merge_driver *drv_unused,
+			const char *path_unused,
 			mmfile_t *orig,
 			mmfile_t *src1, const char *name1,
 			mmfile_t *src2, const char *name2,
@@ -684,7 +701,8 @@ static int ll_xdl_merge(const char *cmd__unused,
 			 result);
 }
 
-static int ll_union_merge(const char *cmd__unused,
+static int ll_union_merge(const struct ll_merge_driver *drv_unused,
+			  const char *path_unused,
 			  mmfile_t *orig,
 			  mmfile_t *src1, const char *name1,
 			  mmfile_t *src2, const char *name2,
@@ -694,8 +712,8 @@ static int ll_union_merge(const char *cmd__unused,
 	long size;
 	const int marker_size = 7;
 
-	int status = ll_xdl_merge(cmd__unused, orig,
-				  src1, NULL, src2, NULL, result);
+	int status = ll_xdl_merge(drv_unused, path_unused,
+				  orig, src1, NULL, src2, NULL, result);
 	if (status <= 0)
 		return status;
 	size = result->size;
@@ -726,7 +744,8 @@ static int ll_union_merge(const char *cmd__unused,
 	return 0;
 }
 
-static int ll_binary_merge(const char *cmd__unused,
+static int ll_binary_merge(const struct ll_merge_driver *drv_unused,
+			   const char *path_unused,
 			   mmfile_t *orig,
 			   mmfile_t *src1, const char *name1,
 			   mmfile_t *src2, const char *name2,
@@ -745,14 +764,13 @@ static int ll_binary_merge(const char *cmd__unused,
 	return 1;
 }
 
-static struct {
-	const char *name;
-	ll_merge_fn fn;
-} ll_merge_fns[] = {
-	{ "binary", ll_binary_merge },
-	{ "text", ll_xdl_merge },
-	{ "union", ll_union_merge },
-	{ NULL, NULL },
+#define LL_BINARY_MERGE 0
+#define LL_TEXT_MERGE 1
+#define LL_UNION_MERGE 2
+static struct ll_merge_driver ll_merge_drv[] = {
+	{ "binary", "built-in binary merge", ll_binary_merge },
+	{ "text", "built-in 3-way text merge", ll_xdl_merge },
+	{ "union", "built-in union merge", ll_union_merge },
 };
 
 static void create_temp(mmfile_t *src, char *path)
@@ -768,7 +786,11 @@ static void create_temp(mmfile_t *src, char *path)
 	close(fd);
 }
 
-static int ll_ext_merge(const char *cmd,
+/*
+ * User defined low-level merge driver support.
+ */
+static int ll_ext_merge(const struct ll_merge_driver *fn,
+			const char *path,
 			mmfile_t *orig,
 			mmfile_t *src1, const char *name1,
 			mmfile_t *src2, const char *name2,
@@ -796,7 +818,10 @@ static int ll_ext_merge(const char *cmd,
 	interp_set_entry(table, 1, temp[1]);
 	interp_set_entry(table, 2, temp[2]);
 
-	interpolate(cmdbuf, sizeof(cmdbuf), cmd, table, 3);
+	output(1, "merging %s using %s", path,
+	       fn->description ? fn->description : fn->name);
+
+	interpolate(cmdbuf, sizeof(cmdbuf), fn->cmdline, table, 3);
 
 	memset(&child, 0, sizeof(child));
 	child.argv = args;
@@ -833,101 +858,124 @@ static int ll_ext_merge(const char *cmd,
 /*
  * merge.default and merge.driver configuration items
  */
-static struct user_merge_fn {
-	struct user_merge_fn *next;
-	const char *name;
-	char *cmdline;
-	char b_[1];
-} *ll_user_merge_fns, **ll_user_merge_fns_tail;
+static struct ll_merge_driver *ll_user_merge, **ll_user_merge_tail;
 static const char *default_ll_merge;
 
 static int read_merge_config(const char *var, const char *value)
 {
-	struct user_merge_fn *fn;
-	int blen, nlen;
+	struct ll_merge_driver *fn;
+	const char *ep, *name;
+	int namelen;
 
 	if (!strcmp(var, "merge.default")) {
-		default_ll_merge = strdup(value);
+		if (value)
+			default_ll_merge = strdup(value);
 		return 0;
 	}
 
-	if (strcmp(var, "merge.driver"))
-		return 0;
-	if (!value)
-		return error("%s: lacks value", var);
 	/*
-	 * merge.driver is a multi-valued configuration, whose value is
-	 * of form:
-	 *
-	 *	name command-line
-	 *
-	 * The command-line will be interpolated with the following
-	 * tokens and is given to the shell:
-	 *
-	 *    %O - temporary file name for the merge base.
-	 *    %A - temporary file name for our version.
-	 *    %B - temporary file name for the other branches' version.
-	 *
-	 * The external merge driver should write the results in the file
-	 * named by %A, and signal that it has done with exit status 0.
+	 * We are not interested in anything but "merge.<name>.variable";
+	 * especially, we do not want to look at variables such as
+	 * "merge.summary", "merge.tool", and "merge.verbosity".
 	 */
-	for (nlen = -1, blen = 0; value[blen]; blen++)
-		if (nlen < 0 && isspace(value[blen]))
-			nlen = blen;
-	if (nlen < 0)
-		return error("%s '%s': lacks command line", var, value);
-	fn = xcalloc(1, sizeof(struct user_merge_fn) + blen + 1);
-	memcpy(fn->b_, value, blen + 1);
-	fn->name = fn->b_;
-	fn->b_[nlen] = 0;
-	fn->cmdline = fn->b_ + nlen + 1;
-	fn->next = *ll_user_merge_fns_tail;
-	*ll_user_merge_fns_tail = fn;
+	if (prefixcmp(var, "merge.") || (ep = strrchr(var, '.')) == var + 6)
+		return 0;
+
+	/*
+	 * Find existing one as we might be processing merge.<name>.var2
+	 * after seeing merge.<name>.var1.
+	 */
+	name = var + 6;
+	namelen = ep - name;
+	for (fn = ll_user_merge; fn; fn = fn->next)
+		if (!strncmp(fn->name, name, namelen) && !fn->name[namelen])
+			break;
+	if (!fn) {
+		char *namebuf;
+		fn = xcalloc(1, sizeof(struct ll_merge_driver));
+		namebuf = xmalloc(namelen + 1);
+		memcpy(namebuf, name, namelen);
+		namebuf[namelen] = 0;
+		fn->name = namebuf;
+		fn->fn = ll_ext_merge;
+		fn->next = *ll_user_merge_tail;
+		*ll_user_merge_tail = fn;
+	}
+
+	ep++;
+
+	if (!strcmp("name", ep)) {
+		if (!value)
+			return error("%s: lacks value", var);
+		fn->description = strdup(value);
+		return 0;
+	}
+
+	if (!strcmp("driver", ep)) {
+		if (!value)
+			return error("%s: lacks value", var);
+		/*
+		 * merge.<name>.driver specifies the command line:
+		 *
+		 *	command-line
+		 *
+		 * The command-line will be interpolated with the following
+		 * tokens and is given to the shell:
+		 *
+		 *    %O - temporary file name for the merge base.
+		 *    %A - temporary file name for our version.
+		 *    %B - temporary file name for the other branches' version.
+		 *
+		 * The external merge driver should write the results in the
+		 * file named by %A, and signal that it has done with zero exit
+		 * status.
+		 */
+		fn->cmdline = strdup(value);
+		return 0;
+	}
+
 	return 0;
 }
 
 static void initialize_ll_merge(void)
 {
-	if (ll_user_merge_fns_tail)
+	if (ll_user_merge_tail)
 		return;
-	ll_user_merge_fns_tail = &ll_user_merge_fns;
+	ll_user_merge_tail = &ll_user_merge;
 	git_config(read_merge_config);
 }
 
-static ll_merge_fn find_ll_merge_fn(void *merge_attr, const char **cmdline)
+static const struct ll_merge_driver *find_ll_merge_driver(void *merge_attr)
 {
-	struct user_merge_fn *fn;
+	struct ll_merge_driver *fn;
 	const char *name;
 	int i;
 
 	initialize_ll_merge();
 
 	if (ATTR_TRUE(merge_attr))
-		return ll_xdl_merge;
+		return &ll_merge_drv[LL_TEXT_MERGE];
 	else if (ATTR_FALSE(merge_attr))
-		return ll_binary_merge;
+		return &ll_merge_drv[LL_BINARY_MERGE];
 	else if (ATTR_UNSET(merge_attr)) {
 		if (!default_ll_merge)
-			return ll_xdl_merge;
+			return &ll_merge_drv[LL_TEXT_MERGE];
 		else
 			name = default_ll_merge;
 	}
 	else
 		name = merge_attr;
 
-	for (fn = ll_user_merge_fns; fn; fn = fn->next) {
-		if (!strcmp(fn->name, name)) {
-			*cmdline = fn->cmdline;
-			return ll_ext_merge;
-		}
-	}
+	for (fn = ll_user_merge; fn; fn = fn->next)
+		if (!strcmp(fn->name, name))
+			return fn;
 
-	for (i = 0; ll_merge_fns[i].name; i++)
-		if (!strcmp(ll_merge_fns[i].name, name))
-			return ll_merge_fns[i].fn;
+	for (i = 0; i < ARRAY_SIZE(ll_merge_drv); i++)
+		if (!strcmp(ll_merge_drv[i].name, name))
+			return &ll_merge_drv[i];
 
 	/* default to the 3-way */
-	return ll_xdl_merge;
+	return &ll_merge_drv[LL_TEXT_MERGE];
 }
 
 static void *git_path_check_merge(const char *path)
@@ -953,8 +1001,7 @@ static int ll_merge(mmbuffer_t *result_buf,
 	char *name1, *name2;
 	int merge_status;
 	void *merge_attr;
-	ll_merge_fn fn;
-	const char *driver = NULL;
+	const struct ll_merge_driver *driver;
 
 	name1 = xstrdup(mkpath("%s:%s", branch1, a->path));
 	name2 = xstrdup(mkpath("%s:%s", branch2, b->path));
@@ -964,10 +1011,11 @@ static int ll_merge(mmbuffer_t *result_buf,
 	fill_mm(b->sha1, &src2);
 
 	merge_attr = git_path_check_merge(a->path);
-	fn = find_ll_merge_fn(merge_attr, &driver);
+	driver = find_ll_merge_driver(merge_attr);
 
-	merge_status = fn(driver, &orig,
-			  &src1, name1, &src2, name2, result_buf);
+	merge_status = driver->fn(driver, a->path,
+				  &orig, &src1, name1, &src2, name2,
+				  result_buf);
 
 	free(name1);
 	free(name2);
diff --git a/t/t6026-merge-attr.sh b/t/t6026-merge-attr.sh
index 1732b60ed8..56fc341768 100755
--- a/t/t6026-merge-attr.sh
+++ b/t/t6026-merge-attr.sh
@@ -98,7 +98,9 @@ test_expect_success 'custom merge backend' '
 
 	git reset --hard anchor &&
 	git config --replace-all \
-	merge.driver "custom ./custom-merge %O %A %B 0" &&
+	merge.custom.driver "./custom-merge %O %A %B 0" &&
+	git config --replace-all \
+	merge.custom.name "custom merge driver for testing" &&
 
 	git merge master &&
 
@@ -117,7 +119,9 @@ test_expect_success 'custom merge backend' '
 
 	git reset --hard anchor &&
 	git config --replace-all \
-	merge.driver "custom ./custom-merge %O %A %B 1" &&
+	merge.custom.driver "./custom-merge %O %A %B 1" &&
+	git config --replace-all \
+	merge.custom.name "custom merge driver for testing" &&
 
 	if git merge master
 	then

From 3086486d326b00ce308208e62e0e0de831f3563b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 18 Apr 2007 12:18:25 -0700
Subject: [PATCH 065/109] Allow low-level driver to specify different behaviour
 during internal merge.

This allows [merge "drivername"] to have a variable "recursive"
that names a different low-level merge driver to be used when
merging common ancestors to come up with a virtual ancestor.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/merge-recursive.c b/merge-recursive.c
index 0f5c28eaff..7b5ca8e717 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -677,6 +677,7 @@ struct ll_merge_driver {
 	const char *name;
 	const char *description;
 	ll_merge_fn fn;
+	const char *recursive;
 	struct ll_merge_driver *next;
 	char *cmdline;
 };
@@ -934,6 +935,13 @@ static int read_merge_config(const char *var, const char *value)
 		return 0;
 	}
 
+	if (!strcmp("recursive", ep)) {
+		if (!value)
+			return error("%s: lacks value", var);
+		fn->recursive = strdup(value);
+		return 0;
+	}
+
 	return 0;
 }
 
@@ -1013,6 +1021,10 @@ static int ll_merge(mmbuffer_t *result_buf,
 	merge_attr = git_path_check_merge(a->path);
 	driver = find_ll_merge_driver(merge_attr);
 
+	if (index_only && driver->recursive) {
+		merge_attr = git_attr(driver->recursive, strlen(driver->recursive));
+		driver = find_ll_merge_driver(merge_attr);
+	}
 	merge_status = driver->fn(driver, a->path,
 				  &orig, &src1, name1, &src2, name2,
 				  result_buf);

From a5e92abde61d59a8612c5b87d0bae681e90f7fdb Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 18 Apr 2007 16:16:37 -0700
Subject: [PATCH 066/109] Fix funny types used in attribute value
 representation

It was bothering me a lot that I abused small integer values
casted to (void *) to represent non string values in
gitattributes.  This corrects it by making the type of attribute
values (const char *), and using the address of a few statically
allocated character buffer to denote true/false.  Unset attributes
are represented as having NULLs as their values.

Added in-header documentation to explain how git_checkattr()
routine should be called.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c               | 20 +++++++++++++-------
 attr.h               | 22 +++++++++++++++-------
 builtin-check-attr.c |  4 ++--
 convert.c            |  2 +-
 diff.c               |  4 ++--
 merge-recursive.c    | 17 ++++++++++-------
 6 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/attr.c b/attr.c
index b3496a6eb5..285e689e5a 100644
--- a/attr.c
+++ b/attr.c
@@ -1,7 +1,13 @@
 #include "cache.h"
 #include "attr.h"
 
-#define ATTR__UNKNOWN	((void *) -2)
+const char git_attr__true[] = "(builtin)true";
+const char git_attr__false[] = "\0(builtin)false";
+static const char git_attr__unknown[] = "(builtin)unknown";
+#define ATTR__TRUE git_attr__true
+#define ATTR__FALSE git_attr__false
+#define ATTR__UNSET NULL
+#define ATTR__UNKNOWN git_attr__unknown
 
 /*
  * The basic design decision here is that we are not going to have
@@ -102,7 +108,7 @@ struct git_attr *git_attr(const char *name, int len)
 /* What does a matched pattern decide? */
 struct attr_state {
 	struct git_attr *attr;
-	void *setto;
+	const char *setto;
 };
 
 struct match_attr {
@@ -262,14 +268,14 @@ static void free_attr_elem(struct attr_stack *e)
 		struct match_attr *a = e->attrs[i];
 		int j;
 		for (j = 0; j < a->num_attr; j++) {
-			void *setto = a->state[j].setto;
+			const char *setto = a->state[j].setto;
 			if (setto == ATTR__TRUE ||
 			    setto == ATTR__FALSE ||
 			    setto == ATTR__UNSET ||
 			    setto == ATTR__UNKNOWN)
 				;
 			else
-				free(setto);
+				free((char*) setto);
 		}
 		free(a);
 	}
@@ -478,8 +484,8 @@ static int fill_one(const char *what, struct match_attr *a, int rem)
 
 	for (i = 0; 0 < rem && i < a->num_attr; i++) {
 		struct git_attr *attr = a->state[i].attr;
-		void **n = &(check[attr->attr_nr].value);
-		void *v = a->state[i].setto;
+		const char **n = &(check[attr->attr_nr].value);
+		const char *v = a->state[i].setto;
 
 		if (*n == ATTR__UNKNOWN) {
 			debug_set(what, a->u.pattern, attr, v);
@@ -547,7 +553,7 @@ int git_checkattr(const char *path, int num, struct git_attr_check *check)
 		rem = macroexpand(stk, rem);
 
 	for (i = 0; i < num; i++) {
-		void *value = check_all_attr[check[i].attr->attr_nr].value;
+		const char *value = check_all_attr[check[i].attr->attr_nr].value;
 		if (value == ATTR__UNKNOWN)
 			value = ATTR__UNSET;
 		check[i].value = value;
diff --git a/attr.h b/attr.h
index 8ec2d3d35c..f1c2038b09 100644
--- a/attr.h
+++ b/attr.h
@@ -4,21 +4,29 @@
 /* An attribute is a pointer to this opaque structure */
 struct git_attr;
 
+/*
+ * Given a string, return the gitattribute object that
+ * corresponds to it.
+ */
 struct git_attr *git_attr(const char *, int);
 
 /* Internal use */
-#define ATTR__TRUE	((void *) 1)
-#define ATTR__FALSE	((void *) 0)
-#define ATTR__UNSET	((void *) -1)
+extern const char git_attr__true[];
+extern const char git_attr__false[];
 
 /* For public to check git_attr_check results */
-#define ATTR_TRUE(v) ((v) == ATTR__TRUE)
-#define ATTR_FALSE(v) ((v) == ATTR__FALSE)
-#define ATTR_UNSET(v) ((v) == ATTR__UNSET)
+#define ATTR_TRUE(v) ((v) == git_attr__true)
+#define ATTR_FALSE(v) ((v) == git_attr__false)
+#define ATTR_UNSET(v) ((v) == NULL)
 
+/*
+ * Send one or more git_attr_check to git_checkattr(), and
+ * each 'value' member tells what its value is.
+ * Unset one is returned as NULL.
+ */
 struct git_attr_check {
 	struct git_attr *attr;
-	void *value;
+	const char *value;
 };
 
 int git_checkattr(const char *path, int, struct git_attr_check *);
diff --git a/builtin-check-attr.c b/builtin-check-attr.c
index 6983a73c1b..9d77f76ff1 100644
--- a/builtin-check-attr.c
+++ b/builtin-check-attr.c
@@ -42,7 +42,7 @@ int cmd_check_attr(int argc, const char **argv, const char *prefix)
 		if (git_checkattr(argv[i], cnt, check))
 			die("git_checkattr died");
 		for (j = 0; j < cnt; j++) {
-			void *value = check[j].value;
+			const char *value = check[j].value;
 
 			if (ATTR_TRUE(value))
 				value = "set";
@@ -52,7 +52,7 @@ int cmd_check_attr(int argc, const char **argv, const char *prefix)
 				value = "unspecified";
 
 			write_name_quoted("", 0, argv[i], 1, stdout);
-			printf(": %s: %s\n", argv[j+1], (char *) value);
+			printf(": %s: %s\n", argv[j+1], value);
 		}
 	}
 	return 0;
diff --git a/convert.c b/convert.c
index 68bb70f12f..a5f60c7c6b 100644
--- a/convert.c
+++ b/convert.c
@@ -226,7 +226,7 @@ static int git_path_check_crlf(const char *path)
 	setup_crlf_check(&attr_crlf_check);
 
 	if (!git_checkattr(path, 1, &attr_crlf_check)) {
-		void *value = attr_crlf_check.value;
+		const char *value = attr_crlf_check.value;
 		if (ATTR_TRUE(value))
 			return 1;
 		else if (ATTR_FALSE(value))
diff --git a/diff.c b/diff.c
index a32078ea01..5f501864e6 100644
--- a/diff.c
+++ b/diff.c
@@ -1069,7 +1069,7 @@ static int file_is_binary(struct diff_filespec *one)
 
 	setup_diff_attr_check(&attr_diff_check);
 	if (!git_checkattr(one->path, 1, &attr_diff_check)) {
-		void *value = attr_diff_check.value;
+		const char *value = attr_diff_check.value;
 		if (ATTR_TRUE(value))
 			return 0;
 		else if (ATTR_FALSE(value))
@@ -1078,7 +1078,7 @@ static int file_is_binary(struct diff_filespec *one)
 			;
 		else
 			die("unknown value %s given to 'diff' attribute",
-			    (char *)value);
+			    value);
 	}
 
 	if (!one->data) {
diff --git a/merge-recursive.c b/merge-recursive.c
index 7b5ca8e717..ec8438b463 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -953,7 +953,7 @@ static void initialize_ll_merge(void)
 	git_config(read_merge_config);
 }
 
-static const struct ll_merge_driver *find_ll_merge_driver(void *merge_attr)
+static const struct ll_merge_driver *find_ll_merge_driver(const char *merge_attr)
 {
 	struct ll_merge_driver *fn;
 	const char *name;
@@ -986,7 +986,7 @@ static const struct ll_merge_driver *find_ll_merge_driver(void *merge_attr)
 	return &ll_merge_drv[LL_TEXT_MERGE];
 }
 
-static void *git_path_check_merge(const char *path)
+static const char *git_path_check_merge(const char *path)
 {
 	static struct git_attr_check attr_merge_check;
 
@@ -994,7 +994,7 @@ static void *git_path_check_merge(const char *path)
 		attr_merge_check.attr = git_attr("merge", 5);
 
 	if (git_checkattr(path, 1, &attr_merge_check))
-		return ATTR__UNSET;
+		return NULL;
 	return attr_merge_check.value;
 }
 
@@ -1008,7 +1008,7 @@ static int ll_merge(mmbuffer_t *result_buf,
 	mmfile_t orig, src1, src2;
 	char *name1, *name2;
 	int merge_status;
-	void *merge_attr;
+	const char *ll_driver_name;
 	const struct ll_merge_driver *driver;
 
 	name1 = xstrdup(mkpath("%s:%s", branch1, a->path));
@@ -1018,11 +1018,14 @@ static int ll_merge(mmbuffer_t *result_buf,
 	fill_mm(a->sha1, &src1);
 	fill_mm(b->sha1, &src2);
 
-	merge_attr = git_path_check_merge(a->path);
-	driver = find_ll_merge_driver(merge_attr);
+	ll_driver_name = git_path_check_merge(a->path);
+	driver = find_ll_merge_driver(ll_driver_name);
 
 	if (index_only && driver->recursive) {
-		merge_attr = git_attr(driver->recursive, strlen(driver->recursive));
+		void *merge_attr;
+
+		ll_driver_name = driver->recursive;
+		merge_attr = git_attr(ll_driver_name, strlen(ll_driver_name));
 		driver = find_ll_merge_driver(merge_attr);
 	}
 	merge_status = driver->fn(driver, a->path,

From c774aab98ce6c5ef7aaacbef38da0a501eb671d4 Mon Sep 17 00:00:00 2001
From: Julian Phillips <julian@quantumfyre.co.uk>
Date: Tue, 17 Apr 2007 02:42:50 +0100
Subject: [PATCH 067/109] refs.c: add a function to sort a ref list, rather
 then sorting on add

Rather than sorting the refs list while building it, sort in one
go after it is built using a merge sort.  This has a large
performance boost with large numbers of refs.

It shouldn't happen that we read duplicate entries into the same
list, but just in case sort_ref_list drops them if the SHA1s are
the same, or dies, as we have no way of knowing which one is the
correct one.

Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 refs.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 89 insertions(+), 21 deletions(-)

diff --git a/refs.c b/refs.c
index d7be2841c5..f9b8802003 100644
--- a/refs.c
+++ b/refs.c
@@ -47,22 +47,7 @@ static struct ref_list *add_ref(const char *name, const unsigned char *sha1,
 				struct ref_list **new_entry)
 {
 	int len;
-	struct ref_list **p = &list, *entry;
-
-	/* Find the place to insert the ref into.. */
-	while ((entry = *p) != NULL) {
-		int cmp = strcmp(entry->name, name);
-		if (cmp > 0)
-			break;
-
-		/* Same as existing entry? */
-		if (!cmp) {
-			if (new_entry)
-				*new_entry = entry;
-			return list;
-		}
-		p = &entry->next;
-	}
+	struct ref_list *entry;
 
 	/* Allocate it and add it in.. */
 	len = strlen(name) + 1;
@@ -71,11 +56,94 @@ static struct ref_list *add_ref(const char *name, const unsigned char *sha1,
 	hashclr(entry->peeled);
 	memcpy(entry->name, name, len);
 	entry->flag = flag;
-	entry->next = *p;
-	*p = entry;
+	entry->next = list;
 	if (new_entry)
 		*new_entry = entry;
-	return list;
+	return entry;
+}
+
+/* merge sort the ref list */
+static struct ref_list *sort_ref_list(struct ref_list *list)
+{
+	int psize, qsize, last_merge_count, cmp;
+	struct ref_list *p, *q, *l, *e;
+	struct ref_list *new_list = list;
+	int k = 1;
+	int merge_count = 0;
+
+	if (!list)
+		return list;
+
+	do {
+		last_merge_count = merge_count;
+		merge_count = 0;
+
+		psize = 0;
+
+		p = new_list;
+		q = new_list;
+		new_list = NULL;
+		l = NULL;
+
+		while (p) {
+			merge_count++;
+
+			while (psize < k && q->next) {
+				q = q->next;
+				psize++;
+			}
+			qsize = k;
+
+			while ((psize > 0) || (qsize > 0 && q)) {
+				if (qsize == 0 || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+				} else if (psize == 0) {
+					e = q;
+					q = q->next;
+					qsize--;
+				} else {
+					cmp = strcmp(q->name, p->name);
+					if (cmp < 0) {
+						e = q;
+						q = q->next;
+						qsize--;
+					} else if (cmp > 0) {
+						e = p;
+						p = p->next;
+						psize--;
+					} else {
+						if (hashcmp(q->sha1, p->sha1))
+							die("Duplicated ref, and SHA1s don't match: %s",
+							    q->name);
+						warning("Duplicated ref: %s", q->name);
+						e = q;
+						q = q->next;
+						qsize--;
+						free(e);
+						e = p;
+						p = p->next;
+						psize--;
+					}
+				}
+
+				e->next = NULL;
+
+				if (l)
+					l->next = e;
+				if (!new_list)
+					new_list = e;
+				l = e;
+			}
+
+			p = q;
+		};
+
+		k = k * 2;
+	} while ((last_merge_count != merge_count) || (last_merge_count != 1));
+
+	return new_list;
 }
 
 /*
@@ -142,7 +210,7 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
 		    !get_sha1_hex(refline + 1, sha1))
 			hashcpy(last->peeled, sha1);
 	}
-	cached_refs->packed = list;
+	cached_refs->packed = sort_ref_list(list);
 }
 
 static struct ref_list *get_packed_refs(void)
@@ -201,7 +269,7 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list)
 		free(ref);
 		closedir(dir);
 	}
-	return list;
+	return sort_ref_list(list);
 }
 
 static struct ref_list *get_loose_refs(void)

From 1c3e5c4ebc326c5c70350d3f4dc7f2b29e813480 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Thu, 19 Apr 2007 01:55:45 +0200
Subject: [PATCH 068/109] Tests for core subproject support

The following tests available:

- create subprojects: create a directory in the superproject,
  initialize a git repo in it, and try adding it in super project.
  Make a commit in superproject

- check if fsck ignores the subprojects: it just should give no errors

- check if commit in a subproject detected: make a commit in
  subproject, git-diff-files in superproject should detect it

- check if a changed subproject HEAD can be committed: try
  "git-commit -a" in superproject. It should commit changed
  HEAD of a subproject

- check if diff-index works for subproject elements: compare the index
  (changed by previuos tests) with the initial commit (which created
  two subprojects). Should show a change for the recently changed subproject

- check if diff-tree works for subproject elements: do the same, just use
  git-diff-tree. This test is somewhat redundant, I just added it for
  completeness (diff, diff-files, and diff-index are already used)

- check if git diff works for subproject elements: try to limit
  the diff for the name of a subproject in superproject:
     git diff HEAD^ HEAD -- subproject

- check if clone works: try a clone of superproject and compare
  "git ls-files -s" output in superproject and cloned repo

- removing and adding subproject: rename test. Currently implemented
  as "git-update-index --force-remove", "mv" and "git-add".

- checkout in superproject: try to checkout the initial commit

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t3040-subprojects-basic.sh | 85 ++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100755 t/t3040-subprojects-basic.sh

diff --git a/t/t3040-subprojects-basic.sh b/t/t3040-subprojects-basic.sh
new file mode 100755
index 0000000000..79b9f23654
--- /dev/null
+++ b/t/t3040-subprojects-basic.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+
+test_description='Basic subproject functionality'
+. ./test-lib.sh
+
+test_expect_success 'Super project creation' \
+    ': >Makefile &&
+    git add Makefile &&
+    git commit -m "Superproject created"'
+
+
+cat >expected <<EOF
+:000000 160000 00000... A	sub1
+:000000 160000 00000... A	sub2
+EOF
+test_expect_success 'create subprojects' \
+    'mkdir sub1 &&
+    ( cd sub1 && git init && : >Makefile && git add * &&
+    git commit -q -m "subproject 1" ) &&
+    mkdir sub2 &&
+    ( cd sub2 && git init && : >Makefile && git add * &&
+    git commit -q -m "subproject 2" ) &&
+    git update-index --add sub1 &&
+    git add sub2 &&
+    git commit -q -m "subprojects added" &&
+    git diff-tree --abbrev=5 HEAD^ HEAD |cut -d" " -f-3,5- >current &&
+    git diff expected current'
+
+git branch save HEAD
+
+test_expect_success 'check if fsck ignores the subprojects' \
+    'git fsck --full'
+
+test_expect_success 'check if commit in a subproject detected' \
+    '( cd sub1 &&
+    echo "all:" >>Makefile &&
+    echo "	true" >>Makefile &&
+    git commit -q -a -m "make all" ) && {
+        git diff-files --exit-code
+	test $? = 1
+    }'
+
+test_expect_success 'check if a changed subproject HEAD can be committed' \
+    'git commit -q -a -m "sub1 changed" && {
+	git diff-tree --exit-code HEAD^ HEAD
+	test $? = 1
+    }'
+
+test_expect_success 'check if diff-index works for subproject elements' \
+    'git diff-index --exit-code --cached save -- sub1
+    test $? = 1'
+
+test_expect_success 'check if diff-tree works for subproject elements' \
+    'git diff-tree --exit-code HEAD^ HEAD -- sub1
+    test $? = 1'
+
+test_expect_success 'check if git diff works for subproject elements' \
+    'git diff --exit-code HEAD^ HEAD
+    test $? = 1'
+
+test_expect_success 'check if clone works' \
+    'git ls-files -s >expected &&
+    git clone -l -s . cloned &&
+    ( cd cloned && git ls-files -s ) >current &&
+    git diff expected current'
+
+test_expect_success 'removing and adding subproject' \
+    'git update-index --force-remove -- sub2 &&
+    mv sub2 sub3 &&
+    git add sub3 &&
+    git commit -q -m "renaming a subproject" && {
+	git diff -M --name-status --exit-code HEAD^ HEAD
+	test $? = 1
+    }'
+
+# the index must contain the object name the HEAD of the
+# subproject sub1 was at the point "save"
+test_expect_success 'checkout in superproject' \
+    'git checkout save &&
+    git diff-index --exit-code --raw --cached save -- sub1'
+
+# just interesting what happened...
+# git diff --name-status -M save master
+
+test_done

From 15ba3af2d5056313fa19ceb0cb7f7cb3cdd54f16 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 18 Apr 2007 19:05:57 -0700
Subject: [PATCH 069/109] Counto-fix in merge-recursive

When the configuration has variables unrelated to low-level
merge drivers (e.g. merge.summary), the code failed to ignore
them but did something totally senseless.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index ec8438b463..65c018b3ea 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -809,6 +809,9 @@ static int ll_ext_merge(const struct ll_merge_driver *fn,
 	int status, fd, i;
 	struct stat st;
 
+	if (fn->cmdline == NULL)
+		die("custom merge driver %s lacks command line.", fn->name);
+
 	result->ptr = NULL;
 	result->size = 0;
 	create_temp(orig, temp[0]);
@@ -879,7 +882,7 @@ static int read_merge_config(const char *var, const char *value)
 	 * especially, we do not want to look at variables such as
 	 * "merge.summary", "merge.tool", and "merge.verbosity".
 	 */
-	if (prefixcmp(var, "merge.") || (ep = strrchr(var, '.')) == var + 6)
+	if (prefixcmp(var, "merge.") || (ep = strrchr(var, '.')) == var + 5)
 		return 0;
 
 	/*

From d56dbd67097a84dac1dbdf28c1a254f63f93724a Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 18 Apr 2007 19:22:57 -0700
Subject: [PATCH 070/109] Simplify code to find recursive merge driver.

There is no need to intern the string to git_attr, as we are already
dealing with the name of the driver there.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 65c018b3ea..96e461c737 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -1024,13 +1024,8 @@ static int ll_merge(mmbuffer_t *result_buf,
 	ll_driver_name = git_path_check_merge(a->path);
 	driver = find_ll_merge_driver(ll_driver_name);
 
-	if (index_only && driver->recursive) {
-		void *merge_attr;
-
-		ll_driver_name = driver->recursive;
-		merge_attr = git_attr(ll_driver_name, strlen(ll_driver_name));
-		driver = find_ll_merge_driver(merge_attr);
-	}
+	if (index_only && driver->recursive)
+		driver = find_ll_merge_driver(driver->recursive);
 	merge_status = driver->fn(driver, a->path,
 				  &orig, &src1, name1, &src2, name2,
 				  result_buf);

From e4d58311ba1a6cefa2ac5a3d95918af0d9b43588 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 19 Apr 2007 22:28:02 -0400
Subject: [PATCH 071/109] pack-objects: remove obsolete comments

The sorted-by-sha ans sorted-by-type arrays are no more.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 19fae4c917..c72e07a2bb 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -49,22 +49,15 @@ struct object_entry {
  * expanded).  nr_objects & nr_alloc controls this array.  They are stored
  * in the order we see -- typically rev-list --objects order that gives us
  * nice "minimum seek" order.
- *
- * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
- * elements in the objects array.  The former is used to build the pack
- * index (lists object names in the ascending order to help offset lookup),
- * and the latter is used to group similar things together by try_delta()
- * heuristics.
  */
+static struct object_entry *objects;
+static uint32_t nr_objects, nr_alloc, nr_result;
 
 static int non_empty;
 static int no_reuse_delta;
 static int local;
 static int incremental;
 static int allow_ofs_delta;
-
-static struct object_entry *objects;
-static uint32_t nr_objects, nr_alloc, nr_result;
 static const char *pack_tmp_name, *idx_tmp_name;
 static char tmpname[PATH_MAX];
 static unsigned char pack_file_sha1[20];
@@ -76,8 +69,7 @@ static int num_preferred_base;
 
 /*
  * The object names in objects array are hashed with this hashtable,
- * to help looking up the entry by object name.  Binary search from
- * sorted_by_sha is also possible but this was easier to code and faster.
+ * to help looking up the entry by object name.
  * This hashtable is built after all the objects are seen.
  */
 static int *object_ix;

From be18c1fe1291ed839aef817283a880e09e222be5 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 19 Apr 2007 22:16:53 -0400
Subject: [PATCH 072/109] document --index-version for index-pack and
 pack-objects

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/git-index-pack.txt   | 5 +++++
 Documentation/git-pack-objects.txt | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt
index 2229ee86b7..b7a49b9f58 100644
--- a/Documentation/git-index-pack.txt
+++ b/Documentation/git-index-pack.txt
@@ -68,6 +68,11 @@ OPTIONS
 	message can later be searched for within all .keep files to
 	locate any which have outlived their usefulness.
 
+--index-version=<version>[,<offset>]::
+	This is intended to be used by the test suite only. It allows
+	to force the version for the generated pack index, and to force
+	64-bit index entries on objects located above the given offset.
+
 
 Note
 ----
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
index fdc6f97289..d9e11c6534 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -138,6 +138,11 @@ base-name::
 	length, this option typically shrinks the resulting
 	packfile by 3-5 per-cent.
 
+--index-version=<version>[,<offset>]::
+	This is intended to be used by the test suite only. It allows
+	to force the version for the generated pack index, and to force
+	64-bit index entries on objects located above the given offset.
+
 
 Author
 ------

From 4392da4d5d7585a9defa6869517cb354f7460f35 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 19 Apr 2007 20:47:04 -0700
Subject: [PATCH 073/109] Documentation: support manual section (5) - file
 formats.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/Makefile | 19 +++++++++++++------
 Makefile               |  3 ++-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index a637d8d559..f4c6a803c3 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -2,9 +2,10 @@ MAN1_TXT= \
 	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
 		$(wildcard git-*.txt)) \
 	gitk.txt
+MAN5_TXT=
 MAN7_TXT=git.txt
 
-DOC_HTML=$(patsubst %.txt,%.html,$(MAN1_TXT) $(MAN7_TXT))
+DOC_HTML=$(patsubst %.txt,%.html,$(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT))
 
 ARTICLES = tutorial
 ARTICLES += tutorial-2
@@ -23,12 +24,14 @@ SP_ARTICLES = howto/revert-branch-rebase user-manual
 DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
 
 DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+DOC_MAN5=$(patsubst %.txt,%.5,$(MAN1_TXT))
 DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
 
 prefix?=$(HOME)
 bindir?=$(prefix)/bin
 mandir?=$(prefix)/man
 man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
 man7dir=$(mandir)/man7
 # DESTDIR=
 
@@ -53,15 +56,19 @@ all: html man
 
 html: $(DOC_HTML)
 
-$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN7): asciidoc.conf
+$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
 
-man: man1 man7
+man: man1 man5 man7
 man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
 man7: $(DOC_MAN7)
 
 install: man
-	$(INSTALL) -d -m755 $(DESTDIR)$(man1dir) $(DESTDIR)$(man7dir)
+	$(INSTALL) -d -m755 $(DESTDIR)$(man1dir)
+	$(INSTALL) -d -m755 $(DESTDIR)$(man5dir)
+	$(INSTALL) -d -m755 $(DESTDIR)$(man7dir)
 	$(INSTALL) -m644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
+	: $(INSTALL) -m644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
 	$(INSTALL) -m644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
 
 
@@ -99,7 +106,7 @@ cmd-list.made: cmd-list.perl $(MAN1_TXT)
 git.7 git.html: git.txt core-intro.txt
 
 clean:
-	rm -f *.xml *.xml+ *.html *.html+ *.1 *.7 howto-index.txt howto/*.html doc.dep
+	rm -f *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 howto-index.txt howto/*.html doc.dep
 	rm -f $(cmds_txt) *.made
 
 %.html : %.txt
@@ -109,7 +116,7 @@ clean:
 		sed -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' >$@+
 	mv $@+ $@
 
-%.1 %.7 : %.xml
+%.1 %.5 %.7 : %.xml
 	xmlto -m callouts.xsl man $<
 
 %.xml : %.txt
diff --git a/Makefile b/Makefile
index 4a399dda81..e14cc10047 100644
--- a/Makefile
+++ b/Makefile
@@ -1030,9 +1030,10 @@ dist-doc:
 	gzip -n -9 -f $(htmldocs).tar
 	:
 	rm -fr .doc-tmp-dir
-	mkdir .doc-tmp-dir .doc-tmp-dir/man1 .doc-tmp-dir/man7
+	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
 	$(MAKE) -C Documentation DESTDIR=./ \
 		man1dir=../.doc-tmp-dir/man1 \
+		man5dir=../.doc-tmp-dir/man5 \
 		man7dir=../.doc-tmp-dir/man7 \
 		install
 	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .

From 163b95919428cd7d782af91296e0b886683f2daa Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 19 Apr 2007 22:37:19 -0700
Subject: [PATCH 074/109] Update 'crlf' attribute semantics.

This updates the semantics of 'crlf' so that .gitattributes file
can say "this is text, even though it may look funny".

Setting the `crlf` attribute on a path is meant to mark the path
as a "text" file.  'core.autocrlf' conversion takes place
without guessing the content type by inspection.

Unsetting the `crlf` attribute on a path is meant to mark the
path as a "binary" file.  The path never goes through line
endings conversion upon checkin/checkout.

Unspecified `crlf` attribute tells git to apply the
`core.autocrlf` conversion when the file content looks like
text.

Setting the `crlf` attribut to string value "input" is similar
to setting the attribute to `true`, but also forces git to act
as if `core.autocrlf` is set to `input` for the path.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 convert.c       | 75 +++++++++++++++++--------------------------------
 t/t0020-crlf.sh | 74 ++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 89 insertions(+), 60 deletions(-)

diff --git a/convert.c b/convert.c
index a5f60c7c6b..da64253a16 100644
--- a/convert.c
+++ b/convert.c
@@ -10,6 +10,11 @@
  * translation when the "auto_crlf" option is set.
  */
 
+#define CRLF_GUESS	(-1)
+#define CRLF_BINARY	0
+#define CRLF_TEXT	1
+#define CRLF_INPUT	2
+
 struct text_stat {
 	/* CR, LF and CRLF counts */
 	unsigned cr, lf, crlf;
@@ -74,13 +79,13 @@ static int is_binary(unsigned long size, struct text_stat *stats)
 	return 0;
 }
 
-static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int guess)
+static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int action)
 {
 	char *buffer, *nbuf;
 	unsigned long size, nsize;
 	struct text_stat stats;
 
-	if (guess && !auto_crlf)
+	if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf))
 		return 0;
 
 	size = *sizep;
@@ -94,7 +99,7 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
 	if (!stats.cr)
 		return 0;
 
-	if (guess) {
+	if (action == CRLF_GUESS) {
 		/*
 		 * We're currently not going to even try to convert stuff
 		 * that has bare CR characters. Does anybody do that crazy
@@ -119,7 +124,12 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
 	*bufp = nbuf;
 	*sizep = nsize;
 
-	if (guess) {
+	if (action == CRLF_GUESS) {
+		/*
+		 * If we guessed, we already know we rejected a file with
+		 * lone CR, and we can strip a CR without looking at what
+		 * follow it.
+		 */
 		do {
 			unsigned char c = *buffer++;
 			if (c != '\r')
@@ -136,24 +146,15 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
 	return 1;
 }
 
-static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
-{
-	return crlf_to_git(path, bufp, sizep, 1);
-}
-
-static int forcecrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
-{
-	return crlf_to_git(path, bufp, sizep, 0);
-}
-
-static int crlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep, int guess)
+static int crlf_to_worktree(const char *path, char **bufp, unsigned long *sizep, int action)
 {
 	char *buffer, *nbuf;
 	unsigned long size, nsize;
 	struct text_stat stats;
 	unsigned char last;
 
-	if (guess && auto_crlf <= 0)
+	if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
+	    (action == CRLF_GUESS && auto_crlf <= 0))
 		return 0;
 
 	size = *sizep;
@@ -171,7 +172,7 @@ static int crlf_to_working_tree(const char *path, char **bufp, unsigned long *si
 	if (stats.lf == stats.crlf)
 		return 0;
 
-	if (guess) {
+	if (action == CRLF_GUESS) {
 		/* If we have any bare CR characters, we're not going to touch it */
 		if (stats.cr != stats.crlf)
 			return 0;
@@ -200,16 +201,6 @@ static int crlf_to_working_tree(const char *path, char **bufp, unsigned long *si
 	return 1;
 }
 
-static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
-{
-	return crlf_to_working_tree(path, bufp, sizep, 1);
-}
-
-static int forcecrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
-{
-	return crlf_to_working_tree(path, bufp, sizep, 0);
-}
-
 static void setup_crlf_check(struct git_attr_check *check)
 {
 	static struct git_attr *attr_crlf;
@@ -228,38 +219,24 @@ static int git_path_check_crlf(const char *path)
 	if (!git_checkattr(path, 1, &attr_crlf_check)) {
 		const char *value = attr_crlf_check.value;
 		if (ATTR_TRUE(value))
-			return 1;
+			return CRLF_TEXT;
 		else if (ATTR_FALSE(value))
-			return 0;
+			return CRLF_BINARY;
 		else if (ATTR_UNSET(value))
 			;
-		else
-			die("unknown value %s given to 'crlf' attribute",
-			    (char *)value);
+		else if (!strcmp(value, "input"))
+			return CRLF_INPUT;
+		/* fallthru */
 	}
-	return -1;
+	return CRLF_GUESS;
 }
 
 int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
 {
-	switch (git_path_check_crlf(path)) {
-	case 0:
-		return 0;
-	case 1:
-		return forcecrlf_to_git(path, bufp, sizep);
-	default:
-		return autocrlf_to_git(path, bufp, sizep);
-	}
+	return crlf_to_git(path, bufp, sizep, git_path_check_crlf(path));
 }
 
 int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 {
-	switch (git_path_check_crlf(path)) {
-	case 0:
-		return 0;
-	case 1:
-		return forcecrlf_to_working_tree(path, bufp, sizep);
-	default:
-		return autocrlf_to_working_tree(path, bufp, sizep);
-	}
+	return crlf_to_worktree(path, bufp, sizep, git_path_check_crlf(path));
 }
diff --git a/t/t0020-crlf.sh b/t/t0020-crlf.sh
index cf84f0a1ab..fe1dfd08a0 100755
--- a/t/t0020-crlf.sh
+++ b/t/t0020-crlf.sh
@@ -4,6 +4,10 @@ test_description='CRLF conversion'
 
 . ./test-lib.sh
 
+q_to_nul () {
+	tr Q '\0'
+}
+
 append_cr () {
 	sed -e 's/$/Q/' | tr Q '\015'
 }
@@ -20,6 +24,7 @@ test_expect_success setup '
 	for w in Hello world how are you; do echo $w; done >one &&
 	mkdir dir &&
 	for w in I am very very fine thank you; do echo $w; done >dir/two &&
+	for w in Oh here is NULQin text here; do echo $w; done | q_to_nul >three &&
 	git add . &&
 
 	git commit -m initial &&
@@ -27,6 +32,7 @@ test_expect_success setup '
 	one=`git rev-parse HEAD:one` &&
 	dir=`git rev-parse HEAD:dir` &&
 	two=`git rev-parse HEAD:dir/two` &&
+	three=`git rev-parse HEAD:three` &&
 
 	for w in Some extra lines here; do echo $w; done >>one &&
 	git diff >patch.file &&
@@ -38,7 +44,7 @@ test_expect_success setup '
 
 test_expect_success 'update with autocrlf=input' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git read-tree --reset -u HEAD &&
 	git repo-config core.autocrlf input &&
 
@@ -62,7 +68,7 @@ test_expect_success 'update with autocrlf=input' '
 
 test_expect_success 'update with autocrlf=true' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git read-tree --reset -u HEAD &&
 	git repo-config core.autocrlf true &&
 
@@ -86,7 +92,7 @@ test_expect_success 'update with autocrlf=true' '
 
 test_expect_success 'checkout with autocrlf=true' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf true &&
 	git read-tree --reset -u HEAD &&
 
@@ -110,7 +116,7 @@ test_expect_success 'checkout with autocrlf=true' '
 
 test_expect_success 'checkout with autocrlf=input' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf input &&
 	git read-tree --reset -u HEAD &&
 
@@ -136,7 +142,7 @@ test_expect_success 'checkout with autocrlf=input' '
 
 test_expect_success 'apply patch (autocrlf=input)' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf input &&
 	git read-tree --reset -u HEAD &&
 
@@ -149,7 +155,7 @@ test_expect_success 'apply patch (autocrlf=input)' '
 
 test_expect_success 'apply patch --cached (autocrlf=input)' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf input &&
 	git read-tree --reset -u HEAD &&
 
@@ -162,7 +168,7 @@ test_expect_success 'apply patch --cached (autocrlf=input)' '
 
 test_expect_success 'apply patch --index (autocrlf=input)' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf input &&
 	git read-tree --reset -u HEAD &&
 
@@ -176,7 +182,7 @@ test_expect_success 'apply patch --index (autocrlf=input)' '
 
 test_expect_success 'apply patch (autocrlf=true)' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf true &&
 	git read-tree --reset -u HEAD &&
 
@@ -189,7 +195,7 @@ test_expect_success 'apply patch (autocrlf=true)' '
 
 test_expect_success 'apply patch --cached (autocrlf=true)' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf true &&
 	git read-tree --reset -u HEAD &&
 
@@ -202,7 +208,7 @@ test_expect_success 'apply patch --cached (autocrlf=true)' '
 
 test_expect_success 'apply patch --index (autocrlf=true)' '
 
-	rm -f tmp one dir/two &&
+	rm -f tmp one dir/two three &&
 	git repo-config core.autocrlf true &&
 	git read-tree --reset -u HEAD &&
 
@@ -216,8 +222,8 @@ test_expect_success 'apply patch --index (autocrlf=true)' '
 
 test_expect_success '.gitattributes says two is binary' '
 
+	rm -f tmp one dir/two three &&
 	echo "two -crlf" >.gitattributes &&
-	rm -f tmp one dir/two &&
 	git repo-config core.autocrlf true &&
 	git read-tree --reset -u HEAD &&
 
@@ -230,6 +236,52 @@ test_expect_success '.gitattributes says two is binary' '
 	fi &&
 
 	if remove_cr one >/dev/null
+	then
+		: happy
+	else
+		echo "Huh?"
+		false
+	fi &&
+
+	if remove_cr three >/dev/null
+	then
+		echo "Huh?"
+		false
+	else
+		: happy
+	fi
+'
+
+test_expect_success '.gitattributes says two is input' '
+
+	rm -f tmp one dir/two three &&
+	echo "two crlf=input" >.gitattributes &&
+	git read-tree --reset -u HEAD &&
+
+	if remove_cr dir/two >/dev/null
+	then
+		echo "Huh?"
+		false
+	else
+		: happy
+	fi
+'
+
+test_expect_success '.gitattributes says two and three are text' '
+
+	rm -f tmp one dir/two three &&
+	echo "t* crlf" >.gitattributes &&
+	git read-tree --reset -u HEAD &&
+
+	if remove_cr dir/two >/dev/null
+	then
+		: happy
+	else
+		echo "Huh?"
+		false
+	fi &&
+
+	if remove_cr three >/dev/null
 	then
 		: happy
 	else

From 88e7fdf2cb436e068434241b0519577293055c19 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 19 Apr 2007 20:48:03 -0700
Subject: [PATCH 075/109] Document gitattributes(5)

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/Makefile          |   4 +-
 Documentation/config.txt        |  13 ++
 Documentation/gitattributes.txt | 285 ++++++++++++++++++++++++++++++++
 3 files changed, 300 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/gitattributes.txt

diff --git a/Documentation/Makefile b/Documentation/Makefile
index f4c6a803c3..8d3617db97 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -2,7 +2,7 @@ MAN1_TXT= \
 	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
 		$(wildcard git-*.txt)) \
 	gitk.txt
-MAN5_TXT=
+MAN5_TXT=gitattributes.txt
 MAN7_TXT=git.txt
 
 DOC_HTML=$(patsubst %.txt,%.html,$(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT))
@@ -68,7 +68,7 @@ install: man
 	$(INSTALL) -d -m755 $(DESTDIR)$(man5dir)
 	$(INSTALL) -d -m755 $(DESTDIR)$(man7dir)
 	$(INSTALL) -m644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
-	: $(INSTALL) -m644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
+	$(INSTALL) -m644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
 	$(INSTALL) -m644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
 
 
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 7e41ca6a0d..a130846883 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -499,6 +499,19 @@ merge.verbosity::
 	conflicts, 2 outputs conflicts and file changes.  Level 5 and
 	above outputs debugging information.  The default is level 2.
 
+merge.<driver>.name::
+	Defines a human readable name for a custom low-level
+	merge driver.  See gitlink:gitattributes[5] for details.
+
+merge.<driver>.driver::
+	Defines the command that implements a custom low-level
+	merge driver.  See gitlink:gitattributes[5] for details.
+
+merge.<driver>.recursive::
+	Names a low-level merge driver to be used when
+	performing an internal merge between common ancestors.
+	See gitlink:gitattributes[5] for details.
+
 pack.window::
 	The size of the window used by gitlink:git-pack-objects[1] when no
 	window size is given on the command line. Defaults to 10.
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
new file mode 100644
index 0000000000..ece58abee2
--- /dev/null
+++ b/Documentation/gitattributes.txt
@@ -0,0 +1,285 @@
+gitattributes(5)
+================
+
+NAME
+----
+gitattributes - defining attributes per path
+
+SYNOPSIS
+--------
+.gitattributes
+
+
+DESCRIPTION
+-----------
+
+A `gitattributes` file is a simple text file that gives
+`attributes` to pathnames.
+
+Each line in `gitattributes` file is of form:
+
+	glob	attr1 attr2 ...
+
+That is, a glob pattern followed by an attributes list,
+separated by whitespaces.  When the glob pattern matches the
+path in question, the attributes listed on the line are given to
+the path.
+
+Each attribute can be in one of these states for a given path:
+
+Set::
+
+	The path has the attribute with special value "true";
+	this is specified by listing only the name of the
+	attribute in the attribute list.
+
+Unset::
+
+	The path has the attribute with special value "false";
+	this is specified by listing the name of the attribute
+	prefixed with a dash `-` in the attribute list.
+
+Set to a value::
+
+	The path has the attribute with specified string value;
+	this is specified by listing the name of the attribute
+	followed by an equal sign `=` and its value in the
+	attribute list.
+
+Unspecified::
+
+	No glob pattern matches the path, and nothing says if
+	the path has or does not have the attribute.
+
+When more than one glob pattern matches the path, a later line
+overrides an earlier line.
+
+When deciding what attributes are assigned to a path, git
+consults `$GIT_DIR/info/attributes` file (which has the highest
+precedence), `.gitattributes` file in the same directory as the
+path in question, and its parent directories (the further the
+directory that contains `.gitattributes` is from the path in
+question, the lower its precedence).
+
+Sometimes you would need to override an setting of an attribute
+for a path to `unspecified` state.  This can be done by listing
+the name of the attribute prefixed with an exclamation point `!`.
+
+
+EFFECTS
+-------
+
+Certain operations by git can be influenced by assigning
+particular attributes to a path.  Currently, three operations
+are attributes-aware.
+
+Checking-out and checking-in
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The attribute `crlf` affects how the contents stored in the
+repository are copied to the working tree files when commands
+such as `git checkout` and `git merge` run.  It also affects how
+git stores the contents you prepare in the working tree in the
+repository upon `git add` and `git commit`.
+
+Set::
+
+	Setting the `crlf` attribute on a path is meant to mark
+	the path as a "text" file.  'core.autocrlf' conversion
+	takes place without guessing the content type by
+	inspection.
+
+Unset::
+
+	Unsetting the `crlf` attribute on a path is meant to
+	mark the path as a "binary" file.  The path never goes
+	through line endings conversion upon checkin/checkout.
+
+Unspecified::
+
+	Unspecified `crlf` attribute tells git to apply the
+	`core.autocrlf` conversion when the file content looks
+	like text.
+
+Set to string value "input"::
+
+	This is similar to setting the attribute to `true`, but
+	also forces git to act as if `core.autocrlf` is set to
+	`input` for the path.
+
+Any other value set to `crlf` attribute is ignored and git acts
+as if the attribute is left unspecified.
+
+
+The `core.autocrlf` conversion
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the configuration variable `core.autocrlf` is false, no
+conversion is done.
+
+When `core.autocrlf` is true, it means that the platform wants
+CRLF line endings for files in the working tree, and you want to
+convert them back to the normal LF line endings when checking
+in to the repository.
+
+When `core.autocrlf` is set to "input", line endings are
+converted to LF upon checkin, but there is no conversion done
+upon checkout.
+
+
+Generating diff text
+~~~~~~~~~~~~~~~~~~~~
+
+The attribute `diff` affects if `git diff` generates textual
+patch for the path or just says `Binary files differ`.
+
+Set::
+
+	A path to which the `diff` attribute is set is treated
+	as text, even when they contain byte values that
+	normally never appear in text files, such as NUL.
+
+Unset::
+
+	A path to which the `diff` attribute is unset will
+	generate `Binary files differ`.
+
+Unspecified::
+
+	A path to which the `diff` attribute is unspecified
+	first gets its contents inspected, and if it looks like
+	text, it is treated as text.  Otherwise it would
+	generate `Binary files differ`.
+
+Any other value set to `diff` attribute is ignored and git acts
+as if the attribute is left unspecified.
+
+
+Performing a three-way merge
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The attribute `merge` affects how three versions of a file is
+merged when a file-level merge is necessary during `git merge`,
+and other programs such as `git revert` and `git cherry-pick`.
+
+Set::
+
+	Built-in 3-way merge driver is used to merge the
+	contents in a way similar to `merge` command of `RCS`
+	suite.  This is suitable for ordinary text files.
+
+Unset::
+
+	Take the version from the current branch as the
+	tentative merge result, and declare that the merge has
+	conflicts.  This is suitable for binary files that does
+	not have a well-defined merge semantics.
+
+Unspecified::
+
+	By default, this uses the same built-in 3-way merge
+	driver as is the case the `merge` attribute is set.
+	However, `merge.default` configuration variable can name
+	different merge driver to be used for paths to which the
+	`merge` attribute is unspecified.
+
+Any other string value::
+
+	3-way merge is performed using the specified custom
+	merge driver.  The built-in 3-way merge driver can be
+	explicitly specified by asking for "text" driver; the
+	built-in "take the current branch" driver can be
+	requested by "binary".
+
+
+Defining a custom merge driver
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The definition of a merge driver is done in `gitconfig` not
+`gitattributes` file, so strictly speaking this manual page is a
+wrong place to talk about it.  However...
+
+To define a custom merge driver `filfre`, add a section to your
+`$GIT_DIR/config` file (or `$HOME/.gitconfig` file) like this:
+
+----------------------------------------------------------------
+[merge "filfre"]
+	name = feel-free merge driver
+	driver = filfre %O %A %B
+	recursive = binary
+----------------------------------------------------------------
+
+The `merge.*.name` variable gives the driver a human-readable
+name.
+
+The `merge.*.driver` variable's value is used to construct a
+command to run to merge ancestor's version (`%O`), current
+version (`%A`) and the other branches' version (`%B`).  These
+three tokens are replaced with the names of temporary files that
+hold the contents of these versions when the command line is
+built.
+
+The merge driver is expected to leave the result of the merge in
+the file named with `%A` by overwriting it, and exit with zero
+status if it managed to merge them cleanly, or non-zero if there
+were conflicts.
+
+The `merge.*.recursive` variable specifies what other merge
+driver to use when the merge driver is called for an internal
+merge between common ancestors, when there are more than one.
+When left unspecified, the driver itself is used for both
+internal merge and the final merge.
+
+
+EXAMPLE
+-------
+
+If you have these three `gitattributes` file:
+
+----------------------------------------------------------------
+(in $GIT_DIR/info/attributes)
+
+a*	foo !bar -baz
+
+(in .gitattributes)
+abc	foo bar baz
+
+(in t/.gitattributes)
+ab*	merge=filfre
+abc	-foo -bar
+*.c	frotz
+----------------------------------------------------------------
+
+the attributes given to path `t/abc` are computed as follows:
+
+1. By examining `t/.gitattributes` (which is in the same
+   diretory as the path in question), git finds that the first
+   line matches.  `merge` attribute is set.  It also finds that
+   the second line matches, and attributes `foo` and `bar`
+   are unset.
+
+2. Then it examines `.gitattributes` (which is in the parent
+   directory), and finds that the first line matches, but
+   `t/.gitattributes` file already decided how `merge`, `foo`
+   and `bar` attributes should be given to this path, so it
+   leaves `foo` and `bar` unset.  Attribute `baz` is set.
+
+3. Finally it examines `$GIT_DIR/info/gitattributes`.  This file
+   is used to override the in-tree settings.  The first line is
+   a match, and `foo` is set, `bar` is reverted to unspecified
+   state, and `baz` is unset.
+
+As the result, the attributes assignement to `t/abc` becomes:
+
+----------------------------------------------------------------
+foo	set to true
+bar	unspecified
+baz	set to false
+merge	set to string value "filfre"
+frotz	unspecified
+----------------------------------------------------------------
+
+
+GIT
+---
+Part of the gitlink:git[7] suite

From dfdac5d9b877641d3aad8ec49f64c2730a3487e3 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Fri, 20 Apr 2007 01:39:39 -0700
Subject: [PATCH 076/109] git-add -u: match the index with working tree.

This is a shorthand of what "git commit -a" does in preparation
for making a commit, which is:

    git diff-files --name-only -z | git update-index --remove -z --stdin

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-add.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/builtin-add.c b/builtin-add.c
index 9ec292590c..5e6748f356 100644
--- a/builtin-add.c
+++ b/builtin-add.c
@@ -8,10 +8,15 @@
 #include "dir.h"
 #include "exec_cmd.h"
 #include "cache-tree.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "commit.h"
+#include "revision.h"
 
 static const char builtin_add_usage[] =
-"git-add [-n] [-v] [-f] [--interactive | -i] [--] <filepattern>...";
+"git-add [-n] [-v] [-f] [--interactive | -i] [-u] [--] <filepattern>...";
 
+static int take_all_worktree_changes;
 static const char *excludes_file;
 
 static void prune_directory(struct dir_struct *dir, const char **pathspec, int prefix)
@@ -92,6 +97,44 @@ static void fill_directory(struct dir_struct *dir, const char **pathspec)
 		prune_directory(dir, pathspec, baselen);
 }
 
+static void update_callback(struct diff_queue_struct *q,
+			    struct diff_options *opt, void *cbdata)
+{
+	int i, verbose;
+
+	verbose = *((int *)cbdata);
+	for (i = 0; i < q->nr; i++) {
+		struct diff_filepair *p = q->queue[i];
+		const char *path = p->one->path;
+		switch (p->status) {
+		default:
+			die("unexpacted diff status %c", p->status);
+		case DIFF_STATUS_UNMERGED:
+		case DIFF_STATUS_MODIFIED:
+			add_file_to_cache(path, verbose);
+			break;
+		case DIFF_STATUS_DELETED:
+			remove_file_from_cache(path);
+			if (verbose)
+				printf("remove '%s'\n", path);
+			break;
+		}
+	}
+}
+
+static void update_all(int verbose)
+{
+	struct rev_info rev;
+	init_revisions(&rev, "");
+	setup_revisions(0, NULL, &rev, NULL);
+	rev.diffopt.output_format = DIFF_FORMAT_CALLBACK;
+	rev.diffopt.format_callback = update_callback;
+	rev.diffopt.format_callback_data = &verbose;
+	if (read_cache() < 0)
+		die("index file corrupt");
+	run_diff_files(&rev, 0);
+}
+
 static int git_add_config(const char *var, const char *value)
 {
 	if (!strcmp(var, "core.excludesfile")) {
@@ -156,8 +199,20 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 			verbose = 1;
 			continue;
 		}
+		if (!strcmp(arg, "-u")) {
+			take_all_worktree_changes = 1;
+			continue;
+		}
 		usage(builtin_add_usage);
 	}
+
+	if (take_all_worktree_changes) {
+		if (i < argc)
+			die("-u and explicit paths are incompatible");
+		update_all(verbose);
+		goto finish;
+	}
+
 	if (argc <= i) {
 		fprintf(stderr, "Nothing specified, nothing added.\n");
 		fprintf(stderr, "Maybe you wanted to say 'git add .'?\n");
@@ -207,6 +262,7 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 	for (i = 0; i < dir.nr; i++)
 		add_file_to_cache(dir.entries[i]->name, verbose);
 
+ finish:
 	if (active_cache_changed) {
 		if (write_cache(newfd, active_cache, active_nr) ||
 		    close(newfd) || commit_locked_index(&lock_file))

From ac78e548049f4e86b38368d2c4b4dbb546c64ac6 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Thu, 19 Apr 2007 02:05:03 +0200
Subject: [PATCH 077/109] Simplify calling of CR/LF conversion routines

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-apply.c | 18 +++++--------
 cache.h         |  4 +--
 convert.c       | 71 ++++++++++++++++++++++++-------------------------
 diff.c          |  4 +--
 entry.c         |  7 ++---
 sha1_file.c     |  7 +++--
 6 files changed, 51 insertions(+), 60 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index fd92ef7174..ccd342c1c4 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1475,8 +1475,8 @@ static int read_old_data(struct stat *st, const char *path, char **buf_p, unsign
 		}
 		close(fd);
 		nsize = got;
-		nbuf = buf;
-		if (convert_to_git(path, &nbuf, &nsize)) {
+		nbuf = convert_to_git(path, buf, &nsize);
+		if (nbuf) {
 			free(buf);
 			*buf_p = nbuf;
 			*alloc_p = nsize;
@@ -2355,9 +2355,8 @@ static void add_index_file(const char *path, unsigned mode, void *buf, unsigned
 
 static int try_create_file(const char *path, unsigned int mode, const char *buf, unsigned long size)
 {
-	int fd, converted;
+	int fd;
 	char *nbuf;
-	unsigned long nsize;
 
 	if (has_symlinks && S_ISLNK(mode))
 		/* Although buf:size is counted string, it also is NUL
@@ -2369,13 +2368,10 @@ static int try_create_file(const char *path, unsigned int mode, const char *buf,
 	if (fd < 0)
 		return -1;
 
-	nsize = size;
-	nbuf = (char *) buf;
-	converted = convert_to_working_tree(path, &nbuf, &nsize);
-	if (converted) {
+	nbuf = convert_to_working_tree(path, buf, &size);
+	if (nbuf)
 		buf = nbuf;
-		size = nsize;
-	}
+
 	while (size) {
 		int written = xwrite(fd, buf, size);
 		if (written < 0)
@@ -2387,7 +2383,7 @@ static int try_create_file(const char *path, unsigned int mode, const char *buf,
 	}
 	if (close(fd) < 0)
 		die("closing file %s: %s", path, strerror(errno));
-	if (converted)
+	if (nbuf)
 		free(nbuf);
 	return 0;
 }
diff --git a/cache.h b/cache.h
index 38ad00661d..8c804cb6ee 100644
--- a/cache.h
+++ b/cache.h
@@ -496,8 +496,8 @@ extern void trace_printf(const char *format, ...);
 extern void trace_argv_printf(const char **argv, int count, const char *format, ...);
 
 /* convert.c */
-extern int convert_to_git(const char *path, char **bufp, unsigned long *sizep);
-extern int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep);
+extern char *convert_to_git(const char *path, const char *src, unsigned long *sizep);
+extern char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep);
 
 /* match-trees.c */
 void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int);
diff --git a/convert.c b/convert.c
index da64253a16..742b895cfa 100644
--- a/convert.c
+++ b/convert.c
@@ -79,25 +79,24 @@ static int is_binary(unsigned long size, struct text_stat *stats)
 	return 0;
 }
 
-static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int action)
+static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action)
 {
-	char *buffer, *nbuf;
+	char *buffer, *dst;
 	unsigned long size, nsize;
 	struct text_stat stats;
 
 	if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf))
-		return 0;
+		return NULL;
 
 	size = *sizep;
 	if (!size)
-		return 0;
-	buffer = *bufp;
+		return NULL;
 
-	gather_stats(buffer, size, &stats);
+	gather_stats(src, size, &stats);
 
 	/* No CR? Nothing to convert, regardless. */
 	if (!stats.cr)
-		return 0;
+		return NULL;
 
 	if (action == CRLF_GUESS) {
 		/*
@@ -106,13 +105,13 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
 		 * stuff?
 		 */
 		if (stats.cr != stats.crlf)
-			return 0;
+			return NULL;
 
 		/*
 		 * And add some heuristics for binary vs text, of course...
 		 */
 		if (is_binary(size, &stats))
-			return 0;
+			return NULL;
 	}
 
 	/*
@@ -120,10 +119,10 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
 	 * to let the caller know that we switched buffers on it.
 	 */
 	nsize = size - stats.crlf;
-	nbuf = xmalloc(nsize);
-	*bufp = nbuf;
+	buffer = xmalloc(nsize);
 	*sizep = nsize;
 
+	dst = buffer;
 	if (action == CRLF_GUESS) {
 		/*
 		 * If we guessed, we already know we rejected a file with
@@ -131,54 +130,53 @@ static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int
 		 * follow it.
 		 */
 		do {
-			unsigned char c = *buffer++;
+			unsigned char c = *src++;
 			if (c != '\r')
-				*nbuf++ = c;
+				*dst++ = c;
 		} while (--size);
 	} else {
 		do {
-			unsigned char c = *buffer++;
+			unsigned char c = *src++;
 			if (! (c == '\r' && (1 < size && *buffer == '\n')))
-				*nbuf++ = c;
+				*dst++ = c;
 		} while (--size);
 	}
 
-	return 1;
+	return buffer;
 }
 
-static int crlf_to_worktree(const char *path, char **bufp, unsigned long *sizep, int action)
+static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action)
 {
-	char *buffer, *nbuf;
+	char *buffer, *dst;
 	unsigned long size, nsize;
 	struct text_stat stats;
 	unsigned char last;
 
 	if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
 	    (action == CRLF_GUESS && auto_crlf <= 0))
-		return 0;
+		return NULL;
 
 	size = *sizep;
 	if (!size)
-		return 0;
-	buffer = *bufp;
+		return NULL;
 
-	gather_stats(buffer, size, &stats);
+	gather_stats(src, size, &stats);
 
 	/* No LF? Nothing to convert, regardless. */
 	if (!stats.lf)
-		return 0;
+		return NULL;
 
 	/* Was it already in CRLF format? */
 	if (stats.lf == stats.crlf)
-		return 0;
+		return NULL;
 
 	if (action == CRLF_GUESS) {
 		/* If we have any bare CR characters, we're not going to touch it */
 		if (stats.cr != stats.crlf)
-			return 0;
+			return NULL;
 
 		if (is_binary(size, &stats))
-			return 0;
+			return NULL;
 	}
 
 	/*
@@ -186,19 +184,20 @@ static int crlf_to_worktree(const char *path, char **bufp, unsigned long *sizep,
 	 * to let the caller know that we switched buffers on it.
 	 */
 	nsize = size + stats.lf - stats.crlf;
-	nbuf = xmalloc(nsize);
-	*bufp = nbuf;
+	buffer = xmalloc(nsize);
 	*sizep = nsize;
 	last = 0;
+
+	dst = buffer;
 	do {
-		unsigned char c = *buffer++;
+		unsigned char c = *src++;
 		if (c == '\n' && last != '\r')
-			*nbuf++ = '\r';
-		*nbuf++ = c;
+			*dst++ = '\r';
+		*dst++ = c;
 		last = c;
 	} while (--size);
 
-	return 1;
+	return buffer;
 }
 
 static void setup_crlf_check(struct git_attr_check *check)
@@ -231,12 +230,12 @@ static int git_path_check_crlf(const char *path)
 	return CRLF_GUESS;
 }
 
-int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
+char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
 {
-	return crlf_to_git(path, bufp, sizep, git_path_check_crlf(path));
+	return crlf_to_git(path, src, sizep, git_path_check_crlf(path));
 }
 
-int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
 {
-	return crlf_to_worktree(path, bufp, sizep, git_path_check_crlf(path));
+	return crlf_to_worktree(path, src, sizep, git_path_check_crlf(path));
 }
diff --git a/diff.c b/diff.c
index 5f501864e6..1cb1230a99 100644
--- a/diff.c
+++ b/diff.c
@@ -1493,9 +1493,9 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		buf = s->data;
 		size = s->size;
-		if (convert_to_git(s->path, &buf, &size)) {
+		buf = convert_to_git(s->path, s->data, &size);
+		if (buf) {
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
 			s->data = buf;
diff --git a/entry.c b/entry.c
index d72f811580..3771209f19 100644
--- a/entry.c
+++ b/entry.c
@@ -79,7 +79,6 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
 	}
 	switch (ntohl(ce->ce_mode) & S_IFMT) {
 		char *buf;
-		unsigned long nsize;
 
 	case S_IFREG:
 		if (to_tempfile) {
@@ -96,12 +95,10 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
 		/*
 		 * Convert from git internal format to working tree format
 		 */
-		buf = new;
-		nsize = size;
-		if (convert_to_working_tree(ce->name, &buf, &nsize)) {
+		buf = convert_to_working_tree(ce->name, new, &size);
+		if (buf) {
 			free(new);
 			new = buf;
-			size = nsize;
 		}
 
 		wrote = write_in_full(fd, new, size);
diff --git a/sha1_file.c b/sha1_file.c
index 4304fe9bbc..1978d5f14e 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2277,10 +2277,9 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 	 */
 	if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) {
 		unsigned long nsize = size;
-		char *nbuf = buf;
-		if (convert_to_git(path, &nbuf, &nsize)) {
-			if (size)
-				munmap(buf, size);
+		char *nbuf = convert_to_git(path, buf, &nsize);
+		if (nbuf) {
+			munmap(buf, size);
 			size = nsize;
 			buf = nbuf;
 			re_allocated = 1;

From e87b1c943a50af9ab51df20b3419cbffa4e75484 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 21 Apr 2007 00:05:31 -0700
Subject: [PATCH 078/109] Fix bogus linked-list management for user defined
 merge drivers.

ll_user_merge_tail is supposed to point at the pointer to be
updated to point at a newly created item.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 merge-recursive.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 96e461c737..3d395895fc 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -902,8 +902,9 @@ static int read_merge_config(const char *var, const char *value)
 		namebuf[namelen] = 0;
 		fn->name = namebuf;
 		fn->fn = ll_ext_merge;
-		fn->next = *ll_user_merge_tail;
+		fn->next = NULL;
 		*ll_user_merge_tail = fn;
+		ll_user_merge_tail = &(fn->next);
 	}
 
 	ep++;

From 6073ee85719be6d959e74aa667024fcbec44a588 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Fri, 20 Apr 2007 23:44:02 -0700
Subject: [PATCH 079/109] convert.c: restructure the attribute checking part.

This separates the checkattr() call and interpretation of the
returned value specific to the 'crlf' attribute into separate
routines, so that we can run a single call to checkattr() to
check for more than one attributes, and then interprete what
the returned settings mean separately.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 convert.c | 46 +++++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/convert.c b/convert.c
index 742b895cfa..37239ace83 100644
--- a/convert.c
+++ b/convert.c
@@ -200,7 +200,7 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long *
 	return buffer;
 }
 
-static void setup_crlf_check(struct git_attr_check *check)
+static void setup_convert_check(struct git_attr_check *check)
 {
 	static struct git_attr *attr_crlf;
 
@@ -209,33 +209,41 @@ static void setup_crlf_check(struct git_attr_check *check)
 	check->attr = attr_crlf;
 }
 
-static int git_path_check_crlf(const char *path)
+static int git_path_check_crlf(const char *path, struct git_attr_check *check)
 {
-	struct git_attr_check attr_crlf_check;
+	const char *value = check->value;
 
-	setup_crlf_check(&attr_crlf_check);
-
-	if (!git_checkattr(path, 1, &attr_crlf_check)) {
-		const char *value = attr_crlf_check.value;
-		if (ATTR_TRUE(value))
-			return CRLF_TEXT;
-		else if (ATTR_FALSE(value))
-			return CRLF_BINARY;
-		else if (ATTR_UNSET(value))
-			;
-		else if (!strcmp(value, "input"))
-			return CRLF_INPUT;
-		/* fallthru */
-	}
+	if (ATTR_TRUE(value))
+		return CRLF_TEXT;
+	else if (ATTR_FALSE(value))
+		return CRLF_BINARY;
+	else if (ATTR_UNSET(value))
+		;
+	else if (!strcmp(value, "input"))
+		return CRLF_INPUT;
 	return CRLF_GUESS;
 }
 
 char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
 {
-	return crlf_to_git(path, src, sizep, git_path_check_crlf(path));
+	struct git_attr_check check[1];
+	int crlf = CRLF_GUESS;
+
+	setup_convert_check(check);
+	if (!git_checkattr(path, 1, check)) {
+		crlf = git_path_check_crlf(path, check);
+	}
+	return crlf_to_git(path, src, sizep, crlf);
 }
 
 char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
 {
-	return crlf_to_worktree(path, src, sizep, git_path_check_crlf(path));
+	struct git_attr_check check[1];
+	int crlf = CRLF_GUESS;
+
+	setup_convert_check(check);
+	if (!git_checkattr(path, 1, check)) {
+		crlf = git_path_check_crlf(path, check);
+	}
+	return crlf_to_worktree(path, src, sizep, crlf);
 }

From 5e635e396020cc08bc21a3e67c20c5294d6d13fd Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 21 Apr 2007 03:11:10 -0700
Subject: [PATCH 080/109] lockfile: record the primary process.

The usual process flow is the main process opens and holds the lock to
the index, does its thing, perhaps spawning children during the course,
and then writes the resulting index out by releaseing the lock.

However, the lockfile interface uses atexit(3) to clean it up, without
regard to who actually created the lock.  This typically leads to a
confusing behaviour of lock being released too early when the child
exits, and then the parent process when it calls commit_lockfile()
finds that it cannot unlock it.

This fixes the problem by recording who created and holds the lock, and
upon atexit(3) handler, child simply ignores the lockfile the parent
created.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 cache.h    | 1 +
 lockfile.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 8c804cb6ee..faddaf6504 100644
--- a/cache.h
+++ b/cache.h
@@ -209,6 +209,7 @@ extern int refresh_cache(unsigned int flags);
 
 struct lock_file {
 	struct lock_file *next;
+	pid_t owner;
 	char on_list;
 	char filename[PATH_MAX];
 };
diff --git a/lockfile.c b/lockfile.c
index bed6b21daf..23db35aff2 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -8,8 +8,11 @@ static const char *alternate_index_output;
 
 static void remove_lock_file(void)
 {
+	pid_t me = getpid();
+
 	while (lock_file_list) {
-		if (lock_file_list->filename[0])
+		if (lock_file_list->owner == me &&
+		    lock_file_list->filename[0])
 			unlink(lock_file_list->filename);
 		lock_file_list = lock_file_list->next;
 	}
@@ -28,6 +31,7 @@ static int lock_file(struct lock_file *lk, const char *path)
 	sprintf(lk->filename, "%s.lock", path);
 	fd = open(lk->filename, O_RDWR | O_CREAT | O_EXCL, 0666);
 	if (0 <= fd) {
+		lk->owner = getpid();
 		if (!lk->on_list) {
 			lk->next = lock_file_list;
 			lock_file_list = lk;

From fdd3e7d95986ffe4c146952bc91d0b46964aeeb8 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 21 Apr 2007 23:51:27 -0700
Subject: [PATCH 081/109] Update documentation links to point at v1.5.1.2

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/git.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/git.txt b/Documentation/git.txt
index 9defc33273..ca1f78f790 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -37,7 +37,11 @@ Documentation for older releases are available here:
 
 * link:RelNotes-1.5.1.txt[release notes for 1.5.1]
 
-* link:v1.5.0.7/git.html[documentation for release 1.5.0.7]
+* link:v1.5.1.2/git.html[documentation for release 1.5.1.2]
+
+* link:RelNotes-1.5.1.2.txt[release notes for 1.5.1.2]
+
+* link:RelNotes-1.5.1.1.txt[release notes for 1.5.1.1]
 
 * link:RelNotes-1.5.0.7.txt[release notes for 1.5.0.7]
 

From 2d76548b6af66c49acaf0af7819c6dfb8fcf3389 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 22 Apr 2007 00:11:41 -0700
Subject: [PATCH 082/109] Documentation/Makefile: fix section (5) installation

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/Makefile          | 2 +-
 Documentation/gitattributes.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 8d3617db97..3f92783d55 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -24,7 +24,7 @@ SP_ARTICLES = howto/revert-branch-rebase user-manual
 DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
 
 DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
-DOC_MAN5=$(patsubst %.txt,%.5,$(MAN1_TXT))
+DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
 DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
 
 prefix?=$(HOME)
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index ece58abee2..126871756d 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -7,7 +7,7 @@ gitattributes - defining attributes per path
 
 SYNOPSIS
 --------
-.gitattributes
+$GIT_DIR/info/attributes, gitattributes
 
 
 DESCRIPTION

From 7392b03aa45846a4450e5990ce1381ed6fd706e2 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 22 Apr 2007 00:26:56 -0700
Subject: [PATCH 083/109] Update draft release notes for v1.5.2

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/RelNotes-1.5.2.txt | 77 +++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/Documentation/RelNotes-1.5.2.txt b/Documentation/RelNotes-1.5.2.txt
index d93da608c7..abecac6de9 100644
--- a/Documentation/RelNotes-1.5.2.txt
+++ b/Documentation/RelNotes-1.5.2.txt
@@ -4,6 +4,38 @@ GIT v1.5.2 Release Notes (draft)
 Updates since v1.5.1
 --------------------
 
+* Plumbing level subproject support.
+
+  You can include a subdirectory that has an independent git
+  repository in your index and tree objects as a
+  "subproject".  This plumbing (i.e. "core") level subproject
+  support explicitly excludes recursive behaviour.
+
+  The "subproject" entries in the index and trees are
+  incompatible with older versions of git.  Experimenting with
+  the plumbing level support is encouraged, but be warned that
+  unless everybody in your project updates to this release or
+  later, using this feature would make your project
+  inaccessible by people with older versions of git.
+
+* Plumbing level gitattributes support.
+
+  The gitattributes mechanism allows you to add 'attributes' to
+  paths in your project, and affect the way certain git
+  operations work.  Currently you can influence if a path is
+  considered a binary or text (the former would be treated by
+  'git diff' not to produce textual output; the latter can go
+  through the line endings conversion process in repositories
+  with core.autocrlf set), and specify a custom 3-way merge
+  driver.
+
+* The packfile format now optionally suports 64-bit index.
+
+  This release supports the "version 2" format of the .idx
+  file.  This is automatically enabled when a huge packfile
+  needs more than 32-bit to express offsets of objects in the
+  pack
+
 * New commands and options.
 
   - "git bisect start" can optionally take a single bad commit and
@@ -17,6 +49,10 @@ Updates since v1.5.1
   - "git format-patch" learned a new --subject-prefix=<string>
     option, to override the built-in "[PATCH]".
 
+  - "git add -u" is a quick way to do the first stage of "git
+    commit -a" (i.e. update the index to match the working
+    tree); it obviously does not make a commit.
+
 * Updated behavior of existing commands.
 
   - "git diff --stat" shows size of preimage and postimage blobs
@@ -41,6 +77,16 @@ Updates since v1.5.1
   - "git archive" does not insist you to give --format parameter
     anymore; it defaults to "tar".
 
+  - "git cvsserver" can use backends other than sqlite.
+
+  - "gitview" (in contrib/ section) learned to better support
+    "git-annotate".
+
+  - Local "git fetch" from a repository whose object store is
+    one of the alternates (e.g. fetching from the origin in a
+    repository created with "git clone -l -s") avoids
+    downloading objects unnecessary.
+
 * Builds
 
   - git-p4import has never been installed; now there is an
@@ -65,34 +111,11 @@ Updates since v1.5.1
 Fixes since v1.5.1
 ------------------
 
-The following are all in v1.5.1.x series, unless otherwise noted.
-
-* Documentation updates
-
-  - Various documentation updates from J. Bruce Fields, Frank
-    Lichtenheld, Alex Riesen and others.  Andrew Ruder started a
-    war on undocumented options.
+All of the fixes in v1.5.1 maintenance series are included in
+this release, unless otherwise noted.
 
 * Bugfixes
 
-  - "git diff a/ b/" incorrectly fell in "diff between two
-    filesystem objects" codepath, when the user most likely
-    wanted to limit the extent of output to two tracked
-    directories.
-
-  - git-quiltimport had the same bug as we fixed for
-    git-applymbox in v1.5.1.1 -- it gave an alarming "did not
-    have any patch" message (but did not actually fail and was
-    harmless).
-
-  - various git-svn fixes.
-
-  - Sample update hook incorrectly always refused requests to
-    delete branches through push.
-
-  - git-blame on a very long working tree path had buffer
-    overrun problem.
-
   - Switching branches with "git checkout" refused to work when
     a path changes from a file to a directory between the
     current branch and the new branch, in order not to lose
@@ -110,10 +133,12 @@ The following are all in v1.5.1.x series, unless otherwise noted.
     will not be backported to 1.5.1.x series, as it is rather an
     intrusive change.
 
+* Documentation updates
+
 * Performance Tweaks
 
 --
 exec >/var/tmp/1
-O=v1.5.1.1-158-g86da9de
+O=v1.5.1.2-242-g2d76548
 echo O=`git describe refs/heads/master`
 git shortlog --no-merges $O..refs/heads/master ^refs/heads/maint

From 2b6854c863ae83dd8b4766a159a45a02e883a41f Mon Sep 17 00:00:00 2001
From: "Shawn O. Pearce" <spearce@spearce.org>
Date: Sat, 21 Apr 2007 21:14:39 -0400
Subject: [PATCH 084/109] Cleanup variables in cat-file

I want to add new command line options to cat-file, but
to do that we need to change how we handle argv[] first.
This is a simple cleanup that assigns names to the two
arguments we currently care about.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-cat-file.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/builtin-cat-file.c b/builtin-cat-file.c
index d61d3d5b74..f132d583d3 100644
--- a/builtin-cat-file.c
+++ b/builtin-cat-file.c
@@ -83,17 +83,21 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	void *buf;
 	unsigned long size;
 	int opt;
+	const char *exp_type, *obj_name;
 
 	git_config(git_default_config);
 	if (argc != 3)
 		usage("git-cat-file [-t|-s|-e|-p|<type>] <sha1>");
-	if (get_sha1(argv[2], sha1))
-		die("Not a valid object name %s", argv[2]);
+	exp_type = argv[1];
+	obj_name = argv[2];
+
+	if (get_sha1(obj_name, sha1))
+		die("Not a valid object name %s", obj_name);
 
 	opt = 0;
-	if ( argv[1][0] == '-' ) {
-		opt = argv[1][1];
-		if ( !opt || argv[1][2] )
+	if ( exp_type[0] == '-' ) {
+		opt = exp_type[1];
+		if ( !opt || exp_type[2] )
 			opt = -1; /* Not a single character option */
 	}
 
@@ -121,15 +125,17 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 	case 'p':
 		type = sha1_object_info(sha1, NULL);
 		if (type < 0)
-			die("Not a valid object name %s", argv[2]);
+			die("Not a valid object name %s", obj_name);
 
 		/* custom pretty-print here */
-		if (type == OBJ_TREE)
-			return cmd_ls_tree(2, argv + 1, NULL);
+		if (type == OBJ_TREE) {
+			const char *ls_args[3] = {"ls-tree", obj_name, NULL};
+			return cmd_ls_tree(2, ls_args, NULL);
+		}
 
 		buf = read_sha1_file(sha1, &type, &size);
 		if (!buf)
-			die("Cannot read object %s", argv[2]);
+			die("Cannot read object %s", obj_name);
 		if (type == OBJ_TAG) {
 			pprint_tag(sha1, buf, size);
 			return 0;
@@ -138,15 +144,15 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		/* otherwise just spit out the data */
 		break;
 	case 0:
-		buf = read_object_with_reference(sha1, argv[1], &size, NULL);
+		buf = read_object_with_reference(sha1, exp_type, &size, NULL);
 		break;
 
 	default:
-		die("git-cat-file: unknown option: %s\n", argv[1]);
+		die("git-cat-file: unknown option: %s\n", exp_type);
 	}
 
 	if (!buf)
-		die("git-cat-file %s: bad file", argv[2]);
+		die("git-cat-file %s: bad file", obj_name);
 
 	write_or_die(1, buf, size);
 	return 0;

From 67e22ed58f776372de5ed0a5e6fb329bc24b52b2 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Sun, 22 Apr 2007 16:12:22 +0200
Subject: [PATCH 085/109] Fix a typo in crlf conversion code

Also, noticed by valgrind: the code caused a read out-of-bounds.
Some comments updated as well (they still reflected old calling
conventions).

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 convert.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/convert.c b/convert.c
index 37239ace83..ad106ef35f 100644
--- a/convert.c
+++ b/convert.c
@@ -115,8 +115,8 @@ static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep
 	}
 
 	/*
-	 * Ok, allocate a new buffer, fill it in, and return true
-	 * to let the caller know that we switched buffers on it.
+	 * Ok, allocate a new buffer, fill it in, and return it
+	 * to let the caller know that we switched buffers.
 	 */
 	nsize = size - stats.crlf;
 	buffer = xmalloc(nsize);
@@ -137,7 +137,7 @@ static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep
 	} else {
 		do {
 			unsigned char c = *src++;
-			if (! (c == '\r' && (1 < size && *buffer == '\n')))
+			if (! (c == '\r' && (1 < size && *src == '\n')))
 				*dst++ = c;
 		} while (--size);
 	}
@@ -180,8 +180,8 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long *
 	}
 
 	/*
-	 * Ok, allocate a new buffer, fill it in, and return true
-	 * to let the caller know that we switched buffers on it.
+	 * Ok, allocate a new buffer, fill it in, and return it
+	 * to let the caller know that we switched buffers.
 	 */
 	nsize = size + stats.lf - stats.crlf;
 	buffer = xmalloc(nsize);

From 4629795816bf3f58a02872ec389a92de7efd38c4 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Sun, 22 Apr 2007 16:11:54 +0200
Subject: [PATCH 086/109] Fix crash in t0020 (crlf conversion)

Reallocated wrong size.
Noticed on Ubuntu 7.04 probably because it has some malloc diagnostics in libc:
"git-read-tree --reset -u HEAD" aborted in the test. Valgrind sped up the
debugging greatly: took me 10 minutes.

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 attr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/attr.c b/attr.c
index 285e689e5a..a0712543b2 100644
--- a/attr.c
+++ b/attr.c
@@ -300,7 +300,8 @@ static struct attr_stack *read_attr_from_array(const char **list)
 		a = parse_attr_line(line, "[builtin]", ++lineno, 1);
 		if (!a)
 			continue;
-		res->attrs = xrealloc(res->attrs, res->num_matches + 1);
+		res->attrs = xrealloc(res->attrs,
+			sizeof(struct match_attr *) * (res->num_matches + 1));
 		res->attrs[res->num_matches++] = a;
 	}
 	return res;
@@ -324,7 +325,8 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
 		a = parse_attr_line(buf, path, ++lineno, macro_ok);
 		if (!a)
 			continue;
-		res->attrs = xrealloc(res->attrs, res->num_matches + 1);
+		res->attrs = xrealloc(res->attrs,
+			sizeof(struct match_attr *) * (res->num_matches + 1));
 		res->attrs[res->num_matches++] = a;
 	}
 	fclose(fp);

From aef5aedd8536fddbbd2e19245dfab201d05afb25 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 22 Apr 2007 11:49:35 -0700
Subject: [PATCH 087/109] pack-objects: quickfix for permission modes.

mkstemp() often creates the file in 0600 which means the
resulting packfile is not readable by anybody other than the
repository owner.  Force 0644 for now, even though this is not
strictly correct.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index c72e07a2bb..840d4d697a 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1783,11 +1783,17 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		write_index_file(last_obj_offset, object_list_sha1);
 		snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
 			 base_name, sha1_to_hex(object_list_sha1));
+		if (chmod(pack_tmp_name, 0644))
+			die("unable to make temporary pack file readable: %s",
+			    strerror(errno));
 		if (rename(pack_tmp_name, tmpname))
 			die("unable to rename temporary pack file: %s",
 			    strerror(errno));
 		snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
 			 base_name, sha1_to_hex(object_list_sha1));
+		if (chmod(idx_tmp_name, 0644))
+			die("unable to make temporary index file readable: %s",
+			    strerror(errno));
 		if (rename(idx_tmp_name, tmpname))
 			die("unable to rename temporary index file: %s",
 			    strerror(errno));

From b6b32ccb927de0c631b1aaf27b1664bd4eb300c7 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 22 Apr 2007 12:28:34 -0700
Subject: [PATCH 088/109] Fix 'quickfix' on pack-objects.

The earlier quickfix forced world-readable permission bits.  This
updates it to honor umask and core.sharedrepository settings.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 840d4d697a..34350bf322 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1612,6 +1612,13 @@ static void get_object_list(int ac, const char **av)
 	traverse_commit_list(&revs, show_commit, show_object);
 }
 
+static int adjust_perm(const char *path, mode_t mode)
+{
+	if (chmod(path, mode))
+		return -1;
+	return adjust_shared_perm(path);
+}
+
 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 {
 	int depth = 10;
@@ -1780,10 +1787,15 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	last_obj_offset = write_pack_file();
 	if (!pack_to_stdout) {
 		unsigned char object_list_sha1[20];
+		mode_t mode = umask(0);
+
+		umask(mode);
+		mode = 0666 & ~mode;
+
 		write_index_file(last_obj_offset, object_list_sha1);
 		snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
 			 base_name, sha1_to_hex(object_list_sha1));
-		if (chmod(pack_tmp_name, 0644))
+		if (adjust_perm(pack_tmp_name, mode))
 			die("unable to make temporary pack file readable: %s",
 			    strerror(errno));
 		if (rename(pack_tmp_name, tmpname))
@@ -1791,7 +1803,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			    strerror(errno));
 		snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
 			 base_name, sha1_to_hex(object_list_sha1));
-		if (chmod(idx_tmp_name, 0644))
+		if (adjust_perm(idx_tmp_name, mode))
 			die("unable to make temporary index file readable: %s",
 			    strerror(errno));
 		if (rename(idx_tmp_name, tmpname))

From a5878961b14c1b4f6d1d08c0d3a2f5234659fb8a Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 22 Apr 2007 18:59:34 -0700
Subject: [PATCH 089/109] Update tests not to assume that generated packfiles
 are writable.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t5300-pack-object.sh | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index 083095f7f3..f336769836 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -152,7 +152,7 @@ test_expect_success \
     'use packed deltified (REF_DELTA) objects' \
     'GIT_OBJECT_DIRECTORY=.git2/objects &&
      export GIT_OBJECT_DIRECTORY &&
-     rm .git2/objects/pack/test-* &&
+     rm -f .git2/objects/pack/test-* &&
      cp test-2-${packname_2}.pack test-2-${packname_2}.idx .git2/objects/pack && {
 	 git-diff-tree --root -p $commit &&
 	 while read object
@@ -167,7 +167,7 @@ test_expect_success \
     'use packed deltified (OFS_DELTA) objects' \
     'GIT_OBJECT_DIRECTORY=.git2/objects &&
      export GIT_OBJECT_DIRECTORY &&
-     rm .git2/objects/pack/test-* &&
+     rm -f .git2/objects/pack/test-* &&
      cp test-3-${packname_3}.pack test-3-${packname_3}.idx .git2/objects/pack && {
 	 git-diff-tree --root -p $commit &&
 	 while read object
@@ -188,15 +188,15 @@ test_expect_success \
 
 test_expect_success \
     'corrupt a pack and see if verify catches' \
-    'cp test-1-${packname_1}.idx test-3.idx &&
-     cp test-2-${packname_2}.pack test-3.pack &&
+    'cat test-1-${packname_1}.idx >test-3.idx &&
+     cat test-2-${packname_2}.pack >test-3.pack &&
      if git-verify-pack test-3.idx
      then false
      else :;
      fi &&
 
      : PACK_SIGNATURE &&
-     cp test-1-${packname_1}.pack test-3.pack &&
+     cat test-1-${packname_1}.pack >test-3.pack &&
      dd if=/dev/zero of=test-3.pack count=1 bs=1 conv=notrunc seek=2 &&
      if git-verify-pack test-3.idx
      then false
@@ -204,7 +204,7 @@ test_expect_success \
      fi &&
 
      : PACK_VERSION &&
-     cp test-1-${packname_1}.pack test-3.pack &&
+     cat test-1-${packname_1}.pack >test-3.pack &&
      dd if=/dev/zero of=test-3.pack count=1 bs=1 conv=notrunc seek=7 &&
      if git-verify-pack test-3.idx
      then false
@@ -212,7 +212,7 @@ test_expect_success \
      fi &&
 
      : TYPE/SIZE byte of the first packed object data &&
-     cp test-1-${packname_1}.pack test-3.pack &&
+     cat test-1-${packname_1}.pack >test-3.pack &&
      dd if=/dev/zero of=test-3.pack count=1 bs=1 conv=notrunc seek=12 &&
      if git-verify-pack test-3.idx
      then false
@@ -222,7 +222,7 @@ test_expect_success \
      : sum of the index file itself &&
      l=`wc -c <test-3.idx` &&
      l=`expr $l - 20` &&
-     cp test-1-${packname_1}.pack test-3.pack &&
+     cat test-1-${packname_1}.pack >test-3.pack &&
      dd if=/dev/zero of=test-3.idx count=20 bs=1 conv=notrunc seek=$l &&
      if git-verify-pack test-3.pack
      then false
@@ -233,21 +233,21 @@ test_expect_success \
 
 test_expect_success \
     'build pack index for an existing pack' \
-    'cp test-1-${packname_1}.pack test-3.pack &&
+    'cat test-1-${packname_1}.pack >test-3.pack &&
      git-index-pack -o tmp.idx test-3.pack &&
      cmp tmp.idx test-1-${packname_1}.idx &&
 
      git-index-pack test-3.pack &&
      cmp test-3.idx test-1-${packname_1}.idx &&
 
-     cp test-2-${packname_2}.pack test-3.pack &&
+     cat test-2-${packname_2}.pack >test-3.pack &&
      git-index-pack -o tmp.idx test-2-${packname_2}.pack &&
      cmp tmp.idx test-2-${packname_2}.idx &&
 
      git-index-pack test-3.pack &&
      cmp test-3.idx test-2-${packname_2}.idx &&
 
-     cp test-3-${packname_3}.pack test-3.pack &&
+     cat test-3-${packname_3}.pack >test-3.pack &&
      git-index-pack -o tmp.idx test-3-${packname_3}.pack &&
      cmp tmp.idx test-3-${packname_3}.idx &&
 

From d83c9af5c6a437ddaa9dd2747e83aee642501a44 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 22 Apr 2007 19:00:16 -0700
Subject: [PATCH 090/109] pack-objects: make generated packfile read-only

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 34350bf322..50fc37fa0f 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -1790,7 +1790,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		mode_t mode = umask(0);
 
 		umask(mode);
-		mode = 0666 & ~mode;
+		mode = 0444 & ~mode;
 
 		write_index_file(last_obj_offset, object_list_sha1);
 		snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",

From f1af60bdba465779df92090ed370988f202ff043 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 22 Apr 2007 17:52:55 -0700
Subject: [PATCH 091/109] Support 'diff=pgm' attribute

This enhances the attributes mechanism so that external programs
meant for existing GIT_EXTERNAL_DIFF interface can be specifed
per path.

To configure such a custom diff driver, first define a custom
diff driver in the configuration:

	[diff "my-c-diff"]
		command = <<your command string comes here>>

Then mark the paths that you want to use this custom driver
using the attribute mechanism.

	*.c	diff=my-c-diff

The intent of this separation is that the attribute mechanism is
used for specifying the type of the contents, while the
configuration mechanism is used to define what needs to be done
to that type of the contents, which would be specific to both
platform and personal taste.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-diff.c           |  1 +
 combine-diff.c           |  1 +
 diff.c                   | 87 ++++++++++++++++++++++++++++++++---
 diff.h                   |  1 +
 t/t4020-diff-external.sh | 97 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 182 insertions(+), 5 deletions(-)
 create mode 100755 t/t4020-diff-external.sh

diff --git a/builtin-diff.c b/builtin-diff.c
index 21d13f0b30..2ae60097b8 100644
--- a/builtin-diff.c
+++ b/builtin-diff.c
@@ -225,6 +225,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
 		if (diff_setup_done(&rev.diffopt) < 0)
 			die("diff_setup_done failed");
 	}
+	rev.diffopt.allow_external = 1;
 
 	/* Do we have --cached and not have a pending object, then
 	 * default to HEAD by hand.  Eek.
diff --git a/combine-diff.c b/combine-diff.c
index 3a9b32f6b8..cff9c5dc42 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -943,6 +943,7 @@ void diff_tree_combined(const unsigned char *sha1,
 	diffopts = *opt;
 	diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
 	diffopts.recursive = 1;
+	diffopts.allow_external = 0;
 
 	show_log_first = !!rev->loginfo && !rev->no_commit_id;
 	needsep = 0;
diff --git a/diff.c b/diff.c
index f516664968..9dfded7664 100644
--- a/diff.c
+++ b/diff.c
@@ -52,6 +52,49 @@ static int parse_diff_color_slot(const char *var, int ofs)
 	die("bad config variable '%s'", var);
 }
 
+static struct ll_diff_driver {
+	const char *name;
+	struct ll_diff_driver *next;
+	char *cmd;
+} *user_diff, **user_diff_tail;
+
+/*
+ * Currently there is only "diff.<drivername>.command" variable;
+ * because there are "diff.color.<slot>" variables, we are parsing
+ * this in a bit convoluted way to allow low level diff driver
+ * called "color".
+ */
+static int parse_lldiff_command(const char *var, const char *ep, const char *value)
+{
+	const char *name;
+	int namelen;
+	struct ll_diff_driver *drv;
+
+	name = var + 5;
+	namelen = ep - name;
+	for (drv = user_diff; drv; drv = drv->next)
+		if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
+			break;
+	if (!drv) {
+		char *namebuf;
+		drv = xcalloc(1, sizeof(struct ll_diff_driver));
+		namebuf = xmalloc(namelen + 1);
+		memcpy(namebuf, name, namelen);
+		namebuf[namelen] = 0;
+		drv->name = namebuf;
+		drv->next = NULL;
+		if (!user_diff_tail)
+			user_diff_tail = &user_diff;
+		*user_diff_tail = drv;
+		user_diff_tail = &(drv->next);
+	}
+
+	if (!value)
+		return error("%s: lacks value", var);
+	drv->cmd = strdup(value);
+	return 0;
+}
+
 /*
  * These are to give UI layer defaults.
  * The core-level commands such as git-diff-files should
@@ -78,11 +121,18 @@ int git_diff_ui_config(const char *var, const char *value)
 			diff_detect_rename_default = DIFF_DETECT_RENAME;
 		return 0;
 	}
+	if (!prefixcmp(var, "diff.")) {
+		const char *ep = strrchr(var, '.');
+
+		if (ep != var + 4 && !strcmp(ep, ".command"))
+			return parse_lldiff_command(var, ep, value);
+	}
 	if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) {
 		int slot = parse_diff_color_slot(var, 11);
 		color_parse(value, var, diff_colors[slot]);
 		return 0;
 	}
+
 	return git_default_config(var, value);
 }
 
@@ -1074,11 +1124,6 @@ static int file_is_binary(struct diff_filespec *one)
 			return 0;
 		else if (ATTR_FALSE(value))
 			return 1;
-		else if (ATTR_UNSET(value))
-			;
-		else
-			die("unknown value %s given to 'diff' attribute",
-			    value);
 	}
 
 	if (!one->data) {
@@ -1752,6 +1797,30 @@ static void run_external_diff(const char *pgm,
 	}
 }
 
+static const char *external_diff_attr(const char *name)
+{
+	struct git_attr_check attr_diff_check;
+
+	setup_diff_attr_check(&attr_diff_check);
+	if (!git_checkattr(name, 1, &attr_diff_check)) {
+		const char *value = attr_diff_check.value;
+		if (!ATTR_TRUE(value) &&
+		    !ATTR_FALSE(value) &&
+		    !ATTR_UNSET(value)) {
+			struct ll_diff_driver *drv;
+
+			if (!user_diff_tail) {
+				user_diff_tail = &user_diff;
+				git_config(git_diff_ui_config);
+			}
+			for (drv = user_diff; drv; drv = drv->next)
+				if (!strcmp(drv->name, value))
+					return drv->cmd;
+		}
+	}
+	return NULL;
+}
+
 static void run_diff_cmd(const char *pgm,
 			 const char *name,
 			 const char *other,
@@ -1761,6 +1830,14 @@ static void run_diff_cmd(const char *pgm,
 			 struct diff_options *o,
 			 int complete_rewrite)
 {
+	if (!o->allow_external)
+		pgm = NULL;
+	else {
+		const char *cmd = external_diff_attr(name);
+		if (cmd)
+			pgm = cmd;
+	}
+
 	if (pgm) {
 		run_external_diff(pgm, name, other, one, two, xfrm_msg,
 				  complete_rewrite);
diff --git a/diff.h b/diff.h
index a0d2ce1399..63738c1dd4 100644
--- a/diff.h
+++ b/diff.h
@@ -59,6 +59,7 @@ struct diff_options {
 		 color_diff_words:1,
 		 has_changes:1,
 		 quiet:1,
+		 allow_external:1,
 		 exit_with_status:1;
 	int context;
 	int break_opt;
diff --git a/t/t4020-diff-external.sh b/t/t4020-diff-external.sh
new file mode 100755
index 0000000000..f0045cd788
--- /dev/null
+++ b/t/t4020-diff-external.sh
@@ -0,0 +1,97 @@
+#!/bin/sh
+
+test_description='external diff interface test'
+
+. ./test-lib.sh
+
+_z40=0000000000000000000000000000000000000000
+
+test_expect_success setup '
+
+	test_tick &&
+	echo initial >file &&
+	git add file &&
+	git commit -m initial &&
+
+	test_tick &&
+	echo second >file &&
+	git add file &&
+	git commit -m second &&
+
+	test_tick &&
+	echo third >file
+'
+
+test_expect_success 'GIT_EXTERNAL_DIFF environment' '
+
+	GIT_EXTERNAL_DIFF=echo git diff | {
+		read path oldfile oldhex oldmode newfile newhex newmode &&
+		test "z$path" = zfile &&
+		test "z$oldmode" = z100644 &&
+		test "z$newhex" = "z$_z40" &&
+		test "z$newmode" = z100644 &&
+		oh=$(git rev-parse --verify HEAD:file) &&
+		test "z$oh" = "z$oldhex"
+	}
+
+'
+
+test_expect_success 'GIT_EXTERNAL_DIFF environment should apply only to diff' '
+
+	GIT_EXTERNAL_DIFF=echo git log -p -1 HEAD |
+	grep "^diff --git a/file b/file"
+
+'
+
+test_expect_success 'diff attribute' '
+
+	git config diff.parrot.command echo &&
+
+	echo >.gitattributes "file diff=parrot" &&
+
+	git diff | {
+		read path oldfile oldhex oldmode newfile newhex newmode &&
+		test "z$path" = zfile &&
+		test "z$oldmode" = z100644 &&
+		test "z$newhex" = "z$_z40" &&
+		test "z$newmode" = z100644 &&
+		oh=$(git rev-parse --verify HEAD:file) &&
+		test "z$oh" = "z$oldhex"
+	}
+
+'
+
+test_expect_success 'diff attribute should apply only to diff' '
+
+	git log -p -1 HEAD |
+	grep "^diff --git a/file b/file"
+
+'
+
+test_expect_success 'diff attribute' '
+
+	git config --unset diff.parrot.command &&
+	git config diff.color.command echo &&
+
+	echo >.gitattributes "file diff=color" &&
+
+	git diff | {
+		read path oldfile oldhex oldmode newfile newhex newmode &&
+		test "z$path" = zfile &&
+		test "z$oldmode" = z100644 &&
+		test "z$newhex" = "z$_z40" &&
+		test "z$newmode" = z100644 &&
+		oh=$(git rev-parse --verify HEAD:file) &&
+		test "z$oh" = "z$oldhex"
+	}
+
+'
+
+test_expect_success 'diff attribute should apply only to diff' '
+
+	git log -p -1 HEAD |
+	grep "^diff --git a/file b/file"
+
+'
+
+test_done

From 96a02f8f6d2192d3686cd1c719044082c89e8391 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 18 Apr 2007 14:27:45 -0400
Subject: [PATCH 092/109] common progress display support

Instead of having this code duplicated in multiple places, let's have
a common interface for progress display.  If someday someone wishes to
display a cheezy progress bar instead then only one file will have to
be changed.

Note: I left merge-recursive.c out since it has a strange notion of
progress as it apparently increase the expected total number as it goes.
Someone with more intimate knowledge of what that is supposed to mean
might look at converting it to the common progress interface.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Makefile                 |  4 +-
 builtin-pack-objects.c   | 83 ++++++++++------------------------------
 builtin-unpack-objects.c | 38 ++++++++----------
 index-pack.c             | 78 +++++++++++--------------------------
 progress.c               | 68 ++++++++++++++++++++++++++++++++
 progress.h               | 14 +++++++
 unpack-trees.c           | 54 +++++---------------------
 7 files changed, 154 insertions(+), 185 deletions(-)
 create mode 100644 progress.c
 create mode 100644 progress.h

diff --git a/Makefile b/Makefile
index c9c2a5fb66..65bd2dbf9d 100644
--- a/Makefile
+++ b/Makefile
@@ -283,7 +283,7 @@ LIB_H = \
 	diff.h object.h pack.h pkt-line.h quote.h refs.h list-objects.h sideband.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
 	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
-	utf8.h reflog-walk.h patch-ids.h attr.h decorate.h
+	utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h
 
 DIFF_OBJS = \
 	diff.o diff-lib.o diffcore-break.o diffcore-order.o \
@@ -305,7 +305,7 @@ LIB_OBJS = \
 	write_or_die.o trace.o list-objects.o grep.o match-trees.o \
 	alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
 	color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
-	convert.o attr.o decorate.o
+	convert.o attr.o decorate.o progress.o
 
 BUILTIN_OBJS = \
 	builtin-add.o \
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 50fc37fa0f..d7d9996f5e 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -12,6 +12,7 @@
 #include "diff.h"
 #include "revision.h"
 #include "list-objects.h"
+#include "progress.h"
 
 static const char pack_usage[] = "\
 git-pack-objects [{ -q | --progress | --all-progress }] \n\
@@ -62,10 +63,10 @@ static const char *pack_tmp_name, *idx_tmp_name;
 static char tmpname[PATH_MAX];
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
-static volatile sig_atomic_t progress_update;
 static int window = 10;
 static int pack_to_stdout;
 static int num_preferred_base;
+static struct progress progress_state;
 
 /*
  * The object names in objects array are hashed with this hashtable,
@@ -564,7 +565,6 @@ static off_t write_pack_file(void)
 	struct sha1file *f;
 	off_t offset, last_obj_offset = 0;
 	struct pack_header hdr;
-	unsigned last_percent = 999;
 	int do_progress = progress;
 
 	if (pack_to_stdout) {
@@ -580,8 +580,10 @@ static off_t write_pack_file(void)
 		f = sha1fd(fd, pack_tmp_name);
 	}
 
-	if (do_progress)
+	if (do_progress) {
 		fprintf(stderr, "Writing %u objects.\n", nr_result);
+		start_progress(&progress_state, "", nr_result);
+	}
 
 	hdr.hdr_signature = htonl(PACK_SIGNATURE);
 	hdr.hdr_version = htonl(PACK_VERSION);
@@ -593,18 +595,11 @@ static off_t write_pack_file(void)
 	for (i = 0; i < nr_objects; i++) {
 		last_obj_offset = offset;
 		offset = write_one(f, objects + i, offset);
-		if (do_progress) {
-			unsigned percent = written * 100 / nr_result;
-			if (progress_update || percent != last_percent) {
-				fprintf(stderr, "%4u%% (%u/%u) done\r",
-					percent, written, nr_result);
-				progress_update = 0;
-				last_percent = percent;
-			}
-		}
+		if (do_progress)
+			display_progress(&progress_state, written);
 	}
 	if (do_progress)
-		fputc('\n', stderr);
+		stop_progress(&progress_state);
  done:
 	if (written != nr_result)
 		die("wrote %u objects while expecting %u", written, nr_result);
@@ -865,10 +860,8 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
 	else
 		object_ix[-1 - ix] = nr_objects;
 
-	if (progress_update) {
-		fprintf(stderr, "Counting objects...%u\r", nr_objects);
-		progress_update = 0;
-	}
+	if (progress)
+		display_progress(&progress_state, nr_objects);
 
 	return 1;
 }
@@ -1390,15 +1383,16 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 	uint32_t i = nr_objects, idx = 0, processed = 0;
 	unsigned int array_size = window * sizeof(struct unpacked);
 	struct unpacked *array;
-	unsigned last_percent = 999;
 	int max_depth;
 
 	if (!nr_objects)
 		return;
 	array = xmalloc(array_size);
 	memset(array, 0, array_size);
-	if (progress)
+	if (progress) {
 		fprintf(stderr, "Deltifying %u objects.\n", nr_result);
+		start_progress(&progress_state, "", nr_result);
+	}
 
 	do {
 		struct object_entry *entry = list[--i];
@@ -1408,15 +1402,8 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		if (!entry->preferred_base)
 			processed++;
 
-		if (progress) {
-			unsigned percent = processed * 100 / nr_result;
-			if (percent != last_percent || progress_update) {
-				fprintf(stderr, "%4u%% (%u/%u) done\r",
-					percent, processed, nr_result);
-				progress_update = 0;
-				last_percent = percent;
-			}
-		}
+		if (progress)
+			display_progress(&progress_state, processed);
 
 		if (entry->delta)
 			/* This happens if we decided to reuse existing
@@ -1471,7 +1458,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 	} while (i > 0);
 
 	if (progress)
-		fputc('\n', stderr);
+		stop_progress(&progress_state);
 
 	for (i = 0; i < window; ++i) {
 		free_delta_index(array[i].index);
@@ -1498,28 +1485,6 @@ static void prepare_pack(int window, int depth)
 	free(delta_list);
 }
 
-static void progress_interval(int signum)
-{
-	progress_update = 1;
-}
-
-static void setup_progress_signal(void)
-{
-	struct sigaction sa;
-	struct itimerval v;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = progress_interval;
-	sigemptyset(&sa.sa_mask);
-	sa.sa_flags = SA_RESTART;
-	sigaction(SIGALRM, &sa, NULL);
-
-	v.it_interval.tv_sec = 1;
-	v.it_interval.tv_usec = 0;
-	v.it_value = v.it_interval;
-	setitimer(ITIMER_REAL, &v, NULL);
-}
-
 static int git_pack_config(const char *k, const char *v)
 {
 	if(!strcmp(k, "pack.window")) {
@@ -1759,31 +1724,25 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 
 	if (progress) {
 		fprintf(stderr, "Generating pack...\n");
-		setup_progress_signal();
+		start_progress(&progress_state, "Counting objects: ", 0);
 	}
-
 	if (!use_internal_rev_list)
 		read_object_list_from_stdin();
 	else {
 		rp_av[rp_ac] = NULL;
 		get_object_list(rp_ac, rp_av);
 	}
-
-	if (progress)
+	if (progress) {
+		stop_progress(&progress_state);
 		fprintf(stderr, "Done counting %u objects.\n", nr_objects);
+	}
+
 	if (non_empty && !nr_result)
 		return 0;
 	if (progress && (nr_objects != nr_result))
 		fprintf(stderr, "Result has %u objects.\n", nr_result);
 	if (nr_result)
 		prepare_pack(window, depth);
-	if (progress == 1 && pack_to_stdout) {
-		/* the other end usually displays progress itself */
-		struct itimerval v = {{0,},};
-		setitimer(ITIMER_REAL, &v, NULL);
-		signal(SIGALRM, SIG_IGN );
-		progress_update = 0;
-	}
 	last_obj_offset = write_pack_file();
 	if (!pack_to_stdout) {
 		unsigned char object_list_sha1[20];
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index f821906460..c370c7f834 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -7,6 +7,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "tree.h"
+#include "progress.h"
 
 static int dry_run, quiet, recover, has_errors;
 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
@@ -264,7 +265,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 	free(base);
 }
 
-static void unpack_one(unsigned nr, unsigned total)
+static void unpack_one(unsigned nr)
 {
 	unsigned shift;
 	unsigned char *pack, c;
@@ -286,20 +287,7 @@ static void unpack_one(unsigned nr, unsigned total)
 		size += (c & 0x7f) << shift;
 		shift += 7;
 	}
-	if (!quiet) {
-		static unsigned long last_sec;
-		static unsigned last_percent;
-		struct timeval now;
-		unsigned percentage = ((nr+1) * 100) / total;
 
-		gettimeofday(&now, NULL);
-		if (percentage != last_percent || now.tv_sec != last_sec) {
-			last_sec = now.tv_sec;
-			last_percent = percentage;
-			fprintf(stderr, "%4u%% (%u/%u) done\r",
-					percentage, (nr+1), total);
-		}
-	}
 	switch (type) {
 	case OBJ_COMMIT:
 	case OBJ_TREE:
@@ -323,6 +311,7 @@ static void unpack_one(unsigned nr, unsigned total)
 static void unpack_all(void)
 {
 	int i;
+	struct progress progress;
 	struct pack_header *hdr = fill(sizeof(struct pack_header));
 	unsigned nr_objects = ntohl(hdr->hdr_entries);
 
@@ -330,12 +319,21 @@ static void unpack_all(void)
 		die("bad pack file");
 	if (!pack_version_ok(hdr->hdr_version))
 		die("unknown pack file version %d", ntohl(hdr->hdr_version));
-	fprintf(stderr, "Unpacking %d objects\n", nr_objects);
-
-	obj_list = xmalloc(nr_objects * sizeof(*obj_list));
 	use(sizeof(struct pack_header));
-	for (i = 0; i < nr_objects; i++)
-		unpack_one(i, nr_objects);
+
+	if (!quiet) {
+		fprintf(stderr, "Unpacking %d objects\n", nr_objects);
+		start_progress(&progress, "", nr_objects);
+	}
+	obj_list = xmalloc(nr_objects * sizeof(*obj_list));
+	for (i = 0; i < nr_objects; i++) {
+		unpack_one(i);
+		if (!quiet)
+			display_progress(&progress, i + 1);
+	}
+	if (!quiet)
+		stop_progress(&progress);
+
 	if (delta_list)
 		die("unresolved deltas left after unpacking");
 }
@@ -404,7 +402,5 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 	}
 
 	/* All done */
-	if (!quiet)
-		fprintf(stderr, "\n");
 	return has_errors;
 }
diff --git a/index-pack.c b/index-pack.c
index 7aad261d48..a49d03d6de 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -6,6 +6,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "tree.h"
+#include "progress.h"
 
 static const char index_pack_usage[] =
 "git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
@@ -47,40 +48,7 @@ static int nr_resolved_deltas;
 static int from_stdin;
 static int verbose;
 
-static volatile sig_atomic_t progress_update;
-
-static void progress_interval(int signum)
-{
-	progress_update = 1;
-}
-
-static void setup_progress_signal(void)
-{
-	struct sigaction sa;
-	struct itimerval v;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = progress_interval;
-	sigemptyset(&sa.sa_mask);
-	sa.sa_flags = SA_RESTART;
-	sigaction(SIGALRM, &sa, NULL);
-
-	v.it_interval.tv_sec = 1;
-	v.it_interval.tv_usec = 0;
-	v.it_value = v.it_interval;
-	setitimer(ITIMER_REAL, &v, NULL);
-
-}
-
-static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc)
-{
-	unsigned percent = n * 100 / total;
-	if (percent != last_pc || progress_update) {
-		fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total);
-		progress_update = 0;
-	}
-	return percent;
-}
+static struct progress progress;
 
 /* We always read in 4kB chunks. */
 static unsigned char input_buffer[4096];
@@ -428,7 +396,7 @@ static int compare_delta_entry(const void *a, const void *b)
 /* Parse all objects and return the pack content SHA1 hash */
 static void parse_pack_objects(unsigned char *sha1)
 {
-	int i, percent = -1;
+	int i;
 	struct delta_entry *delta = deltas;
 	void *data;
 	struct stat st;
@@ -439,8 +407,10 @@ static void parse_pack_objects(unsigned char *sha1)
 	 * - calculate SHA1 of all non-delta objects;
 	 * - remember base (SHA1 or offset) for all deltas.
 	 */
-	if (verbose)
+	if (verbose) {
 		fprintf(stderr, "Indexing %d objects.\n", nr_objects);
+		start_progress(&progress, "", nr_objects);
+	}
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
 		data = unpack_raw_entry(obj, &delta->base);
@@ -453,11 +423,11 @@ static void parse_pack_objects(unsigned char *sha1)
 			sha1_object(data, obj->size, obj->type, obj->sha1);
 		free(data);
 		if (verbose)
-			percent = display_progress(i+1, nr_objects, percent);
+			display_progress(&progress, i+1);
 	}
 	objects[i].offset = consumed_bytes;
 	if (verbose)
-		fputc('\n', stderr);
+		stop_progress(&progress);
 
 	/* Check pack integrity */
 	flush();
@@ -488,8 +458,10 @@ static void parse_pack_objects(unsigned char *sha1)
 	 *   recursively checking if the resulting object is used as a base
 	 *   for some more deltas.
 	 */
-	if (verbose)
+	if (verbose) {
 		fprintf(stderr, "Resolving %d deltas.\n", nr_deltas);
+		start_progress(&progress, "", nr_deltas);
+	}
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
 		union delta_base base;
@@ -521,11 +493,8 @@ static void parse_pack_objects(unsigned char *sha1)
 			}
 		free(data);
 		if (verbose)
-			percent = display_progress(nr_resolved_deltas,
-						   nr_deltas, percent);
+			display_progress(&progress, nr_resolved_deltas);
 	}
-	if (verbose && nr_resolved_deltas == nr_deltas)
-		fputc('\n', stderr);
 }
 
 static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_crc)
@@ -587,7 +556,7 @@ static int delta_pos_compare(const void *_a, const void *_b)
 static void fix_unresolved_deltas(int nr_unresolved)
 {
 	struct delta_entry **sorted_by_pos;
-	int i, n = 0, percent = -1;
+	int i, n = 0;
 
 	/*
 	 * Since many unresolved deltas may well be themselves base objects
@@ -632,12 +601,9 @@ static void fix_unresolved_deltas(int nr_unresolved)
 		append_obj_to_pack(d->base.sha1, data, size, type);
 		free(data);
 		if (verbose)
-			percent = display_progress(nr_resolved_deltas,
-						   nr_deltas, percent);
+			display_progress(&progress, nr_resolved_deltas);
 	}
 	free(sorted_by_pos);
-	if (verbose)
-		fputc('\n', stderr);
 }
 
 static void readjust_pack_header_and_sha1(unsigned char *sha1)
@@ -980,10 +946,13 @@ int main(int argc, char **argv)
 	parse_pack_header();
 	objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
 	deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
-	if (verbose)
-		setup_progress_signal();
 	parse_pack_objects(sha1);
-	if (nr_deltas != nr_resolved_deltas) {
+	if (nr_deltas == nr_resolved_deltas) {
+		if (verbose)
+			stop_progress(&progress);
+		/* Flush remaining pack final 20-byte SHA1. */
+		flush();
+	} else {
 		if (fix_thin_pack) {
 			int nr_unresolved = nr_deltas - nr_resolved_deltas;
 			int nr_objects_initial = nr_objects;
@@ -993,17 +962,16 @@ int main(int argc, char **argv)
 					   (nr_objects + nr_unresolved + 1)
 					   * sizeof(*objects));
 			fix_unresolved_deltas(nr_unresolved);
-			if (verbose)
+			if (verbose) {
+				stop_progress(&progress);
 				fprintf(stderr, "%d objects were added to complete this thin pack.\n",
 					nr_objects - nr_objects_initial);
+			}
 			readjust_pack_header_and_sha1(sha1);
 		}
 		if (nr_deltas != nr_resolved_deltas)
 			die("pack has %d unresolved deltas",
 			    nr_deltas - nr_resolved_deltas);
-	} else {
-		/* Flush remaining pack final 20-byte SHA1. */
-		flush();
 	}
 	free(deltas);
 	curr_index = write_index_file(index_name, sha1);
diff --git a/progress.c b/progress.c
new file mode 100644
index 0000000000..702e116066
--- /dev/null
+++ b/progress.c
@@ -0,0 +1,68 @@
+#include "git-compat-util.h"
+#include "progress.h"
+
+static volatile sig_atomic_t progress_update;
+
+static void progress_interval(int signum)
+{
+	progress_update = 1;
+}
+
+static void set_progress_signal(void)
+{
+	struct sigaction sa;
+	struct itimerval v;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = progress_interval;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_RESTART;
+	sigaction(SIGALRM, &sa, NULL);
+
+	v.it_interval.tv_sec = 1;
+	v.it_interval.tv_usec = 0;
+	v.it_value = v.it_interval;
+	setitimer(ITIMER_REAL, &v, NULL);
+}
+
+static void clear_progress_signal(void)
+{
+	struct itimerval v = {{0,},};
+	setitimer(ITIMER_REAL, &v, NULL);
+	signal(SIGALRM, SIG_IGN);
+	progress_update = 0;
+}
+
+int display_progress(struct progress *progress, unsigned n)
+{
+	if (progress->total) {
+		unsigned percent = n * 100 / progress->total;
+		if (percent != progress->last_percent || progress_update) {
+			progress->last_percent = percent;
+			fprintf(stderr, "%s%4u%% (%u/%u) done\r",
+				progress->msg, percent, n, progress->total);
+			progress_update = 0;
+			return 1;
+		}
+	} else if (progress_update) {
+		fprintf(stderr, "%s%u\r", progress->msg, n);
+		progress_update = 0;
+		return 1;
+	}
+	return 0;
+}
+
+void start_progress(struct progress *progress, const char *msg, unsigned total)
+{
+	progress->msg = msg;
+	progress->total = total;
+	progress->last_percent = -1;
+	set_progress_signal();
+}
+
+void stop_progress(struct progress *progress)
+{
+	clear_progress_signal();
+	if (progress->total)
+		fputc('\n', stderr);
+}
diff --git a/progress.h b/progress.h
new file mode 100644
index 0000000000..5fa4948117
--- /dev/null
+++ b/progress.h
@@ -0,0 +1,14 @@
+#ifndef __progress_h__
+#define __progress_h__
+
+struct progress {
+	const char *msg;
+	unsigned total;
+	unsigned last_percent;
+};
+
+int display_progress(struct progress *progress, unsigned n);
+void start_progress(struct progress *progress, const char *msg, unsigned total);
+void stop_progress(struct progress *progress);
+
+#endif
diff --git a/unpack-trees.c b/unpack-trees.c
index 5139481358..8a11622ccb 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -4,6 +4,7 @@
 #include "tree-walk.h"
 #include "cache-tree.h"
 #include "unpack-trees.h"
+#include "progress.h"
 
 #define DBRT_DEBUG 1
 
@@ -288,36 +289,13 @@ static void unlink_entry(char *name)
 	}
 }
 
-static volatile sig_atomic_t progress_update;
-
-static void progress_interval(int signum)
-{
-	progress_update = 1;
-}
-
-static void setup_progress_signal(void)
-{
-	struct sigaction sa;
-	struct itimerval v;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = progress_interval;
-	sigemptyset(&sa.sa_mask);
-	sa.sa_flags = SA_RESTART;
-	sigaction(SIGALRM, &sa, NULL);
-
-	v.it_interval.tv_sec = 1;
-	v.it_interval.tv_usec = 0;
-	v.it_value = v.it_interval;
-	setitimer(ITIMER_REAL, &v, NULL);
-}
-
 static struct checkout state;
 static void check_updates(struct cache_entry **src, int nr,
 		struct unpack_trees_options *o)
 {
 	unsigned short mask = htons(CE_UPDATE);
-	unsigned last_percent = 200, cnt = 0, total = 0;
+	unsigned cnt = 0, total = 0;
+	struct progress progress;
 
 	if (o->update && o->verbose_update) {
 		for (total = cnt = 0; cnt < nr; cnt++) {
@@ -332,8 +310,7 @@ static void check_updates(struct cache_entry **src, int nr,
 
 		if (total) {
 			fprintf(stderr, "Checking files out...\n");
-			setup_progress_signal();
-			progress_update = 1;
+			start_progress(&progress, "", total);
 		}
 		cnt = 0;
 	}
@@ -341,20 +318,9 @@ static void check_updates(struct cache_entry **src, int nr,
 	while (nr--) {
 		struct cache_entry *ce = *src++;
 
-		if (total) {
-			if (!ce->ce_mode || ce->ce_flags & mask) {
-				unsigned percent;
-				cnt++;
-				percent = (cnt * 100) / total;
-				if (percent != last_percent ||
-				    progress_update) {
-					fprintf(stderr, "%4u%% (%u/%u) done\r",
-						percent, cnt, total);
-					last_percent = percent;
-					progress_update = 0;
-				}
-			}
-		}
+		if (total)
+			if (!ce->ce_mode || ce->ce_flags & mask)
+				display_progress(&progress, ++cnt);
 		if (!ce->ce_mode) {
 			if (o->update)
 				unlink_entry(ce->name);
@@ -366,10 +332,8 @@ static void check_updates(struct cache_entry **src, int nr,
 				checkout_entry(ce, &state, NULL);
 		}
 	}
-	if (total) {
-		signal(SIGALRM, SIG_IGN);
-		fputc('\n', stderr);
-	}
+	if (total)
+		stop_progress(&progress);;
 }
 
 int unpack_trees(struct object_list *trees, struct unpack_trees_options *o)

From 13aaf148258808437c485d3ef54c7ae7668384d7 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 20 Apr 2007 14:10:07 -0400
Subject: [PATCH 093/109] make progress "title" part of the common progress
 interface

If the progress bar ends up in a box, better provide a title for it too.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-pack-objects.c   | 19 +++++++------------
 builtin-unpack-objects.c |  6 ++----
 index-pack.c             | 12 ++++--------
 progress.c               | 12 ++++++++----
 progress.h               |  5 +++--
 unpack-trees.c           |  7 +++----
 6 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index d7d9996f5e..b827627670 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -580,10 +580,8 @@ static off_t write_pack_file(void)
 		f = sha1fd(fd, pack_tmp_name);
 	}
 
-	if (do_progress) {
-		fprintf(stderr, "Writing %u objects.\n", nr_result);
-		start_progress(&progress_state, "", nr_result);
-	}
+	if (do_progress)
+		start_progress(&progress_state, "Writing %u objects...", "", nr_result);
 
 	hdr.hdr_signature = htonl(PACK_SIGNATURE);
 	hdr.hdr_version = htonl(PACK_VERSION);
@@ -1389,10 +1387,8 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		return;
 	array = xmalloc(array_size);
 	memset(array, 0, array_size);
-	if (progress) {
-		fprintf(stderr, "Deltifying %u objects.\n", nr_result);
-		start_progress(&progress_state, "", nr_result);
-	}
+	if (progress)
+		start_progress(&progress_state, "Deltifying %u objects...", "", nr_result);
 
 	do {
 		struct object_entry *entry = list[--i];
@@ -1722,10 +1718,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 
 	prepare_packed_git();
 
-	if (progress) {
-		fprintf(stderr, "Generating pack...\n");
-		start_progress(&progress_state, "Counting objects: ", 0);
-	}
+	if (progress)
+		start_progress(&progress_state, "Generating pack...",
+			       "Counting objects: ", 0);
 	if (!use_internal_rev_list)
 		read_object_list_from_stdin();
 	else {
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index c370c7f834..2bbda67fab 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -321,10 +321,8 @@ static void unpack_all(void)
 		die("unknown pack file version %d", ntohl(hdr->hdr_version));
 	use(sizeof(struct pack_header));
 
-	if (!quiet) {
-		fprintf(stderr, "Unpacking %d objects\n", nr_objects);
-		start_progress(&progress, "", nr_objects);
-	}
+	if (!quiet)
+		start_progress(&progress, "Unpacking %u objects...", "", nr_objects);
 	obj_list = xmalloc(nr_objects * sizeof(*obj_list));
 	for (i = 0; i < nr_objects; i++) {
 		unpack_one(i);
diff --git a/index-pack.c b/index-pack.c
index a49d03d6de..824004f9a2 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -407,10 +407,8 @@ static void parse_pack_objects(unsigned char *sha1)
 	 * - calculate SHA1 of all non-delta objects;
 	 * - remember base (SHA1 or offset) for all deltas.
 	 */
-	if (verbose) {
-		fprintf(stderr, "Indexing %d objects.\n", nr_objects);
-		start_progress(&progress, "", nr_objects);
-	}
+	if (verbose)
+		start_progress(&progress, "Indexing %u objects...", "", nr_objects);
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
 		data = unpack_raw_entry(obj, &delta->base);
@@ -458,10 +456,8 @@ static void parse_pack_objects(unsigned char *sha1)
 	 *   recursively checking if the resulting object is used as a base
 	 *   for some more deltas.
 	 */
-	if (verbose) {
-		fprintf(stderr, "Resolving %d deltas.\n", nr_deltas);
-		start_progress(&progress, "", nr_deltas);
-	}
+	if (verbose)
+		start_progress(&progress, "Resolving %u deltas...", "", nr_deltas);
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
 		union delta_base base;
diff --git a/progress.c b/progress.c
index 702e116066..478134bc69 100644
--- a/progress.c
+++ b/progress.c
@@ -40,23 +40,27 @@ int display_progress(struct progress *progress, unsigned n)
 		if (percent != progress->last_percent || progress_update) {
 			progress->last_percent = percent;
 			fprintf(stderr, "%s%4u%% (%u/%u) done\r",
-				progress->msg, percent, n, progress->total);
+				progress->prefix, percent, n, progress->total);
 			progress_update = 0;
 			return 1;
 		}
 	} else if (progress_update) {
-		fprintf(stderr, "%s%u\r", progress->msg, n);
+		fprintf(stderr, "%s%u\r", progress->prefix, n);
 		progress_update = 0;
 		return 1;
 	}
 	return 0;
 }
 
-void start_progress(struct progress *progress, const char *msg, unsigned total)
+void start_progress(struct progress *progress, const char *title,
+		    const char *prefix, unsigned total)
 {
-	progress->msg = msg;
+	char buf[80];
+	progress->prefix = prefix;
 	progress->total = total;
 	progress->last_percent = -1;
+	if (snprintf(buf, sizeof(buf), title, total))
+		fprintf(stderr, "%s\n", buf);
 	set_progress_signal();
 }
 
diff --git a/progress.h b/progress.h
index 5fa4948117..1f2661e810 100644
--- a/progress.h
+++ b/progress.h
@@ -2,13 +2,14 @@
 #define __progress_h__
 
 struct progress {
-	const char *msg;
+	const char *prefix;
 	unsigned total;
 	unsigned last_percent;
 };
 
 int display_progress(struct progress *progress, unsigned n);
-void start_progress(struct progress *progress, const char *msg, unsigned total);
+void start_progress(struct progress *progress, const char *title,
+		    const char *prefix, unsigned total);
 void stop_progress(struct progress *progress);
 
 #endif
diff --git a/unpack-trees.c b/unpack-trees.c
index 8a11622ccb..cefc4042d5 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -308,10 +308,9 @@ static void check_updates(struct cache_entry **src, int nr,
 		if (total < 250)
 			total = 0;
 
-		if (total) {
-			fprintf(stderr, "Checking files out...\n");
-			start_progress(&progress, "", total);
-		}
+		if (total)
+			start_progress(&progress, "Checking %u files out...",
+				       "", total);
 		cnt = 0;
 	}
 

From 180a9f226860e18cdc2c3987ae89c239b318b408 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 20 Apr 2007 15:05:27 -0400
Subject: [PATCH 094/109] provide a facility for "delayed" progress reporting

This allows for progress to be displayed only if the progress has not
reached a specified percentage treshold within a given delay in seconds.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 progress.c | 34 ++++++++++++++++++++++++++++++++++
 progress.h |  6 ++++++
 2 files changed, 40 insertions(+)

diff --git a/progress.c b/progress.c
index 478134bc69..05f7890314 100644
--- a/progress.c
+++ b/progress.c
@@ -13,6 +13,8 @@ static void set_progress_signal(void)
 	struct sigaction sa;
 	struct itimerval v;
 
+	progress_update = 0;
+
 	memset(&sa, 0, sizeof(sa));
 	sa.sa_handler = progress_interval;
 	sigemptyset(&sa.sa_mask);
@@ -35,6 +37,24 @@ static void clear_progress_signal(void)
 
 int display_progress(struct progress *progress, unsigned n)
 {
+	if (progress->delay) {
+		char buf[80];
+		if (!progress_update || --progress->delay)
+			return 0;
+		if (progress->total) {
+			unsigned percent = n * 100 / progress->total;
+			if (percent > progress->delayed_percent_treshold) {
+				/* inhibit this progress report entirely */
+				clear_progress_signal();
+				progress->delay = -1;
+				progress->total = 0;
+				return 0;
+			}
+		}
+		if (snprintf(buf, sizeof(buf),
+			     progress->delayed_title, progress->total))
+			fprintf(stderr, "%s\n", buf);
+	}
 	if (progress->total) {
 		unsigned percent = n * 100 / progress->total;
 		if (percent != progress->last_percent || progress_update) {
@@ -59,11 +79,25 @@ void start_progress(struct progress *progress, const char *title,
 	progress->prefix = prefix;
 	progress->total = total;
 	progress->last_percent = -1;
+	progress->delay = 0;
 	if (snprintf(buf, sizeof(buf), title, total))
 		fprintf(stderr, "%s\n", buf);
 	set_progress_signal();
 }
 
+void start_progress_delay(struct progress *progress, const char *title,
+			  const char *prefix, unsigned total,
+			  unsigned percent_treshold, unsigned delay)
+{
+	progress->prefix = prefix;
+	progress->total = total;
+	progress->last_percent = -1;
+	progress->delayed_percent_treshold = percent_treshold;
+	progress->delayed_title = title;
+	progress->delay = delay;
+	set_progress_signal();
+}
+
 void stop_progress(struct progress *progress)
 {
 	clear_progress_signal();
diff --git a/progress.h b/progress.h
index 1f2661e810..4ee851acfb 100644
--- a/progress.h
+++ b/progress.h
@@ -5,11 +5,17 @@ struct progress {
 	const char *prefix;
 	unsigned total;
 	unsigned last_percent;
+	unsigned delay;
+	unsigned delayed_percent_treshold;
+	const char *delayed_title;
 };
 
 int display_progress(struct progress *progress, unsigned n);
 void start_progress(struct progress *progress, const char *title,
 		    const char *prefix, unsigned total);
+void start_progress_delay(struct progress *progress, const char *title,
+			  const char *prefix, unsigned total,
+			  unsigned percent_treshold, unsigned delay);
 void stop_progress(struct progress *progress);
 
 #endif

From 55a9137d8a33449425c55104c00bc9189381754a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 20 Apr 2007 15:27:44 -0400
Subject: [PATCH 095/109] delay progress display when checking out files

Let's start displaying progress only if more than 50% of total number
of files remains to be checked out after 2 seconds.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 unpack-trees.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index cefc4042d5..675a9998dc 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -304,13 +304,8 @@ static void check_updates(struct cache_entry **src, int nr,
 				total++;
 		}
 
-		/* Don't bother doing this for very small updates */
-		if (total < 250)
-			total = 0;
-
-		if (total)
-			start_progress(&progress, "Checking %u files out...",
-				       "", total);
+		start_progress_delay(&progress, "Checking %u files out...",
+				     "", total, 50, 2);
 		cnt = 0;
 	}
 

From 4280cde95fa4e3fb012eb6d0c239a7777baaf60c Mon Sep 17 00:00:00 2001
From: Martin Koegler <mkoegler@auto.tuwien.ac.at>
Date: Sun, 22 Apr 2007 22:49:25 -0700
Subject: [PATCH 096/109] gitweb: Show "no difference" message for empty diff

Currently, gitweb shows only header and footer, if no differences are
found. This patch adds a "No differences found" message for the html
output.

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 gitweb/gitweb.css  | 4 ++++
 gitweb/gitweb.perl | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/gitweb/gitweb.css b/gitweb/gitweb.css
index 5e40292404..2b023bd98a 100644
--- a/gitweb/gitweb.css
+++ b/gitweb/gitweb.css
@@ -387,6 +387,10 @@ div.diff.incomplete {
 	color: #cccccc;
 }
 
+div.diff.nodifferences {
+	font-weight: bold;
+	color: #600000;
+}
 
 div.index_include {
 	border: solid #d9d8d1;
diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index c48b35aa39..cbd8d03e64 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -2398,6 +2398,7 @@ sub git_patchset_body {
 	my ($fd, $difftree, $hash, $hash_parent) = @_;
 
 	my $patch_idx = 0;
+	my $patch_number = 0;
 	my $patch_line;
 	my $diffinfo;
 	my (%from, %to);
@@ -2419,6 +2420,7 @@ sub git_patchset_body {
 		# git diff header
 		#assert($patch_line =~ m/^diff /) if DEBUG;
 		#assert($patch_line !~ m!$/$!) if DEBUG; # is chomp-ed
+		$patch_number++;
 		push @diff_header, $patch_line;
 
 		# extended diff header
@@ -2581,6 +2583,7 @@ sub git_patchset_body {
 	} continue {
 		print "</div>\n"; # class="patch"
 	}
+	print "<div class=\"diff nodifferences\">No differences found</div>\n" if (!$patch_number);
 
 	print "</div>\n"; # class="patchset"
 }

From 2cc3167c688d1c91bc4cb9b1caa737b9d4971056 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 23 Apr 2007 00:21:02 -0700
Subject: [PATCH 097/109] Document "diff=driver" attribute

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/gitattributes.txt | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 126871756d..d2edb9b14a 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -151,8 +151,34 @@ Unspecified::
 	text, it is treated as text.  Otherwise it would
 	generate `Binary files differ`.
 
-Any other value set to `diff` attribute is ignored and git acts
-as if the attribute is left unspecified.
+String::
+
+	Diff is shown using the specified custom diff driver.
+	The driver program is given its input using the same
+	calling convention as used for GIT_EXTERNAL_DIFF
+	program.
+
+
+Defining a custom diff driver
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The definition of a diff driver is done in `gitconfig`, not
+`gitattributes` file, so strictly speaking this manual page is a
+wrong place to talk about it.  However...
+
+To define a custom diff driver `jcdiff`, add a section to your
+`$GIT_DIR/config` file (or `$HOME/.gitconfig` file) like this:
+
+----------------------------------------------------------------
+[diff "jcdiff"]
+	command = j-c-diff
+----------------------------------------------------------------
+
+When git needs to show you a diff for the path with `diff`
+attribute set to `jcdiff`, it calls the command you specified
+with the above configuration, i.e. `j-c-diff`, with 7
+parameters, just like `GIT_EXTERNAL_DIFF` program is called.
+See gitlink:git[7] for details.
 
 
 Performing a three-way merge
@@ -183,7 +209,7 @@ Unspecified::
 	different merge driver to be used for paths to which the
 	`merge` attribute is unspecified.
 
-Any other string value::
+String::
 
 	3-way merge is performed using the specified custom
 	merge driver.  The built-in 3-way merge driver can be

From c7f34c180b7117cf60ad12a8b180eed33716e390 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Mon, 23 Apr 2007 10:21:25 +0200
Subject: [PATCH 098/109] dir.c(common_prefix): Fix two bugs

The function common_prefix() is used to find the common subdirectory of
a couple of pathnames. When checking if the next pathname matches up with
the prefix, it incorrectly checked the whole path, not just the prefix
(including the slash). Thus, the expensive part of the loop was executed
always.

The other bug is more serious: if the first and the last pathname in the
list have a longer common prefix than the common prefix for _all_ pathnames
in the list, the longer one would be chosen. This bug was probably hidden
by the fact that bash's wildcard expansion sorts the results, and the code
just so happens to work with sorted input.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 dir.c          | 3 ++-
 t/t3700-add.sh | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index b48e19dc09..602282bd1b 100644
--- a/dir.c
+++ b/dir.c
@@ -24,8 +24,9 @@ int common_prefix(const char **pathspec)
 	prefix = slash - path + 1;
 	while ((next = *++pathspec) != NULL) {
 		int len = strlen(next);
-		if (len >= prefix && !memcmp(path, next, len))
+		if (len >= prefix && !memcmp(path, next, prefix))
 			continue;
+		len = prefix - 1;
 		for (;;) {
 			if (!len)
 				return 0;
diff --git a/t/t3700-add.sh b/t/t3700-add.sh
index 08e035220c..ad8cc7d4ae 100755
--- a/t/t3700-add.sh
+++ b/t/t3700-add.sh
@@ -104,4 +104,10 @@ test_expect_success 'add ignored ones with -f' '
 	git-ls-files --error-unmatch d.ig/d.if d.ig/d.ig
 '
 
+mkdir 1 1/2 1/3
+touch 1/2/a 1/3/b 1/2/c
+test_expect_success 'check correct prefix detection' '
+	git add 1/2/a 1/3/b 1/2/c
+'
+
 test_done

From 81178fe48c1466f400741842f9e3da1528cfd124 Mon Sep 17 00:00:00 2001
From: Brian Gernhardt <benji@silverinsanity.com>
Date: Mon, 23 Apr 2007 19:56:45 -0400
Subject: [PATCH 099/109] Reverse the order of -b and --track in the man page.

Using "-b --track newbranch oldbranch" gives the error:

  git checkout: updating paths is incompatible with switching
  branches/forcing

However, "--track -b ..." works just fine.

Signed-off-by: Brian Gernhardt <benji@silverinsanity.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/git-checkout.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/git-checkout.txt b/Documentation/git-checkout.txt
index 4f2e847dc3..918d8ee720 100644
--- a/Documentation/git-checkout.txt
+++ b/Documentation/git-checkout.txt
@@ -8,7 +8,7 @@ git-checkout - Checkout and switch to a branch
 SYNOPSIS
 --------
 [verse]
-'git-checkout' [-q] [-f] [-b [--track | --no-track] <new_branch> [-l]] [-m] [<branch>]
+'git-checkout' [-q] [-f] [[--track | --no-track] -b <new_branch> [-l]] [-m] [<branch>]
 'git-checkout' [<tree-ish>] <paths>...
 
 DESCRIPTION

From 4c474b6f92a2aa0b5aea282fe281e8f65279380a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 23 Apr 2007 13:17:19 -0400
Subject: [PATCH 100/109] add file checkout progress

It is nice to see what is happening when checking out large amount of
files, either with git-checkout or git-reset.  The new progress code
already decides what is a "significant amount" and displays progress
only in that case..

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-checkout.sh | 8 +++++---
 git-reset.sh    | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/git-checkout.sh b/git-checkout.sh
index deb0a9a3c7..ed7c2c5f6a 100755
--- a/git-checkout.sh
+++ b/git-checkout.sh
@@ -17,6 +17,7 @@ newbranch=
 newbranch_log=
 merge=
 quiet=
+v=-v
 LF='
 '
 while [ "$#" != "0" ]; do
@@ -47,6 +48,7 @@ while [ "$#" != "0" ]; do
 		;;
 	"-q")
 		quiet=1
+		v=
 		;;
 	--)
 		break
@@ -197,7 +199,7 @@ fi
 
 if [ "$force" ]
 then
-    git-read-tree --reset -u $new
+    git-read-tree $v --reset -u $new
 else
     git-update-index --refresh >/dev/null
     merge_error=$(git-read-tree -m -u --exclude-per-directory=.gitignore $old $new 2>&1) || (
@@ -210,7 +212,7 @@ else
 	# Match the index to the working tree, and do a three-way.
     	git diff-files --name-only | git update-index --remove --stdin &&
 	work=`git write-tree` &&
-	git read-tree --reset -u $new || exit
+	git read-tree $v --reset -u $new || exit
 
 	eval GITHEAD_$new='${new_name:-${branch:-$new}}' &&
 	eval GITHEAD_$work=local &&
@@ -221,7 +223,7 @@ else
 	# this is not a real merge before committing, but just carrying
 	# the working tree changes along.
 	unmerged=`git ls-files -u`
-	git read-tree --reset $new
+	git read-tree $v --reset $new
 	case "$unmerged" in
 	'')	;;
 	*)
diff --git a/git-reset.sh b/git-reset.sh
index fee6d98d9c..a172d7ce25 100755
--- a/git-reset.sh
+++ b/git-reset.sh
@@ -71,7 +71,7 @@ then
 		die "Cannot do a soft reset in the middle of a merge."
 	fi
 else
-	git-read-tree --reset $update "$rev" || exit
+	git-read-tree -v --reset $update "$rev" || exit
 fi
 
 # Any resets update HEAD to the head being switched to.

From 557b1e0da595cfb61f9500bd8dd74a32f97616ff Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Mon, 23 Apr 2007 20:46:21 +0200
Subject: [PATCH 101/109] t4201: Do not display weird characters on the
 terminal

Now that git-commit got chatty, we have to shut it up again.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t4201-shortlog.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh
index c27e39cb6f..a48733cee0 100755
--- a/t/t4201-shortlog.sh
+++ b/t/t4201-shortlog.sh
@@ -15,19 +15,19 @@ commit=$( (echo "Test"; echo) | git commit-tree $tree )
 git update-ref HEAD $commit
 
 echo 2 > a1
-git commit -m "This is a very, very long first line for the commit message to see if it is wrapped correctly" a1
+git commit --quiet -m "This is a very, very long first line for the commit message to see if it is wrapped correctly" a1
 
 # test if the wrapping is still valid when replacing all i's by treble clefs.
 echo 3 > a1
-git commit -m "$(echo "This is a very, very long first line for the commit message to see if it is wrapped correctly" | sed "s/i/1234/g" | tr 1234 '\360\235\204\236')" a1
+git commit --quiet -m "$(echo "This is a very, very long first line for the commit message to see if it is wrapped correctly" | sed "s/i/1234/g" | tr 1234 '\360\235\204\236')" a1
 
 # now fsck up the utf8
 git repo-config i18n.commitencoding non-utf-8
 echo 4 > a1
-git commit -m "$(echo "This is a very, very long first line for the commit message to see if it is wrapped correctly" | sed "s/i/1234/g" | tr 1234 '\370\235\204\236')" a1
+git commit --quiet -m "$(echo "This is a very, very long first line for the commit message to see if it is wrapped correctly" | sed "s/i/1234/g" | tr 1234 '\370\235\204\236')" a1
 
 echo 5 > a1
-git commit -m "a								12	34	56	78" a1
+git commit --quiet -m "a								12	34	56	78" a1
 
 git shortlog -w HEAD > out
 

From bd4b0aeb1f4db2d7552623f77b62b9a24f8cac9e Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 23 Apr 2007 22:05:22 -0700
Subject: [PATCH 102/109] t5302: avoid using tail -c

A Large Angry SCM (gitzilla) noticed that on an unnamed platform, tail -c
wants its byte count as part of the option, not as a separate argument.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t5302-pack-index.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh
index 232e5f1964..6902fc6d48 100755
--- a/t/t5302-pack-index.sh
+++ b/t/t5302-pack-index.sh
@@ -16,7 +16,7 @@ test_expect_success \
          test-genrandom "$i" 8192 >>file_$i &&
          git-update-index --add file_$i || return 1
      done &&
-     echo 101 >file_101 && tail -c 8192 file_100 >>file_101 &&
+     { echo 101 && test-genrandom 100 8192; } >file_101 &&
      git-update-index --add file_101 &&
      tree=`git-write-tree` &&
      commit=`git-commit-tree $tree </dev/null` && {

From ab69e89c7e95f309564c3967222bd39f0587a74c Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 23 Apr 2007 22:10:47 -0700
Subject: [PATCH 103/109] t6030: grab commit object name as we go

Instead of running rev-list and picking earlier lines using head/tail pipeline,
grab commit object name as we build commits.  This also removes a non POSIX
use of tail with -linenum (more posixly-correct way to say it is "-n linenum")

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t6030-bisect-porcelain.sh | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/t/t6030-bisect-porcelain.sh b/t/t6030-bisect-porcelain.sh
index 13e9379236..30f6ade13f 100755
--- a/t/t6030-bisect-porcelain.sh
+++ b/t/t6030-bisect-porcelain.sh
@@ -22,22 +22,25 @@ add_line_into_file()
         MSG="Create file <$_file> with <$_line> inside."
     fi
 
-    git-commit -m "$MSG" $_file
+    test_tick
+    git-commit --quiet -m "$MSG" $_file
 }
 
 HASH1=
+HASH2=
 HASH3=
 HASH4=
 
-test_expect_success \
-    'set up basic repo with 1 file (hello) and 4 commits' \
-    'add_line_into_file "1: Hello World" hello &&
+test_expect_success 'set up basic repo with 1 file (hello) and 4 commits' '
+     add_line_into_file "1: Hello World" hello &&
+     HASH1=$(git rev-parse --verify HEAD) &&
      add_line_into_file "2: A new day for git" hello &&
+     HASH2=$(git rev-parse --verify HEAD) &&
      add_line_into_file "3: Another new day for git" hello &&
+     HASH3=$(git rev-parse --verify HEAD) &&
      add_line_into_file "4: Ciao for now" hello &&
-     HASH1=$(git rev-list HEAD | tail -1) &&
-     HASH3=$(git rev-list HEAD | head -2 | tail -1) &&
-     HASH4=$(git rev-list HEAD | head -1)'
+     HASH4=$(git rev-parse --verify HEAD)
+'
 
 test_expect_success 'bisect starts with only one bad' '
 	git bisect reset &&

From 2122591b3b5c6d93d3052a3151afcfa3146ede84 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@freedesktop.org>
Date: Mon, 23 Apr 2007 17:18:16 -0700
Subject: [PATCH 104/109] Add clean.requireForce option, and add -f option to
 git-clean to override it

Add a new configuration option clean.requireForce.  If set, git-clean will
refuse to run, unless forced with the new -f option, or not acting due to -n.

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/config.txt               |  4 ++++
 Documentation/git-clean.txt            |  6 +++++-
 contrib/completion/git-completion.bash |  1 +
 git-clean.sh                           | 13 ++++++++++++-
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index b13ff3a1bb..e0aff5369f 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -300,6 +300,10 @@ branch.<name>.merge::
 	branch.<name>.merge to the desired branch, and use the special setting
 	`.` (a period) for branch.<name>.remote.
 
+clean.requireForce::
+	A boolean to make git-clean do nothing unless given -f or -n.  Defaults
+	to false.
+
 color.branch::
 	A boolean to enable/disable color in the output of
 	gitlink:git-branch[1]. May be set to `true` (or `always`),
diff --git a/Documentation/git-clean.txt b/Documentation/git-clean.txt
index c61afbcdba..5aff026eb4 100644
--- a/Documentation/git-clean.txt
+++ b/Documentation/git-clean.txt
@@ -8,7 +8,7 @@ git-clean - Remove untracked files from the working tree
 SYNOPSIS
 --------
 [verse]
-'git-clean' [-d] [-n] [-q] [-x | -X] [--] <paths>...
+'git-clean' [-d] [-f] [-n] [-q] [-x | -X] [--] <paths>...
 
 DESCRIPTION
 -----------
@@ -25,6 +25,10 @@ OPTIONS
 -d::
 	Remove untracked directories in addition to untracked files.
 
+-f::
+	If the git configuration specifies clean.forceRequire as true,
+	git-clean will refuse to run unless given -f or -n.
+
 -n::
 	Don't actually remove anything, just show what would be done.
 
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 7c03403484..46356e8a27 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -790,6 +790,7 @@ _git_config ()
 		core.legacyHeaders
 		core.packedGitWindowSize
 		core.packedGitLimit
+		clean.requireForce
 		color.branch
 		color.branch.current
 		color.branch.local
diff --git a/git-clean.sh b/git-clean.sh
index db177a7886..299309d971 100755
--- a/git-clean.sh
+++ b/git-clean.sh
@@ -3,9 +3,10 @@
 # Copyright (c) 2005-2006 Pavel Roskin
 #
 
-USAGE="[-d] [-n] [-q] [-x | -X] [--] <paths>..."
+USAGE="[-d] [-f] [-n] [-q] [-x | -X] [--] <paths>..."
 LONG_USAGE='Clean untracked files from the working directory
 	-d	remove directories as well
+	-f	override clean.requireForce and clean anyway
 	-n 	don'\''t remove anything, just show what would be done
 	-q	be quiet, only report errors
 	-x	remove ignored files as well
@@ -19,6 +20,7 @@ require_work_tree
 ignored=
 ignoredonly=
 cleandir=
+disabled="`git-config --bool clean.requireForce`"
 rmf="rm -f --"
 rmrf="rm -rf --"
 rm_refuse="echo Not removing"
@@ -30,7 +32,11 @@ do
 	-d)
 		cleandir=1
 		;;
+	-f)
+		disabled=
+		;;
 	-n)
+		disabled=
 		rmf="echo Would remove"
 		rmrf="echo Would remove"
 		rm_refuse="echo Would not remove"
@@ -58,6 +64,11 @@ do
 	shift
 done
 
+if [ "$disabled" = true ]; then
+	echo "clean.requireForce set and -n or -f not given; refusing to clean"
+	exit 1
+fi
+
 case "$ignored,$ignoredonly" in
 	1,1) usage;;
 esac

From 6777c3806da18d4a3a05e5bcdde0aa9efb9b3b9f Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@freedesktop.org>
Date: Mon, 23 Apr 2007 17:32:04 -0700
Subject: [PATCH 105/109] Fix typo in git-am: s/Was is/Was it/

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-am.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/git-am.sh b/git-am.sh
index e69ecbfdb1..c9f66e2784 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -291,7 +291,7 @@ do
 			<"$dotest/$msgnum" >"$dotest/info" ||
 			stop_here $this
 		test -s $dotest/patch || {
-			echo "Patch is empty.  Was is split wrong?"
+			echo "Patch is empty.  Was it split wrong?"
 			stop_here $this
 		}
 		git-stripspace < "$dotest/msg" > "$dotest/msg-clean"

From ce748f59923b3a3d432d6e8a12366f71284b595f Mon Sep 17 00:00:00 2001
From: Brian Gernhardt <benji@silverinsanity.com>
Date: Mon, 23 Apr 2007 20:02:34 -0400
Subject: [PATCH 106/109] Ignore all man sections as they are generated files.

Signed-off-by: Brian Gernhardt <benji@silverinsanity.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/.gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/.gitignore b/Documentation/.gitignore
index b98d21e98e..a37b2152bd 100644
--- a/Documentation/.gitignore
+++ b/Documentation/.gitignore
@@ -1,7 +1,6 @@
 *.xml
 *.html
-*.1
-*.7
+*.[1-8]
 *.made
 howto-index.txt
 doc.dep

From bbc6354171a08e2c43d651f8645bcdb76bf9f92b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 23 Apr 2007 23:17:41 -0700
Subject: [PATCH 107/109] Build RPM with ETC_GITCONFIG=/etc/gitconfig

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git.spec.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/git.spec.in b/git.spec.in
index 46aee88fd1..87197d10e1 100644
--- a/git.spec.in
+++ b/git.spec.in
@@ -86,12 +86,14 @@ Perl interface to Git
 
 %build
 make %{_smp_mflags} CFLAGS="$RPM_OPT_FLAGS" WITH_OWN_SUBPROCESS_PY=YesPlease \
+     ETC_GITCONFIG=/etc/gitconfig \
      prefix=%{_prefix} all %{!?_without_docs: doc}
 
 %install
 rm -rf $RPM_BUILD_ROOT
 make %{_smp_mflags} CFLAGS="$RPM_OPT_FLAGS" DESTDIR=$RPM_BUILD_ROOT \
      WITH_OWN_SUBPROCESS_PY=YesPlease \
+     ETC_GITCONFIG=/etc/gitconfig \
      prefix=%{_prefix} mandir=%{_mandir} INSTALLDIRS=vendor \
      install %{!?_without_docs: install-doc}
 find $RPM_BUILD_ROOT -type f -name .packlist -exec rm -f {} ';'

From 41728d69426dd707d4978929f8f4ac7a16f115f3 Mon Sep 17 00:00:00 2001
From: Gerrit Pape <pape@smarden.org>
Date: Mon, 23 Apr 2007 12:06:29 +0000
Subject: [PATCH 108/109] Documentation/git-reset.txt: suggest git commit
 --amend in example.

In example 'Undo a commit and redo', refer to 'git commit --amend', as
this is the easier alternative.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/git-reset.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt
index 5b55cda512..19c5b9bbda 100644
--- a/Documentation/git-reset.txt
+++ b/Documentation/git-reset.txt
@@ -67,6 +67,8 @@ message, or both.  Leaves working tree as it was before "reset".
 <3> "reset" copies the old head to .git/ORIG_HEAD; redo the
 commit by starting with its log message.  If you do not need to
 edit the message further, you can give -C option instead.
++
+See also the --amend option to gitlink:git-commit[1].
 
 Undo commits permanently::
 +

From afb5f39e24eadc5933b34cfd17c1089e482d6a6b Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Tue, 24 Apr 2007 04:26:26 +0900
Subject: [PATCH 109/109] git-fetch: Fix "argument list too long"

If $ls_remote_result was too long,

    git-fetch--tool -s pick-rref "$rref" "$ls_remote_result"

in git-fetch will fail with "argument list too long".

This patch fixes git-fetch--tool and git-fetch by passing
$ls_remote_result via stdin.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 builtin-fetch--tool.c |  6 +++++-
 git-fetch.sh          | 11 ++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c
index be341c159f..3145c01f7e 100644
--- a/builtin-fetch--tool.c
+++ b/builtin-fetch--tool.c
@@ -571,9 +571,13 @@ int cmd_fetch__tool(int argc, const char **argv, const char *prefix)
 		return parse_reflist(reflist);
 	}
 	if (!strcmp("pick-rref", argv[1])) {
+		const char *ls_remote_result;
 		if (argc != 4)
 			return error("pick-rref takes 2 args");
-		return pick_rref(sopt, argv[2], argv[3]);
+		ls_remote_result = argv[3];
+		if (!strcmp(ls_remote_result, "-"))
+			ls_remote_result = get_stdin();
+		return pick_rref(sopt, argv[2], ls_remote_result);
 	}
 	if (!strcmp("expand-refs-wildcard", argv[1])) {
 		const char *reflist;
diff --git a/git-fetch.sh b/git-fetch.sh
index 832b20cce6..0e05cf1195 100755
--- a/git-fetch.sh
+++ b/git-fetch.sh
@@ -189,8 +189,8 @@ fetch_all_at_once () {
 			# See if all of what we are going to fetch are
 			# connected to our repository's tips, in which
 			# case we do not have to do any fetch.
-			theirs=$(git-fetch--tool -s pick-rref \
-					"$rref" "$ls_remote_result") &&
+			theirs=$(echo "$ls_remote_result" | \
+				git-fetch--tool -s pick-rref "$rref" "-") &&
 
 			# This will barf when $theirs reach an object that
 			# we do not have in our repository.  Otherwise,
@@ -198,7 +198,8 @@ fetch_all_at_once () {
 			git-rev-list --objects $theirs --not --all \
 				>/dev/null 2>/dev/null
 		then
-			git-fetch--tool pick-rref "$rref" "$ls_remote_result"
+			echo "$ls_remote_result" | \
+				git-fetch--tool pick-rref "$rref" "-"
 		else
 			git-fetch-pack --thin $exec $keep $shallow_depth \
 				$quiet $no_progress "$remote" $rref ||
@@ -263,8 +264,8 @@ fetch_per_ref () {
 	  fi
 
 	  # Find $remote_name from ls-remote output.
-	  head=$(git-fetch--tool -s pick-rref \
-			"$remote_name" "$ls_remote_result")
+	  head=$(echo "$ls_remote_result" | \
+		git-fetch--tool -s pick-rref "$remote_name" "-")
 	  expr "z$head" : "z$_x40\$" >/dev/null ||
 		die "No such ref $remote_name at $remote"
 	  echo >&2 "Fetching $remote_name from $remote using $proto"