From 92b7de93fb7801570ddc3195f03f30b9c201a3bd Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Wed, 7 Jan 2009 18:04:09 +0100
Subject: [PATCH 01/40] Implement the patience diff algorithm

The patience diff algorithm produces slightly more intuitive output
than the classic Myers algorithm, as it does not try to minimize the
number of +/- lines first, but tries to preserve the lines that are
unique.

To this end, it first determines lines that are unique in both files,
then the maximal sequence which preserves the order (relative to both
files) is extracted.

Starting from this initial set of common lines, the rest of the lines
is handled recursively, with Myers' algorithm as a fallback when
the patience algorithm fails (due to no common unique lines).

This patch includes memory leak fixes by Pierre Habouzit.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiff.h     |   1 +
 xdiff/xdiffi.c    |   3 +
 xdiff/xdiffi.h    |   2 +
 xdiff/xpatience.c | 381 ++++++++++++++++++++++++++++++++++++++++++++++
 xdiff/xprepare.c  |   3 +-
 5 files changed, 389 insertions(+), 1 deletion(-)
 create mode 100644 xdiff/xpatience.c

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 84fff583e2..d238c8263e 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -32,6 +32,7 @@ extern "C" {
 #define XDF_IGNORE_WHITESPACE (1 << 2)
 #define XDF_IGNORE_WHITESPACE_CHANGE (1 << 3)
 #define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 4)
+#define XDF_PATIENCE_DIFF (1 << 5)
 #define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL)
 
 #define XDL_PATCH_NORMAL '-'
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 9d0324a38c..3e97462bdd 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -329,6 +329,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	xdalgoenv_t xenv;
 	diffdata_t dd1, dd2;
 
+	if (xpp->flags & XDF_PATIENCE_DIFF)
+		return xdl_do_patience_diff(mf1, mf2, xpp, xe);
+
 	if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
 
 		return -1;
diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h
index 3e099dc445..ad033a8e6a 100644
--- a/xdiff/xdiffi.h
+++ b/xdiff/xdiffi.h
@@ -55,5 +55,7 @@ int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr);
 void xdl_free_script(xdchange_t *xscr);
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg);
+int xdl_do_patience_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+		xdfenv_t *env);
 
 #endif /* #if !defined(XDIFFI_H) */
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
new file mode 100644
index 0000000000..e42c16a807
--- /dev/null
+++ b/xdiff/xpatience.c
@@ -0,0 +1,381 @@
+/*
+ *  LibXDiff by Davide Libenzi ( File Differential Library )
+ *  Copyright (C) 2003-2009 Davide Libenzi, Johannes E. Schindelin
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *  Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+#include "xinclude.h"
+#include "xtypes.h"
+#include "xdiff.h"
+
+/*
+ * The basic idea of patience diff is to find lines that are unique in
+ * both files.  These are intuitively the ones that we want to see as
+ * common lines.
+ *
+ * The maximal ordered sequence of such line pairs (where ordered means
+ * that the order in the sequence agrees with the order of the lines in
+ * both files) naturally defines an initial set of common lines.
+ *
+ * Now, the algorithm tries to extend the set of common lines by growing
+ * the line ranges where the files have identical lines.
+ *
+ * Between those common lines, the patience diff algorithm is applied
+ * recursively, until no unique line pairs can be found; these line ranges
+ * are handled by the well-known Myers algorithm.
+ */
+
+#define NON_UNIQUE ULONG_MAX
+
+/*
+ * This is a hash mapping from line hash to line numbers in the first and
+ * second file.
+ */
+struct hashmap {
+	int nr, alloc;
+	struct entry {
+		unsigned long hash;
+		/*
+		 * 0 = unused entry, 1 = first line, 2 = second, etc.
+		 * line2 is NON_UNIQUE if the line is not unique
+		 * in either the first or the second file.
+		 */
+		unsigned long line1, line2;
+		/*
+		 * "next" & "previous" are used for the longest common
+		 * sequence;
+		 * initially, "next" reflects only the order in file1.
+		 */
+		struct entry *next, *previous;
+	} *entries, *first, *last;
+	/* were common records found? */
+	unsigned long has_matches;
+	mmfile_t *file1, *file2;
+	xdfenv_t *env;
+	xpparam_t const *xpp;
+};
+
+/* The argument "pass" is 1 for the first file, 2 for the second. */
+static void insert_record(int line, struct hashmap *map, int pass)
+{
+	xrecord_t **records = pass == 1 ?
+		map->env->xdf1.recs : map->env->xdf2.recs;
+	xrecord_t *record = records[line - 1], *other;
+	/*
+	 * After xdl_prepare_env() (or more precisely, due to
+	 * xdl_classify_record()), the "ha" member of the records (AKA lines)
+	 * is _not_ the hash anymore, but a linearized version of it.  In
+	 * other words, the "ha" member is guaranteed to start with 0 and
+	 * the second record's ha can only be 0 or 1, etc.
+	 *
+	 * So we multiply ha by 2 in the hope that the hashing was
+	 * "unique enough".
+	 */
+	int index = (int)((record->ha << 1) % map->alloc);
+
+	while (map->entries[index].line1) {
+		other = map->env->xdf1.recs[map->entries[index].line1 - 1];
+		if (map->entries[index].hash != record->ha ||
+				!xdl_recmatch(record->ptr, record->size,
+					other->ptr, other->size,
+					map->xpp->flags)) {
+			if (++index >= map->alloc)
+				index = 0;
+			continue;
+		}
+		if (pass == 2)
+			map->has_matches = 1;
+		if (pass == 1 || map->entries[index].line2)
+			map->entries[index].line2 = NON_UNIQUE;
+		else
+			map->entries[index].line2 = line;
+		return;
+	}
+	if (pass == 2)
+		return;
+	map->entries[index].line1 = line;
+	map->entries[index].hash = record->ha;
+	if (!map->first)
+		map->first = map->entries + index;
+	if (map->last) {
+		map->last->next = map->entries + index;
+		map->entries[index].previous = map->last;
+	}
+	map->last = map->entries + index;
+	map->nr++;
+}
+
+/*
+ * This function has to be called for each recursion into the inter-hunk
+ * parts, as previously non-unique lines can become unique when being
+ * restricted to a smaller part of the files.
+ *
+ * It is assumed that env has been prepared using xdl_prepare().
+ */
+static int fill_hashmap(mmfile_t *file1, mmfile_t *file2,
+		xpparam_t const *xpp, xdfenv_t *env,
+		struct hashmap *result,
+		int line1, int count1, int line2, int count2)
+{
+	result->file1 = file1;
+	result->file2 = file2;
+	result->xpp = xpp;
+	result->env = env;
+
+	/* We know exactly how large we want the hash map */
+	result->alloc = count1 * 2;
+	result->entries = (struct entry *)
+		xdl_malloc(result->alloc * sizeof(struct entry));
+	if (!result->entries)
+		return -1;
+	memset(result->entries, 0, result->alloc * sizeof(struct entry));
+
+	/* First, fill with entries from the first file */
+	while (count1--)
+		insert_record(line1++, result, 1);
+
+	/* Then search for matches in the second file */
+	while (count2--)
+		insert_record(line2++, result, 2);
+
+	return 0;
+}
+
+/*
+ * Find the longest sequence with a smaller last element (meaning a smaller
+ * line2, as we construct the sequence with entries ordered by line1).
+ */
+static int binary_search(struct entry **sequence, int longest,
+		struct entry *entry)
+{
+	int left = -1, right = longest;
+
+	while (left + 1 < right) {
+		int middle = (left + right) / 2;
+		/* by construction, no two entries can be equal */
+		if (sequence[middle]->line2 > entry->line2)
+			right = middle;
+		else
+			left = middle;
+	}
+	/* return the index in "sequence", _not_ the sequence length */
+	return left;
+}
+
+/*
+ * The idea is to start with the list of common unique lines sorted by
+ * the order in file1.  For each of these pairs, the longest (partial)
+ * sequence whose last element's line2 is smaller is determined.
+ *
+ * For efficiency, the sequences are kept in a list containing exactly one
+ * item per sequence length: the sequence with the smallest last
+ * element (in terms of line2).
+ */
+static struct entry *find_longest_common_sequence(struct hashmap *map)
+{
+	struct entry **sequence = xdl_malloc(map->nr * sizeof(struct entry *));
+	int longest = 0, i;
+	struct entry *entry;
+
+	for (entry = map->first; entry; entry = entry->next) {
+		if (!entry->line2 || entry->line2 == NON_UNIQUE)
+			continue;
+		i = binary_search(sequence, longest, entry);
+		entry->previous = i < 0 ? NULL : sequence[i];
+		sequence[++i] = entry;
+		if (i == longest)
+			longest++;
+	}
+
+	/* No common unique lines were found */
+	if (!longest) {
+		xdl_free(sequence);
+		return NULL;
+	}
+
+	/* Iterate starting at the last element, adjusting the "next" members */
+	entry = sequence[longest - 1];
+	entry->next = NULL;
+	while (entry->previous) {
+		entry->previous->next = entry;
+		entry = entry->previous;
+	}
+	xdl_free(sequence);
+	return entry;
+}
+
+static int match(struct hashmap *map, int line1, int line2)
+{
+	xrecord_t *record1 = map->env->xdf1.recs[line1 - 1];
+	xrecord_t *record2 = map->env->xdf2.recs[line2 - 1];
+	return xdl_recmatch(record1->ptr, record1->size,
+		record2->ptr, record2->size, map->xpp->flags);
+}
+
+static int patience_diff(mmfile_t *file1, mmfile_t *file2,
+		xpparam_t const *xpp, xdfenv_t *env,
+		int line1, int count1, int line2, int count2);
+
+static int walk_common_sequence(struct hashmap *map, struct entry *first,
+		int line1, int count1, int line2, int count2)
+{
+	int end1 = line1 + count1, end2 = line2 + count2;
+	int next1, next2;
+
+	for (;;) {
+		/* Try to grow the line ranges of common lines */
+		if (first) {
+			next1 = first->line1;
+			next2 = first->line2;
+			while (next1 > line1 && next2 > line2 &&
+					match(map, next1 - 1, next2 - 1)) {
+				next1--;
+				next2--;
+			}
+		} else {
+			next1 = end1;
+			next2 = end2;
+		}
+		while (line1 < next1 && line2 < next2 &&
+				match(map, line1, line2)) {
+			line1++;
+			line2++;
+		}
+
+		/* Recurse */
+		if (next1 > line1 || next2 > line2) {
+			struct hashmap submap;
+
+			memset(&submap, 0, sizeof(submap));
+			if (patience_diff(map->file1, map->file2,
+					map->xpp, map->env,
+					line1, next1 - line1,
+					line2, next2 - line2))
+				return -1;
+		}
+
+		if (!first)
+			return 0;
+
+		while (first->next &&
+				first->next->line1 == first->line1 + 1 &&
+				first->next->line2 == first->line2 + 1)
+			first = first->next;
+
+		line1 = first->line1 + 1;
+		line2 = first->line2 + 1;
+
+		first = first->next;
+	}
+}
+
+static int fall_back_to_classic_diff(struct hashmap *map,
+		int line1, int count1, int line2, int count2)
+{
+	/*
+	 * This probably does not work outside Git, since
+	 * we have a very simple mmfile structure.
+	 *
+	 * Note: ideally, we would reuse the prepared environment, but
+	 * the libxdiff interface does not (yet) allow for diffing only
+	 * ranges of lines instead of the whole files.
+	 */
+	mmfile_t subfile1, subfile2;
+	xpparam_t xpp;
+	xdfenv_t env;
+
+	subfile1.ptr = (char *)map->env->xdf1.recs[line1 - 1]->ptr;
+	subfile1.size = map->env->xdf1.recs[line1 + count1 - 2]->ptr +
+		map->env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr;
+	subfile2.ptr = (char *)map->env->xdf2.recs[line2 - 1]->ptr;
+	subfile2.size = map->env->xdf2.recs[line2 + count2 - 2]->ptr +
+		map->env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr;
+	xpp.flags = map->xpp->flags & ~XDF_PATIENCE_DIFF;
+	if (xdl_do_diff(&subfile1, &subfile2, &xpp, &env) < 0)
+		return -1;
+
+	memcpy(map->env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1);
+	memcpy(map->env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2);
+
+	xdl_free_env(&env);
+
+	return 0;
+}
+
+/*
+ * Recursively find the longest common sequence of unique lines,
+ * and if none was found, ask xdl_do_diff() to do the job.
+ *
+ * This function assumes that env was prepared with xdl_prepare_env().
+ */
+static int patience_diff(mmfile_t *file1, mmfile_t *file2,
+		xpparam_t const *xpp, xdfenv_t *env,
+		int line1, int count1, int line2, int count2)
+{
+	struct hashmap map;
+	struct entry *first;
+	int result = 0;
+
+	/* trivial case: one side is empty */
+	if (!count1) {
+		while(count2--)
+			env->xdf2.rchg[line2++ - 1] = 1;
+		return 0;
+	} else if (!count2) {
+		while(count1--)
+			env->xdf1.rchg[line1++ - 1] = 1;
+		return 0;
+	}
+
+	memset(&map, 0, sizeof(map));
+	if (fill_hashmap(file1, file2, xpp, env, &map,
+			line1, count1, line2, count2))
+		return -1;
+
+	/* are there any matching lines at all? */
+	if (!map.has_matches) {
+		while(count1--)
+			env->xdf1.rchg[line1++ - 1] = 1;
+		while(count2--)
+			env->xdf2.rchg[line2++ - 1] = 1;
+		xdl_free(map.entries);
+		return 0;
+	}
+
+	first = find_longest_common_sequence(&map);
+	if (first)
+		result = walk_common_sequence(&map, first,
+			line1, count1, line2, count2);
+	else
+		result = fall_back_to_classic_diff(&map,
+			line1, count1, line2, count2);
+
+	xdl_free(map.entries);
+	return result;
+}
+
+int xdl_do_patience_diff(mmfile_t *file1, mmfile_t *file2,
+		xpparam_t const *xpp, xdfenv_t *env)
+{
+	if (xdl_prepare_env(file1, file2, xpp, env) < 0)
+		return -1;
+
+	/* environment is cleaned up in xdl_diff() */
+	return patience_diff(file1, file2, xpp, env,
+			1, env->xdf1.nrec, 1, env->xdf2.nrec);
+}
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index a43aa72cd0..1689085235 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -290,7 +290,8 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 
 	xdl_free_classifier(&cf);
 
-	if (xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) {
+	if (!(xpp->flags & XDF_PATIENCE_DIFF) &&
+			xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) {
 
 		xdl_free_ctx(&xe->xdf2);
 		xdl_free_ctx(&xe->xdf1);

From 34292bddb861f3cb52a524fdce67234430a744fe Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Thu, 1 Jan 2009 17:39:17 +0100
Subject: [PATCH 02/40] Introduce the diff option '--patience'

This commit teaches Git to produce diff output using the patience diff
algorithm with the diff option '--patience'.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt |   3 +
 Makefile                       |   2 +-
 diff.c                         |   2 +
 t/t4033-diff-patience.sh       | 168 +++++++++++++++++++++++++++++++++
 4 files changed, 174 insertions(+), 1 deletion(-)
 create mode 100755 t/t4033-diff-patience.sh

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index c62b45cdba..808bf87277 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -40,6 +40,9 @@ endif::git-format-patch[]
 --patch-with-raw::
 	Synonym for "-p --raw".
 
+--patience:
+	Generate a diff using the "patience diff" algorithm.
+
 --stat[=width[,name-width]]::
 	Generate a diffstat.  You can override the default
 	output width for 80-column terminal by "--stat=width".
diff --git a/Makefile b/Makefile
index aabf0130b9..33e6fa403e 100644
--- a/Makefile
+++ b/Makefile
@@ -1287,7 +1287,7 @@ $(LIB_FILE): $(LIB_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
 
 XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
-	xdiff/xmerge.o
+	xdiff/xmerge.o xdiff/xpatience.o
 $(XDIFF_OBJS): xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \
 	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
 
diff --git a/diff.c b/diff.c
index 0484601f42..4aeab7746c 100644
--- a/diff.c
+++ b/diff.c
@@ -2471,6 +2471,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		options->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE;
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		options->xdl_opts |= XDF_IGNORE_WHITESPACE_AT_EOL;
+	else if (!strcmp(arg, "--patience"))
+		options->xdl_opts |= XDF_PATIENCE_DIFF;
 
 	/* flags options */
 	else if (!strcmp(arg, "--binary")) {
diff --git a/t/t4033-diff-patience.sh b/t/t4033-diff-patience.sh
new file mode 100755
index 0000000000..1eb14989df
--- /dev/null
+++ b/t/t4033-diff-patience.sh
@@ -0,0 +1,168 @@
+#!/bin/sh
+
+test_description='patience diff algorithm'
+
+. ./test-lib.sh
+
+cat >file1 <<\EOF
+#include <stdio.h>
+
+// Frobs foo heartily
+int frobnitz(int foo)
+{
+    int i;
+    for(i = 0; i < 10; i++)
+    {
+        printf("Your answer is: ");
+        printf("%d\n", foo);
+    }
+}
+
+int fact(int n)
+{
+    if(n > 1)
+    {
+        return fact(n-1) * n;
+    }
+    return 1;
+}
+
+int main(int argc, char **argv)
+{
+    frobnitz(fact(10));
+}
+EOF
+
+cat >file2 <<\EOF
+#include <stdio.h>
+
+int fib(int n)
+{
+    if(n > 2)
+    {
+        return fib(n-1) + fib(n-2);
+    }
+    return 1;
+}
+
+// Frobs foo heartily
+int frobnitz(int foo)
+{
+    int i;
+    for(i = 0; i < 10; i++)
+    {
+        printf("%d\n", foo);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    frobnitz(fib(10));
+}
+EOF
+
+cat >expect <<\EOF
+diff --git a/file1 b/file2
+index 6faa5a3..e3af329 100644
+--- a/file1
++++ b/file2
+@@ -1,26 +1,25 @@
+ #include <stdio.h>
+ 
++int fib(int n)
++{
++    if(n > 2)
++    {
++        return fib(n-1) + fib(n-2);
++    }
++    return 1;
++}
++
+ // Frobs foo heartily
+ int frobnitz(int foo)
+ {
+     int i;
+     for(i = 0; i < 10; i++)
+     {
+-        printf("Your answer is: ");
+         printf("%d\n", foo);
+     }
+ }
+ 
+-int fact(int n)
+-{
+-    if(n > 1)
+-    {
+-        return fact(n-1) * n;
+-    }
+-    return 1;
+-}
+-
+ int main(int argc, char **argv)
+ {
+-    frobnitz(fact(10));
++    frobnitz(fib(10));
+ }
+EOF
+
+test_expect_success 'patience diff' '
+
+	test_must_fail git diff --no-index --patience file1 file2 > output &&
+	test_cmp expect output
+
+'
+
+test_expect_success 'patience diff output is valid' '
+
+	mv file2 expect &&
+	git apply < output &&
+	test_cmp expect file2
+
+'
+
+cat >uniq1 <<\EOF
+1
+2
+3
+4
+5
+6
+EOF
+
+cat >uniq2 <<\EOF
+a
+b
+c
+d
+e
+f
+EOF
+
+cat >expect <<\EOF
+diff --git a/uniq1 b/uniq2
+index b414108..0fdf397 100644
+--- a/uniq1
++++ b/uniq2
+@@ -1,6 +1,6 @@
+-1
+-2
+-3
+-4
+-5
+-6
++a
++b
++c
++d
++e
++f
+EOF
+
+test_expect_success 'completely different files' '
+
+	test_must_fail git diff --no-index --patience uniq1 uniq2 > output &&
+	test_cmp expect output
+
+'
+
+test_done

From cc545709253fe8440db2648cb5c771e5b126bdb5 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Thu, 1 Jan 2009 17:39:37 +0100
Subject: [PATCH 03/40] bash completions: Add the --patience option

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 contrib/completion/git-completion.bash | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index e00454983e..b98d765deb 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -776,6 +776,7 @@ _git_diff ()
 			--no-ext-diff
 			--no-prefix --src-prefix= --dst-prefix=
 			--base --ours --theirs
+			--patience
 			"
 		return
 		;;
@@ -967,6 +968,7 @@ _git_log ()
 			--color-words --walk-reflogs
 			--parents --children --full-history
 			--merge
+			--patience
 			"
 		return
 		;;

From 1c7c1d179e51f163c014353f33b406f5bae13922 Mon Sep 17 00:00:00 2001
From: Clemens Buchacher <drizzd@aon.at>
Date: Wed, 14 Jan 2009 15:54:34 +0100
Subject: [PATCH 04/40] clean up pathspec matching

If pathspec already matched exactly, it cannot match any more.
Originally, we had to continue anyways, because we did not
differentiate between exact, recursive and globbing matches.

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 0131983dfb..8b0e0fbaf7 100644
--- a/dir.c
+++ b/dir.c
@@ -118,7 +118,7 @@ int match_pathspec(const char **pathspec, const char *name, int namelen, int pre
 
 	for (retval = 0; (match = *pathspec++) != NULL; seen++) {
 		int how;
-		if (retval && *seen == MATCHED_EXACTLY)
+		if (*seen == MATCHED_EXACTLY)
 			continue;
 		match += prefix;
 		how = match_one(match, name, namelen);

From 0b50922abffb82c473182b03eb5bb47a978cceac Mon Sep 17 00:00:00 2001
From: Clemens Buchacher <drizzd@aon.at>
Date: Wed, 14 Jan 2009 15:54:35 +0100
Subject: [PATCH 05/40] remove pathspec_match, use match_pathspec instead

Both versions have the same functionality. This removes any
redundancy.

This also adds makes two extensions to match_pathspec:

- If pathspec is NULL, return 1. This reflects the behavior of git
  commands, for which no paths usually means "match all paths".

- If seen is NULL, do not use it.

Signed-off-by: Clemens Buchacher <drizzd@aon.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin-checkout.c |  6 +++---
 builtin-commit.c   |  2 +-
 builtin-ls-files.c | 40 ++--------------------------------------
 cache.h            |  1 -
 dir.c              | 19 +++++++++++--------
 5 files changed, 17 insertions(+), 51 deletions(-)

diff --git a/builtin-checkout.c b/builtin-checkout.c
index b5dd9c07b4..84a28257b6 100644
--- a/builtin-checkout.c
+++ b/builtin-checkout.c
@@ -240,7 +240,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec,
 
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		pathspec_match(pathspec, ps_matched, ce->name, 0);
+		match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
 	}
 
 	if (report_path_error(ps_matched, pathspec, 0))
@@ -249,7 +249,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec,
 	/* Any unmerged paths? */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (pathspec_match(pathspec, NULL, ce->name, 0)) {
+		if (match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
 			if (!ce_stage(ce))
 				continue;
 			if (opts->force) {
@@ -274,7 +274,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec,
 	state.refresh_cache = 1;
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (pathspec_match(pathspec, NULL, ce->name, 0)) {
+		if (match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
 			if (!ce_stage(ce)) {
 				errs |= checkout_entry(ce, &state, NULL);
 				continue;
diff --git a/builtin-commit.c b/builtin-commit.c
index e88b78f811..3d1867ac17 100644
--- a/builtin-commit.c
+++ b/builtin-commit.c
@@ -166,7 +166,7 @@ static int list_paths(struct string_list *list, const char *with_tree,
 		struct cache_entry *ce = active_cache[i];
 		if (ce->ce_flags & CE_UPDATE)
 			continue;
-		if (!pathspec_match(pattern, m, ce->name, 0))
+		if (!match_pathspec(pattern, ce->name, ce_namelen(ce), 0, m))
 			continue;
 		string_list_insert(ce->name, list);
 	}
diff --git a/builtin-ls-files.c b/builtin-ls-files.c
index f72eb85475..3434031295 100644
--- a/builtin-ls-files.c
+++ b/builtin-ls-files.c
@@ -36,42 +36,6 @@ static const char *tag_other = "";
 static const char *tag_killed = "";
 static const char *tag_modified = "";
 
-
-/*
- * Match a pathspec against a filename. The first "skiplen" characters
- * are the common prefix
- */
-int pathspec_match(const char **spec, char *ps_matched,
-		   const char *filename, int skiplen)
-{
-	const char *m;
-
-	while ((m = *spec++) != NULL) {
-		int matchlen = strlen(m + skiplen);
-
-		if (!matchlen)
-			goto matched;
-		if (!strncmp(m + skiplen, filename + skiplen, matchlen)) {
-			if (m[skiplen + matchlen - 1] == '/')
-				goto matched;
-			switch (filename[skiplen + matchlen]) {
-			case '/': case '\0':
-				goto matched;
-			}
-		}
-		if (!fnmatch(m + skiplen, filename + skiplen, 0))
-			goto matched;
-		if (ps_matched)
-			ps_matched++;
-		continue;
-	matched:
-		if (ps_matched)
-			*ps_matched = 1;
-		return 1;
-	}
-	return 0;
-}
-
 static void show_dir_entry(const char *tag, struct dir_entry *ent)
 {
 	int len = prefix_len;
@@ -80,7 +44,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
 	if (len >= ent->len)
 		die("git ls-files: internal error - directory entry not superset of prefix");
 
-	if (pathspec && !pathspec_match(pathspec, ps_matched, ent->name, len))
+	if (!match_pathspec(pathspec, ent->name, ent->len, len, ps_matched))
 		return;
 
 	fputs(tag, stdout);
@@ -156,7 +120,7 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce)
 	if (len >= ce_namelen(ce))
 		die("git ls-files: internal error - cache entry not superset of prefix");
 
-	if (pathspec && !pathspec_match(pathspec, ps_matched, ce->name, len))
+	if (!match_pathspec(pathspec, ce->name, ce_namelen(ce), len, ps_matched))
 		return;
 
 	if (tag && *tag && show_valid_bit &&
diff --git a/cache.h b/cache.h
index 231c06d772..c60259d492 100644
--- a/cache.h
+++ b/cache.h
@@ -936,7 +936,6 @@ extern int ws_fix_copy(char *, const char *, int, unsigned, int *);
 extern int ws_blank_line(const char *line, int len, unsigned ws_rule);
 
 /* ls-files */
-int pathspec_match(const char **spec, char *matched, const char *filename, int skiplen);
 int report_path_error(const char *ps_matched, const char **pathspec, int prefix_offset);
 void overlay_tree_on_cache(const char *tree_name, const char *prefix);
 
diff --git a/dir.c b/dir.c
index 8b0e0fbaf7..c50ecc8d69 100644
--- a/dir.c
+++ b/dir.c
@@ -108,25 +108,28 @@ static int match_one(const char *match, const char *name, int namelen)
  * and a mark is left in seen[] array for pathspec element that
  * actually matched anything.
  */
-int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen)
+int match_pathspec(const char **pathspec, const char *name, int namelen,
+		int prefix, char *seen)
 {
-	int retval;
-	const char *match;
+	int i, retval = 0;
+
+	if (!pathspec)
+		return 1;
 
 	name += prefix;
 	namelen -= prefix;
 
-	for (retval = 0; (match = *pathspec++) != NULL; seen++) {
+	for (i = 0; pathspec[i] != NULL; i++) {
 		int how;
-		if (*seen == MATCHED_EXACTLY)
+		const char *match = pathspec[i] + prefix;
+		if (seen && seen[i] == MATCHED_EXACTLY)
 			continue;
-		match += prefix;
 		how = match_one(match, name, namelen);
 		if (how) {
 			if (retval < how)
 				retval = how;
-			if (*seen < how)
-				*seen = how;
+			if (seen && seen[i] < how)
+				seen[i] = how;
 		}
 	}
 	return retval;

From 07b57e90f7c852c4fe212ab1d91058f27469a74b Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sat, 17 Jan 2009 17:29:42 +0100
Subject: [PATCH 06/40] Add color_fwrite_lines(), a function coloring each line
 individually

We have to set the color before every line and reset it before every
newline.  Add a function color_fwrite_lines() which does that for us.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 color.c | 28 ++++++++++++++++++++++++++++
 color.h |  1 +
 2 files changed, 29 insertions(+)

diff --git a/color.c b/color.c
index fc0b72ad59..d4ae83f9b7 100644
--- a/color.c
+++ b/color.c
@@ -191,3 +191,31 @@ int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
 	va_end(args);
 	return r;
 }
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+		size_t count, const char *buf)
+{
+	if (!*color)
+		return fwrite(buf, count, 1, fp) != 1;
+	while (count) {
+		char *p = memchr(buf, '\n', count);
+		if (p != buf && (fputs(color, fp) < 0 ||
+				fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
+				fputs(COLOR_RESET, fp) < 0))
+			return -1;
+		if (!p)
+			return 0;
+		if (fputc('\n', fp) < 0)
+			return -1;
+		count -= p + 1 - buf;
+		buf = p + 1;
+	}
+	return 0;
+}
+
+
diff --git a/color.h b/color.h
index 6cf5c88aaf..cd5c985eb7 100644
--- a/color.h
+++ b/color.h
@@ -19,5 +19,6 @@ int git_config_colorbool(const char *var, const char *value, int stdout_is_tty);
 void color_parse(const char *var, const char *value, char *dst);
 int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
 int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
 
 #endif /* COLOR_H */

From 23c1575f747393f9847874fd1ed72a44557459d1 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sat, 17 Jan 2009 17:29:43 +0100
Subject: [PATCH 07/40] color-words: refactor word splitting and use
 ALLOC_GROW()

Word splitting is now performed by the function diff_words_fill(),
avoiding having the same code twice.

In the same spirit, avoid duplicating the code of ALLOC_GROW().

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/diff.c b/diff.c
index d23548292a..c111eef13e 100644
--- a/diff.c
+++ b/diff.c
@@ -326,10 +326,7 @@ struct diff_words_buffer {
 static void diff_words_append(char *line, unsigned long len,
 		struct diff_words_buffer *buffer)
 {
-	if (buffer->text.size + len > buffer->alloc) {
-		buffer->alloc = (buffer->text.size + len) * 3 / 2;
-		buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc);
-	}
+	ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc);
 	line++;
 	len--;
 	memcpy(buffer->text.ptr + buffer->text.size, line, len);
@@ -398,6 +395,22 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 	}
 }
 
+/*
+ * This function splits the words in buffer->text, and stores the list with
+ * newline separator into out.
+ */
+static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
+{
+	int i;
+	out->size = buffer->text.size;
+	out->ptr = xmalloc(out->size);
+	memcpy(out->ptr, buffer->text.ptr, out->size);
+	for (i = 0; i < out->size; i++)
+		if (isspace(out->ptr[i]))
+			out->ptr[i] = '\n';
+	buffer->current = 0;
+}
+
 /* this executes the word diff on the accumulated buffers */
 static void diff_words_show(struct diff_words_data *diff_words)
 {
@@ -405,26 +418,11 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	xdemitconf_t xecfg;
 	xdemitcb_t ecb;
 	mmfile_t minus, plus;
-	int i;
 
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
-	minus.size = diff_words->minus.text.size;
-	minus.ptr = xmalloc(minus.size);
-	memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
-	for (i = 0; i < minus.size; i++)
-		if (isspace(minus.ptr[i]))
-			minus.ptr[i] = '\n';
-	diff_words->minus.current = 0;
-
-	plus.size = diff_words->plus.text.size;
-	plus.ptr = xmalloc(plus.size);
-	memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
-	for (i = 0; i < plus.size; i++)
-		if (isspace(plus.ptr[i]))
-			plus.ptr[i] = '\n';
-	diff_words->plus.current = 0;
-
+	diff_words_fill(&diff_words->minus, &minus);
+	diff_words_fill(&diff_words->plus, &plus);
 	xpp.flags = XDF_NEED_MINIMAL;
 	xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,

From 2e5d2003b28820f88296e47a79eb440ca0295000 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sat, 17 Jan 2009 17:29:44 +0100
Subject: [PATCH 08/40] color-words: change algorithm to allow for 0-character
 word boundaries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Up until now, the color-words code assumed that word boundaries are
identical to white space characters.

Therefore, it could get away with a very simple scheme: it copied the
hunks, substituted newlines for each white space character, called
libxdiff with the processed text, and then identified the text to
output by the offsets (which agreed since the original text had the
same length).

This code was ugly, for a number of reasons:

- it was impossible to introduce 0-character word boundaries,

- we had to print everything word by word, and

- the code needed extra special handling of newlines in the removed part.

Fix all of these issues by processing the text such that

- we build word lists, separated by newlines,

- we remember the original offsets for every word, and

- after calling libxdiff on the wordlists, we parse the hunk headers, and
  find the corresponding offsets, and then

- we print the removed/added parts in one go.

The pre and post samples in the test were provided by Santi Béjar.

Note that there is some strange special handling of hunk headers where
one line range is 0 due to POSIX: in this case, the start is one too
low.  In other words a hunk header '@@ -1,0 +2 @@' actually means that
the line must be added after the _second_ line of the pre text, _not_
the first.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c                | 161 ++++++++++++++++++++++++------------------
 t/t4034-diff-words.sh |  66 +++++++++++++++++
 2 files changed, 159 insertions(+), 68 deletions(-)
 create mode 100755 t/t4034-diff-words.sh

diff --git a/diff.c b/diff.c
index c111eef13e..37c886a815 100644
--- a/diff.c
+++ b/diff.c
@@ -319,8 +319,10 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 struct diff_words_buffer {
 	mmfile_t text;
 	long alloc;
-	long current; /* output pointer */
-	int suppressed_newline;
+	struct diff_words_orig {
+		const char *begin, *end;
+	} *orig;
+	int orig_nr, orig_alloc;
 };
 
 static void diff_words_append(char *line, unsigned long len,
@@ -335,80 +337,89 @@ static void diff_words_append(char *line, unsigned long len,
 
 struct diff_words_data {
 	struct diff_words_buffer minus, plus;
+	const char *current_plus;
 	FILE *file;
 };
 
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
-		int suppress_newline)
-{
-	const char *ptr;
-	int eol = 0;
-
-	if (len == 0)
-		return;
-
-	ptr  = buffer->text.ptr + buffer->current;
-	buffer->current += len;
-
-	if (ptr[len - 1] == '\n') {
-		eol = 1;
-		len--;
-	}
-
-	fputs(diff_get_color(1, color), file);
-	fwrite(ptr, len, 1, file);
-	fputs(diff_get_color(1, DIFF_RESET), file);
-
-	if (eol) {
-		if (suppress_newline)
-			buffer->suppressed_newline = 1;
-		else
-			putc('\n', file);
-	}
-}
-
 static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 {
 	struct diff_words_data *diff_words = priv;
+	int minus_first, minus_len, plus_first, plus_len;
+	const char *minus_begin, *minus_end, *plus_begin, *plus_end;
 
-	if (diff_words->minus.suppressed_newline) {
-		if (line[0] != '+')
-			putc('\n', diff_words->file);
-		diff_words->minus.suppressed_newline = 0;
-	}
+	if (line[0] != '@' || parse_hunk_header(line, len,
+			&minus_first, &minus_len, &plus_first, &plus_len))
+		return;
 
-	len--;
-	switch (line[0]) {
-		case '-':
-			print_word(diff_words->file,
-				   &diff_words->minus, len, DIFF_FILE_OLD, 1);
-			break;
-		case '+':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_FILE_NEW, 0);
-			break;
-		case ' ':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_PLAIN, 0);
-			diff_words->minus.current += len;
-			break;
-	}
+	/* POSIX requires that first be decremented by one if len == 0... */
+	if (minus_len) {
+		minus_begin = diff_words->minus.orig[minus_first].begin;
+		minus_end =
+			diff_words->minus.orig[minus_first + minus_len - 1].end;
+	} else
+		minus_begin = minus_end =
+			diff_words->minus.orig[minus_first].end;
+
+	if (plus_len) {
+		plus_begin = diff_words->plus.orig[plus_first].begin;
+		plus_end = diff_words->plus.orig[plus_first + plus_len - 1].end;
+	} else
+		plus_begin = plus_end = diff_words->plus.orig[plus_first].end;
+
+	if (diff_words->current_plus != plus_begin)
+		fwrite(diff_words->current_plus,
+				plus_begin - diff_words->current_plus, 1,
+				diff_words->file);
+	if (minus_begin != minus_end)
+		color_fwrite_lines(diff_words->file,
+				diff_get_color(1, DIFF_FILE_OLD),
+				minus_end - minus_begin, minus_begin);
+	if (plus_begin != plus_end)
+		color_fwrite_lines(diff_words->file,
+				diff_get_color(1, DIFF_FILE_NEW),
+				plus_end - plus_begin, plus_begin);
+
+	diff_words->current_plus = plus_end;
 }
 
 /*
- * This function splits the words in buffer->text, and stores the list with
- * newline separator into out.
+ * This function splits the words in buffer->text, stores the list with
+ * newline separator into out, and saves the offsets of the original words
+ * in buffer->orig.
  */
 static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 {
-	int i;
-	out->size = buffer->text.size;
-	out->ptr = xmalloc(out->size);
-	memcpy(out->ptr, buffer->text.ptr, out->size);
-	for (i = 0; i < out->size; i++)
-		if (isspace(out->ptr[i]))
-			out->ptr[i] = '\n';
-	buffer->current = 0;
+	int i, j;
+
+	out->size = 0;
+	out->ptr = xmalloc(buffer->text.size);
+
+	/* fake an empty "0th" word */
+	ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
+	buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr;
+	buffer->orig_nr = 1;
+
+	for (i = 0; i < buffer->text.size; i++) {
+		if (isspace(buffer->text.ptr[i]))
+			continue;
+		for (j = i + 1; j < buffer->text.size &&
+				!isspace(buffer->text.ptr[j]); j++)
+			; /* find the end of the word */
+
+		/* store original boundaries */
+		ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
+				buffer->orig_alloc);
+		buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i;
+		buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j;
+		buffer->orig_nr++;
+
+		/* store one word */
+		memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
+		out->ptr[out->size + j - i] = '\n';
+		out->size += j - i + 1;
+
+		i = j - 1;
+	}
 }
 
 /* this executes the word diff on the accumulated buffers */
@@ -419,22 +430,34 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	xdemitcb_t ecb;
 	mmfile_t minus, plus;
 
+	/* special case: only removal */
+	if (!diff_words->plus.text.size) {
+		color_fwrite_lines(diff_words->file,
+			diff_get_color(1, DIFF_FILE_OLD),
+			diff_words->minus.text.size, diff_words->minus.text.ptr);
+		diff_words->minus.text.size = 0;
+		return;
+	}
+
+	diff_words->current_plus = diff_words->plus.text.ptr;
+
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
 	diff_words_fill(&diff_words->minus, &minus);
 	diff_words_fill(&diff_words->plus, &plus);
 	xpp.flags = XDF_NEED_MINIMAL;
-	xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
+	xecfg.ctxlen = 0;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
 		      &xpp, &xecfg, &ecb);
 	free(minus.ptr);
 	free(plus.ptr);
+	if (diff_words->current_plus != diff_words->plus.text.ptr +
+			diff_words->plus.text.size)
+		fwrite(diff_words->current_plus,
+			diff_words->plus.text.ptr + diff_words->plus.text.size
+			- diff_words->current_plus, 1,
+			diff_words->file);
 	diff_words->minus.text.size = diff_words->plus.text.size = 0;
-
-	if (diff_words->minus.suppressed_newline) {
-		putc('\n', diff_words->file);
-		diff_words->minus.suppressed_newline = 0;
-	}
 }
 
 typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
@@ -458,7 +481,9 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 			diff_words_show(ecbdata->diff_words);
 
 		free (ecbdata->diff_words->minus.text.ptr);
+		free (ecbdata->diff_words->minus.orig);
 		free (ecbdata->diff_words->plus.text.ptr);
+		free (ecbdata->diff_words->plus.orig);
 		free(ecbdata->diff_words);
 		ecbdata->diff_words = NULL;
 	}
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
new file mode 100755
index 0000000000..b22195f8bb
--- /dev/null
+++ b/t/t4034-diff-words.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+test_description='word diff colors'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+
+	git config diff.color.old red
+	git config diff.color.new green
+
+'
+
+decrypt_color () {
+	sed \
+		-e 's/.\[1m/<WHITE>/g' \
+		-e 's/.\[31m/<RED>/g' \
+		-e 's/.\[32m/<GREEN>/g' \
+		-e 's/.\[36m/<BROWN>/g' \
+		-e 's/.\[m/<RESET>/g'
+}
+
+word_diff () {
+	test_must_fail git diff --no-index "$@" pre post > output &&
+	decrypt_color < output > output.decrypted &&
+	test_cmp expect output.decrypted
+}
+
+cat > pre <<\EOF
+h(4)
+
+a = b + c
+EOF
+
+cat > post <<\EOF
+h(4),hh[44]
+
+a = b + c
+
+aa = a
+
+aeff = aeff * ( aaa )
+EOF
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+<RED>h(4)<RESET><GREEN>h(4),hh[44]<RESET>
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa )<RESET>
+EOF
+
+test_expect_success 'word diff with runs of whitespace' '
+
+	word_diff --color-words
+
+'
+
+test_done

From 2b6a5417d750d086d1da906e46de2b3ad8df6753 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sat, 17 Jan 2009 17:29:45 +0100
Subject: [PATCH 09/40] color-words: take an optional regular expression
 describing words

In some applications, words are not delimited by white space.  To
allow for that, you can specify a regular expression describing
what makes a word with

	git diff --color-words='[A-Za-z0-9]+'

Note that words cannot contain newline characters.

As suggested by Thomas Rast, the words are the exact matches of the
regular expression.

Note that a regular expression beginning with a '^' will match only
a word at the beginning of the hunk, not a word at the beginning of
a line, and is probably not what you want.

This commit contains a quoting fix by Thomas Rast.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt |  6 +++-
 diff.c                         | 64 +++++++++++++++++++++++++++++-----
 diff.h                         |  1 +
 t/t4034-diff-words.sh          | 57 ++++++++++++++++++++++++++++++
 4 files changed, 118 insertions(+), 10 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 43793d7500..2c1fa4b102 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -91,8 +91,12 @@ endif::git-format-patch[]
 	Turn off colored diff, even when the configuration file
 	gives the default to color output.
 
---color-words::
+--color-words[=regex]::
 	Show colored word diff, i.e. color words which have changed.
++
+Optionally, you can pass a regular expression that tells Git what the
+words are that you are looking for; The default is to interpret any
+stretch of non-whitespace as a word.
 
 --no-renames::
 	Turn off rename detection, even when the configuration
diff --git a/diff.c b/diff.c
index 37c886a815..9fb3d0df31 100644
--- a/diff.c
+++ b/diff.c
@@ -333,12 +333,14 @@ static void diff_words_append(char *line, unsigned long len,
 	len--;
 	memcpy(buffer->text.ptr + buffer->text.size, line, len);
 	buffer->text.size += len;
+	buffer->text.ptr[buffer->text.size] = '\0';
 }
 
 struct diff_words_data {
 	struct diff_words_buffer minus, plus;
 	const char *current_plus;
 	FILE *file;
+	regex_t *word_regex;
 };
 
 static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
@@ -382,17 +384,49 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 	diff_words->current_plus = plus_end;
 }
 
+/* This function starts looking at *begin, and returns 0 iff a word was found. */
+static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
+		int *begin, int *end)
+{
+	if (word_regex && *begin < buffer->size) {
+		regmatch_t match[1];
+		if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
+			char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
+					'\n', match[0].rm_eo - match[0].rm_so);
+			*end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
+			*begin += match[0].rm_so;
+			return *begin >= *end;
+		}
+		return -1;
+	}
+
+	/* find the next word */
+	while (*begin < buffer->size && isspace(buffer->ptr[*begin]))
+		(*begin)++;
+	if (*begin >= buffer->size)
+		return -1;
+
+	/* find the end of the word */
+	*end = *begin + 1;
+	while (*end < buffer->size && !isspace(buffer->ptr[*end]))
+		(*end)++;
+
+	return 0;
+}
+
 /*
  * This function splits the words in buffer->text, stores the list with
  * newline separator into out, and saves the offsets of the original words
  * in buffer->orig.
  */
-static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
+static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out,
+		regex_t *word_regex)
 {
 	int i, j;
+	long alloc = 0;
 
 	out->size = 0;
-	out->ptr = xmalloc(buffer->text.size);
+	out->ptr = NULL;
 
 	/* fake an empty "0th" word */
 	ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
@@ -400,11 +434,8 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 	buffer->orig_nr = 1;
 
 	for (i = 0; i < buffer->text.size; i++) {
-		if (isspace(buffer->text.ptr[i]))
-			continue;
-		for (j = i + 1; j < buffer->text.size &&
-				!isspace(buffer->text.ptr[j]); j++)
-			; /* find the end of the word */
+		if (find_word_boundaries(&buffer->text, word_regex, &i, &j))
+			return;
 
 		/* store original boundaries */
 		ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
@@ -414,6 +445,7 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 		buffer->orig_nr++;
 
 		/* store one word */
+		ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc);
 		memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
 		out->ptr[out->size + j - i] = '\n';
 		out->size += j - i + 1;
@@ -443,9 +475,10 @@ static void diff_words_show(struct diff_words_data *diff_words)
 
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
-	diff_words_fill(&diff_words->minus, &minus);
-	diff_words_fill(&diff_words->plus, &plus);
+	diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex);
+	diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex);
 	xpp.flags = XDF_NEED_MINIMAL;
+	/* as only the hunk header will be parsed, we need a 0-context */
 	xecfg.ctxlen = 0;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
 		      &xpp, &xecfg, &ecb);
@@ -484,6 +517,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 		free (ecbdata->diff_words->minus.orig);
 		free (ecbdata->diff_words->plus.text.ptr);
 		free (ecbdata->diff_words->plus.orig);
+		free(ecbdata->diff_words->word_regex);
 		free(ecbdata->diff_words);
 		ecbdata->diff_words = NULL;
 	}
@@ -1506,6 +1540,14 @@ static void builtin_diff(const char *name_a,
 			ecbdata.diff_words =
 				xcalloc(1, sizeof(struct diff_words_data));
 			ecbdata.diff_words->file = o->file;
+			if (o->word_regex) {
+				ecbdata.diff_words->word_regex = (regex_t *)
+					xmalloc(sizeof(regex_t));
+				if (regcomp(ecbdata.diff_words->word_regex,
+						o->word_regex, REG_EXTENDED))
+					die ("Invalid regular expression: %s",
+							o->word_regex);
+			}
 		}
 		xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
 			      &xpp, &xecfg, &ecb);
@@ -2517,6 +2559,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_OPT_CLR(options, COLOR_DIFF);
 	else if (!strcmp(arg, "--color-words"))
 		options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+	else if (!prefixcmp(arg, "--color-words=")) {
+		options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+		options->word_regex = arg + 14;
+	}
 	else if (!strcmp(arg, "--exit-code"))
 		DIFF_OPT_SET(options, EXIT_WITH_STATUS);
 	else if (!strcmp(arg, "--quiet"))
diff --git a/diff.h b/diff.h
index 4d5a32781d..23cd90c2e6 100644
--- a/diff.h
+++ b/diff.h
@@ -98,6 +98,7 @@ struct diff_options {
 
 	int stat_width;
 	int stat_name_width;
+	const char *word_regex;
 
 	/* this is set by diffcore for DIFF_FORMAT_PATCH */
 	int found_changes;
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index b22195f8bb..4873486301 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -63,4 +63,61 @@ test_expect_success 'word diff with runs of whitespace' '
 
 '
 
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+h(4),<GREEN>hh<RESET>[44]
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa<RESET> )
+EOF
+
+test_expect_success 'word diff with a regular expression' '
+
+	word_diff --color-words="[a-z]+"
+
+'
+
+echo 'aaa (aaa)' > pre
+echo 'aaa (aaa) aaa' > post
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index c29453b..be22f37 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1 +1 @@<RESET>
+aaa (aaa) <GREEN>aaa<RESET>
+EOF
+
+test_expect_success 'test parsing words for newline' '
+
+	word_diff --color-words="a+"
+
+'
+
+echo '(:' > pre
+echo '(' > post
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 289cb9d..2d06f37 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1 +1 @@<RESET>
+(<RED>:<RESET>
+EOF
+
+test_expect_success 'test when words are only removed at the end' '
+
+	word_diff --color-words=.
+
+'
+
 test_done

From bf82940dbf12f066ba42a2a03a5bb626ba22c067 Mon Sep 17 00:00:00 2001
From: Thomas Rast <trast@student.ethz.ch>
Date: Sat, 17 Jan 2009 17:29:46 +0100
Subject: [PATCH 10/40] color-words: enable REG_NEWLINE to help user

We silently truncate a match at the newline, which may lead to
unexpected behaviour, e.g., when matching "<[^>]*>" against

  <foo
  bar>

since then "<foo" becomes a word (and "bar>" doesn't!) even though the
regex said only angle-bracket-delimited things can be words.

To alleviate the problem slightly, use REG_NEWLINE so that negated
classes can't match a newline.  Of course newlines can still be
matched explicitly.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index 9fb3d0df31..00c661f82e 100644
--- a/diff.c
+++ b/diff.c
@@ -1544,7 +1544,8 @@ static void builtin_diff(const char *name_a,
 				ecbdata.diff_words->word_regex = (regex_t *)
 					xmalloc(sizeof(regex_t));
 				if (regcomp(ecbdata.diff_words->word_regex,
-						o->word_regex, REG_EXTENDED))
+						o->word_regex,
+						REG_EXTENDED | REG_NEWLINE))
 					die ("Invalid regular expression: %s",
 							o->word_regex);
 			}

From c4b252c3d894673968b144d8e10b79ef22c17b0a Mon Sep 17 00:00:00 2001
From: Thomas Rast <trast@student.ethz.ch>
Date: Sat, 17 Jan 2009 17:29:47 +0100
Subject: [PATCH 11/40] color-words: expand docs with precise semantics

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 2c1fa4b102..8689a92d8d 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -91,12 +91,17 @@ endif::git-format-patch[]
 	Turn off colored diff, even when the configuration file
 	gives the default to color output.
 
---color-words[=regex]::
-	Show colored word diff, i.e. color words which have changed.
+--color-words[=<regex>]::
+	Show colored word diff, i.e., color words which have changed.
+	By default, words are separated by whitespace.
 +
-Optionally, you can pass a regular expression that tells Git what the
-words are that you are looking for; The default is to interpret any
-stretch of non-whitespace as a word.
+When a <regex> is specified, every non-overlapping match of the
+<regex> is considered a word.  Anything between these matches is
+considered whitespace and ignored(!) for the purposes of finding
+differences.  You may want to append `|[^[:space:]]` to your regular
+expression to make sure that it matches all non-whitespace characters.
+A match that contains a newline is silently truncated(!) at the
+newline.
 
 --no-renames::
 	Turn off rename detection, even when the configuration

From 80c49c3de2d5a3aa12b0980a65f1163c8aef0c16 Mon Sep 17 00:00:00 2001
From: Thomas Rast <trast@student.ethz.ch>
Date: Sat, 17 Jan 2009 17:29:48 +0100
Subject: [PATCH 12/40] color-words: make regex configurable via attributes

Make the --color-words splitting regular expression configurable via
the diff driver's 'wordregex' attribute.  The user can then set the
driver on a file in .gitattributes.  If a regex is given on the
command line, it overrides the driver's setting.

We also provide built-in regexes for the languages that already had
funcname patterns, and add an appropriate diff driver entry for C/++.
(The patterns are designed to run UTF-8 sequences into a single chunk
to make sure they remain readable.)

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt  |  4 ++
 Documentation/gitattributes.txt | 21 +++++++++
 diff.c                          | 10 +++++
 t/t4034-diff-words.sh           | 36 +++++++++++++++
 userdiff.c                      | 78 ++++++++++++++++++++++++++-------
 userdiff.h                      |  1 +
 6 files changed, 135 insertions(+), 15 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 8689a92d8d..1edb82e8e1 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -102,6 +102,10 @@ differences.  You may want to append `|[^[:space:]]` to your regular
 expression to make sure that it matches all non-whitespace characters.
 A match that contains a newline is silently truncated(!) at the
 newline.
++
+The regex can also be set via a diff driver, see
+linkgit:gitattributes[1]; giving it explicitly overrides any diff
+driver setting.
 
 --no-renames::
 	Turn off rename detection, even when the configuration
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 8af22eccac..ba3ba12730 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -317,6 +317,8 @@ patterns are available:
 
 - `bibtex` suitable for files with BibTeX coded references.
 
+- `cpp` suitable for source code in the C and C++ languages.
+
 - `html` suitable for HTML/XHTML documents.
 
 - `java` suitable for source code in the Java language.
@@ -334,6 +336,25 @@ patterns are available:
 - `tex` suitable for source code for LaTeX documents.
 
 
+Customizing word diff
+^^^^^^^^^^^^^^^^^^^^^
+
+You can customize the rules that `git diff --color-words` uses to
+split words in a line, by specifying an appropriate regular expression
+in the "diff.*.wordregex" configuration variable.  For example, in TeX
+a backslash followed by a sequence of letters forms a command, but
+several such commands can be run together without intervening
+whitespace.  To separate them, use a regular expression such as
+
+------------------------
+[diff "tex"]
+	wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+"
+------------------------
+
+A built-in pattern is provided for all languages listed in the
+previous section.
+
+
 Performing text diffs of binary files
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/diff.c b/diff.c
index 00c661f82e..9fcde963db 100644
--- a/diff.c
+++ b/diff.c
@@ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe
 	return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
 }
 
+static const char *userdiff_word_regex(struct diff_filespec *one)
+{
+	diff_filespec_load_driver(one);
+	return one->driver->word_regex;
+}
+
 void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
 {
 	if (!options->a_prefix)
@@ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a,
 			ecbdata.diff_words =
 				xcalloc(1, sizeof(struct diff_words_data));
 			ecbdata.diff_words->file = o->file;
+			if (!o->word_regex)
+				o->word_regex = userdiff_word_regex(one);
+			if (!o->word_regex)
+				o->word_regex = userdiff_word_regex(two);
 			if (o->word_regex) {
 				ecbdata.diff_words->word_regex = (regex_t *)
 					xmalloc(sizeof(regex_t));
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index 4873486301..744221bef9 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' '
 
 '
 
+test_expect_success 'set a diff driver' '
+	git config diff.testdriver.wordregex "[^[:space:]]" &&
+	cat <<EOF > .gitattributes
+pre diff=testdriver
+post diff=testdriver
+EOF
+'
+
+test_expect_success 'option overrides default' '
+
+	word_diff --color-words="[a-z]+"
+
+'
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+h(4)<GREEN>,hh[44]<RESET>
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa )<RESET>
+EOF
+
+test_expect_success 'use default supplied by driver' '
+
+	word_diff --color-words
+
+'
+
 echo 'aaa (aaa)' > pre
 echo 'aaa (aaa) aaa' > post
 
@@ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' '
 
 	word_diff --color-words="a+"
 
+
 '
 
 echo '(:' > pre
diff --git a/userdiff.c b/userdiff.c
index 3681062ebf..2b55509485 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -6,14 +6,20 @@ static struct userdiff_driver *drivers;
 static int ndrivers;
 static int drivers_alloc;
 
-#define FUNCNAME(name, pattern) \
-	{ name, NULL, -1, { pattern, REG_EXTENDED } }
+#define PATTERNS(name, pattern, wordregex)			\
+	{ name, NULL, -1, { pattern, REG_EXTENDED }, wordregex }
 static struct userdiff_driver builtin_drivers[] = {
-FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
-FUNCNAME("java",
+PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
+	 "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("java",
 	 "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
-	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
-FUNCNAME("objc",
+	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+	 "[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+	 "|[-+*/<>%&^|=!]="
+	 "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"
+	 "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("objc",
 	 /* Negate C statements that can look like functions */
 	 "!^[ \t]*(do|for|if|else|return|switch|while)\n"
 	 /* Objective-C methods */
@@ -21,20 +27,60 @@ FUNCNAME("objc",
 	 /* C functions */
 	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n"
 	 /* Objective-C class/protocol definitions */
-	 "^(@(implementation|interface|protocol)[ \t].*)$"),
-FUNCNAME("pascal",
+	 "^(@(implementation|interface|protocol)[ \t].*)$",
+	 /* -- */
+	 "[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
+	 "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("pascal",
 	 "^((procedure|function|constructor|destructor|interface|"
 		"implementation|initialization|finalization)[ \t]*.*)$"
 	 "\n"
-	 "^(.*=[ \t]*(class|record).*)$"),
-FUNCNAME("php", "^[\t ]*((function|class).*)"),
-FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
-FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
-FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
-FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
+	 "^(.*=[ \t]*(class|record).*)$",
+	 /* -- */
+	 "[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
+	 "|<>|<=|>=|:=|\\.\\."
+	 "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("php", "^[\t ]*((function|class).*)",
+	 /* -- */
+	 "[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
+	 "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"
+	 "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
+	 /* -- */
+	 "[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?"
+	 "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"
+	 "|[^[:space:]|[\x80-\xff]+"),
+	 /* -- */
+PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
+	 /* -- */
+	 "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
+	 "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"
+	 "|[^[:space:]|[\x80-\xff]+"),
+PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+	 "[={}\"]|[^={}\" \t]+"),
+PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
+	 "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"),
+PATTERNS("cpp",
+	 /* Jump targets or access declarations */
+	 "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n"
+	 /* C/++ functions/methods at top level */
+	 "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n"
+	 /* compound type at top level */
+	 "^((struct|class|enum)[^;]*)$",
+	 /* -- */
+	 "[a-zA-Z_][a-zA-Z0-9_]*"
+	 "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+	 "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
+	 "|[^[:space:]]|[\x80-\xff]+"),
 { "default", NULL, -1, { NULL, 0 } },
 };
-#undef FUNCNAME
+#undef PATTERNS
 
 static struct userdiff_driver driver_true = {
 	"diff=true",
@@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v)
 		return parse_string(&drv->external, k, v);
 	if ((drv = parse_driver(k, v, "textconv")))
 		return parse_string(&drv->textconv, k, v);
+	if ((drv = parse_driver(k, v, "wordregex")))
+		return parse_string(&drv->word_regex, k, v);
 
 	return 0;
 }
diff --git a/userdiff.h b/userdiff.h
index ba2945770b..c3151594f5 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -11,6 +11,7 @@ struct userdiff_driver {
 	const char *external;
 	int binary;
 	struct userdiff_funcname funcname;
+	const char *word_regex;
 	const char *textconv;
 };
 

From f0298cf1c6a7b5cc8b79d84a03b0ce07df2d9e6b Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Fri, 16 Jan 2009 13:52:53 +0100
Subject: [PATCH 13/40] revision walker: include a detached HEAD in --all

When HEAD is detached, --all should list it, too, logically, as a
detached HEAD is by definition a temporary, unnamed branch.

It is especially necessary to list it when garbage collecting, as
the detached HEAD would be trashed.

Noticed by Thomas Rast.

Note that this affects creating bundles with --all; I contend that it
is a good change to add the HEAD, so that cloning from such a bundle
will give you a current branch.  However, I had to fix t5701 as it
assumed that --all does not imply HEAD.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 revision.c              |  1 +
 t/t5701-clone-local.sh  |  4 ++--
 t/t6014-rev-list-all.sh | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100755 t/t6014-rev-list-all.sh

diff --git a/revision.c b/revision.c
index 45fd7a3660..f6ccb973a6 100644
--- a/revision.c
+++ b/revision.c
@@ -1223,6 +1223,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
 
 			if (!strcmp(arg, "--all")) {
 				handle_refs(revs, flags, for_each_ref);
+				handle_refs(revs, flags, head_ref);
 				continue;
 			}
 			if (!strcmp(arg, "--branches")) {
diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh
index 8dfaaa456e..14413f851f 100755
--- a/t/t5701-clone-local.sh
+++ b/t/t5701-clone-local.sh
@@ -11,8 +11,8 @@ test_expect_success 'preparing origin repository' '
 	git clone --bare . x &&
 	test "$(GIT_CONFIG=a.git/config git config --bool core.bare)" = true &&
 	test "$(GIT_CONFIG=x/config git config --bool core.bare)" = true
-	git bundle create b1.bundle --all HEAD &&
-	git bundle create b2.bundle --all &&
+	git bundle create b1.bundle master HEAD &&
+	git bundle create b2.bundle master &&
 	mkdir dir &&
 	cp b1.bundle dir/b3
 	cp b1.bundle b4
diff --git a/t/t6014-rev-list-all.sh b/t/t6014-rev-list-all.sh
new file mode 100755
index 0000000000..991ab4a65b
--- /dev/null
+++ b/t/t6014-rev-list-all.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+test_description='--all includes detached HEADs'
+
+. ./test-lib.sh
+
+
+commit () {
+	test_tick &&
+	echo $1 > foo &&
+	git add foo &&
+	git commit -m "$1"
+}
+
+test_expect_success 'setup' '
+
+	commit one &&
+	commit two &&
+	git checkout HEAD^ &&
+	commit detached
+
+'
+
+test_expect_success 'rev-list --all lists detached HEAD' '
+
+	test 3 = $(git rev-list --all | wc -l)
+
+'
+
+test_expect_success 'repack does not lose detached HEAD' '
+
+	git gc &&
+	git prune --expire=now &&
+	git show HEAD
+
+'
+
+test_done

From b2a6d1c6868b6d5e7d2b4fa9129341220a1e848a Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Sat, 17 Jan 2009 22:27:08 -0800
Subject: [PATCH 14/40] bundle: allow the same ref to be given more than once

"git bundle create x master master" used to create a bundle that lists
the same branch (master) twice.  Cloning from such a bundle resulted in
a needless warning "warning: Duplicated ref: refs/remotes/origin/master".

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle.c               |  2 ++
 object.c               | 19 +++++++++++++++++++
 object.h               |  1 +
 t/t5701-clone-local.sh |  2 +-
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/bundle.c b/bundle.c
index daecd8e1ca..b20f2101f2 100644
--- a/bundle.c
+++ b/bundle.c
@@ -240,6 +240,8 @@ int create_bundle(struct bundle_header *header, const char *path,
 		return error("unrecognized argument: %s'", argv[i]);
 	}
 
+	object_array_remove_duplicates(&revs.pending);
+
 	for (i = 0; i < revs.pending.nr; i++) {
 		struct object_array_entry *e = revs.pending.objects + i;
 		unsigned char sha1[20];
diff --git a/object.c b/object.c
index 50b6528001..7e6a92c88e 100644
--- a/object.c
+++ b/object.c
@@ -268,3 +268,22 @@ void add_object_array_with_mode(struct object *obj, const char *name, struct obj
 	objects[nr].mode = mode;
 	array->nr = ++nr;
 }
+
+void object_array_remove_duplicates(struct object_array *array)
+{
+	int ref, src, dst;
+	struct object_array_entry *objects = array->objects;
+
+	for (ref = 0; ref < array->nr - 1; ref++) {
+		for (src = ref + 1, dst = src;
+		     src < array->nr;
+		     src++) {
+			if (!strcmp(objects[ref].name, objects[src].name))
+				continue;
+			if (src != dst)
+				objects[dst] = objects[src];
+			dst++;
+		}
+		array->nr = dst;
+	}
+}
diff --git a/object.h b/object.h
index 036bd66fe9..3193916638 100644
--- a/object.h
+++ b/object.h
@@ -71,5 +71,6 @@ int object_list_contains(struct object_list *list, struct object *obj);
 /* Object array handling .. */
 void add_object_array(struct object *obj, const char *name, struct object_array *array);
 void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode);
+void object_array_remove_duplicates(struct object_array *);
 
 #endif /* OBJECT_H */
diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh
index 14413f851f..fe0fda282c 100755
--- a/t/t5701-clone-local.sh
+++ b/t/t5701-clone-local.sh
@@ -11,7 +11,7 @@ test_expect_success 'preparing origin repository' '
 	git clone --bare . x &&
 	test "$(GIT_CONFIG=a.git/config git config --bool core.bare)" = true &&
 	test "$(GIT_CONFIG=x/config git config --bool core.bare)" = true
-	git bundle create b1.bundle master HEAD &&
+	git bundle create b1.bundle --all &&
 	git bundle create b2.bundle master &&
 	mkdir dir &&
 	cp b1.bundle dir/b3

From 92604b466344b2157efc42ef3521dac22d7906a2 Mon Sep 17 00:00:00 2001
From: Kjetil Barvik <barvik@broadpark.no>
Date: Sun, 18 Jan 2009 16:14:50 +0100
Subject: [PATCH 15/40] lstat_cache(): more cache effective symlink/directory
 detection

Make the cache functionality more effective.  Previously when A/B/C/D
was in the cache and A/B/C/E/file.c was called for, there was no match
at all from the cache.  Now we use the fact that the paths "A", "A/B"
and "A/B/C" are already tested, and we only need to do an lstat() call
on "A/B/C/E".

We only cache/store the last path regardless of its type.  Since the
cache functionality is always used with alphabetically sorted names
(at least it seems so for me), there is no need to store both the last
symlink-leading path and the last real-directory path.  Note that if
the cache is not called with (mostly) alphabetically sorted names,
neither the old, nor this new one, would be very effective.

Previously, when symlink A/B/C/S was cached/stored in the symlink-
leading path, and A/B/C/file.c was called for, it was not easy to use
the fact that we already knew that the paths "A", "A/B" and "A/B/C"
are real directories.

Avoid copying the first path components of the name 2 zillion times
when we test new path components.  Since we always cache/store the
last path, we can copy each component as we test those directly into
the cache.  Previously we ended up doing a memcpy() for the full
path/name right before each lstat() call, and when updating the cache
for each time we have tested a new path component.

We also use less memory, that is, PATH_MAX bytes less memory on the
stack and PATH_MAX bytes less memory on the heap.

Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable
comments to this patch!

Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 symlinks.c | 167 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 126 insertions(+), 41 deletions(-)

diff --git a/symlinks.c b/symlinks.c
index 5a5e781a15..49fb4d8bc2 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -1,64 +1,149 @@
 #include "cache.h"
 
-struct pathname {
+static struct cache_def {
+	char path[PATH_MAX];
 	int len;
-	char path[PATH_MAX];
-};
+	int flags;
+} cache;
 
-/* Return matching pathname prefix length, or zero if not matching */
-static inline int match_pathname(int len, const char *name, struct pathname *match)
+/*
+ * Returns the length (on a path component basis) of the longest
+ * common prefix match of 'name' and the cached path string.
+ */
+static inline int longest_match_lstat_cache(int len, const char *name)
 {
-	int match_len = match->len;
-	return (len > match_len &&
-		name[match_len] == '/' &&
-		!memcmp(name, match->path, match_len)) ? match_len : 0;
-}
+	int max_len, match_len = 0, i = 0;
 
-static inline void set_pathname(int len, const char *name, struct pathname *match)
-{
-	if (len < PATH_MAX) {
-		match->len = len;
-		memcpy(match->path, name, len);
-		match->path[len] = 0;
+	max_len = len < cache.len ? len : cache.len;
+	while (i < max_len && name[i] == cache.path[i]) {
+		if (name[i] == '/')
+			match_len = i;
+		i++;
 	}
+	/* Is the cached path string a substring of 'name'? */
+	if (i == cache.len && cache.len < len && name[cache.len] == '/')
+		match_len = cache.len;
+	/* Is 'name' a substring of the cached path string? */
+	else if ((i == len && len < cache.len && cache.path[len] == '/') ||
+		 (i == len && len == cache.len))
+		match_len = len;
+	return match_len;
 }
 
-int has_symlink_leading_path(int len, const char *name)
+static inline void reset_lstat_cache(void)
 {
-	static struct pathname link, nonlink;
-	char path[PATH_MAX];
+	cache.path[0] = '\0';
+	cache.len = 0;
+	cache.flags = 0;
+}
+
+#define FL_DIR      (1 << 0)
+#define FL_SYMLINK  (1 << 1)
+#define FL_LSTATERR (1 << 2)
+#define FL_ERR      (1 << 3)
+
+/*
+ * Check if name 'name' of length 'len' has a symlink leading
+ * component, or if the directory exists and is real.
+ *
+ * To speed up the check, some information is allowed to be cached.
+ * This can be indicated by the 'track_flags' argument.
+ */
+static int lstat_cache(int len, const char *name,
+		       int track_flags)
+{
+	int match_len, last_slash, last_slash_dir;
+	int match_flags, ret_flags, save_flags, max_len;
 	struct stat st;
-	char *sp;
-	int known_dir;
 
 	/*
-	 * See if the last known symlink cache matches.
+	 * Check to see if we have a match from the cache for the
+	 * symlink path type.
 	 */
-	if (match_pathname(len, name, &link))
-		return 1;
+	match_len = last_slash = longest_match_lstat_cache(len, name);
+	match_flags = cache.flags & track_flags & FL_SYMLINK;
+	if (match_flags && match_len == cache.len)
+		return match_flags;
+	/*
+	 * If we now have match_len > 0, we would know that the
+	 * matched part will always be a directory.
+	 *
+	 * Also, if we are tracking directories and 'name' is a
+	 * substring of the cache on a path component basis, we can
+	 * return immediately.
+	 */
+	match_flags = track_flags & FL_DIR;
+	if (match_flags && len == match_len)
+		return match_flags;
 
 	/*
-	 * Get rid of the last known directory part
+	 * Okay, no match from the cache so far, so now we have to
+	 * check the rest of the path components.
 	 */
-	known_dir = match_pathname(len, name, &nonlink);
+	ret_flags = FL_DIR;
+	last_slash_dir = last_slash;
+	max_len = len < PATH_MAX ? len : PATH_MAX;
+	while (match_len < max_len) {
+		do {
+			cache.path[match_len] = name[match_len];
+			match_len++;
+		} while (match_len < max_len && name[match_len] != '/');
+		if (match_len >= max_len)
+			break;
+		last_slash = match_len;
+		cache.path[last_slash] = '\0';
 
-	while ((sp = strchr(name + known_dir + 1, '/')) != NULL) {
-		int thislen = sp - name ;
-		memcpy(path, name, thislen);
-		path[thislen] = 0;
-
-		if (lstat(path, &st))
-			return 0;
-		if (S_ISDIR(st.st_mode)) {
-			set_pathname(thislen, path, &nonlink);
-			known_dir = thislen;
+		if (lstat(cache.path, &st)) {
+			ret_flags = FL_LSTATERR;
+		} else if (S_ISDIR(st.st_mode)) {
+			last_slash_dir = last_slash;
 			continue;
-		}
-		if (S_ISLNK(st.st_mode)) {
-			set_pathname(thislen, path, &link);
-			return 1;
+		} else if (S_ISLNK(st.st_mode)) {
+			ret_flags = FL_SYMLINK;
+		} else {
+			ret_flags = FL_ERR;
 		}
 		break;
 	}
-	return 0;
+
+	/*
+	 * At the end update the cache.  Note that max 2 different
+	 * path types, FL_SYMLINK and FL_DIR, can be cached for the
+	 * moment!
+	 */
+	save_flags = ret_flags & track_flags & FL_SYMLINK;
+	if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
+		cache.path[last_slash] = '\0';
+		cache.len = last_slash;
+		cache.flags = save_flags;
+	} else if (track_flags & FL_DIR &&
+		   last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
+		/*
+		 * We have a separate test for the directory case,
+		 * since it could be that we have found a symlink and
+		 * the track_flags says that we cannot cache this
+		 * fact, so the cache would then have been left empty
+		 * in this case.
+		 *
+		 * But if we are allowed to track real directories, we
+		 * can still cache the path components before the last
+		 * one (the found symlink component).
+		 */
+		cache.path[last_slash_dir] = '\0';
+		cache.len = last_slash_dir;
+		cache.flags = FL_DIR;
+	} else {
+		reset_lstat_cache();
+	}
+	return ret_flags;
+}
+
+/*
+ * Return non-zero if path 'name' has a leading symlink component
+ */
+int has_symlink_leading_path(int len, const char *name)
+{
+	return lstat_cache(len, name,
+			   FL_SYMLINK|FL_DIR) &
+		FL_SYMLINK;
 }

From 09c9306658f793f2cad1ed7856b55a7c4e758703 Mon Sep 17 00:00:00 2001
From: Kjetil Barvik <barvik@broadpark.no>
Date: Sun, 18 Jan 2009 16:14:51 +0100
Subject: [PATCH 16/40] lstat_cache(): introduce
 has_symlink_or_noent_leading_path() function

In some cases, especially inside the unpack-trees.c file, and inside
the verify_absent() function, we can avoid some unnecessary calls to
lstat(), if the lstat_cache() function can also be told to keep track
of non-existing directories.

So we update the lstat_cache() function to handle this new fact,
introduce a new wrapper function, and the result is that we save lots
of lstat() calls for a removed directory which previously contained
lots of files, when we call this new wrapper of lstat_cache() instead
of the old one.

We do similar changes inside the unlink_entry() function, since if we
can already say that the leading directory component of a pathname
does not exist, it is not necessary to try to remove a pathname below
it!

Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable
comments to this patch!

Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h        |  1 +
 symlinks.c     | 94 ++++++++++++++++++++++++++++++++------------------
 unpack-trees.c |  4 +--
 3 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/cache.h b/cache.h
index 231c06d772..11181aa007 100644
--- a/cache.h
+++ b/cache.h
@@ -720,6 +720,7 @@ struct checkout {
 
 extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
 extern int has_symlink_leading_path(int len, const char *name);
+extern int has_symlink_or_noent_leading_path(int len, const char *name);
 
 extern struct alternate_object_database {
 	struct alternate_object_database *next;
diff --git a/symlinks.c b/symlinks.c
index 49fb4d8bc2..c69556acf8 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -4,6 +4,7 @@ static struct cache_def {
 	char path[PATH_MAX];
 	int len;
 	int flags;
+	int track_flags;
 } cache;
 
 /*
@@ -30,21 +31,23 @@ static inline int longest_match_lstat_cache(int len, const char *name)
 	return match_len;
 }
 
-static inline void reset_lstat_cache(void)
+static inline void reset_lstat_cache(int track_flags)
 {
 	cache.path[0] = '\0';
 	cache.len = 0;
 	cache.flags = 0;
+	cache.track_flags = track_flags;
 }
 
 #define FL_DIR      (1 << 0)
-#define FL_SYMLINK  (1 << 1)
-#define FL_LSTATERR (1 << 2)
-#define FL_ERR      (1 << 3)
+#define FL_NOENT    (1 << 1)
+#define FL_SYMLINK  (1 << 2)
+#define FL_LSTATERR (1 << 3)
+#define FL_ERR      (1 << 4)
 
 /*
  * Check if name 'name' of length 'len' has a symlink leading
- * component, or if the directory exists and is real.
+ * component, or if the directory exists and is real, or not.
  *
  * To speed up the check, some information is allowed to be cached.
  * This can be indicated by the 'track_flags' argument.
@@ -56,25 +59,35 @@ static int lstat_cache(int len, const char *name,
 	int match_flags, ret_flags, save_flags, max_len;
 	struct stat st;
 
-	/*
-	 * Check to see if we have a match from the cache for the
-	 * symlink path type.
-	 */
-	match_len = last_slash = longest_match_lstat_cache(len, name);
-	match_flags = cache.flags & track_flags & FL_SYMLINK;
-	if (match_flags && match_len == cache.len)
-		return match_flags;
-	/*
-	 * If we now have match_len > 0, we would know that the
-	 * matched part will always be a directory.
-	 *
-	 * Also, if we are tracking directories and 'name' is a
-	 * substring of the cache on a path component basis, we can
-	 * return immediately.
-	 */
-	match_flags = track_flags & FL_DIR;
-	if (match_flags && len == match_len)
-		return match_flags;
+	if (cache.track_flags != track_flags) {
+		/*
+		 * As a safeguard we clear the cache if the value of
+		 * track_flags does not match with the last supplied
+		 * value.
+		 */
+		reset_lstat_cache(track_flags);
+		match_len = last_slash = 0;
+	} else {
+		/*
+		 * Check to see if we have a match from the cache for
+		 * the 2 "excluding" path types.
+		 */
+		match_len = last_slash = longest_match_lstat_cache(len, name);
+		match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
+		if (match_flags && match_len == cache.len)
+			return match_flags;
+		/*
+		 * If we now have match_len > 0, we would know that
+		 * the matched part will always be a directory.
+		 *
+		 * Also, if we are tracking directories and 'name' is
+		 * a substring of the cache on a path component basis,
+		 * we can return immediately.
+		 */
+		match_flags = track_flags & FL_DIR;
+		if (match_flags && len == match_len)
+			return match_flags;
+	}
 
 	/*
 	 * Okay, no match from the cache so far, so now we have to
@@ -95,6 +108,8 @@ static int lstat_cache(int len, const char *name,
 
 		if (lstat(cache.path, &st)) {
 			ret_flags = FL_LSTATERR;
+			if (errno == ENOENT)
+				ret_flags |= FL_NOENT;
 		} else if (S_ISDIR(st.st_mode)) {
 			last_slash_dir = last_slash;
 			continue;
@@ -107,11 +122,11 @@ static int lstat_cache(int len, const char *name,
 	}
 
 	/*
-	 * At the end update the cache.  Note that max 2 different
-	 * path types, FL_SYMLINK and FL_DIR, can be cached for the
-	 * moment!
+	 * At the end update the cache.  Note that max 3 different
+	 * path types, FL_NOENT, FL_SYMLINK and FL_DIR, can be cached
+	 * for the moment!
 	 */
-	save_flags = ret_flags & track_flags & FL_SYMLINK;
+	save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
 	if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
 		cache.path[last_slash] = '\0';
 		cache.len = last_slash;
@@ -120,20 +135,20 @@ static int lstat_cache(int len, const char *name,
 		   last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
 		/*
 		 * We have a separate test for the directory case,
-		 * since it could be that we have found a symlink and
-		 * the track_flags says that we cannot cache this
-		 * fact, so the cache would then have been left empty
-		 * in this case.
+		 * since it could be that we have found a symlink or a
+		 * non-existing directory and the track_flags says
+		 * that we cannot cache this fact, so the cache would
+		 * then have been left empty in this case.
 		 *
 		 * But if we are allowed to track real directories, we
 		 * can still cache the path components before the last
-		 * one (the found symlink component).
+		 * one (the found symlink or non-existing component).
 		 */
 		cache.path[last_slash_dir] = '\0';
 		cache.len = last_slash_dir;
 		cache.flags = FL_DIR;
 	} else {
-		reset_lstat_cache();
+		reset_lstat_cache(track_flags);
 	}
 	return ret_flags;
 }
@@ -147,3 +162,14 @@ int has_symlink_leading_path(int len, const char *name)
 			   FL_SYMLINK|FL_DIR) &
 		FL_SYMLINK;
 }
+
+/*
+ * Return non-zero if path 'name' has a leading symlink component or
+ * if some leading path component does not exists.
+ */
+int has_symlink_or_noent_leading_path(int len, const char *name)
+{
+	return lstat_cache(len, name,
+			   FL_SYMLINK|FL_NOENT|FL_DIR) &
+		(FL_SYMLINK|FL_NOENT);
+}
diff --git a/unpack-trees.c b/unpack-trees.c
index 54f301da67..a3fd383afb 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -61,7 +61,7 @@ static void unlink_entry(struct cache_entry *ce)
 	char *cp, *prev;
 	char *name = ce->name;
 
-	if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+	if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
 		return;
 	if (unlink(name))
 		return;
@@ -584,7 +584,7 @@ static int verify_absent(struct cache_entry *ce, const char *action,
 	if (o->index_only || o->reset || !o->update)
 		return 0;
 
-	if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+	if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
 		return 0;
 
 	if (!lstat(ce->name, &st)) {

From bad4a54fa6fe8a9bbdfb5c3e413268eefde69b75 Mon Sep 17 00:00:00 2001
From: Kjetil Barvik <barvik@broadpark.no>
Date: Sun, 18 Jan 2009 16:14:52 +0100
Subject: [PATCH 17/40] lstat_cache(): introduce has_dirs_only_path() function

The create_directories() function in entry.c currently calls stat()
or lstat() for each path component of the pathname 'path' each and every
time.  For the 'git checkout' command, this function is called on each
file for which we must do an update (ce->ce_flags & CE_UPDATE), so we get
lots and lots of calls.

To fix this, we make a new wrapper to the lstat_cache() function, and
call the wrapper function instead of the calls to the stat() or the
lstat() functions.  Since the paths given to the create_directories()
function, is sorted alphabetically, the new wrapper would be very
cache effective in this situation.

To support it we must update the lstat_cache() function to be able to
say that "please test the complete length of 'name'", and also to give
it the length of a prefix, where the cache should use the stat()
function instead of the lstat() function to test each path component.

Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable
comments to this patch!

Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h    |  1 +
 entry.c    | 34 ++++++++++-------------------
 symlinks.c | 64 +++++++++++++++++++++++++++++++++++++++---------------
 3 files changed, 60 insertions(+), 39 deletions(-)

diff --git a/cache.h b/cache.h
index 11181aa007..7c8c8e4842 100644
--- a/cache.h
+++ b/cache.h
@@ -721,6 +721,7 @@ struct checkout {
 extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
 extern int has_symlink_leading_path(int len, const char *name);
 extern int has_symlink_or_noent_leading_path(int len, const char *name);
+extern int has_dirs_only_path(int len, const char *name, int prefix_len);
 
 extern struct alternate_object_database {
 	struct alternate_object_database *next;
diff --git a/entry.c b/entry.c
index aa2ee46a84..01a683ee5d 100644
--- a/entry.c
+++ b/entry.c
@@ -8,35 +8,25 @@ static void create_directories(const char *path, const struct checkout *state)
 	const char *slash = path;
 
 	while ((slash = strchr(slash+1, '/')) != NULL) {
-		struct stat st;
-		int stat_status;
-
 		len = slash - path;
 		memcpy(buf, path, len);
 		buf[len] = 0;
 
-		if (len <= state->base_dir_len)
-			/*
-			 * checkout-index --prefix=<dir>; <dir> is
-			 * allowed to be a symlink to an existing
-			 * directory.
-			 */
-			stat_status = stat(buf, &st);
-		else
-			/*
-			 * if there currently is a symlink, we would
-			 * want to replace it with a real directory.
-			 */
-			stat_status = lstat(buf, &st);
-
-		if (!stat_status && S_ISDIR(st.st_mode))
+		/*
+		 * For 'checkout-index --prefix=<dir>', <dir> is
+		 * allowed to be a symlink to an existing directory,
+		 * and we set 'state->base_dir_len' below, such that
+		 * we test the path components of the prefix with the
+		 * stat() function instead of the lstat() function.
+		 */
+		if (has_dirs_only_path(len, buf, state->base_dir_len))
 			continue; /* ok, it is already a directory. */
 
 		/*
-		 * We know stat_status == 0 means something exists
-		 * there and this mkdir would fail, but that is an
-		 * error codepath; we do not care, as we unlink and
-		 * mkdir again in such a case.
+		 * If this mkdir() would fail, it could be that there
+		 * is already a symlink or something else exists
+		 * there, therefore we then try to unlink it and try
+		 * one more time to create the directory.
 		 */
 		if (mkdir(buf, 0777)) {
 			if (errno == EEXIST && state->force &&
diff --git a/symlinks.c b/symlinks.c
index c69556acf8..918e24ad9e 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -1,10 +1,11 @@
 #include "cache.h"
 
 static struct cache_def {
-	char path[PATH_MAX];
+	char path[PATH_MAX + 1];
 	int len;
 	int flags;
 	int track_flags;
+	int prefix_len_stat_func;
 } cache;
 
 /*
@@ -31,12 +32,13 @@ static inline int longest_match_lstat_cache(int len, const char *name)
 	return match_len;
 }
 
-static inline void reset_lstat_cache(int track_flags)
+static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func)
 {
 	cache.path[0] = '\0';
 	cache.len = 0;
 	cache.flags = 0;
 	cache.track_flags = track_flags;
+	cache.prefix_len_stat_func = prefix_len_stat_func;
 }
 
 #define FL_DIR      (1 << 0)
@@ -44,28 +46,35 @@ static inline void reset_lstat_cache(int track_flags)
 #define FL_SYMLINK  (1 << 2)
 #define FL_LSTATERR (1 << 3)
 #define FL_ERR      (1 << 4)
+#define FL_FULLPATH (1 << 5)
 
 /*
  * Check if name 'name' of length 'len' has a symlink leading
  * component, or if the directory exists and is real, or not.
  *
  * To speed up the check, some information is allowed to be cached.
- * This can be indicated by the 'track_flags' argument.
+ * This can be indicated by the 'track_flags' argument, which also can
+ * be used to indicate that we should check the full path.
+ *
+ * The 'prefix_len_stat_func' parameter can be used to set the length
+ * of the prefix, where the cache should use the stat() function
+ * instead of the lstat() function to test each path component.
  */
 static int lstat_cache(int len, const char *name,
-		       int track_flags)
+		       int track_flags, int prefix_len_stat_func)
 {
 	int match_len, last_slash, last_slash_dir;
-	int match_flags, ret_flags, save_flags, max_len;
+	int match_flags, ret_flags, save_flags, max_len, ret;
 	struct stat st;
 
-	if (cache.track_flags != track_flags) {
+	if (cache.track_flags != track_flags ||
+	    cache.prefix_len_stat_func != prefix_len_stat_func) {
 		/*
-		 * As a safeguard we clear the cache if the value of
-		 * track_flags does not match with the last supplied
-		 * value.
+		 * As a safeguard we clear the cache if the values of
+		 * track_flags and/or prefix_len_stat_func does not
+		 * match with the last supplied values.
 		 */
-		reset_lstat_cache(track_flags);
+		reset_lstat_cache(track_flags, prefix_len_stat_func);
 		match_len = last_slash = 0;
 	} else {
 		/*
@@ -101,12 +110,17 @@ static int lstat_cache(int len, const char *name,
 			cache.path[match_len] = name[match_len];
 			match_len++;
 		} while (match_len < max_len && name[match_len] != '/');
-		if (match_len >= max_len)
+		if (match_len >= max_len && !(track_flags & FL_FULLPATH))
 			break;
 		last_slash = match_len;
 		cache.path[last_slash] = '\0';
 
-		if (lstat(cache.path, &st)) {
+		if (last_slash <= prefix_len_stat_func)
+			ret = stat(cache.path, &st);
+		else
+			ret = lstat(cache.path, &st);
+
+		if (ret) {
 			ret_flags = FL_LSTATERR;
 			if (errno == ENOENT)
 				ret_flags |= FL_NOENT;
@@ -127,12 +141,12 @@ static int lstat_cache(int len, const char *name,
 	 * for the moment!
 	 */
 	save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
-	if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
+	if (save_flags && last_slash > 0 && last_slash <= PATH_MAX) {
 		cache.path[last_slash] = '\0';
 		cache.len = last_slash;
 		cache.flags = save_flags;
 	} else if (track_flags & FL_DIR &&
-		   last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
+		   last_slash_dir > 0 && last_slash_dir <= PATH_MAX) {
 		/*
 		 * We have a separate test for the directory case,
 		 * since it could be that we have found a symlink or a
@@ -148,18 +162,20 @@ static int lstat_cache(int len, const char *name,
 		cache.len = last_slash_dir;
 		cache.flags = FL_DIR;
 	} else {
-		reset_lstat_cache(track_flags);
+		reset_lstat_cache(track_flags, prefix_len_stat_func);
 	}
 	return ret_flags;
 }
 
+#define USE_ONLY_LSTAT  0
+
 /*
  * Return non-zero if path 'name' has a leading symlink component
  */
 int has_symlink_leading_path(int len, const char *name)
 {
 	return lstat_cache(len, name,
-			   FL_SYMLINK|FL_DIR) &
+			   FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) &
 		FL_SYMLINK;
 }
 
@@ -170,6 +186,20 @@ int has_symlink_leading_path(int len, const char *name)
 int has_symlink_or_noent_leading_path(int len, const char *name)
 {
 	return lstat_cache(len, name,
-			   FL_SYMLINK|FL_NOENT|FL_DIR) &
+			   FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) &
 		(FL_SYMLINK|FL_NOENT);
 }
+
+/*
+ * Return non-zero if all path components of 'name' exists as a
+ * directory.  If prefix_len > 0, we will test with the stat()
+ * function instead of the lstat() function for a prefix length of
+ * 'prefix_len', thus we then allow for symlinks in the prefix part as
+ * long as those points to real existing directories.
+ */
+int has_dirs_only_path(int len, const char *name, int prefix_len)
+{
+	return lstat_cache(len, name,
+			   FL_DIR|FL_FULLPATH, prefix_len) &
+		FL_DIR;
+}

From aeabab5c712d5acae4a2836272d641acbb87b893 Mon Sep 17 00:00:00 2001
From: Kjetil Barvik <barvik@broadpark.no>
Date: Sun, 18 Jan 2009 16:14:53 +0100
Subject: [PATCH 18/40] lstat_cache(): introduce invalidate_lstat_cache()
 function

In some cases it could maybe be necessary to say to the cache that
"Hey, I deleted/changed the type of this pathname and if you currently
have it inside your cache, you should deleted it".

This patch introduce a function which support this.

Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h    |  1 +
 symlinks.c | 44 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/cache.h b/cache.h
index 7c8c8e4842..f4452a89a7 100644
--- a/cache.h
+++ b/cache.h
@@ -722,6 +722,7 @@ extern int checkout_entry(struct cache_entry *ce, const struct checkout *state,
 extern int has_symlink_leading_path(int len, const char *name);
 extern int has_symlink_or_noent_leading_path(int len, const char *name);
 extern int has_dirs_only_path(int len, const char *name, int prefix_len);
+extern void invalidate_lstat_cache(int len, const char *name);
 
 extern struct alternate_object_database {
 	struct alternate_object_database *next;
diff --git a/symlinks.c b/symlinks.c
index 918e24ad9e..31ddbc9e96 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -12,23 +12,30 @@ static struct cache_def {
  * Returns the length (on a path component basis) of the longest
  * common prefix match of 'name' and the cached path string.
  */
-static inline int longest_match_lstat_cache(int len, const char *name)
+static inline int longest_match_lstat_cache(int len, const char *name,
+					    int *previous_slash)
 {
-	int max_len, match_len = 0, i = 0;
+	int max_len, match_len = 0, match_len_prev = 0, i = 0;
 
 	max_len = len < cache.len ? len : cache.len;
 	while (i < max_len && name[i] == cache.path[i]) {
-		if (name[i] == '/')
+		if (name[i] == '/') {
+			match_len_prev = match_len;
 			match_len = i;
+		}
 		i++;
 	}
 	/* Is the cached path string a substring of 'name'? */
-	if (i == cache.len && cache.len < len && name[cache.len] == '/')
+	if (i == cache.len && cache.len < len && name[cache.len] == '/') {
+		match_len_prev = match_len;
 		match_len = cache.len;
 	/* Is 'name' a substring of the cached path string? */
-	else if ((i == len && len < cache.len && cache.path[len] == '/') ||
-		 (i == len && len == cache.len))
+	} else if ((i == len && len < cache.len && cache.path[len] == '/') ||
+		   (i == len && len == cache.len)) {
+		match_len_prev = match_len;
 		match_len = len;
+	}
+	*previous_slash = match_len_prev;
 	return match_len;
 }
 
@@ -63,7 +70,7 @@ static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func)
 static int lstat_cache(int len, const char *name,
 		       int track_flags, int prefix_len_stat_func)
 {
-	int match_len, last_slash, last_slash_dir;
+	int match_len, last_slash, last_slash_dir, previous_slash;
 	int match_flags, ret_flags, save_flags, max_len, ret;
 	struct stat st;
 
@@ -81,7 +88,8 @@ static int lstat_cache(int len, const char *name,
 		 * Check to see if we have a match from the cache for
 		 * the 2 "excluding" path types.
 		 */
-		match_len = last_slash = longest_match_lstat_cache(len, name);
+		match_len = last_slash =
+			longest_match_lstat_cache(len, name, &previous_slash);
 		match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
 		if (match_flags && match_len == cache.len)
 			return match_flags;
@@ -167,6 +175,26 @@ static int lstat_cache(int len, const char *name,
 	return ret_flags;
 }
 
+/*
+ * Invalidate the given 'name' from the cache, if 'name' matches
+ * completely with the cache.
+ */
+void invalidate_lstat_cache(int len, const char *name)
+{
+	int match_len, previous_slash;
+
+	match_len = longest_match_lstat_cache(len, name, &previous_slash);
+	if (len == match_len) {
+		if ((cache.track_flags & FL_DIR) && previous_slash > 0) {
+			cache.path[previous_slash] = '\0';
+			cache.len = previous_slash;
+			cache.flags = FL_DIR;
+		} else
+			reset_lstat_cache(cache.track_flags,
+					  cache.prefix_len_stat_func);
+	}
+}
+
 #define USE_ONLY_LSTAT  0
 
 /*

From bda6eb0da9b4e4e763b531c83cab9fd9f85934ff Mon Sep 17 00:00:00 2001
From: Kjetil Barvik <barvik@broadpark.no>
Date: Sun, 18 Jan 2009 16:14:54 +0100
Subject: [PATCH 19/40] lstat_cache(): introduce clear_lstat_cache() function

If you want to completely clear the contents of the lstat_cache(), then
call this new function.

Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h    | 1 +
 symlinks.c | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/cache.h b/cache.h
index f4452a89a7..468daf6a43 100644
--- a/cache.h
+++ b/cache.h
@@ -723,6 +723,7 @@ extern int has_symlink_leading_path(int len, const char *name);
 extern int has_symlink_or_noent_leading_path(int len, const char *name);
 extern int has_dirs_only_path(int len, const char *name, int prefix_len);
 extern void invalidate_lstat_cache(int len, const char *name);
+extern void clear_lstat_cache(void);
 
 extern struct alternate_object_database {
 	struct alternate_object_database *next;
diff --git a/symlinks.c b/symlinks.c
index 31ddbc9e96..f262b7c44b 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -195,6 +195,14 @@ void invalidate_lstat_cache(int len, const char *name)
 	}
 }
 
+/*
+ * Completely clear the contents of the cache
+ */
+void clear_lstat_cache(void)
+{
+	reset_lstat_cache(0, 0);
+}
+
 #define USE_ONLY_LSTAT  0
 
 /*

From 98a4d87b87e9846eafd21ba232cc2b7ba3f718fc Mon Sep 17 00:00:00 2001
From: Boyd Stephen Smith Jr <bss@iguanasuicide.net>
Date: Tue, 20 Jan 2009 21:46:57 -0600
Subject: [PATCH 20/40] color-words: Support diff.wordregex config option

When diff is invoked with --color-words (w/o =regex), use the regular
expression the user has configured as diff.wordregex.

diff drivers configured via attributes take precedence over the
diff.wordregex-words setting.  If the user wants to change them, they have
their own configuration variables.

Signed-off-by: Boyd Stephen Smith Jr <bss@iguanasuicide.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config.txt       |  6 +++++
 Documentation/diff-options.txt |  7 +++---
 diff.c                         |  5 ++++
 t/t4034-diff-words.sh          | 45 ++++++++++++++++++++++++++++++++--
 4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 7408bb2d34..0ca983ac3b 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -639,6 +639,12 @@ diff.suppress-blank-empty::
 	A boolean to inhibit the standard behavior of printing a space
 	before each empty output line. Defaults to false.
 
+diff.wordregex::
+	A POSIX Extended Regular Expression used to determine what is a "word"
+	when performing word-by-word difference calculations.  Character
+	sequences that match the regular expression are "words", all other
+	characters are *ignorable* whitespace.
+
 fetch.unpackLimit::
 	If the number of objects fetched over the git native
 	transfer is below this
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 1edb82e8e1..164e2c5348 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -103,9 +103,10 @@ expression to make sure that it matches all non-whitespace characters.
 A match that contains a newline is silently truncated(!) at the
 newline.
 +
-The regex can also be set via a diff driver, see
-linkgit:gitattributes[1]; giving it explicitly overrides any diff
-driver setting.
+The regex can also be set via a diff driver or configuration option, see
+linkgit:gitattributes[1] or linkgit:git-config[1].  Giving it explicitly
+overrides any diff driver or configuration setting.  Diff drivers
+override configuration settings.
 
 --no-renames::
 	Turn off rename detection, even when the configuration
diff --git a/diff.c b/diff.c
index 9fcde963db..ed8b83c68f 100644
--- a/diff.c
+++ b/diff.c
@@ -23,6 +23,7 @@ static int diff_detect_rename_default;
 static int diff_rename_limit_default = 200;
 static int diff_suppress_blank_empty;
 int diff_use_color_default = -1;
+static const char *diff_word_regex_cfg;
 static const char *external_diff_cmd_cfg;
 int diff_auto_refresh_index = 1;
 static int diff_mnemonic_prefix;
@@ -92,6 +93,8 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 	}
 	if (!strcmp(var, "diff.external"))
 		return git_config_string(&external_diff_cmd_cfg, var, value);
+	if (!strcmp(var, "diff.wordregex"))
+		return git_config_string(&diff_word_regex_cfg, var, value);
 
 	return git_diff_basic_config(var, value, cb);
 }
@@ -1550,6 +1553,8 @@ static void builtin_diff(const char *name_a,
 				o->word_regex = userdiff_word_regex(one);
 			if (!o->word_regex)
 				o->word_regex = userdiff_word_regex(two);
+			if (!o->word_regex)
+				o->word_regex = diff_word_regex_cfg;
 			if (o->word_regex) {
 				ecbdata.diff_words->word_regex = (regex_t *)
 					xmalloc(sizeof(regex_t));
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index 744221bef9..6bcc153084 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -77,6 +77,7 @@ a = b + c<RESET>
 
 <GREEN>aeff = aeff * ( aaa<RESET> )
 EOF
+cp expect expect.letter-runs-are-words
 
 test_expect_success 'word diff with a regular expression' '
 
@@ -92,7 +93,7 @@ post diff=testdriver
 EOF
 '
 
-test_expect_success 'option overrides default' '
+test_expect_success 'option overrides .gitattributes' '
 
 	word_diff --color-words="[a-z]+"
 
@@ -112,13 +113,53 @@ a = b + c<RESET>
 
 <GREEN>aeff = aeff * ( aaa )<RESET>
 EOF
+cp expect expect.non-whitespace-is-word
 
-test_expect_success 'use default supplied by driver' '
+test_expect_success 'use regex supplied by driver' '
 
 	word_diff --color-words
 
 '
 
+test_expect_success 'set diff.wordregex option' '
+	git config diff.wordregex "[[:alnum:]]+"
+'
+
+cp expect.letter-runs-are-words expect
+
+test_expect_success 'command-line overrides config' '
+	word_diff --color-words="[a-z]+"
+'
+
+cp expect.non-whitespace-is-word expect
+
+test_expect_success '.gitattributes override config' '
+	word_diff --color-words
+'
+
+test_expect_success 'remove diff driver regex' '
+	git config --unset diff.testdriver.wordregex
+'
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+h(4),<GREEN>hh[44<RESET>]
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa<RESET> )
+EOF
+
+test_expect_success 'use configured regex' '
+	word_diff --color-words
+'
+
 echo 'aaa (aaa)' > pre
 echo 'aaa (aaa) aaa' > post
 

From ae3b970ac3e21324a95fea75213c2569180d74c6 Mon Sep 17 00:00:00 2001
From: Boyd Stephen Smith Jr <bss@iguanasuicide.net>
Date: Tue, 20 Jan 2009 22:59:54 -0600
Subject: [PATCH 21/40] Change the spelling of "wordregex".

Use "wordRegex" for configuration variable names.  Use "word_regex" for C
language tokens.

Signed-off-by: Boyd Stephen Smith Jr. <bss@iguanasuicide.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config.txt        | 2 +-
 Documentation/gitattributes.txt | 4 ++--
 t/t4034-diff-words.sh           | 8 ++++----
 userdiff.c                      | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 0ca983ac3b..332213e65d 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -639,7 +639,7 @@ diff.suppress-blank-empty::
 	A boolean to inhibit the standard behavior of printing a space
 	before each empty output line. Defaults to false.
 
-diff.wordregex::
+diff.wordRegex::
 	A POSIX Extended Regular Expression used to determine what is a "word"
 	when performing word-by-word difference calculations.  Character
 	sequences that match the regular expression are "words", all other
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index ba3ba12730..227934f59a 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -341,14 +341,14 @@ Customizing word diff
 
 You can customize the rules that `git diff --color-words` uses to
 split words in a line, by specifying an appropriate regular expression
-in the "diff.*.wordregex" configuration variable.  For example, in TeX
+in the "diff.*.wordRegex" configuration variable.  For example, in TeX
 a backslash followed by a sequence of letters forms a command, but
 several such commands can be run together without intervening
 whitespace.  To separate them, use a regular expression such as
 
 ------------------------
 [diff "tex"]
-	wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+"
+	wordRegex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+"
 ------------------------
 
 A built-in pattern is provided for all languages listed in the
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index 6bcc153084..4508effcaa 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -86,7 +86,7 @@ test_expect_success 'word diff with a regular expression' '
 '
 
 test_expect_success 'set a diff driver' '
-	git config diff.testdriver.wordregex "[^[:space:]]" &&
+	git config diff.testdriver.wordRegex "[^[:space:]]" &&
 	cat <<EOF > .gitattributes
 pre diff=testdriver
 post diff=testdriver
@@ -121,8 +121,8 @@ test_expect_success 'use regex supplied by driver' '
 
 '
 
-test_expect_success 'set diff.wordregex option' '
-	git config diff.wordregex "[[:alnum:]]+"
+test_expect_success 'set diff.wordRegex option' '
+	git config diff.wordRegex "[[:alnum:]]+"
 '
 
 cp expect.letter-runs-are-words expect
@@ -138,7 +138,7 @@ test_expect_success '.gitattributes override config' '
 '
 
 test_expect_success 'remove diff driver regex' '
-	git config --unset diff.testdriver.wordregex
+	git config --unset diff.testdriver.wordRegex
 '
 
 cat > expect <<\EOF
diff --git a/userdiff.c b/userdiff.c
index 2b55509485..d556da9751 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -6,8 +6,8 @@ static struct userdiff_driver *drivers;
 static int ndrivers;
 static int drivers_alloc;
 
-#define PATTERNS(name, pattern, wordregex)			\
-	{ name, NULL, -1, { pattern, REG_EXTENDED }, wordregex }
+#define PATTERNS(name, pattern, word_regex)			\
+	{ name, NULL, -1, { pattern, REG_EXTENDED }, word_regex }
 static struct userdiff_driver builtin_drivers[] = {
 PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
 	 "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"),

From b938f62a208d24f3d6bed260ceb809cd6b3924d7 Mon Sep 17 00:00:00 2001
From: Boyd Stephen Smith Jr <bss@iguanasuicide.net>
Date: Thu, 22 Jan 2009 12:26:25 -0600
Subject: [PATCH 22/40] Fix Documentation for git-describe

The documentation for git-describe says the default abbreviation is 8
hexadecimal digits while cache.c clearly shows DEFAULT_ABBREV set to 7.
This patch corrects the documentation.

Signed-off-by: Boyd Stephen Smith Jr <bss@iguanasuicide.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-describe.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt
index c4dbc2ae34..59a6fd17ab 100644
--- a/Documentation/git-describe.txt
+++ b/Documentation/git-describe.txt
@@ -38,7 +38,7 @@ OPTIONS
 	Automatically implies --tags.
 
 --abbrev=<n>::
-	Instead of using the default 8 hexadecimal digits as the
+	Instead of using the default 7 hexadecimal digits as the
 	abbreviated object name, use <n> digits.
 
 --candidates=<n>::

From 86ac751859033741a120e9e4a91133d075d9d898 Mon Sep 17 00:00:00 2001
From: Sverre Rabbelier <srabbelier@gmail.com>
Date: Fri, 23 Jan 2009 01:07:32 +0100
Subject: [PATCH 23/40] Allow cloning an empty repository

Cloning an empty repository manually (that is, doing 'git init' and
then doing all configuration by hand) can be a lot of work. Save the
user this work by allowing the cloning of empty repositories.

Signed-off-by: Sverre Rabbelier <srabbelier@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin-clone.c        | 17 +++++++++++++----
 t/t5701-clone-local.sh | 16 ++++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/builtin-clone.c b/builtin-clone.c
index f7e5a7b0a0..1e9c9aa844 100644
--- a/builtin-clone.c
+++ b/builtin-clone.c
@@ -522,14 +522,23 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 					     option_upload_pack);
 
 		refs = transport_get_remote_refs(transport);
-		transport_fetch_refs(transport, refs);
+		if(refs)
+			transport_fetch_refs(transport, refs);
 	}
 
-	clear_extra_refs();
+	if (refs) {
+		clear_extra_refs();
 
-	mapped_refs = write_remote_refs(refs, &refspec, reflog_msg.buf);
+		mapped_refs = write_remote_refs(refs, &refspec, reflog_msg.buf);
 
-	head_points_at = locate_head(refs, mapped_refs, &remote_head);
+		head_points_at = locate_head(refs, mapped_refs, &remote_head);
+	}
+	else {
+		warning("You appear to have cloned an empty repository.");
+		head_points_at = NULL;
+		remote_head = NULL;
+		option_no_checkout = 1;
+	}
 
 	if (head_points_at) {
 		/* Local default branch link */
diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh
index 8dfaaa456e..fbd9bfa573 100755
--- a/t/t5701-clone-local.sh
+++ b/t/t5701-clone-local.sh
@@ -116,4 +116,20 @@ test_expect_success 'bundle clone with nonexistent HEAD' '
 	test ! -e .git/refs/heads/master
 '
 
+test_expect_success 'clone empty repository' '
+	cd "$D" &&
+	mkdir empty &&
+	(cd empty && git init) &&
+	git clone empty empty-clone &&
+	test_tick &&
+	(cd empty-clone
+	 echo "content" >> foo &&
+	 git add foo &&
+	 git commit -m "Initial commit" &&
+	 git push origin master &&
+	 expected=$(git rev-parse master) &&
+	 actual=$(git --git-dir=../empty/.git rev-parse master) &&
+	 test $actual = $expected)
+'
+
 test_done

From d930508903909ad5e700dd25e3858a1b2d69ed12 Mon Sep 17 00:00:00 2001
From: Arjen Laarhoven <arjen@yaph.org>
Date: Thu, 22 Jan 2009 17:37:24 +0100
Subject: [PATCH 24/40] t/t4202-log.sh: Add testcases

Add testcases for 'git log --diff-filter=[CM]' (copies and renames).
Also add a testcase for 'git log --follow'.

Signed-off-by: Arjen Laarhoven <arjen@yaph.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4202-log.sh | 61 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 51 insertions(+), 10 deletions(-)

diff --git a/t/t4202-log.sh b/t/t4202-log.sh
index 0ab925c4e4..7b976ee36d 100755
--- a/t/t4202-log.sh
+++ b/t/t4202-log.sh
@@ -16,27 +16,31 @@ test_expect_success setup '
 	test_tick &&
 	git commit -m second &&
 
+	git mv one ichi &&
+	test_tick &&
+	git commit -m third &&
+
+	cp ichi ein &&
+	git add ein &&
+	test_tick &&
+	git commit -m fourth &&
+
 	mkdir a &&
 	echo ni >a/two &&
 	git add a/two &&
 	test_tick &&
-	git commit -m third &&
+	git commit -m fifth  &&
 
-	echo san >a/three &&
-	git add a/three &&
+	git rm a/two &&
 	test_tick &&
-	git commit -m fourth &&
-
-	git rm a/three &&
-	test_tick &&
-	git commit -m fifth
+	git commit -m sixth
 
 '
 
 test_expect_success 'diff-filter=A' '
 
 	actual=$(git log --pretty="format:%s" --diff-filter=A HEAD) &&
-	expect=$(echo fourth ; echo third ; echo initial) &&
+	expect=$(echo fifth ; echo fourth ; echo third ; echo initial) &&
 	test "$actual" = "$expect" || {
 		echo Oops
 		echo "Actual: $actual"
@@ -60,7 +64,43 @@ test_expect_success 'diff-filter=M' '
 test_expect_success 'diff-filter=D' '
 
 	actual=$(git log --pretty="format:%s" --diff-filter=D HEAD) &&
-	expect=$(echo fifth) &&
+	expect=$(echo sixth ; echo third) &&
+	test "$actual" = "$expect" || {
+		echo Oops
+		echo "Actual: $actual"
+		false
+	}
+
+'
+
+test_expect_success 'diff-filter=R' '
+
+	actual=$(git log -M --pretty="format:%s" --diff-filter=R HEAD) &&
+	expect=$(echo third) &&
+	test "$actual" = "$expect" || {
+		echo Oops
+		echo "Actual: $actual"
+		false
+	}
+
+'
+
+test_expect_success 'diff-filter=C' '
+
+	actual=$(git log -C -C --pretty="format:%s" --diff-filter=C HEAD) &&
+	expect=$(echo fourth) &&
+	test "$actual" = "$expect" || {
+		echo Oops
+		echo "Actual: $actual"
+		false
+	}
+
+'
+
+test_expect_success 'git log --follow' '
+
+	actual=$(git log --follow --pretty="format:%s" ichi) &&
+	expect=$(echo third ; echo second ; echo initial) &&
 	test "$actual" = "$expect" || {
 		echo Oops
 		echo "Actual: $actual"
@@ -72,6 +112,7 @@ test_expect_success 'diff-filter=D' '
 test_expect_success 'setup case sensitivity tests' '
 	echo case >one &&
 	test_tick &&
+	git add one
 	git commit -a -m Second
 '
 

From b80da424a13107c239ed40573fae3d692d19b6cd Mon Sep 17 00:00:00 2001
From: "martin f. krafft" <madduck@madduck.net>
Date: Fri, 23 Jan 2009 11:31:21 +1100
Subject: [PATCH 25/40] git-am: implement --reject option passed to git-apply

With --reject, git-am simply passes the --reject option to git-apply and thus
allows people to work with reject files if they so prefer.

Signed-off-by: martin f. krafft <madduck@madduck.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-am.txt |  2 ++
 git-am.sh                |  3 +++
 t/t4252-am-options.sh    | 11 ++++++++++-
 t/t4252/am-test-6-1      | 21 +++++++++++++++++++++
 4 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 t/t4252/am-test-6-1

diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt
index 5cbbe76937..efd311b1ce 100644
--- a/Documentation/git-am.txt
+++ b/Documentation/git-am.txt
@@ -12,6 +12,7 @@ SYNOPSIS
 'git am' [--signoff] [--keep] [--utf8 | --no-utf8]
 	 [--3way] [--interactive]
 	 [--whitespace=<option>] [-C<n>] [-p<n>] [--directory=<dir>]
+	 [--reject]
 	 [<mbox> | <Maildir>...]
 'git am' (--skip | --resolved | --abort)
 
@@ -63,6 +64,7 @@ default.   You could use `--no-utf8` to override this.
 -C<n>::
 -p<n>::
 --directory=<dir>::
+--reject::
 	These flags are passed to the 'git-apply' (see linkgit:git-apply[1])
 	program that applies
 	the patch.
diff --git a/git-am.sh b/git-am.sh
index e20dd88392..b1c05c9db3 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -19,6 +19,7 @@ whitespace=     pass it through git-apply
 directory=      pass it through git-apply
 C=              pass it through git-apply
 p=              pass it through git-apply
+reject          pass it through git-apply
 resolvemsg=     override error message when patch failure occurs
 r,resolved      to be used after a patch failure
 skip            skip the current patch
@@ -168,6 +169,8 @@ do
 		git_apply_opt="$git_apply_opt $(sq "$1=$2")"; shift ;;
 	-C|-p)
 		git_apply_opt="$git_apply_opt $(sq "$1$2")"; shift ;;
+	--reject)
+		git_apply_opt="$git_apply_opt $1" ;;
 	--)
 		shift; break ;;
 	*)
diff --git a/t/t4252-am-options.sh b/t/t4252-am-options.sh
index 5fdd188582..f603c1b133 100755
--- a/t/t4252-am-options.sh
+++ b/t/t4252-am-options.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-test_description='git am not losing options'
+test_description='git am with options and not losing them'
 . ./test-lib.sh
 
 tm="$TEST_DIRECTORY/t4252"
@@ -66,4 +66,13 @@ test_expect_success 'apply to a funny path' '
 	test -f "$with_sq/file-5"
 '
 
+test_expect_success 'am --reject' '
+	rm -rf .git/rebase-apply &&
+	git reset --hard initial &&
+	test_must_fail git am --reject "$tm"/am-test-6-1 &&
+	grep "@@ -1,3 +1,3 @@" file-2.rej &&
+	test_must_fail git diff-files --exit-code --quiet file-2 &&
+	grep "[-]-reject" .git/rebase-apply/apply-opt
+'
+
 test_done
diff --git a/t/t4252/am-test-6-1 b/t/t4252/am-test-6-1
new file mode 100644
index 0000000000..a8859e9b8f
--- /dev/null
+++ b/t/t4252/am-test-6-1
@@ -0,0 +1,21 @@
+From: A U Thor <au.thor@example.com>
+Date: Thu Dec 4 16:00:00 2008 -0800
+Subject: Huh
+
+Should fail and leave rejects
+
+diff --git i/file-2 w/file-2
+index 06e567b..b6f3a16 100644
+--- i/file-2
++++ w/file-2
+@@ -1,3 +1,3 @@
+-0
++One
+ 2
+ 3
+@@ -4,4 +4,4 @@
+ 4
+ 5
+-6
++Six
+ 7

From b1c7d4aafeda8e6b2bbeac29a3181cbe970b7874 Mon Sep 17 00:00:00 2001
From: Ray Chuan <rctay89@gmail.com>
Date: Sat, 24 Jan 2009 10:00:22 +0800
Subject: [PATCH 26/40] http-push: refactor lock-related headers creation for
 curl requests

DAV-related headers (more specifically, headers related to the lock token,
namely, If, Lock-Token, and Timeout) for curl requests are created and
allocated individually, eg a "if_header" variable for the "If: " header, a
"timeout_header" variable for the "Timeout: " header.

This patch provides a new function ("get_dav_token_headers") that creates
these header, saving methods from allocating memory, and from issuing a
"curl_slist_append()" call.  The temporary string storage given to
curl_slist_append() is freed much earlier than the previous code with this
patch, but this change is safe, because curl_slist_append() keeps a copy
of the given string.

In part, this patch also addresses the fact that commit 753bc91 (Remove
the requirement opaquelocktoken uri scheme) did not update memory
allocations for DAV-related headers.

Signed-off-by: Tay Ray Chuan <rctay89@gmail.com>
Acked-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 http-push.c | 68 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/http-push.c b/http-push.c
index cb5bf95a73..eca4a8e5e9 100644
--- a/http-push.c
+++ b/http-push.c
@@ -177,6 +177,37 @@ struct remote_ls_ctx
 	struct remote_ls_ctx *parent;
 };
 
+/* get_dav_token_headers options */
+enum dav_header_flag {
+	DAV_HEADER_IF = (1u << 0),
+	DAV_HEADER_LOCK = (1u << 1),
+	DAV_HEADER_TIMEOUT = (1u << 2)
+};
+
+static struct curl_slist *get_dav_token_headers(struct remote_lock *lock, enum dav_header_flag options) {
+	struct strbuf buf = STRBUF_INIT;
+	struct curl_slist *dav_headers = NULL;
+
+	if(options & DAV_HEADER_IF) {
+		strbuf_addf(&buf, "If: (<%s>)", lock->token);
+		dav_headers = curl_slist_append(dav_headers, buf.buf);
+		strbuf_reset(&buf);
+	}
+	if(options & DAV_HEADER_LOCK) {
+		strbuf_addf(&buf, "Lock-Token: <%s>", lock->token);
+		dav_headers = curl_slist_append(dav_headers, buf.buf);
+		strbuf_reset(&buf);
+	}
+	if(options & DAV_HEADER_TIMEOUT) {
+		strbuf_addf(&buf, "Timeout: Second-%ld", lock->timeout);
+		dav_headers = curl_slist_append(dav_headers, buf.buf);
+		strbuf_reset(&buf);
+	}
+	strbuf_release(&buf);
+
+	return dav_headers;
+}
+
 static void finish_request(struct transfer_request *request);
 static void release_request(struct transfer_request *request);
 
@@ -588,18 +619,12 @@ static int refresh_lock(struct remote_lock *lock)
 {
 	struct active_request_slot *slot;
 	struct slot_results results;
-	char *if_header;
-	char timeout_header[25];
-	struct curl_slist *dav_headers = NULL;
+	struct curl_slist *dav_headers;
 	int rc = 0;
 
 	lock->refreshing = 1;
 
-	if_header = xmalloc(strlen(lock->token) + 25);
-	sprintf(if_header, "If: (<%s>)", lock->token);
-	sprintf(timeout_header, "Timeout: Second-%ld", lock->timeout);
-	dav_headers = curl_slist_append(dav_headers, if_header);
-	dav_headers = curl_slist_append(dav_headers, timeout_header);
+	dav_headers = get_dav_token_headers(lock, DAV_HEADER_IF | DAV_HEADER_TIMEOUT);
 
 	slot = get_active_slot();
 	slot->results = &results;
@@ -622,7 +647,6 @@ static int refresh_lock(struct remote_lock *lock)
 
 	lock->refreshing = 0;
 	curl_slist_free_all(dav_headers);
-	free(if_header);
 
 	return rc;
 }
@@ -1303,14 +1327,10 @@ static int unlock_remote(struct remote_lock *lock)
 	struct active_request_slot *slot;
 	struct slot_results results;
 	struct remote_lock *prev = remote->locks;
-	char *lock_token_header;
-	struct curl_slist *dav_headers = NULL;
+	struct curl_slist *dav_headers;
 	int rc = 0;
 
-	lock_token_header = xmalloc(strlen(lock->token) + 31);
-	sprintf(lock_token_header, "Lock-Token: <%s>",
-		lock->token);
-	dav_headers = curl_slist_append(dav_headers, lock_token_header);
+	dav_headers = get_dav_token_headers(lock, DAV_HEADER_LOCK);
 
 	slot = get_active_slot();
 	slot->results = &results;
@@ -1331,7 +1351,6 @@ static int unlock_remote(struct remote_lock *lock)
 	}
 
 	curl_slist_free_all(dav_headers);
-	free(lock_token_header);
 
 	if (remote->locks == lock) {
 		remote->locks = lock->next;
@@ -1731,13 +1750,10 @@ static int update_remote(unsigned char *sha1, struct remote_lock *lock)
 {
 	struct active_request_slot *slot;
 	struct slot_results results;
-	char *if_header;
 	struct buffer out_buffer = { STRBUF_INIT, 0 };
-	struct curl_slist *dav_headers = NULL;
+	struct curl_slist *dav_headers;
 
-	if_header = xmalloc(strlen(lock->token) + 25);
-	sprintf(if_header, "If: (<%s>)", lock->token);
-	dav_headers = curl_slist_append(dav_headers, if_header);
+	dav_headers = get_dav_token_headers(lock, DAV_HEADER_IF);
 
 	strbuf_addf(&out_buffer.buf, "%s\n", sha1_to_hex(sha1));
 
@@ -1756,7 +1772,6 @@ static int update_remote(unsigned char *sha1, struct remote_lock *lock)
 	if (start_active_slot(slot)) {
 		run_active_slot(slot);
 		strbuf_release(&out_buffer.buf);
-		free(if_header);
 		if (results.curl_result != CURLE_OK) {
 			fprintf(stderr,
 				"PUT error: curl result=%d, HTTP code=%ld\n",
@@ -1766,7 +1781,6 @@ static int update_remote(unsigned char *sha1, struct remote_lock *lock)
 		}
 	} else {
 		strbuf_release(&out_buffer.buf);
-		free(if_header);
 		fprintf(stderr, "Unable to start PUT request\n");
 		return 0;
 	}
@@ -1948,15 +1962,12 @@ static void update_remote_info_refs(struct remote_lock *lock)
 	struct buffer buffer = { STRBUF_INIT, 0 };
 	struct active_request_slot *slot;
 	struct slot_results results;
-	char *if_header;
-	struct curl_slist *dav_headers = NULL;
+	struct curl_slist *dav_headers;
 
 	remote_ls("refs/", (PROCESS_FILES | RECURSIVE),
 		  add_remote_info_ref, &buffer.buf);
 	if (!aborted) {
-		if_header = xmalloc(strlen(lock->token) + 25);
-		sprintf(if_header, "If: (<%s>)", lock->token);
-		dav_headers = curl_slist_append(dav_headers, if_header);
+		dav_headers = get_dav_token_headers(lock, DAV_HEADER_IF);
 
 		slot = get_active_slot();
 		slot->results = &results;
@@ -1978,7 +1989,6 @@ static void update_remote_info_refs(struct remote_lock *lock)
 					results.curl_result, results.http_code);
 			}
 		}
-		free(if_header);
 	}
 	strbuf_release(&buffer.buf);
 }

From d456c9fd1e96c21ca32a224ed56ee34252605e34 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Sun, 25 Jan 2009 00:04:15 -0800
Subject: [PATCH 27/40] http-push.c: style fixes

b1c7d4a (http-push: refactor lock-related headers creation for curl
requests, 2009-01-24) had many style violations that slipped through.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 http-push.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/http-push.c b/http-push.c
index eca4a8e5e9..59037df502 100644
--- a/http-push.c
+++ b/http-push.c
@@ -184,21 +184,22 @@ enum dav_header_flag {
 	DAV_HEADER_TIMEOUT = (1u << 2)
 };
 
-static struct curl_slist *get_dav_token_headers(struct remote_lock *lock, enum dav_header_flag options) {
+static struct curl_slist *get_dav_token_headers(struct remote_lock *lock, enum dav_header_flag options)
+{
 	struct strbuf buf = STRBUF_INIT;
 	struct curl_slist *dav_headers = NULL;
 
-	if(options & DAV_HEADER_IF) {
+	if (options & DAV_HEADER_IF) {
 		strbuf_addf(&buf, "If: (<%s>)", lock->token);
 		dav_headers = curl_slist_append(dav_headers, buf.buf);
 		strbuf_reset(&buf);
 	}
-	if(options & DAV_HEADER_LOCK) {
+	if (options & DAV_HEADER_LOCK) {
 		strbuf_addf(&buf, "Lock-Token: <%s>", lock->token);
 		dav_headers = curl_slist_append(dav_headers, buf.buf);
 		strbuf_reset(&buf);
 	}
-	if(options & DAV_HEADER_TIMEOUT) {
+	if (options & DAV_HEADER_TIMEOUT) {
 		strbuf_addf(&buf, "Timeout: Second-%ld", lock->timeout);
 		dav_headers = curl_slist_append(dav_headers, buf.buf);
 		strbuf_reset(&buf);

From d6716c026666319824096401cd448b3694732d26 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 24 Jan 2009 17:17:55 +0100
Subject: [PATCH 28/40] Ignore test-ctype

Signed-off-by: Benjamin Kramer <benny.kra@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d9adce585a..e8f91ce8cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -144,6 +144,7 @@ git-core-*/?*
 gitk-wish
 gitweb/gitweb.cgi
 test-chmtime
+test-ctype
 test-date
 test-delta
 test-dump-cache-tree

From 5c415311f743ccb11a50f350ff1c385778f049d6 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Sun, 25 Jan 2009 12:41:09 -0800
Subject: [PATCH 29/40] GIT 1.6.1.1

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/RelNotes-1.6.1.1.txt | 24 +++++++++++++++++-------
 GIT-VERSION-GEN                    |  2 +-
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/Documentation/RelNotes-1.6.1.1.txt b/Documentation/RelNotes-1.6.1.1.txt
index 5cd1ca9cc6..88454c1973 100644
--- a/Documentation/RelNotes-1.6.1.1.txt
+++ b/Documentation/RelNotes-1.6.1.1.txt
@@ -4,9 +4,14 @@ GIT v1.6.1.1 Release Notes
 Fixes since v1.6.1
 ------------------
 
+* "git add frotz/nitfol" when "frotz" is a submodule should have errored
+  out, but it didn't.
+
 * "git apply" took file modes from the patch text and updated the mode
   bits of the target tree even when the patch was not about mode changes.
 
+* "git bisect view" on Cygwin did not launch gitk
+
 * "git checkout $tree" did not trigger an error.
 
 * "git commit" tried to remove COMMIT_EDITMSG from the work tree by mistake.
@@ -14,6 +19,12 @@ Fixes since v1.6.1
 * "git describe --all" complained when a commit is described with a tag,
   which was nonsense.
 
+* "git diff --no-index --" did not trigger no-index (aka "use git-diff as
+  a replacement of diff on untracked files") behaviour.
+
+* "git format-patch -1 HEAD" on a root commit failed to produce patch
+  text.
+
 * "git fsck branch" did not work as advertised; instead it behaved the same
   way as "git fsck".
 
@@ -36,14 +47,13 @@ Fixes since v1.6.1
 
 * "git mv -k" with more than one errorneous paths misbehaved.
 
+* "git read-tree -m -u" hence branch switching incorrectly lost a
+  subdirectory in rare cases.
+
 * "git rebase -i" issued an unnecessary error message upon a user error of
   marking the first commit to be "squash"ed.
 
-Other documentation updates.
-
----
-exec >/var/tmp/1
-O=v1.6.1-60-g78f111e
-echo O=$(git describe maint)
-git shortlog --no-merges $O..maint
+* "git shortlog" did not format a commit message with multi-line
+  subject correctly.
 
+Many documentation updates.
diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN
index 6c7465c758..fe496e5fca 100755
--- a/GIT-VERSION-GEN
+++ b/GIT-VERSION-GEN
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=GIT-VERSION-FILE
-DEF_VER=v1.6.0.2.GIT
+DEF_VER=v1.6.1.1.GIT
 
 LF='
 '

From bf8a40b89eef66bc16902f7ea1a73352b8ba3d18 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Sun, 25 Jan 2009 15:35:52 -0800
Subject: [PATCH 30/40] git-svn: fix memory leak when checking for empty
 symlinks

By enforcing SVN::Pool usage when calling get_file once again.

This regression was introduced with the reintroduction of
SVN::Ra::get_file() usage in
dbc6c74d0858d77e61e092a48d467e725211f8e9

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
 git-svn.perl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/git-svn.perl b/git-svn.perl
index d4cb538b93..5d39b391d1 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -4021,7 +4021,8 @@ my ($ra_invalid, $can_do_switch, %ignored_err, $RA);
 BEGIN {
 	# enforce temporary pool usage for some simple functions
 	no strict 'refs';
-	for my $f (qw/rev_proplist get_latest_revnum get_uuid get_repos_root/) {
+	for my $f (qw/rev_proplist get_latest_revnum get_uuid get_repos_root
+	              get_file/) {
 		my $SUPER = "SUPER::$f";
 		*$f = sub {
 			my $self = shift;

From edc662f929e1a2be3e350bf362cc9aed95f5250a Mon Sep 17 00:00:00 2001
From: "Vitaly \\\"_Vi\\\" Shukela" <public_vi@tut.by>
Date: Mon, 26 Jan 2009 00:21:40 +0200
Subject: [PATCH 31/40] git-svn: add --ignore-paths option for fetching

This will be useful when somebody want to checkout something partially from
repository with some non-standart layout or exclude some files from it.
Example: repository has structure /module-{a,b,c}/{trunk,branches,tags}/...
Modules are interdependent, and you want it to be single repostory (to commit
to all modules simultaneously and view complete history), but do not want
branches and tags be checked out into working copy.
Other use case is excluding some large blobs.

The quirk for now is that user must specify this option every fetch/rebase;
in other case he may get extra files or "file not found" errors. It may be
will be resolved by adding regular expression to .git/config into
[svn-remote ...] to make it persistent.

Signed-off-by: Vitaly "_Vi" Shukela <public_vi@tut.by>
Acked-by: Eric Wong <normalperson@yhbt.net>

[ew: replaced 4-space indent with tabs]
[ew: prefixed $ignore_regex with an underscore to be consistent
     with other globals in git-svn]
[ew: rearranged functions to minimize diff and removed prototype
     usage to be consistent with the rest of git-svn (and other
     Perl code in git (and they're ugly to me)]
---
 git-svn.perl | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index 5d39b391d1..79888a05c4 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -70,7 +70,8 @@ my ($_stdin, $_help, $_edit,
 $Git::SVN::_follow_parent = 1;
 my %remote_opts = ( 'username=s' => \$Git::SVN::Prompt::_username,
                     'config-dir=s' => \$Git::SVN::Ra::config_dir,
-                    'no-auth-cache' => \$Git::SVN::Prompt::_no_auth_cache );
+                    'no-auth-cache' => \$Git::SVN::Prompt::_no_auth_cache,
+                    'ignore-paths=s' => \$SVN::Git::Fetcher::_ignore_regex );
 my %fc_opts = ( 'follow-parent|follow!' => \$Git::SVN::_follow_parent,
 		'authors-file|A=s' => \$_authors,
 		'repack:i' => \$Git::SVN::_repack,
@@ -3245,6 +3246,7 @@ use warnings;
 use Carp qw/croak/;
 use File::Temp qw/tempfile/;
 use IO::File qw//;
+use vars qw/$_ignore_regex/;
 
 # file baton members: path, mode_a, mode_b, pool, fh, blob, base
 sub new {
@@ -3297,6 +3299,15 @@ sub in_dot_git {
 	$_[0] =~ m{(?:^|/)\.git(?:/|$)};
 }
 
+# return value: 0 -- don't ignore, 1 -- ignore
+sub is_path_ignored {
+	my ($path) = @_;
+	return 1 if in_dot_git($path);
+	return 0 unless defined($_ignore_regex);
+	return 1 if $path =~ m!$_ignore_regex!o;
+	return 0;
+}
+
 sub set_path_strip {
 	my ($self, $path) = @_;
 	$self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
@@ -3322,7 +3333,7 @@ sub git_path {
 
 sub delete_entry {
 	my ($self, $path, $rev, $pb) = @_;
-	return undef if in_dot_git($path);
+	return undef if is_path_ignored($path);
 
 	my $gpath = $self->git_path($path);
 	return undef if ($gpath eq '');
@@ -3352,7 +3363,7 @@ sub open_file {
 	my ($self, $path, $pb, $rev) = @_;
 	my ($mode, $blob);
 
-	goto out if in_dot_git($path);
+	goto out if is_path_ignored($path);
 
 	my $gpath = $self->git_path($path);
 	($mode, $blob) = (command('ls-tree', $self->{c}, '--', $gpath)
@@ -3372,7 +3383,7 @@ sub add_file {
 	my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
 	my $mode;
 
-	if (!in_dot_git($path)) {
+	if (!is_path_ignored($path)) {
 		my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
 		delete $self->{empty}->{$dir};
 		$mode = '100644';
@@ -3383,7 +3394,7 @@ sub add_file {
 
 sub add_directory {
 	my ($self, $path, $cp_path, $cp_rev) = @_;
-	goto out if in_dot_git($path);
+	goto out if is_path_ignored($path);
 	my $gpath = $self->git_path($path);
 	if ($gpath eq '') {
 		my ($ls, $ctx) = command_output_pipe(qw/ls-tree
@@ -3407,7 +3418,7 @@ out:
 
 sub change_dir_prop {
 	my ($self, $db, $prop, $value) = @_;
-	return undef if in_dot_git($db->{path});
+	return undef if is_path_ignored($db->{path});
 	$self->{dir_prop}->{$db->{path}} ||= {};
 	$self->{dir_prop}->{$db->{path}}->{$prop} = $value;
 	undef;
@@ -3415,7 +3426,7 @@ sub change_dir_prop {
 
 sub absent_directory {
 	my ($self, $path, $pb) = @_;
-	return undef if in_dot_git($pb->{path});
+	return undef if is_path_ignored($path);
 	$self->{absent_dir}->{$pb->{path}} ||= [];
 	push @{$self->{absent_dir}->{$pb->{path}}}, $path;
 	undef;
@@ -3423,7 +3434,7 @@ sub absent_directory {
 
 sub absent_file {
 	my ($self, $path, $pb) = @_;
-	return undef if in_dot_git($pb->{path});
+	return undef if is_path_ignored($path);
 	$self->{absent_file}->{$pb->{path}} ||= [];
 	push @{$self->{absent_file}->{$pb->{path}}}, $path;
 	undef;
@@ -3431,7 +3442,7 @@ sub absent_file {
 
 sub change_file_prop {
 	my ($self, $fb, $prop, $value) = @_;
-	return undef if in_dot_git($fb->{path});
+	return undef if is_path_ignored($fb->{path});
 	if ($prop eq 'svn:executable') {
 		if ($fb->{mode_b} != 120000) {
 			$fb->{mode_b} = defined $value ? 100755 : 100644;
@@ -3447,7 +3458,7 @@ sub change_file_prop {
 
 sub apply_textdelta {
 	my ($self, $fb, $exp) = @_;
-	return undef if (in_dot_git($fb->{path}));
+	return undef if is_path_ignored($fb->{path});
 	my $fh = $::_repository->temp_acquire('svn_delta');
 	# $fh gets auto-closed() by SVN::TxDelta::apply(),
 	# (but $base does not,) so dup() it for reading in close_file
@@ -3494,7 +3505,7 @@ sub apply_textdelta {
 
 sub close_file {
 	my ($self, $fb, $exp) = @_;
-	return undef if (in_dot_git($fb->{path}));
+	return undef if is_path_ignored($fb->{path});
 
 	my $hash;
 	my $path = $self->git_path($fb->{path});

From 6076b843a0a15d344f367ac46645eb864085477c Mon Sep 17 00:00:00 2001
From: "Vitaly \\\"_Vi\\\" Shukela" <public_vi@tut.by>
Date: Mon, 26 Jan 2009 00:21:41 +0200
Subject: [PATCH 32/40] git-svn: documented --ignore-paths

Documented --ignore-paths option of git-svn to inform users about
the feature and provide some examples.

Signed-off-by: Vitaly "_Vi" Shukela <public_vi@tut.by>
Acked-by: Eric Wong <normalperson@yhbt.net>

[ew: trailing whitespace removed]
---
 Documentation/git-svn.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt
index 63d2f5e962..7b654f7928 100644
--- a/Documentation/git-svn.txt
+++ b/Documentation/git-svn.txt
@@ -103,6 +103,19 @@ repository to be able to interoperate with someone else's local Git
 repository, either don't use this option or you should both use it in
 the same local timezone.
 
+--ignore-paths=<regex>;;
+	This allows one to specify Perl regular expression that will
+	cause skipping of all matching paths from checkout from SVN.
+	Examples:
+
+	--ignore-paths="^doc" - skip "doc*" directory for every fetch.
+
+	--ignore-paths="^[^/]+/(?:branches|tags)" - skip "branches"
+	    and "tags" of first level directories.
+
+	Regular expression is not persistent, you should specify
+	it every time when fetching.
+
 'clone'::
 	Runs 'init' and 'fetch'.  It will automatically create a
 	directory based on the basename of the URL passed to it;

From 242522d9cc9b04c4db3baf7e97135d21401c1827 Mon Sep 17 00:00:00 2001
From: "Vitaly \\\"_Vi\\\" Shukela" <public_vi@tut.by>
Date: Mon, 26 Jan 2009 00:21:42 +0200
Subject: [PATCH 33/40] git-svn: Add test for --ignore-paths parameter

Added a test for this option, similar to (and based on) t9133 about
ignorance of .git directories

Signed-off-by: Vitaly "_Vi" Shukela <public_vi@tut.by>
Acked-by: Eric Wong <normalperson@yhbt.net>

[ew: replaced 'echo -e' with printf so it works on POSIX shells]
[ew: added Vitaly to copyright even though it's based on my test]
---
 t/t9134-git-svn-ignore-paths.sh | 98 +++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100755 t/t9134-git-svn-ignore-paths.sh

diff --git a/t/t9134-git-svn-ignore-paths.sh b/t/t9134-git-svn-ignore-paths.sh
new file mode 100755
index 0000000000..c4b5b8bcf7
--- /dev/null
+++ b/t/t9134-git-svn-ignore-paths.sh
@@ -0,0 +1,98 @@
+#!/bin/sh
+#
+# Copyright (c) 2009 Vitaly Shukela
+# Copyright (c) 2009 Eric Wong
+#
+
+test_description='git svn property tests'
+. ./lib-git-svn.sh
+
+test_expect_success 'setup test repository' '
+	svn co "$svnrepo" s &&
+	(
+		cd s &&
+		mkdir qqq www &&
+		echo test_qqq > qqq/test_qqq.txt &&
+		echo test_www > www/test_www.txt &&
+		svn add qqq &&
+		svn add www &&
+		svn commit -m "create some files" &&
+		svn up &&
+		echo hi >> www/test_www.txt &&
+		svn commit -m "modify www/test_www.txt" &&
+		svn up
+	)
+'
+
+test_expect_success 'clone an SVN repository with ignored www directory' '
+	git svn clone --ignore-paths="^www" "$svnrepo" g &&
+	echo test_qqq > expect &&
+	for i in g/*/*.txt; do cat $i >> expect2; done &&
+	test_cmp expect expect2
+'
+
+test_expect_success 'SVN-side change outside of www' '
+	(
+		cd s &&
+		echo b >> qqq/test_qqq.txt &&
+		svn commit -m "SVN-side change outside of www" &&
+		svn up &&
+		svn log -v | fgrep "SVN-side change outside of www"
+	)
+'
+
+test_expect_success 'update git svn-cloned repo' '
+	(
+		cd g &&
+		git svn rebase --ignore-paths="^www" &&
+		printf "test_qqq\nb\n" > expect &&
+		for i in */*.txt; do cat $i >> expect2; done &&
+		test_cmp expect2 expect &&
+		rm expect expect2
+	)
+'
+
+test_expect_success 'SVN-side change inside of ignored www' '
+	(
+		cd s &&
+		echo zaq >> www/test_www.txt
+		svn commit -m "SVN-side change inside of www/test_www.txt" &&
+		svn up &&
+		svn log -v | fgrep "SVN-side change inside of www/test_www.txt"
+	)
+'
+
+test_expect_success 'update git svn-cloned repo' '
+	(
+		cd g &&
+		git svn rebase --ignore-paths="^www" &&
+		printf "test_qqq\nb\n" > expect &&
+		for i in */*.txt; do cat $i >> expect2; done &&
+		test_cmp expect2 expect &&
+		rm expect expect2
+	)
+'
+
+test_expect_success 'SVN-side change in and out of ignored www' '
+	(
+		cd s &&
+		echo cvf >> www/test_www.txt
+		echo ygg >> qqq/test_qqq.txt
+		svn commit -m "SVN-side change in and out of ignored www" &&
+		svn up &&
+		svn log -v | fgrep "SVN-side change in and out of ignored www"
+	)
+'
+
+test_expect_success 'update git svn-cloned repo again' '
+	(
+		cd g &&
+		git svn rebase --ignore-paths="^www" &&
+		printf "test_qqq\nb\nygg\n" > expect &&
+		for i in */*.txt; do cat $i >> expect2; done &&
+		test_cmp expect2 expect &&
+		rm expect expect2
+	)
+'
+
+test_done

From ec74042dc7d81238d2c6d898331ff3b7e12a5d75 Mon Sep 17 00:00:00 2001
From: Teemu Likonen <tlikonen@iki.fi>
Date: Sun, 25 Jan 2009 14:20:22 +0200
Subject: [PATCH 34/40] diff-options.txt: Fix asciidoc markup issue

Must be "--patience::", not "--patience:".

Signed-off-by: Teemu Likonen <tlikonen@iki.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 1f8ce979bb..a8ed23e7f8 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -36,7 +36,7 @@ endif::git-format-patch[]
 --patch-with-raw::
 	Synonym for "-p --raw".
 
---patience:
+--patience::
 	Generate a diff using the "patience diff" algorithm.
 
 --stat[=width[,name-width]]::

From c30e5673f9614caebf020f934f435db179bd53d4 Mon Sep 17 00:00:00 2001
From: Miklos Vajna <vmiklos@frugalware.org>
Date: Fri, 23 Jan 2009 19:02:29 +0100
Subject: [PATCH 35/40] gittutorial: remove misleading note

In the tutorial Alice initializes the repository, and Bob clones it. So
Bob can just do a 'git pull', but Alice will need 'git pull <url>
<branch>'.

The note suggested that the branch parameter is not necessary, which is
no longer true these days.

Signed-off-by: Miklos Vajna <vmiklos@frugalware.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/gittutorial.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Documentation/gittutorial.txt b/Documentation/gittutorial.txt
index 458fafdb2c..c5d5596d89 100644
--- a/Documentation/gittutorial.txt
+++ b/Documentation/gittutorial.txt
@@ -308,9 +308,7 @@ alice$ git pull /home/bob/myrepo master
 
 This merges the changes from Bob's "master" branch into Alice's
 current branch.  If Alice has made her own changes in the meantime,
-then she may need to manually fix any conflicts.  (Note that the
-"master" argument in the above command is actually unnecessary, as it
-is the default.)
+then she may need to manually fix any conflicts.
 
 The "pull" command thus performs two operations: it fetches changes
 from a remote branch, then merges them into the current branch.

From 277d7e91ae6afbe1b7aa7496320d43fd3f59f7ec Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Mon, 26 Jan 2009 00:32:13 +0100
Subject: [PATCH 36/40] rebase -i --root: fix check for number of arguments

If we are not rebasing with --root, then $# can only be either 1 (base)
or 2 (base and the name of the branch to be rebased).

If we are rebasing with --root, then it is Ok if $# is 0 (rebase the
current branch down to everything) or 1 (rebase the named branch down to
everything).

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Acked-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-rebase--interactive.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh
index 21ac20c305..002929eb09 100755
--- a/git-rebase--interactive.sh
+++ b/git-rebase--interactive.sh
@@ -571,7 +571,8 @@ first and then run 'git rebase --continue' again."
 		;;
 	--)
 		shift
-		test ! -z "$REBASE_ROOT" -o $# -eq 1 -o $# -eq 2 || usage
+		test -z "$REBASE_ROOT" -a $# -ge 1 -a $# -le 2 ||
+		test ! -z "$REBASE_ROOT" -a $# -le 1 || usage
 		test -d "$DOTEST" &&
 			die "Interactive rebase already started"
 

From dfb047b9e4f7f66c5322ef642f21fd92b0a975e3 Mon Sep 17 00:00:00 2001
From: Nanako Shiraishi <nanako3@lavabit.com>
Date: Mon, 26 Jan 2009 17:32:22 +0900
Subject: [PATCH 37/40] Mention "local convention" rule in the CodingGuidelines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The document suggests to imitate the existing code, but didn't
say which existing code it should imitate. This clarifies.

Signed-off-by: しらいしななこ <nanako3@lavabit.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/CodingGuidelines | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Documentation/CodingGuidelines b/Documentation/CodingGuidelines
index f628c1f3b7..0d7fa9cca9 100644
--- a/Documentation/CodingGuidelines
+++ b/Documentation/CodingGuidelines
@@ -21,8 +21,13 @@ code.  For git in general, three rough rules are:
 
 As for more concrete guidelines, just imitate the existing code
 (this is a good guideline, no matter which project you are
-contributing to).  But if you must have a list of rules,
-here they are.
+contributing to). It is always preferable to match the _local_
+convention. New code added to git suite is expected to match
+the overall style of existing code. Modifications to existing
+code is expected to match the style the surrounding code already
+uses (even if it doesn't match the overall style of existing code).
+
+But if you must have a list of rules, here they are.
 
 For shell scripts specifically (not exhaustive):
 

From f7d9d04e3be697989c960881f9b0d50f1ea6a6be Mon Sep 17 00:00:00 2001
From: Ted Pavlic <ted@tedpavlic.com>
Date: Mon, 26 Jan 2009 14:03:59 -0500
Subject: [PATCH 38/40] make: Remove -pthread on Darwin (it is included by
 cstdlib).

As discussed in

http://lists.apple.com/archives/Unix-porting/2005/Mar/msg00019.html

the Mac OS X C standard library is always thread safe and always
includes the pthread library. So explicitly using -pthread causes an
'unrecognized option' compiler warning.

This patch clears PTHREAD_LIBS if Darwin is detected.

Signed-off-by: Ted Pavlic <ted@tedpavlic.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index b4d9cb4949..9d451cf3e5 100644
--- a/Makefile
+++ b/Makefile
@@ -817,6 +817,7 @@ ifeq ($(uname_S),Darwin)
 			BASIC_LDFLAGS += -L/opt/local/lib
 		endif
 	endif
+	PTHREAD_LIBS =
 endif
 
 ifndef CC_LD_DYNPATH

From a6c7a27691bd8b059d39c9bc71b19fb8e614fa54 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Mon, 26 Jan 2009 10:05:22 +0100
Subject: [PATCH 39/40] rebase -i: correctly remember --root flag across
 --continue

d911d14 (rebase -i: learn to rebase root commit, 2009-01-02) tried to
remember the --root flag across a merge conflict in a broken way.
Introduce a flag file $DOTEST/rebase-root to fix and clarify.

While at it, also make sure $UPSTREAM is always initialized to guard
against existing values in the environment.

[tr: added tests]

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-rebase--interactive.sh | 10 +++-
 t/t3412-rebase-root.sh     | 94 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh
index 002929eb09..1438650ae8 100755
--- a/git-rebase--interactive.sh
+++ b/git-rebase--interactive.sh
@@ -456,7 +456,7 @@ get_saved_options () {
 	test -d "$REWRITTEN" && PRESERVE_MERGES=t
 	test -f "$DOTEST"/strategy && STRATEGY="$(cat "$DOTEST"/strategy)"
 	test -f "$DOTEST"/verbose && VERBOSE=t
-	test ! -s "$DOTEST"/upstream && REBASE_ROOT=t
+	test -f "$DOTEST"/rebase-root && REBASE_ROOT=t
 }
 
 while test $# != 0
@@ -586,6 +586,7 @@ first and then run 'git rebase --continue' again."
 			test -z "$ONTO" && ONTO=$UPSTREAM
 			shift
 		else
+			UPSTREAM=
 			UPSTREAM_ARG=--root
 			test -z "$ONTO" &&
 				die "You must specify --onto when using --root"
@@ -612,7 +613,12 @@ first and then run 'git rebase --continue' again."
 			echo "detached HEAD" > "$DOTEST"/head-name
 
 		echo $HEAD > "$DOTEST"/head
-		echo $UPSTREAM > "$DOTEST"/upstream
+		case "$REBASE_ROOT" in
+		'')
+			rm -f "$DOTEST"/rebase-root ;;
+		*)
+			: >"$DOTEST"/rebase-root ;;
+		esac
 		echo $ONTO > "$DOTEST"/onto
 		test -z "$STRATEGY" || echo "$STRATEGY" > "$DOTEST"/strategy
 		test t = "$VERBOSE" && : > "$DOTEST"/verbose
diff --git a/t/t3412-rebase-root.sh b/t/t3412-rebase-root.sh
index 6359580262..ef6f7a54ae 100755
--- a/t/t3412-rebase-root.sh
+++ b/t/t3412-rebase-root.sh
@@ -184,4 +184,98 @@ test_expect_success 'pre-rebase hook stops rebase -i' '
 	test 0 = $(git rev-list other...stops2 | wc -l)
 '
 
+test_expect_success 'remove pre-rebase hook' '
+	rm -f .git/hooks/pre-rebase
+'
+
+test_expect_success 'set up a conflict' '
+	git checkout master &&
+	echo conflict > B &&
+	git add B &&
+	git commit -m conflict
+'
+
+test_expect_success 'rebase --root with conflict (first part)' '
+	git checkout -b conflict1 other &&
+	test_must_fail git rebase --root --onto master &&
+	git ls-files -u | grep "B$"
+'
+
+test_expect_success 'fix the conflict' '
+	echo 3 > B &&
+	git add B
+'
+
+cat > expect-conflict <<EOF
+6
+5
+4
+3
+conflict
+2
+1
+EOF
+
+test_expect_success 'rebase --root with conflict (second part)' '
+	git rebase --continue &&
+	git log --pretty=tformat:"%s" > conflict1 &&
+	test_cmp expect-conflict conflict1
+'
+
+test_expect_success 'rebase -i --root with conflict (first part)' '
+	git checkout -b conflict2 other &&
+	GIT_EDITOR=: test_must_fail git rebase -i --root --onto master &&
+	git ls-files -u | grep "B$"
+'
+
+test_expect_success 'fix the conflict' '
+	echo 3 > B &&
+	git add B
+'
+
+test_expect_success 'rebase -i --root with conflict (second part)' '
+	git rebase --continue &&
+	git log --pretty=tformat:"%s" > conflict2 &&
+	test_cmp expect-conflict conflict2
+'
+
+cat >expect-conflict-p <<\EOF
+commit conflict3 conflict3~1 conflict3^2
+Merge branch 'third' into other
+commit conflict3^2 conflict3~4
+6
+commit conflict3~1 conflict3~2 conflict3~1^2
+Merge branch 'side' into other
+commit conflict3~1^2 conflict3~3
+5
+commit conflict3~2 conflict3~3
+4
+commit conflict3~3 conflict3~4
+3
+commit conflict3~4 conflict3~5
+conflict
+commit conflict3~5 conflict3~6
+2
+commit conflict3~6
+1
+EOF
+
+test_expect_success 'rebase -i -p --root with conflict (first part)' '
+	git checkout -b conflict3 other &&
+	GIT_EDITOR=: test_must_fail git rebase -i -p --root --onto master &&
+	git ls-files -u | grep "B$"
+'
+
+test_expect_success 'fix the conflict' '
+	echo 3 > B &&
+	git add B
+'
+
+test_expect_success 'rebase -i -p --root with conflict (second part)' '
+	git rebase --continue &&
+	git rev-list --topo-order --parents --pretty="tformat:%s" HEAD |
+	git name-rev --stdin --name-only --refs=refs/heads/conflict3 >out &&
+	test_cmp expect-conflict-p out
+'
+
 test_done

From 2d6061537f3f27112f80a6e1047b5c566084ef7d Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Mon, 26 Jan 2009 21:13:00 -0800
Subject: [PATCH 40/40] tests: Avoid single-shot environment export for shell
 function invocation

Some shells have issues with a single-shot environment variable export
when invoking a shell function.  This fixes the ones I found that invoke
test_must_fail that way.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t3409-rebase-hook.sh |  6 +++++-
 t/t3412-rebase-root.sh | 24 ++++++++++++++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/t/t3409-rebase-hook.sh b/t/t3409-rebase-hook.sh
index 1f1b850677..098b75507b 100755
--- a/t/t3409-rebase-hook.sh
+++ b/t/t3409-rebase-hook.sh
@@ -118,7 +118,11 @@ test_expect_success 'pre-rebase hook stops rebase (1)' '
 test_expect_success 'pre-rebase hook stops rebase (2)' '
 	git checkout test &&
 	git reset --hard side &&
-	EDITOR=true test_must_fail git rebase -i master &&
+	(
+		EDITOR=:
+		export EDITOR
+		test_must_fail git rebase -i master
+	) &&
 	test "z$(git symbolic-ref HEAD)" = zrefs/heads/test &&
 	test 0 = $(git rev-list HEAD...side | wc -l)
 '
diff --git a/t/t3412-rebase-root.sh b/t/t3412-rebase-root.sh
index ef6f7a54ae..3d8ff674c0 100755
--- a/t/t3412-rebase-root.sh
+++ b/t/t3412-rebase-root.sh
@@ -172,14 +172,22 @@ EOF
 
 test_expect_success 'pre-rebase hook stops rebase' '
 	git checkout -b stops1 other &&
-	GIT_EDITOR=: test_must_fail git rebase --root --onto master &&
+	(
+		GIT_EDITOR=:
+		export GIT_EDITOR
+		test_must_fail git rebase --root --onto master
+	) &&
 	test "z$(git symbolic-ref HEAD)" = zrefs/heads/stops1
 	test 0 = $(git rev-list other...stops1 | wc -l)
 '
 
 test_expect_success 'pre-rebase hook stops rebase -i' '
 	git checkout -b stops2 other &&
-	GIT_EDITOR=: test_must_fail git rebase --root --onto master &&
+	(
+		GIT_EDITOR=:
+		export GIT_EDITOR
+		test_must_fail git rebase --root --onto master
+	) &&
 	test "z$(git symbolic-ref HEAD)" = zrefs/heads/stops2
 	test 0 = $(git rev-list other...stops2 | wc -l)
 '
@@ -224,7 +232,11 @@ test_expect_success 'rebase --root with conflict (second part)' '
 
 test_expect_success 'rebase -i --root with conflict (first part)' '
 	git checkout -b conflict2 other &&
-	GIT_EDITOR=: test_must_fail git rebase -i --root --onto master &&
+	(
+		GIT_EDITOR=:
+		export GIT_EDITOR
+		test_must_fail git rebase -i --root --onto master
+	) &&
 	git ls-files -u | grep "B$"
 '
 
@@ -262,7 +274,11 @@ EOF
 
 test_expect_success 'rebase -i -p --root with conflict (first part)' '
 	git checkout -b conflict3 other &&
-	GIT_EDITOR=: test_must_fail git rebase -i -p --root --onto master &&
+	(
+		GIT_EDITOR=:
+		export GIT_EDITOR
+		test_must_fail git rebase -i -p --root --onto master
+	) &&
 	git ls-files -u | grep "B$"
 '