From 92b7de93fb7801570ddc3195f03f30b9c201a3bd Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 7 Jan 2009 18:04:09 +0100 Subject: [PATCH 01/40] Implement the patience diff algorithm The patience diff algorithm produces slightly more intuitive output than the classic Myers algorithm, as it does not try to minimize the number of +/- lines first, but tries to preserve the lines that are unique. To this end, it first determines lines that are unique in both files, then the maximal sequence which preserves the order (relative to both files) is extracted. Starting from this initial set of common lines, the rest of the lines is handled recursively, with Myers' algorithm as a fallback when the patience algorithm fails (due to no common unique lines). This patch includes memory leak fixes by Pierre Habouzit. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- xdiff/xdiff.h | 1 + xdiff/xdiffi.c | 3 + xdiff/xdiffi.h | 2 + xdiff/xpatience.c | 381 ++++++++++++++++++++++++++++++++++++++++++++++ xdiff/xprepare.c | 3 +- 5 files changed, 389 insertions(+), 1 deletion(-) create mode 100644 xdiff/xpatience.c diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 84fff583e2..d238c8263e 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -32,6 +32,7 @@ extern "C" { #define XDF_IGNORE_WHITESPACE (1 << 2) #define XDF_IGNORE_WHITESPACE_CHANGE (1 << 3) #define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 4) +#define XDF_PATIENCE_DIFF (1 << 5) #define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL) #define XDL_PATCH_NORMAL '-' diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 9d0324a38c..3e97462bdd 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -329,6 +329,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdalgoenv_t xenv; diffdata_t dd1, dd2; + if (xpp->flags & XDF_PATIENCE_DIFF) + return xdl_do_patience_diff(mf1, mf2, xpp, xe); + if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { return -1; diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h index 3e099dc445..ad033a8e6a 100644 --- a/xdiff/xdiffi.h +++ b/xdiff/xdiffi.h @@ -55,5 +55,7 @@ int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); void xdl_free_script(xdchange_t *xscr); int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, xdemitconf_t const *xecfg); +int xdl_do_patience_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *env); #endif /* #if !defined(XDIFFI_H) */ diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c new file mode 100644 index 0000000000..e42c16a807 --- /dev/null +++ b/xdiff/xpatience.c @@ -0,0 +1,381 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003-2009 Davide Libenzi, Johannes E. Schindelin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi + * + */ +#include "xinclude.h" +#include "xtypes.h" +#include "xdiff.h" + +/* + * The basic idea of patience diff is to find lines that are unique in + * both files. These are intuitively the ones that we want to see as + * common lines. + * + * The maximal ordered sequence of such line pairs (where ordered means + * that the order in the sequence agrees with the order of the lines in + * both files) naturally defines an initial set of common lines. + * + * Now, the algorithm tries to extend the set of common lines by growing + * the line ranges where the files have identical lines. + * + * Between those common lines, the patience diff algorithm is applied + * recursively, until no unique line pairs can be found; these line ranges + * are handled by the well-known Myers algorithm. + */ + +#define NON_UNIQUE ULONG_MAX + +/* + * This is a hash mapping from line hash to line numbers in the first and + * second file. + */ +struct hashmap { + int nr, alloc; + struct entry { + unsigned long hash; + /* + * 0 = unused entry, 1 = first line, 2 = second, etc. + * line2 is NON_UNIQUE if the line is not unique + * in either the first or the second file. + */ + unsigned long line1, line2; + /* + * "next" & "previous" are used for the longest common + * sequence; + * initially, "next" reflects only the order in file1. + */ + struct entry *next, *previous; + } *entries, *first, *last; + /* were common records found? */ + unsigned long has_matches; + mmfile_t *file1, *file2; + xdfenv_t *env; + xpparam_t const *xpp; +}; + +/* The argument "pass" is 1 for the first file, 2 for the second. */ +static void insert_record(int line, struct hashmap *map, int pass) +{ + xrecord_t **records = pass == 1 ? + map->env->xdf1.recs : map->env->xdf2.recs; + xrecord_t *record = records[line - 1], *other; + /* + * After xdl_prepare_env() (or more precisely, due to + * xdl_classify_record()), the "ha" member of the records (AKA lines) + * is _not_ the hash anymore, but a linearized version of it. In + * other words, the "ha" member is guaranteed to start with 0 and + * the second record's ha can only be 0 or 1, etc. + * + * So we multiply ha by 2 in the hope that the hashing was + * "unique enough". + */ + int index = (int)((record->ha << 1) % map->alloc); + + while (map->entries[index].line1) { + other = map->env->xdf1.recs[map->entries[index].line1 - 1]; + if (map->entries[index].hash != record->ha || + !xdl_recmatch(record->ptr, record->size, + other->ptr, other->size, + map->xpp->flags)) { + if (++index >= map->alloc) + index = 0; + continue; + } + if (pass == 2) + map->has_matches = 1; + if (pass == 1 || map->entries[index].line2) + map->entries[index].line2 = NON_UNIQUE; + else + map->entries[index].line2 = line; + return; + } + if (pass == 2) + return; + map->entries[index].line1 = line; + map->entries[index].hash = record->ha; + if (!map->first) + map->first = map->entries + index; + if (map->last) { + map->last->next = map->entries + index; + map->entries[index].previous = map->last; + } + map->last = map->entries + index; + map->nr++; +} + +/* + * This function has to be called for each recursion into the inter-hunk + * parts, as previously non-unique lines can become unique when being + * restricted to a smaller part of the files. + * + * It is assumed that env has been prepared using xdl_prepare(). + */ +static int fill_hashmap(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env, + struct hashmap *result, + int line1, int count1, int line2, int count2) +{ + result->file1 = file1; + result->file2 = file2; + result->xpp = xpp; + result->env = env; + + /* We know exactly how large we want the hash map */ + result->alloc = count1 * 2; + result->entries = (struct entry *) + xdl_malloc(result->alloc * sizeof(struct entry)); + if (!result->entries) + return -1; + memset(result->entries, 0, result->alloc * sizeof(struct entry)); + + /* First, fill with entries from the first file */ + while (count1--) + insert_record(line1++, result, 1); + + /* Then search for matches in the second file */ + while (count2--) + insert_record(line2++, result, 2); + + return 0; +} + +/* + * Find the longest sequence with a smaller last element (meaning a smaller + * line2, as we construct the sequence with entries ordered by line1). + */ +static int binary_search(struct entry **sequence, int longest, + struct entry *entry) +{ + int left = -1, right = longest; + + while (left + 1 < right) { + int middle = (left + right) / 2; + /* by construction, no two entries can be equal */ + if (sequence[middle]->line2 > entry->line2) + right = middle; + else + left = middle; + } + /* return the index in "sequence", _not_ the sequence length */ + return left; +} + +/* + * The idea is to start with the list of common unique lines sorted by + * the order in file1. For each of these pairs, the longest (partial) + * sequence whose last element's line2 is smaller is determined. + * + * For efficiency, the sequences are kept in a list containing exactly one + * item per sequence length: the sequence with the smallest last + * element (in terms of line2). + */ +static struct entry *find_longest_common_sequence(struct hashmap *map) +{ + struct entry **sequence = xdl_malloc(map->nr * sizeof(struct entry *)); + int longest = 0, i; + struct entry *entry; + + for (entry = map->first; entry; entry = entry->next) { + if (!entry->line2 || entry->line2 == NON_UNIQUE) + continue; + i = binary_search(sequence, longest, entry); + entry->previous = i < 0 ? NULL : sequence[i]; + sequence[++i] = entry; + if (i == longest) + longest++; + } + + /* No common unique lines were found */ + if (!longest) { + xdl_free(sequence); + return NULL; + } + + /* Iterate starting at the last element, adjusting the "next" members */ + entry = sequence[longest - 1]; + entry->next = NULL; + while (entry->previous) { + entry->previous->next = entry; + entry = entry->previous; + } + xdl_free(sequence); + return entry; +} + +static int match(struct hashmap *map, int line1, int line2) +{ + xrecord_t *record1 = map->env->xdf1.recs[line1 - 1]; + xrecord_t *record2 = map->env->xdf2.recs[line2 - 1]; + return xdl_recmatch(record1->ptr, record1->size, + record2->ptr, record2->size, map->xpp->flags); +} + +static int patience_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2); + +static int walk_common_sequence(struct hashmap *map, struct entry *first, + int line1, int count1, int line2, int count2) +{ + int end1 = line1 + count1, end2 = line2 + count2; + int next1, next2; + + for (;;) { + /* Try to grow the line ranges of common lines */ + if (first) { + next1 = first->line1; + next2 = first->line2; + while (next1 > line1 && next2 > line2 && + match(map, next1 - 1, next2 - 1)) { + next1--; + next2--; + } + } else { + next1 = end1; + next2 = end2; + } + while (line1 < next1 && line2 < next2 && + match(map, line1, line2)) { + line1++; + line2++; + } + + /* Recurse */ + if (next1 > line1 || next2 > line2) { + struct hashmap submap; + + memset(&submap, 0, sizeof(submap)); + if (patience_diff(map->file1, map->file2, + map->xpp, map->env, + line1, next1 - line1, + line2, next2 - line2)) + return -1; + } + + if (!first) + return 0; + + while (first->next && + first->next->line1 == first->line1 + 1 && + first->next->line2 == first->line2 + 1) + first = first->next; + + line1 = first->line1 + 1; + line2 = first->line2 + 1; + + first = first->next; + } +} + +static int fall_back_to_classic_diff(struct hashmap *map, + int line1, int count1, int line2, int count2) +{ + /* + * This probably does not work outside Git, since + * we have a very simple mmfile structure. + * + * Note: ideally, we would reuse the prepared environment, but + * the libxdiff interface does not (yet) allow for diffing only + * ranges of lines instead of the whole files. + */ + mmfile_t subfile1, subfile2; + xpparam_t xpp; + xdfenv_t env; + + subfile1.ptr = (char *)map->env->xdf1.recs[line1 - 1]->ptr; + subfile1.size = map->env->xdf1.recs[line1 + count1 - 2]->ptr + + map->env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; + subfile2.ptr = (char *)map->env->xdf2.recs[line2 - 1]->ptr; + subfile2.size = map->env->xdf2.recs[line2 + count2 - 2]->ptr + + map->env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; + xpp.flags = map->xpp->flags & ~XDF_PATIENCE_DIFF; + if (xdl_do_diff(&subfile1, &subfile2, &xpp, &env) < 0) + return -1; + + memcpy(map->env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1); + memcpy(map->env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2); + + xdl_free_env(&env); + + return 0; +} + +/* + * Recursively find the longest common sequence of unique lines, + * and if none was found, ask xdl_do_diff() to do the job. + * + * This function assumes that env was prepared with xdl_prepare_env(). + */ +static int patience_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2) +{ + struct hashmap map; + struct entry *first; + int result = 0; + + /* trivial case: one side is empty */ + if (!count1) { + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + return 0; + } else if (!count2) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + return 0; + } + + memset(&map, 0, sizeof(map)); + if (fill_hashmap(file1, file2, xpp, env, &map, + line1, count1, line2, count2)) + return -1; + + /* are there any matching lines at all? */ + if (!map.has_matches) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + xdl_free(map.entries); + return 0; + } + + first = find_longest_common_sequence(&map); + if (first) + result = walk_common_sequence(&map, first, + line1, count1, line2, count2); + else + result = fall_back_to_classic_diff(&map, + line1, count1, line2, count2); + + xdl_free(map.entries); + return result; +} + +int xdl_do_patience_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env) +{ + if (xdl_prepare_env(file1, file2, xpp, env) < 0) + return -1; + + /* environment is cleaned up in xdl_diff() */ + return patience_diff(file1, file2, xpp, env, + 1, env->xdf1.nrec, 1, env->xdf2.nrec); +} diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index a43aa72cd0..1689085235 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -290,7 +290,8 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdl_free_classifier(&cf); - if (xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { + if (!(xpp->flags & XDF_PATIENCE_DIFF) && + xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { xdl_free_ctx(&xe->xdf2); xdl_free_ctx(&xe->xdf1); From 34292bddb861f3cb52a524fdce67234430a744fe Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 1 Jan 2009 17:39:17 +0100 Subject: [PATCH 02/40] Introduce the diff option '--patience' This commit teaches Git to produce diff output using the patience diff algorithm with the diff option '--patience'. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 3 + Makefile | 2 +- diff.c | 2 + t/t4033-diff-patience.sh | 168 +++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 1 deletion(-) create mode 100755 t/t4033-diff-patience.sh diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index c62b45cdba..808bf87277 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -40,6 +40,9 @@ endif::git-format-patch[] --patch-with-raw:: Synonym for "-p --raw". +--patience: + Generate a diff using the "patience diff" algorithm. + --stat[=width[,name-width]]:: Generate a diffstat. You can override the default output width for 80-column terminal by "--stat=width". diff --git a/Makefile b/Makefile index aabf0130b9..33e6fa403e 100644 --- a/Makefile +++ b/Makefile @@ -1287,7 +1287,7 @@ $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ - xdiff/xmerge.o + xdiff/xmerge.o xdiff/xpatience.o $(XDIFF_OBJS): xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h diff --git a/diff.c b/diff.c index 0484601f42..4aeab7746c 100644 --- a/diff.c +++ b/diff.c @@ -2471,6 +2471,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) options->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE; else if (!strcmp(arg, "--ignore-space-at-eol")) options->xdl_opts |= XDF_IGNORE_WHITESPACE_AT_EOL; + else if (!strcmp(arg, "--patience")) + options->xdl_opts |= XDF_PATIENCE_DIFF; /* flags options */ else if (!strcmp(arg, "--binary")) { diff --git a/t/t4033-diff-patience.sh b/t/t4033-diff-patience.sh new file mode 100755 index 0000000000..1eb14989df --- /dev/null +++ b/t/t4033-diff-patience.sh @@ -0,0 +1,168 @@ +#!/bin/sh + +test_description='patience diff algorithm' + +. ./test-lib.sh + +cat >file1 <<\EOF +#include + +// Frobs foo heartily +int frobnitz(int foo) +{ + int i; + for(i = 0; i < 10; i++) + { + printf("Your answer is: "); + printf("%d\n", foo); + } +} + +int fact(int n) +{ + if(n > 1) + { + return fact(n-1) * n; + } + return 1; +} + +int main(int argc, char **argv) +{ + frobnitz(fact(10)); +} +EOF + +cat >file2 <<\EOF +#include + +int fib(int n) +{ + if(n > 2) + { + return fib(n-1) + fib(n-2); + } + return 1; +} + +// Frobs foo heartily +int frobnitz(int foo) +{ + int i; + for(i = 0; i < 10; i++) + { + printf("%d\n", foo); + } +} + +int main(int argc, char **argv) +{ + frobnitz(fib(10)); +} +EOF + +cat >expect <<\EOF +diff --git a/file1 b/file2 +index 6faa5a3..e3af329 100644 +--- a/file1 ++++ b/file2 +@@ -1,26 +1,25 @@ + #include + ++int fib(int n) ++{ ++ if(n > 2) ++ { ++ return fib(n-1) + fib(n-2); ++ } ++ return 1; ++} ++ + // Frobs foo heartily + int frobnitz(int foo) + { + int i; + for(i = 0; i < 10; i++) + { +- printf("Your answer is: "); + printf("%d\n", foo); + } + } + +-int fact(int n) +-{ +- if(n > 1) +- { +- return fact(n-1) * n; +- } +- return 1; +-} +- + int main(int argc, char **argv) + { +- frobnitz(fact(10)); ++ frobnitz(fib(10)); + } +EOF + +test_expect_success 'patience diff' ' + + test_must_fail git diff --no-index --patience file1 file2 > output && + test_cmp expect output + +' + +test_expect_success 'patience diff output is valid' ' + + mv file2 expect && + git apply < output && + test_cmp expect file2 + +' + +cat >uniq1 <<\EOF +1 +2 +3 +4 +5 +6 +EOF + +cat >uniq2 <<\EOF +a +b +c +d +e +f +EOF + +cat >expect <<\EOF +diff --git a/uniq1 b/uniq2 +index b414108..0fdf397 100644 +--- a/uniq1 ++++ b/uniq2 +@@ -1,6 +1,6 @@ +-1 +-2 +-3 +-4 +-5 +-6 ++a ++b ++c ++d ++e ++f +EOF + +test_expect_success 'completely different files' ' + + test_must_fail git diff --no-index --patience uniq1 uniq2 > output && + test_cmp expect output + +' + +test_done From cc545709253fe8440db2648cb5c771e5b126bdb5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 1 Jan 2009 17:39:37 +0100 Subject: [PATCH 03/40] bash completions: Add the --patience option Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index e00454983e..b98d765deb 100755 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -776,6 +776,7 @@ _git_diff () --no-ext-diff --no-prefix --src-prefix= --dst-prefix= --base --ours --theirs + --patience " return ;; @@ -967,6 +968,7 @@ _git_log () --color-words --walk-reflogs --parents --children --full-history --merge + --patience " return ;; From 1c7c1d179e51f163c014353f33b406f5bae13922 Mon Sep 17 00:00:00 2001 From: Clemens Buchacher Date: Wed, 14 Jan 2009 15:54:34 +0100 Subject: [PATCH 04/40] clean up pathspec matching If pathspec already matched exactly, it cannot match any more. Originally, we had to continue anyways, because we did not differentiate between exact, recursive and globbing matches. Signed-off-by: Clemens Buchacher Signed-off-by: Junio C Hamano --- dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 0131983dfb..8b0e0fbaf7 100644 --- a/dir.c +++ b/dir.c @@ -118,7 +118,7 @@ int match_pathspec(const char **pathspec, const char *name, int namelen, int pre for (retval = 0; (match = *pathspec++) != NULL; seen++) { int how; - if (retval && *seen == MATCHED_EXACTLY) + if (*seen == MATCHED_EXACTLY) continue; match += prefix; how = match_one(match, name, namelen); From 0b50922abffb82c473182b03eb5bb47a978cceac Mon Sep 17 00:00:00 2001 From: Clemens Buchacher Date: Wed, 14 Jan 2009 15:54:35 +0100 Subject: [PATCH 05/40] remove pathspec_match, use match_pathspec instead Both versions have the same functionality. This removes any redundancy. This also adds makes two extensions to match_pathspec: - If pathspec is NULL, return 1. This reflects the behavior of git commands, for which no paths usually means "match all paths". - If seen is NULL, do not use it. Signed-off-by: Clemens Buchacher Signed-off-by: Junio C Hamano --- builtin-checkout.c | 6 +++--- builtin-commit.c | 2 +- builtin-ls-files.c | 40 ++-------------------------------------- cache.h | 1 - dir.c | 19 +++++++++++-------- 5 files changed, 17 insertions(+), 51 deletions(-) diff --git a/builtin-checkout.c b/builtin-checkout.c index b5dd9c07b4..84a28257b6 100644 --- a/builtin-checkout.c +++ b/builtin-checkout.c @@ -240,7 +240,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec, for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; - pathspec_match(pathspec, ps_matched, ce->name, 0); + match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, ps_matched); } if (report_path_error(ps_matched, pathspec, 0)) @@ -249,7 +249,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec, /* Any unmerged paths? */ for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; - if (pathspec_match(pathspec, NULL, ce->name, 0)) { + if (match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, NULL)) { if (!ce_stage(ce)) continue; if (opts->force) { @@ -274,7 +274,7 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec, state.refresh_cache = 1; for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; - if (pathspec_match(pathspec, NULL, ce->name, 0)) { + if (match_pathspec(pathspec, ce->name, ce_namelen(ce), 0, NULL)) { if (!ce_stage(ce)) { errs |= checkout_entry(ce, &state, NULL); continue; diff --git a/builtin-commit.c b/builtin-commit.c index e88b78f811..3d1867ac17 100644 --- a/builtin-commit.c +++ b/builtin-commit.c @@ -166,7 +166,7 @@ static int list_paths(struct string_list *list, const char *with_tree, struct cache_entry *ce = active_cache[i]; if (ce->ce_flags & CE_UPDATE) continue; - if (!pathspec_match(pattern, m, ce->name, 0)) + if (!match_pathspec(pattern, ce->name, ce_namelen(ce), 0, m)) continue; string_list_insert(ce->name, list); } diff --git a/builtin-ls-files.c b/builtin-ls-files.c index f72eb85475..3434031295 100644 --- a/builtin-ls-files.c +++ b/builtin-ls-files.c @@ -36,42 +36,6 @@ static const char *tag_other = ""; static const char *tag_killed = ""; static const char *tag_modified = ""; - -/* - * Match a pathspec against a filename. The first "skiplen" characters - * are the common prefix - */ -int pathspec_match(const char **spec, char *ps_matched, - const char *filename, int skiplen) -{ - const char *m; - - while ((m = *spec++) != NULL) { - int matchlen = strlen(m + skiplen); - - if (!matchlen) - goto matched; - if (!strncmp(m + skiplen, filename + skiplen, matchlen)) { - if (m[skiplen + matchlen - 1] == '/') - goto matched; - switch (filename[skiplen + matchlen]) { - case '/': case '\0': - goto matched; - } - } - if (!fnmatch(m + skiplen, filename + skiplen, 0)) - goto matched; - if (ps_matched) - ps_matched++; - continue; - matched: - if (ps_matched) - *ps_matched = 1; - return 1; - } - return 0; -} - static void show_dir_entry(const char *tag, struct dir_entry *ent) { int len = prefix_len; @@ -80,7 +44,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent) if (len >= ent->len) die("git ls-files: internal error - directory entry not superset of prefix"); - if (pathspec && !pathspec_match(pathspec, ps_matched, ent->name, len)) + if (!match_pathspec(pathspec, ent->name, ent->len, len, ps_matched)) return; fputs(tag, stdout); @@ -156,7 +120,7 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) if (len >= ce_namelen(ce)) die("git ls-files: internal error - cache entry not superset of prefix"); - if (pathspec && !pathspec_match(pathspec, ps_matched, ce->name, len)) + if (!match_pathspec(pathspec, ce->name, ce_namelen(ce), len, ps_matched)) return; if (tag && *tag && show_valid_bit && diff --git a/cache.h b/cache.h index 231c06d772..c60259d492 100644 --- a/cache.h +++ b/cache.h @@ -936,7 +936,6 @@ extern int ws_fix_copy(char *, const char *, int, unsigned, int *); extern int ws_blank_line(const char *line, int len, unsigned ws_rule); /* ls-files */ -int pathspec_match(const char **spec, char *matched, const char *filename, int skiplen); int report_path_error(const char *ps_matched, const char **pathspec, int prefix_offset); void overlay_tree_on_cache(const char *tree_name, const char *prefix); diff --git a/dir.c b/dir.c index 8b0e0fbaf7..c50ecc8d69 100644 --- a/dir.c +++ b/dir.c @@ -108,25 +108,28 @@ static int match_one(const char *match, const char *name, int namelen) * and a mark is left in seen[] array for pathspec element that * actually matched anything. */ -int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen) +int match_pathspec(const char **pathspec, const char *name, int namelen, + int prefix, char *seen) { - int retval; - const char *match; + int i, retval = 0; + + if (!pathspec) + return 1; name += prefix; namelen -= prefix; - for (retval = 0; (match = *pathspec++) != NULL; seen++) { + for (i = 0; pathspec[i] != NULL; i++) { int how; - if (*seen == MATCHED_EXACTLY) + const char *match = pathspec[i] + prefix; + if (seen && seen[i] == MATCHED_EXACTLY) continue; - match += prefix; how = match_one(match, name, namelen); if (how) { if (retval < how) retval = how; - if (*seen < how) - *seen = how; + if (seen && seen[i] < how) + seen[i] = how; } } return retval; From 07b57e90f7c852c4fe212ab1d91058f27469a74b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 17 Jan 2009 17:29:42 +0100 Subject: [PATCH 06/40] Add color_fwrite_lines(), a function coloring each line individually We have to set the color before every line and reset it before every newline. Add a function color_fwrite_lines() which does that for us. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- color.c | 28 ++++++++++++++++++++++++++++ color.h | 1 + 2 files changed, 29 insertions(+) diff --git a/color.c b/color.c index fc0b72ad59..d4ae83f9b7 100644 --- a/color.c +++ b/color.c @@ -191,3 +191,31 @@ int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) va_end(args); return r; } + +/* + * This function splits the buffer by newlines and colors the lines individually. + * + * Returns 0 on success. + */ +int color_fwrite_lines(FILE *fp, const char *color, + size_t count, const char *buf) +{ + if (!*color) + return fwrite(buf, count, 1, fp) != 1; + while (count) { + char *p = memchr(buf, '\n', count); + if (p != buf && (fputs(color, fp) < 0 || + fwrite(buf, p ? p - buf : count, 1, fp) != 1 || + fputs(COLOR_RESET, fp) < 0)) + return -1; + if (!p) + return 0; + if (fputc('\n', fp) < 0) + return -1; + count -= p + 1 - buf; + buf = p + 1; + } + return 0; +} + + diff --git a/color.h b/color.h index 6cf5c88aaf..cd5c985eb7 100644 --- a/color.h +++ b/color.h @@ -19,5 +19,6 @@ int git_config_colorbool(const char *var, const char *value, int stdout_is_tty); void color_parse(const char *var, const char *value, char *dst); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); +int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); #endif /* COLOR_H */ From 23c1575f747393f9847874fd1ed72a44557459d1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 17 Jan 2009 17:29:43 +0100 Subject: [PATCH 07/40] color-words: refactor word splitting and use ALLOC_GROW() Word splitting is now performed by the function diff_words_fill(), avoiding having the same code twice. In the same spirit, avoid duplicating the code of ALLOC_GROW(). Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- diff.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/diff.c b/diff.c index d23548292a..c111eef13e 100644 --- a/diff.c +++ b/diff.c @@ -326,10 +326,7 @@ struct diff_words_buffer { static void diff_words_append(char *line, unsigned long len, struct diff_words_buffer *buffer) { - if (buffer->text.size + len > buffer->alloc) { - buffer->alloc = (buffer->text.size + len) * 3 / 2; - buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc); - } + ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc); line++; len--; memcpy(buffer->text.ptr + buffer->text.size, line, len); @@ -398,6 +395,22 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) } } +/* + * This function splits the words in buffer->text, and stores the list with + * newline separator into out. + */ +static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out) +{ + int i; + out->size = buffer->text.size; + out->ptr = xmalloc(out->size); + memcpy(out->ptr, buffer->text.ptr, out->size); + for (i = 0; i < out->size; i++) + if (isspace(out->ptr[i])) + out->ptr[i] = '\n'; + buffer->current = 0; +} + /* this executes the word diff on the accumulated buffers */ static void diff_words_show(struct diff_words_data *diff_words) { @@ -405,26 +418,11 @@ static void diff_words_show(struct diff_words_data *diff_words) xdemitconf_t xecfg; xdemitcb_t ecb; mmfile_t minus, plus; - int i; memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); - minus.size = diff_words->minus.text.size; - minus.ptr = xmalloc(minus.size); - memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size); - for (i = 0; i < minus.size; i++) - if (isspace(minus.ptr[i])) - minus.ptr[i] = '\n'; - diff_words->minus.current = 0; - - plus.size = diff_words->plus.text.size; - plus.ptr = xmalloc(plus.size); - memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size); - for (i = 0; i < plus.size; i++) - if (isspace(plus.ptr[i])) - plus.ptr[i] = '\n'; - diff_words->plus.current = 0; - + diff_words_fill(&diff_words->minus, &minus); + diff_words_fill(&diff_words->plus, &plus); xpp.flags = XDF_NEED_MINIMAL; xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc; xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words, From 2e5d2003b28820f88296e47a79eb440ca0295000 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 17 Jan 2009 17:29:44 +0100 Subject: [PATCH 08/40] color-words: change algorithm to allow for 0-character word boundaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up until now, the color-words code assumed that word boundaries are identical to white space characters. Therefore, it could get away with a very simple scheme: it copied the hunks, substituted newlines for each white space character, called libxdiff with the processed text, and then identified the text to output by the offsets (which agreed since the original text had the same length). This code was ugly, for a number of reasons: - it was impossible to introduce 0-character word boundaries, - we had to print everything word by word, and - the code needed extra special handling of newlines in the removed part. Fix all of these issues by processing the text such that - we build word lists, separated by newlines, - we remember the original offsets for every word, and - after calling libxdiff on the wordlists, we parse the hunk headers, and find the corresponding offsets, and then - we print the removed/added parts in one go. The pre and post samples in the test were provided by Santi Béjar. Note that there is some strange special handling of hunk headers where one line range is 0 due to POSIX: in this case, the start is one too low. In other words a hunk header '@@ -1,0 +2 @@' actually means that the line must be added after the _second_ line of the pre text, _not_ the first. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- diff.c | 161 ++++++++++++++++++++++++------------------ t/t4034-diff-words.sh | 66 +++++++++++++++++ 2 files changed, 159 insertions(+), 68 deletions(-) create mode 100755 t/t4034-diff-words.sh diff --git a/diff.c b/diff.c index c111eef13e..37c886a815 100644 --- a/diff.c +++ b/diff.c @@ -319,8 +319,10 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) struct diff_words_buffer { mmfile_t text; long alloc; - long current; /* output pointer */ - int suppressed_newline; + struct diff_words_orig { + const char *begin, *end; + } *orig; + int orig_nr, orig_alloc; }; static void diff_words_append(char *line, unsigned long len, @@ -335,80 +337,89 @@ static void diff_words_append(char *line, unsigned long len, struct diff_words_data { struct diff_words_buffer minus, plus; + const char *current_plus; FILE *file; }; -static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color, - int suppress_newline) -{ - const char *ptr; - int eol = 0; - - if (len == 0) - return; - - ptr = buffer->text.ptr + buffer->current; - buffer->current += len; - - if (ptr[len - 1] == '\n') { - eol = 1; - len--; - } - - fputs(diff_get_color(1, color), file); - fwrite(ptr, len, 1, file); - fputs(diff_get_color(1, DIFF_RESET), file); - - if (eol) { - if (suppress_newline) - buffer->suppressed_newline = 1; - else - putc('\n', file); - } -} - static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) { struct diff_words_data *diff_words = priv; + int minus_first, minus_len, plus_first, plus_len; + const char *minus_begin, *minus_end, *plus_begin, *plus_end; - if (diff_words->minus.suppressed_newline) { - if (line[0] != '+') - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; - } + if (line[0] != '@' || parse_hunk_header(line, len, + &minus_first, &minus_len, &plus_first, &plus_len)) + return; - len--; - switch (line[0]) { - case '-': - print_word(diff_words->file, - &diff_words->minus, len, DIFF_FILE_OLD, 1); - break; - case '+': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_FILE_NEW, 0); - break; - case ' ': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_PLAIN, 0); - diff_words->minus.current += len; - break; - } + /* POSIX requires that first be decremented by one if len == 0... */ + if (minus_len) { + minus_begin = diff_words->minus.orig[minus_first].begin; + minus_end = + diff_words->minus.orig[minus_first + minus_len - 1].end; + } else + minus_begin = minus_end = + diff_words->minus.orig[minus_first].end; + + if (plus_len) { + plus_begin = diff_words->plus.orig[plus_first].begin; + plus_end = diff_words->plus.orig[plus_first + plus_len - 1].end; + } else + plus_begin = plus_end = diff_words->plus.orig[plus_first].end; + + if (diff_words->current_plus != plus_begin) + fwrite(diff_words->current_plus, + plus_begin - diff_words->current_plus, 1, + diff_words->file); + if (minus_begin != minus_end) + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_OLD), + minus_end - minus_begin, minus_begin); + if (plus_begin != plus_end) + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_NEW), + plus_end - plus_begin, plus_begin); + + diff_words->current_plus = plus_end; } /* - * This function splits the words in buffer->text, and stores the list with - * newline separator into out. + * This function splits the words in buffer->text, stores the list with + * newline separator into out, and saves the offsets of the original words + * in buffer->orig. */ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out) { - int i; - out->size = buffer->text.size; - out->ptr = xmalloc(out->size); - memcpy(out->ptr, buffer->text.ptr, out->size); - for (i = 0; i < out->size; i++) - if (isspace(out->ptr[i])) - out->ptr[i] = '\n'; - buffer->current = 0; + int i, j; + + out->size = 0; + out->ptr = xmalloc(buffer->text.size); + + /* fake an empty "0th" word */ + ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc); + buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr; + buffer->orig_nr = 1; + + for (i = 0; i < buffer->text.size; i++) { + if (isspace(buffer->text.ptr[i])) + continue; + for (j = i + 1; j < buffer->text.size && + !isspace(buffer->text.ptr[j]); j++) + ; /* find the end of the word */ + + /* store original boundaries */ + ALLOC_GROW(buffer->orig, buffer->orig_nr + 1, + buffer->orig_alloc); + buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i; + buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j; + buffer->orig_nr++; + + /* store one word */ + memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i); + out->ptr[out->size + j - i] = '\n'; + out->size += j - i + 1; + + i = j - 1; + } } /* this executes the word diff on the accumulated buffers */ @@ -419,22 +430,34 @@ static void diff_words_show(struct diff_words_data *diff_words) xdemitcb_t ecb; mmfile_t minus, plus; + /* special case: only removal */ + if (!diff_words->plus.text.size) { + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_OLD), + diff_words->minus.text.size, diff_words->minus.text.ptr); + diff_words->minus.text.size = 0; + return; + } + + diff_words->current_plus = diff_words->plus.text.ptr; + memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); diff_words_fill(&diff_words->minus, &minus); diff_words_fill(&diff_words->plus, &plus); xpp.flags = XDF_NEED_MINIMAL; - xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc; + xecfg.ctxlen = 0; xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words, &xpp, &xecfg, &ecb); free(minus.ptr); free(plus.ptr); + if (diff_words->current_plus != diff_words->plus.text.ptr + + diff_words->plus.text.size) + fwrite(diff_words->current_plus, + diff_words->plus.text.ptr + diff_words->plus.text.size + - diff_words->current_plus, 1, + diff_words->file); diff_words->minus.text.size = diff_words->plus.text.size = 0; - - if (diff_words->minus.suppressed_newline) { - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; - } } typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len); @@ -458,7 +481,9 @@ static void free_diff_words_data(struct emit_callback *ecbdata) diff_words_show(ecbdata->diff_words); free (ecbdata->diff_words->minus.text.ptr); + free (ecbdata->diff_words->minus.orig); free (ecbdata->diff_words->plus.text.ptr); + free (ecbdata->diff_words->plus.orig); free(ecbdata->diff_words); ecbdata->diff_words = NULL; } diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh new file mode 100755 index 0000000000..b22195f8bb --- /dev/null +++ b/t/t4034-diff-words.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +test_description='word diff colors' + +. ./test-lib.sh + +test_expect_success setup ' + + git config diff.color.old red + git config diff.color.new green + +' + +decrypt_color () { + sed \ + -e 's/.\[1m//g' \ + -e 's/.\[31m//g' \ + -e 's/.\[32m//g' \ + -e 's/.\[36m//g' \ + -e 's/.\[m//g' +} + +word_diff () { + test_must_fail git diff --no-index "$@" pre post > output && + decrypt_color < output > output.decrypted && + test_cmp expect output.decrypted +} + +cat > pre <<\EOF +h(4) + +a = b + c +EOF + +cat > post <<\EOF +h(4),hh[44] + +a = b + c + +aa = a + +aeff = aeff * ( aaa ) +EOF + +cat > expect <<\EOF +diff --git a/pre b/post +index 330b04f..5ed8eff 100644 +--- a/pre ++++ b/post +@@ -1,3 +1,7 @@ +h(4)h(4),hh[44] + +a = b + c + +aa = a + +aeff = aeff * ( aaa ) +EOF + +test_expect_success 'word diff with runs of whitespace' ' + + word_diff --color-words + +' + +test_done From 2b6a5417d750d086d1da906e46de2b3ad8df6753 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 17 Jan 2009 17:29:45 +0100 Subject: [PATCH 09/40] color-words: take an optional regular expression describing words In some applications, words are not delimited by white space. To allow for that, you can specify a regular expression describing what makes a word with git diff --color-words='[A-Za-z0-9]+' Note that words cannot contain newline characters. As suggested by Thomas Rast, the words are the exact matches of the regular expression. Note that a regular expression beginning with a '^' will match only a word at the beginning of the hunk, not a word at the beginning of a line, and is probably not what you want. This commit contains a quoting fix by Thomas Rast. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 6 +++- diff.c | 64 +++++++++++++++++++++++++++++----- diff.h | 1 + t/t4034-diff-words.sh | 57 ++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 10 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 43793d7500..2c1fa4b102 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -91,8 +91,12 @@ endif::git-format-patch[] Turn off colored diff, even when the configuration file gives the default to color output. ---color-words:: +--color-words[=regex]:: Show colored word diff, i.e. color words which have changed. ++ +Optionally, you can pass a regular expression that tells Git what the +words are that you are looking for; The default is to interpret any +stretch of non-whitespace as a word. --no-renames:: Turn off rename detection, even when the configuration diff --git a/diff.c b/diff.c index 37c886a815..9fb3d0df31 100644 --- a/diff.c +++ b/diff.c @@ -333,12 +333,14 @@ static void diff_words_append(char *line, unsigned long len, len--; memcpy(buffer->text.ptr + buffer->text.size, line, len); buffer->text.size += len; + buffer->text.ptr[buffer->text.size] = '\0'; } struct diff_words_data { struct diff_words_buffer minus, plus; const char *current_plus; FILE *file; + regex_t *word_regex; }; static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) @@ -382,17 +384,49 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) diff_words->current_plus = plus_end; } +/* This function starts looking at *begin, and returns 0 iff a word was found. */ +static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, + int *begin, int *end) +{ + if (word_regex && *begin < buffer->size) { + regmatch_t match[1]; + if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { + char *p = memchr(buffer->ptr + *begin + match[0].rm_so, + '\n', match[0].rm_eo - match[0].rm_so); + *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; + *begin += match[0].rm_so; + return *begin >= *end; + } + return -1; + } + + /* find the next word */ + while (*begin < buffer->size && isspace(buffer->ptr[*begin])) + (*begin)++; + if (*begin >= buffer->size) + return -1; + + /* find the end of the word */ + *end = *begin + 1; + while (*end < buffer->size && !isspace(buffer->ptr[*end])) + (*end)++; + + return 0; +} + /* * This function splits the words in buffer->text, stores the list with * newline separator into out, and saves the offsets of the original words * in buffer->orig. */ -static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out) +static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out, + regex_t *word_regex) { int i, j; + long alloc = 0; out->size = 0; - out->ptr = xmalloc(buffer->text.size); + out->ptr = NULL; /* fake an empty "0th" word */ ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc); @@ -400,11 +434,8 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out) buffer->orig_nr = 1; for (i = 0; i < buffer->text.size; i++) { - if (isspace(buffer->text.ptr[i])) - continue; - for (j = i + 1; j < buffer->text.size && - !isspace(buffer->text.ptr[j]); j++) - ; /* find the end of the word */ + if (find_word_boundaries(&buffer->text, word_regex, &i, &j)) + return; /* store original boundaries */ ALLOC_GROW(buffer->orig, buffer->orig_nr + 1, @@ -414,6 +445,7 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out) buffer->orig_nr++; /* store one word */ + ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc); memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i); out->ptr[out->size + j - i] = '\n'; out->size += j - i + 1; @@ -443,9 +475,10 @@ static void diff_words_show(struct diff_words_data *diff_words) memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); - diff_words_fill(&diff_words->minus, &minus); - diff_words_fill(&diff_words->plus, &plus); + diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex); + diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex); xpp.flags = XDF_NEED_MINIMAL; + /* as only the hunk header will be parsed, we need a 0-context */ xecfg.ctxlen = 0; xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words, &xpp, &xecfg, &ecb); @@ -484,6 +517,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata) free (ecbdata->diff_words->minus.orig); free (ecbdata->diff_words->plus.text.ptr); free (ecbdata->diff_words->plus.orig); + free(ecbdata->diff_words->word_regex); free(ecbdata->diff_words); ecbdata->diff_words = NULL; } @@ -1506,6 +1540,14 @@ static void builtin_diff(const char *name_a, ecbdata.diff_words = xcalloc(1, sizeof(struct diff_words_data)); ecbdata.diff_words->file = o->file; + if (o->word_regex) { + ecbdata.diff_words->word_regex = (regex_t *) + xmalloc(sizeof(regex_t)); + if (regcomp(ecbdata.diff_words->word_regex, + o->word_regex, REG_EXTENDED)) + die ("Invalid regular expression: %s", + o->word_regex); + } } xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata, &xpp, &xecfg, &ecb); @@ -2517,6 +2559,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) DIFF_OPT_CLR(options, COLOR_DIFF); else if (!strcmp(arg, "--color-words")) options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS; + else if (!prefixcmp(arg, "--color-words=")) { + options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS; + options->word_regex = arg + 14; + } else if (!strcmp(arg, "--exit-code")) DIFF_OPT_SET(options, EXIT_WITH_STATUS); else if (!strcmp(arg, "--quiet")) diff --git a/diff.h b/diff.h index 4d5a32781d..23cd90c2e6 100644 --- a/diff.h +++ b/diff.h @@ -98,6 +98,7 @@ struct diff_options { int stat_width; int stat_name_width; + const char *word_regex; /* this is set by diffcore for DIFF_FORMAT_PATCH */ int found_changes; diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index b22195f8bb..4873486301 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -63,4 +63,61 @@ test_expect_success 'word diff with runs of whitespace' ' ' +cat > expect <<\EOF +diff --git a/pre b/post +index 330b04f..5ed8eff 100644 +--- a/pre ++++ b/post +@@ -1,3 +1,7 @@ +h(4),hh[44] + +a = b + c + +aa = a + +aeff = aeff * ( aaa ) +EOF + +test_expect_success 'word diff with a regular expression' ' + + word_diff --color-words="[a-z]+" + +' + +echo 'aaa (aaa)' > pre +echo 'aaa (aaa) aaa' > post + +cat > expect <<\EOF +diff --git a/pre b/post +index c29453b..be22f37 100644 +--- a/pre ++++ b/post +@@ -1 +1 @@ +aaa (aaa) aaa +EOF + +test_expect_success 'test parsing words for newline' ' + + word_diff --color-words="a+" + +' + +echo '(:' > pre +echo '(' > post + +cat > expect <<\EOF +diff --git a/pre b/post +index 289cb9d..2d06f37 100644 +--- a/pre ++++ b/post +@@ -1 +1 @@ +(: +EOF + +test_expect_success 'test when words are only removed at the end' ' + + word_diff --color-words=. + +' + test_done From bf82940dbf12f066ba42a2a03a5bb626ba22c067 Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Sat, 17 Jan 2009 17:29:46 +0100 Subject: [PATCH 10/40] color-words: enable REG_NEWLINE to help user We silently truncate a match at the newline, which may lead to unexpected behaviour, e.g., when matching "<[^>]*>" against since then "" doesn't!) even though the regex said only angle-bracket-delimited things can be words. To alleviate the problem slightly, use REG_NEWLINE so that negated classes can't match a newline. Of course newlines can still be matched explicitly. Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- diff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 9fb3d0df31..00c661f82e 100644 --- a/diff.c +++ b/diff.c @@ -1544,7 +1544,8 @@ static void builtin_diff(const char *name_a, ecbdata.diff_words->word_regex = (regex_t *) xmalloc(sizeof(regex_t)); if (regcomp(ecbdata.diff_words->word_regex, - o->word_regex, REG_EXTENDED)) + o->word_regex, + REG_EXTENDED | REG_NEWLINE)) die ("Invalid regular expression: %s", o->word_regex); } From c4b252c3d894673968b144d8e10b79ef22c17b0a Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Sat, 17 Jan 2009 17:29:47 +0100 Subject: [PATCH 11/40] color-words: expand docs with precise semantics Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 2c1fa4b102..8689a92d8d 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -91,12 +91,17 @@ endif::git-format-patch[] Turn off colored diff, even when the configuration file gives the default to color output. ---color-words[=regex]:: - Show colored word diff, i.e. color words which have changed. +--color-words[=]:: + Show colored word diff, i.e., color words which have changed. + By default, words are separated by whitespace. + -Optionally, you can pass a regular expression that tells Git what the -words are that you are looking for; The default is to interpret any -stretch of non-whitespace as a word. +When a is specified, every non-overlapping match of the + is considered a word. Anything between these matches is +considered whitespace and ignored(!) for the purposes of finding +differences. You may want to append `|[^[:space:]]` to your regular +expression to make sure that it matches all non-whitespace characters. +A match that contains a newline is silently truncated(!) at the +newline. --no-renames:: Turn off rename detection, even when the configuration From 80c49c3de2d5a3aa12b0980a65f1163c8aef0c16 Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Sat, 17 Jan 2009 17:29:48 +0100 Subject: [PATCH 12/40] color-words: make regex configurable via attributes Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 4 ++ Documentation/gitattributes.txt | 21 +++++++++ diff.c | 10 +++++ t/t4034-diff-words.sh | 36 +++++++++++++++ userdiff.c | 78 ++++++++++++++++++++++++++------- userdiff.h | 1 + 6 files changed, 135 insertions(+), 15 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 8689a92d8d..1edb82e8e1 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -102,6 +102,10 @@ differences. You may want to append `|[^[:space:]]` to your regular expression to make sure that it matches all non-whitespace characters. A match that contains a newline is silently truncated(!) at the newline. ++ +The regex can also be set via a diff driver, see +linkgit:gitattributes[1]; giving it explicitly overrides any diff +driver setting. --no-renames:: Turn off rename detection, even when the configuration diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 8af22eccac..ba3ba12730 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -317,6 +317,8 @@ patterns are available: - `bibtex` suitable for files with BibTeX coded references. +- `cpp` suitable for source code in the C and C++ languages. + - `html` suitable for HTML/XHTML documents. - `java` suitable for source code in the Java language. @@ -334,6 +336,25 @@ patterns are available: - `tex` suitable for source code for LaTeX documents. +Customizing word diff +^^^^^^^^^^^^^^^^^^^^^ + +You can customize the rules that `git diff --color-words` uses to +split words in a line, by specifying an appropriate regular expression +in the "diff.*.wordregex" configuration variable. For example, in TeX +a backslash followed by a sequence of letters forms a command, but +several such commands can be run together without intervening +whitespace. To separate them, use a regular expression such as + +------------------------ +[diff "tex"] + wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+" +------------------------ + +A built-in pattern is provided for all languages listed in the +previous section. + + Performing text diffs of binary files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/diff.c b/diff.c index 00c661f82e..9fcde963db 100644 --- a/diff.c +++ b/diff.c @@ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } +static const char *userdiff_word_regex(struct diff_filespec *one) +{ + diff_filespec_load_driver(one); + return one->driver->word_regex; +} + void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) { if (!options->a_prefix) @@ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a, ecbdata.diff_words = xcalloc(1, sizeof(struct diff_words_data)); ecbdata.diff_words->file = o->file; + if (!o->word_regex) + o->word_regex = userdiff_word_regex(one); + if (!o->word_regex) + o->word_regex = userdiff_word_regex(two); if (o->word_regex) { ecbdata.diff_words->word_regex = (regex_t *) xmalloc(sizeof(regex_t)); diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index 4873486301..744221bef9 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' ' ' +test_expect_success 'set a diff driver' ' + git config diff.testdriver.wordregex "[^[:space:]]" && + cat < .gitattributes +pre diff=testdriver +post diff=testdriver +EOF +' + +test_expect_success 'option overrides default' ' + + word_diff --color-words="[a-z]+" + +' + +cat > expect <<\EOF +diff --git a/pre b/post +index 330b04f..5ed8eff 100644 +--- a/pre ++++ b/post +@@ -1,3 +1,7 @@ +h(4),hh[44] + +a = b + c + +aa = a + +aeff = aeff * ( aaa ) +EOF + +test_expect_success 'use default supplied by driver' ' + + word_diff --color-words + +' + echo 'aaa (aaa)' > pre echo 'aaa (aaa) aaa' > post @@ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' ' word_diff --color-words="a+" + ' echo '(:' > pre diff --git a/userdiff.c b/userdiff.c index 3681062ebf..2b55509485 100644 --- a/userdiff.c +++ b/userdiff.c @@ -6,14 +6,20 @@ static struct userdiff_driver *drivers; static int ndrivers; static int drivers_alloc; -#define FUNCNAME(name, pattern) \ - { name, NULL, -1, { pattern, REG_EXTENDED } } +#define PATTERNS(name, pattern, wordregex) \ + { name, NULL, -1, { pattern, REG_EXTENDED }, wordregex } static struct userdiff_driver builtin_drivers[] = { -FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), -FUNCNAME("java", +PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", + "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("java", "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"), -FUNCNAME("objc", + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=" + "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("objc", /* Negate C statements that can look like functions */ "!^[ \t]*(do|for|if|else|return|switch|while)\n" /* Objective-C methods */ @@ -21,20 +27,60 @@ FUNCNAME("objc", /* C functions */ "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n" /* Objective-C class/protocol definitions */ - "^(@(implementation|interface|protocol)[ \t].*)$"), -FUNCNAME("pascal", + "^(@(implementation|interface|protocol)[ \t].*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("pascal", "^((procedure|function|constructor|destructor|interface|" "implementation|initialization|finalization)[ \t]*.*)$" "\n" - "^(.*=[ \t]*(class|record).*)$"), -FUNCNAME("php", "^[\t ]*((function|class).*)"), -FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), -FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), -FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), -FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), + "^(.*=[ \t]*(class|record).*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" + "|<>|<=|>=|:=|\\.\\." + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("php", "^[\t ]*((function|class).*)", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" + "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?" + "|[^[:space:]|[\x80-\xff]+"), + /* -- */ +PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", + /* -- */ + "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." + "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~" + "|[^[:space:]|[\x80-\xff]+"), +PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + "[={}\"]|[^={}\" \t]+"), +PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", + "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"), +PATTERNS("cpp", + /* Jump targets or access declarations */ + "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n" + /* C/++ functions/methods at top level */ + "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n" + /* compound type at top level */ + "^((struct|class|enum)[^;]*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), { "default", NULL, -1, { NULL, 0 } }, }; -#undef FUNCNAME +#undef PATTERNS static struct userdiff_driver driver_true = { "diff=true", @@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v) return parse_string(&drv->external, k, v); if ((drv = parse_driver(k, v, "textconv"))) return parse_string(&drv->textconv, k, v); + if ((drv = parse_driver(k, v, "wordregex"))) + return parse_string(&drv->word_regex, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index ba2945770b..c3151594f5 100644 --- a/userdiff.h +++ b/userdiff.h @@ -11,6 +11,7 @@ struct userdiff_driver { const char *external; int binary; struct userdiff_funcname funcname; + const char *word_regex; const char *textconv; }; From f0298cf1c6a7b5cc8b79d84a03b0ce07df2d9e6b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Jan 2009 13:52:53 +0100 Subject: [PATCH 13/40] revision walker: include a detached HEAD in --all When HEAD is detached, --all should list it, too, logically, as a detached HEAD is by definition a temporary, unnamed branch. It is especially necessary to list it when garbage collecting, as the detached HEAD would be trashed. Noticed by Thomas Rast. Note that this affects creating bundles with --all; I contend that it is a good change to add the HEAD, so that cloning from such a bundle will give you a current branch. However, I had to fix t5701 as it assumed that --all does not imply HEAD. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- revision.c | 1 + t/t5701-clone-local.sh | 4 ++-- t/t6014-rev-list-all.sh | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100755 t/t6014-rev-list-all.sh diff --git a/revision.c b/revision.c index 45fd7a3660..f6ccb973a6 100644 --- a/revision.c +++ b/revision.c @@ -1223,6 +1223,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch if (!strcmp(arg, "--all")) { handle_refs(revs, flags, for_each_ref); + handle_refs(revs, flags, head_ref); continue; } if (!strcmp(arg, "--branches")) { diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh index 8dfaaa456e..14413f851f 100755 --- a/t/t5701-clone-local.sh +++ b/t/t5701-clone-local.sh @@ -11,8 +11,8 @@ test_expect_success 'preparing origin repository' ' git clone --bare . x && test "$(GIT_CONFIG=a.git/config git config --bool core.bare)" = true && test "$(GIT_CONFIG=x/config git config --bool core.bare)" = true - git bundle create b1.bundle --all HEAD && - git bundle create b2.bundle --all && + git bundle create b1.bundle master HEAD && + git bundle create b2.bundle master && mkdir dir && cp b1.bundle dir/b3 cp b1.bundle b4 diff --git a/t/t6014-rev-list-all.sh b/t/t6014-rev-list-all.sh new file mode 100755 index 0000000000..991ab4a65b --- /dev/null +++ b/t/t6014-rev-list-all.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +test_description='--all includes detached HEADs' + +. ./test-lib.sh + + +commit () { + test_tick && + echo $1 > foo && + git add foo && + git commit -m "$1" +} + +test_expect_success 'setup' ' + + commit one && + commit two && + git checkout HEAD^ && + commit detached + +' + +test_expect_success 'rev-list --all lists detached HEAD' ' + + test 3 = $(git rev-list --all | wc -l) + +' + +test_expect_success 'repack does not lose detached HEAD' ' + + git gc && + git prune --expire=now && + git show HEAD + +' + +test_done From b2a6d1c6868b6d5e7d2b4fa9129341220a1e848a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 17 Jan 2009 22:27:08 -0800 Subject: [PATCH 14/40] bundle: allow the same ref to be given more than once "git bundle create x master master" used to create a bundle that lists the same branch (master) twice. Cloning from such a bundle resulted in a needless warning "warning: Duplicated ref: refs/remotes/origin/master". Signed-off-by: Junio C Hamano --- bundle.c | 2 ++ object.c | 19 +++++++++++++++++++ object.h | 1 + t/t5701-clone-local.sh | 2 +- 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/bundle.c b/bundle.c index daecd8e1ca..b20f2101f2 100644 --- a/bundle.c +++ b/bundle.c @@ -240,6 +240,8 @@ int create_bundle(struct bundle_header *header, const char *path, return error("unrecognized argument: %s'", argv[i]); } + object_array_remove_duplicates(&revs.pending); + for (i = 0; i < revs.pending.nr; i++) { struct object_array_entry *e = revs.pending.objects + i; unsigned char sha1[20]; diff --git a/object.c b/object.c index 50b6528001..7e6a92c88e 100644 --- a/object.c +++ b/object.c @@ -268,3 +268,22 @@ void add_object_array_with_mode(struct object *obj, const char *name, struct obj objects[nr].mode = mode; array->nr = ++nr; } + +void object_array_remove_duplicates(struct object_array *array) +{ + int ref, src, dst; + struct object_array_entry *objects = array->objects; + + for (ref = 0; ref < array->nr - 1; ref++) { + for (src = ref + 1, dst = src; + src < array->nr; + src++) { + if (!strcmp(objects[ref].name, objects[src].name)) + continue; + if (src != dst) + objects[dst] = objects[src]; + dst++; + } + array->nr = dst; + } +} diff --git a/object.h b/object.h index 036bd66fe9..3193916638 100644 --- a/object.h +++ b/object.h @@ -71,5 +71,6 @@ int object_list_contains(struct object_list *list, struct object *obj); /* Object array handling .. */ void add_object_array(struct object *obj, const char *name, struct object_array *array); void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode); +void object_array_remove_duplicates(struct object_array *); #endif /* OBJECT_H */ diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh index 14413f851f..fe0fda282c 100755 --- a/t/t5701-clone-local.sh +++ b/t/t5701-clone-local.sh @@ -11,7 +11,7 @@ test_expect_success 'preparing origin repository' ' git clone --bare . x && test "$(GIT_CONFIG=a.git/config git config --bool core.bare)" = true && test "$(GIT_CONFIG=x/config git config --bool core.bare)" = true - git bundle create b1.bundle master HEAD && + git bundle create b1.bundle --all && git bundle create b2.bundle master && mkdir dir && cp b1.bundle dir/b3 From 92604b466344b2157efc42ef3521dac22d7906a2 Mon Sep 17 00:00:00 2001 From: Kjetil Barvik Date: Sun, 18 Jan 2009 16:14:50 +0100 Subject: [PATCH 15/40] lstat_cache(): more cache effective symlink/directory detection Make the cache functionality more effective. Previously when A/B/C/D was in the cache and A/B/C/E/file.c was called for, there was no match at all from the cache. Now we use the fact that the paths "A", "A/B" and "A/B/C" are already tested, and we only need to do an lstat() call on "A/B/C/E". We only cache/store the last path regardless of its type. Since the cache functionality is always used with alphabetically sorted names (at least it seems so for me), there is no need to store both the last symlink-leading path and the last real-directory path. Note that if the cache is not called with (mostly) alphabetically sorted names, neither the old, nor this new one, would be very effective. Previously, when symlink A/B/C/S was cached/stored in the symlink- leading path, and A/B/C/file.c was called for, it was not easy to use the fact that we already knew that the paths "A", "A/B" and "A/B/C" are real directories. Avoid copying the first path components of the name 2 zillion times when we test new path components. Since we always cache/store the last path, we can copy each component as we test those directly into the cache. Previously we ended up doing a memcpy() for the full path/name right before each lstat() call, and when updating the cache for each time we have tested a new path component. We also use less memory, that is, PATH_MAX bytes less memory on the stack and PATH_MAX bytes less memory on the heap. Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable comments to this patch! Signed-off-by: Kjetil Barvik Signed-off-by: Junio C Hamano --- symlinks.c | 167 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 126 insertions(+), 41 deletions(-) diff --git a/symlinks.c b/symlinks.c index 5a5e781a15..49fb4d8bc2 100644 --- a/symlinks.c +++ b/symlinks.c @@ -1,64 +1,149 @@ #include "cache.h" -struct pathname { +static struct cache_def { + char path[PATH_MAX]; int len; - char path[PATH_MAX]; -}; + int flags; +} cache; -/* Return matching pathname prefix length, or zero if not matching */ -static inline int match_pathname(int len, const char *name, struct pathname *match) +/* + * Returns the length (on a path component basis) of the longest + * common prefix match of 'name' and the cached path string. + */ +static inline int longest_match_lstat_cache(int len, const char *name) { - int match_len = match->len; - return (len > match_len && - name[match_len] == '/' && - !memcmp(name, match->path, match_len)) ? match_len : 0; -} + int max_len, match_len = 0, i = 0; -static inline void set_pathname(int len, const char *name, struct pathname *match) -{ - if (len < PATH_MAX) { - match->len = len; - memcpy(match->path, name, len); - match->path[len] = 0; + max_len = len < cache.len ? len : cache.len; + while (i < max_len && name[i] == cache.path[i]) { + if (name[i] == '/') + match_len = i; + i++; } + /* Is the cached path string a substring of 'name'? */ + if (i == cache.len && cache.len < len && name[cache.len] == '/') + match_len = cache.len; + /* Is 'name' a substring of the cached path string? */ + else if ((i == len && len < cache.len && cache.path[len] == '/') || + (i == len && len == cache.len)) + match_len = len; + return match_len; } -int has_symlink_leading_path(int len, const char *name) +static inline void reset_lstat_cache(void) { - static struct pathname link, nonlink; - char path[PATH_MAX]; + cache.path[0] = '\0'; + cache.len = 0; + cache.flags = 0; +} + +#define FL_DIR (1 << 0) +#define FL_SYMLINK (1 << 1) +#define FL_LSTATERR (1 << 2) +#define FL_ERR (1 << 3) + +/* + * Check if name 'name' of length 'len' has a symlink leading + * component, or if the directory exists and is real. + * + * To speed up the check, some information is allowed to be cached. + * This can be indicated by the 'track_flags' argument. + */ +static int lstat_cache(int len, const char *name, + int track_flags) +{ + int match_len, last_slash, last_slash_dir; + int match_flags, ret_flags, save_flags, max_len; struct stat st; - char *sp; - int known_dir; /* - * See if the last known symlink cache matches. + * Check to see if we have a match from the cache for the + * symlink path type. */ - if (match_pathname(len, name, &link)) - return 1; + match_len = last_slash = longest_match_lstat_cache(len, name); + match_flags = cache.flags & track_flags & FL_SYMLINK; + if (match_flags && match_len == cache.len) + return match_flags; + /* + * If we now have match_len > 0, we would know that the + * matched part will always be a directory. + * + * Also, if we are tracking directories and 'name' is a + * substring of the cache on a path component basis, we can + * return immediately. + */ + match_flags = track_flags & FL_DIR; + if (match_flags && len == match_len) + return match_flags; /* - * Get rid of the last known directory part + * Okay, no match from the cache so far, so now we have to + * check the rest of the path components. */ - known_dir = match_pathname(len, name, &nonlink); + ret_flags = FL_DIR; + last_slash_dir = last_slash; + max_len = len < PATH_MAX ? len : PATH_MAX; + while (match_len < max_len) { + do { + cache.path[match_len] = name[match_len]; + match_len++; + } while (match_len < max_len && name[match_len] != '/'); + if (match_len >= max_len) + break; + last_slash = match_len; + cache.path[last_slash] = '\0'; - while ((sp = strchr(name + known_dir + 1, '/')) != NULL) { - int thislen = sp - name ; - memcpy(path, name, thislen); - path[thislen] = 0; - - if (lstat(path, &st)) - return 0; - if (S_ISDIR(st.st_mode)) { - set_pathname(thislen, path, &nonlink); - known_dir = thislen; + if (lstat(cache.path, &st)) { + ret_flags = FL_LSTATERR; + } else if (S_ISDIR(st.st_mode)) { + last_slash_dir = last_slash; continue; - } - if (S_ISLNK(st.st_mode)) { - set_pathname(thislen, path, &link); - return 1; + } else if (S_ISLNK(st.st_mode)) { + ret_flags = FL_SYMLINK; + } else { + ret_flags = FL_ERR; } break; } - return 0; + + /* + * At the end update the cache. Note that max 2 different + * path types, FL_SYMLINK and FL_DIR, can be cached for the + * moment! + */ + save_flags = ret_flags & track_flags & FL_SYMLINK; + if (save_flags && last_slash > 0 && last_slash < PATH_MAX) { + cache.path[last_slash] = '\0'; + cache.len = last_slash; + cache.flags = save_flags; + } else if (track_flags & FL_DIR && + last_slash_dir > 0 && last_slash_dir < PATH_MAX) { + /* + * We have a separate test for the directory case, + * since it could be that we have found a symlink and + * the track_flags says that we cannot cache this + * fact, so the cache would then have been left empty + * in this case. + * + * But if we are allowed to track real directories, we + * can still cache the path components before the last + * one (the found symlink component). + */ + cache.path[last_slash_dir] = '\0'; + cache.len = last_slash_dir; + cache.flags = FL_DIR; + } else { + reset_lstat_cache(); + } + return ret_flags; +} + +/* + * Return non-zero if path 'name' has a leading symlink component + */ +int has_symlink_leading_path(int len, const char *name) +{ + return lstat_cache(len, name, + FL_SYMLINK|FL_DIR) & + FL_SYMLINK; } From 09c9306658f793f2cad1ed7856b55a7c4e758703 Mon Sep 17 00:00:00 2001 From: Kjetil Barvik Date: Sun, 18 Jan 2009 16:14:51 +0100 Subject: [PATCH 16/40] lstat_cache(): introduce has_symlink_or_noent_leading_path() function In some cases, especially inside the unpack-trees.c file, and inside the verify_absent() function, we can avoid some unnecessary calls to lstat(), if the lstat_cache() function can also be told to keep track of non-existing directories. So we update the lstat_cache() function to handle this new fact, introduce a new wrapper function, and the result is that we save lots of lstat() calls for a removed directory which previously contained lots of files, when we call this new wrapper of lstat_cache() instead of the old one. We do similar changes inside the unlink_entry() function, since if we can already say that the leading directory component of a pathname does not exist, it is not necessary to try to remove a pathname below it! Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable comments to this patch! Signed-off-by: Kjetil Barvik Signed-off-by: Junio C Hamano --- cache.h | 1 + symlinks.c | 94 ++++++++++++++++++++++++++++++++------------------ unpack-trees.c | 4 +-- 3 files changed, 63 insertions(+), 36 deletions(-) diff --git a/cache.h b/cache.h index 231c06d772..11181aa007 100644 --- a/cache.h +++ b/cache.h @@ -720,6 +720,7 @@ struct checkout { extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath); extern int has_symlink_leading_path(int len, const char *name); +extern int has_symlink_or_noent_leading_path(int len, const char *name); extern struct alternate_object_database { struct alternate_object_database *next; diff --git a/symlinks.c b/symlinks.c index 49fb4d8bc2..c69556acf8 100644 --- a/symlinks.c +++ b/symlinks.c @@ -4,6 +4,7 @@ static struct cache_def { char path[PATH_MAX]; int len; int flags; + int track_flags; } cache; /* @@ -30,21 +31,23 @@ static inline int longest_match_lstat_cache(int len, const char *name) return match_len; } -static inline void reset_lstat_cache(void) +static inline void reset_lstat_cache(int track_flags) { cache.path[0] = '\0'; cache.len = 0; cache.flags = 0; + cache.track_flags = track_flags; } #define FL_DIR (1 << 0) -#define FL_SYMLINK (1 << 1) -#define FL_LSTATERR (1 << 2) -#define FL_ERR (1 << 3) +#define FL_NOENT (1 << 1) +#define FL_SYMLINK (1 << 2) +#define FL_LSTATERR (1 << 3) +#define FL_ERR (1 << 4) /* * Check if name 'name' of length 'len' has a symlink leading - * component, or if the directory exists and is real. + * component, or if the directory exists and is real, or not. * * To speed up the check, some information is allowed to be cached. * This can be indicated by the 'track_flags' argument. @@ -56,25 +59,35 @@ static int lstat_cache(int len, const char *name, int match_flags, ret_flags, save_flags, max_len; struct stat st; - /* - * Check to see if we have a match from the cache for the - * symlink path type. - */ - match_len = last_slash = longest_match_lstat_cache(len, name); - match_flags = cache.flags & track_flags & FL_SYMLINK; - if (match_flags && match_len == cache.len) - return match_flags; - /* - * If we now have match_len > 0, we would know that the - * matched part will always be a directory. - * - * Also, if we are tracking directories and 'name' is a - * substring of the cache on a path component basis, we can - * return immediately. - */ - match_flags = track_flags & FL_DIR; - if (match_flags && len == match_len) - return match_flags; + if (cache.track_flags != track_flags) { + /* + * As a safeguard we clear the cache if the value of + * track_flags does not match with the last supplied + * value. + */ + reset_lstat_cache(track_flags); + match_len = last_slash = 0; + } else { + /* + * Check to see if we have a match from the cache for + * the 2 "excluding" path types. + */ + match_len = last_slash = longest_match_lstat_cache(len, name); + match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK); + if (match_flags && match_len == cache.len) + return match_flags; + /* + * If we now have match_len > 0, we would know that + * the matched part will always be a directory. + * + * Also, if we are tracking directories and 'name' is + * a substring of the cache on a path component basis, + * we can return immediately. + */ + match_flags = track_flags & FL_DIR; + if (match_flags && len == match_len) + return match_flags; + } /* * Okay, no match from the cache so far, so now we have to @@ -95,6 +108,8 @@ static int lstat_cache(int len, const char *name, if (lstat(cache.path, &st)) { ret_flags = FL_LSTATERR; + if (errno == ENOENT) + ret_flags |= FL_NOENT; } else if (S_ISDIR(st.st_mode)) { last_slash_dir = last_slash; continue; @@ -107,11 +122,11 @@ static int lstat_cache(int len, const char *name, } /* - * At the end update the cache. Note that max 2 different - * path types, FL_SYMLINK and FL_DIR, can be cached for the - * moment! + * At the end update the cache. Note that max 3 different + * path types, FL_NOENT, FL_SYMLINK and FL_DIR, can be cached + * for the moment! */ - save_flags = ret_flags & track_flags & FL_SYMLINK; + save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK); if (save_flags && last_slash > 0 && last_slash < PATH_MAX) { cache.path[last_slash] = '\0'; cache.len = last_slash; @@ -120,20 +135,20 @@ static int lstat_cache(int len, const char *name, last_slash_dir > 0 && last_slash_dir < PATH_MAX) { /* * We have a separate test for the directory case, - * since it could be that we have found a symlink and - * the track_flags says that we cannot cache this - * fact, so the cache would then have been left empty - * in this case. + * since it could be that we have found a symlink or a + * non-existing directory and the track_flags says + * that we cannot cache this fact, so the cache would + * then have been left empty in this case. * * But if we are allowed to track real directories, we * can still cache the path components before the last - * one (the found symlink component). + * one (the found symlink or non-existing component). */ cache.path[last_slash_dir] = '\0'; cache.len = last_slash_dir; cache.flags = FL_DIR; } else { - reset_lstat_cache(); + reset_lstat_cache(track_flags); } return ret_flags; } @@ -147,3 +162,14 @@ int has_symlink_leading_path(int len, const char *name) FL_SYMLINK|FL_DIR) & FL_SYMLINK; } + +/* + * Return non-zero if path 'name' has a leading symlink component or + * if some leading path component does not exists. + */ +int has_symlink_or_noent_leading_path(int len, const char *name) +{ + return lstat_cache(len, name, + FL_SYMLINK|FL_NOENT|FL_DIR) & + (FL_SYMLINK|FL_NOENT); +} diff --git a/unpack-trees.c b/unpack-trees.c index 54f301da67..a3fd383afb 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -61,7 +61,7 @@ static void unlink_entry(struct cache_entry *ce) char *cp, *prev; char *name = ce->name; - if (has_symlink_leading_path(ce_namelen(ce), ce->name)) + if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name)) return; if (unlink(name)) return; @@ -584,7 +584,7 @@ static int verify_absent(struct cache_entry *ce, const char *action, if (o->index_only || o->reset || !o->update) return 0; - if (has_symlink_leading_path(ce_namelen(ce), ce->name)) + if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name)) return 0; if (!lstat(ce->name, &st)) { From bad4a54fa6fe8a9bbdfb5c3e413268eefde69b75 Mon Sep 17 00:00:00 2001 From: Kjetil Barvik Date: Sun, 18 Jan 2009 16:14:52 +0100 Subject: [PATCH 17/40] lstat_cache(): introduce has_dirs_only_path() function The create_directories() function in entry.c currently calls stat() or lstat() for each path component of the pathname 'path' each and every time. For the 'git checkout' command, this function is called on each file for which we must do an update (ce->ce_flags & CE_UPDATE), so we get lots and lots of calls. To fix this, we make a new wrapper to the lstat_cache() function, and call the wrapper function instead of the calls to the stat() or the lstat() functions. Since the paths given to the create_directories() function, is sorted alphabetically, the new wrapper would be very cache effective in this situation. To support it we must update the lstat_cache() function to be able to say that "please test the complete length of 'name'", and also to give it the length of a prefix, where the cache should use the stat() function instead of the lstat() function to test each path component. Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable comments to this patch! Signed-off-by: Kjetil Barvik Signed-off-by: Junio C Hamano --- cache.h | 1 + entry.c | 34 ++++++++++------------------- symlinks.c | 64 +++++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 60 insertions(+), 39 deletions(-) diff --git a/cache.h b/cache.h index 11181aa007..7c8c8e4842 100644 --- a/cache.h +++ b/cache.h @@ -721,6 +721,7 @@ struct checkout { extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath); extern int has_symlink_leading_path(int len, const char *name); extern int has_symlink_or_noent_leading_path(int len, const char *name); +extern int has_dirs_only_path(int len, const char *name, int prefix_len); extern struct alternate_object_database { struct alternate_object_database *next; diff --git a/entry.c b/entry.c index aa2ee46a84..01a683ee5d 100644 --- a/entry.c +++ b/entry.c @@ -8,35 +8,25 @@ static void create_directories(const char *path, const struct checkout *state) const char *slash = path; while ((slash = strchr(slash+1, '/')) != NULL) { - struct stat st; - int stat_status; - len = slash - path; memcpy(buf, path, len); buf[len] = 0; - if (len <= state->base_dir_len) - /* - * checkout-index --prefix=; is - * allowed to be a symlink to an existing - * directory. - */ - stat_status = stat(buf, &st); - else - /* - * if there currently is a symlink, we would - * want to replace it with a real directory. - */ - stat_status = lstat(buf, &st); - - if (!stat_status && S_ISDIR(st.st_mode)) + /* + * For 'checkout-index --prefix=', is + * allowed to be a symlink to an existing directory, + * and we set 'state->base_dir_len' below, such that + * we test the path components of the prefix with the + * stat() function instead of the lstat() function. + */ + if (has_dirs_only_path(len, buf, state->base_dir_len)) continue; /* ok, it is already a directory. */ /* - * We know stat_status == 0 means something exists - * there and this mkdir would fail, but that is an - * error codepath; we do not care, as we unlink and - * mkdir again in such a case. + * If this mkdir() would fail, it could be that there + * is already a symlink or something else exists + * there, therefore we then try to unlink it and try + * one more time to create the directory. */ if (mkdir(buf, 0777)) { if (errno == EEXIST && state->force && diff --git a/symlinks.c b/symlinks.c index c69556acf8..918e24ad9e 100644 --- a/symlinks.c +++ b/symlinks.c @@ -1,10 +1,11 @@ #include "cache.h" static struct cache_def { - char path[PATH_MAX]; + char path[PATH_MAX + 1]; int len; int flags; int track_flags; + int prefix_len_stat_func; } cache; /* @@ -31,12 +32,13 @@ static inline int longest_match_lstat_cache(int len, const char *name) return match_len; } -static inline void reset_lstat_cache(int track_flags) +static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func) { cache.path[0] = '\0'; cache.len = 0; cache.flags = 0; cache.track_flags = track_flags; + cache.prefix_len_stat_func = prefix_len_stat_func; } #define FL_DIR (1 << 0) @@ -44,28 +46,35 @@ static inline void reset_lstat_cache(int track_flags) #define FL_SYMLINK (1 << 2) #define FL_LSTATERR (1 << 3) #define FL_ERR (1 << 4) +#define FL_FULLPATH (1 << 5) /* * Check if name 'name' of length 'len' has a symlink leading * component, or if the directory exists and is real, or not. * * To speed up the check, some information is allowed to be cached. - * This can be indicated by the 'track_flags' argument. + * This can be indicated by the 'track_flags' argument, which also can + * be used to indicate that we should check the full path. + * + * The 'prefix_len_stat_func' parameter can be used to set the length + * of the prefix, where the cache should use the stat() function + * instead of the lstat() function to test each path component. */ static int lstat_cache(int len, const char *name, - int track_flags) + int track_flags, int prefix_len_stat_func) { int match_len, last_slash, last_slash_dir; - int match_flags, ret_flags, save_flags, max_len; + int match_flags, ret_flags, save_flags, max_len, ret; struct stat st; - if (cache.track_flags != track_flags) { + if (cache.track_flags != track_flags || + cache.prefix_len_stat_func != prefix_len_stat_func) { /* - * As a safeguard we clear the cache if the value of - * track_flags does not match with the last supplied - * value. + * As a safeguard we clear the cache if the values of + * track_flags and/or prefix_len_stat_func does not + * match with the last supplied values. */ - reset_lstat_cache(track_flags); + reset_lstat_cache(track_flags, prefix_len_stat_func); match_len = last_slash = 0; } else { /* @@ -101,12 +110,17 @@ static int lstat_cache(int len, const char *name, cache.path[match_len] = name[match_len]; match_len++; } while (match_len < max_len && name[match_len] != '/'); - if (match_len >= max_len) + if (match_len >= max_len && !(track_flags & FL_FULLPATH)) break; last_slash = match_len; cache.path[last_slash] = '\0'; - if (lstat(cache.path, &st)) { + if (last_slash <= prefix_len_stat_func) + ret = stat(cache.path, &st); + else + ret = lstat(cache.path, &st); + + if (ret) { ret_flags = FL_LSTATERR; if (errno == ENOENT) ret_flags |= FL_NOENT; @@ -127,12 +141,12 @@ static int lstat_cache(int len, const char *name, * for the moment! */ save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK); - if (save_flags && last_slash > 0 && last_slash < PATH_MAX) { + if (save_flags && last_slash > 0 && last_slash <= PATH_MAX) { cache.path[last_slash] = '\0'; cache.len = last_slash; cache.flags = save_flags; } else if (track_flags & FL_DIR && - last_slash_dir > 0 && last_slash_dir < PATH_MAX) { + last_slash_dir > 0 && last_slash_dir <= PATH_MAX) { /* * We have a separate test for the directory case, * since it could be that we have found a symlink or a @@ -148,18 +162,20 @@ static int lstat_cache(int len, const char *name, cache.len = last_slash_dir; cache.flags = FL_DIR; } else { - reset_lstat_cache(track_flags); + reset_lstat_cache(track_flags, prefix_len_stat_func); } return ret_flags; } +#define USE_ONLY_LSTAT 0 + /* * Return non-zero if path 'name' has a leading symlink component */ int has_symlink_leading_path(int len, const char *name) { return lstat_cache(len, name, - FL_SYMLINK|FL_DIR) & + FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) & FL_SYMLINK; } @@ -170,6 +186,20 @@ int has_symlink_leading_path(int len, const char *name) int has_symlink_or_noent_leading_path(int len, const char *name) { return lstat_cache(len, name, - FL_SYMLINK|FL_NOENT|FL_DIR) & + FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) & (FL_SYMLINK|FL_NOENT); } + +/* + * Return non-zero if all path components of 'name' exists as a + * directory. If prefix_len > 0, we will test with the stat() + * function instead of the lstat() function for a prefix length of + * 'prefix_len', thus we then allow for symlinks in the prefix part as + * long as those points to real existing directories. + */ +int has_dirs_only_path(int len, const char *name, int prefix_len) +{ + return lstat_cache(len, name, + FL_DIR|FL_FULLPATH, prefix_len) & + FL_DIR; +} From aeabab5c712d5acae4a2836272d641acbb87b893 Mon Sep 17 00:00:00 2001 From: Kjetil Barvik Date: Sun, 18 Jan 2009 16:14:53 +0100 Subject: [PATCH 18/40] lstat_cache(): introduce invalidate_lstat_cache() function In some cases it could maybe be necessary to say to the cache that "Hey, I deleted/changed the type of this pathname and if you currently have it inside your cache, you should deleted it". This patch introduce a function which support this. Signed-off-by: Kjetil Barvik Signed-off-by: Junio C Hamano --- cache.h | 1 + symlinks.c | 44 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/cache.h b/cache.h index 7c8c8e4842..f4452a89a7 100644 --- a/cache.h +++ b/cache.h @@ -722,6 +722,7 @@ extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, extern int has_symlink_leading_path(int len, const char *name); extern int has_symlink_or_noent_leading_path(int len, const char *name); extern int has_dirs_only_path(int len, const char *name, int prefix_len); +extern void invalidate_lstat_cache(int len, const char *name); extern struct alternate_object_database { struct alternate_object_database *next; diff --git a/symlinks.c b/symlinks.c index 918e24ad9e..31ddbc9e96 100644 --- a/symlinks.c +++ b/symlinks.c @@ -12,23 +12,30 @@ static struct cache_def { * Returns the length (on a path component basis) of the longest * common prefix match of 'name' and the cached path string. */ -static inline int longest_match_lstat_cache(int len, const char *name) +static inline int longest_match_lstat_cache(int len, const char *name, + int *previous_slash) { - int max_len, match_len = 0, i = 0; + int max_len, match_len = 0, match_len_prev = 0, i = 0; max_len = len < cache.len ? len : cache.len; while (i < max_len && name[i] == cache.path[i]) { - if (name[i] == '/') + if (name[i] == '/') { + match_len_prev = match_len; match_len = i; + } i++; } /* Is the cached path string a substring of 'name'? */ - if (i == cache.len && cache.len < len && name[cache.len] == '/') + if (i == cache.len && cache.len < len && name[cache.len] == '/') { + match_len_prev = match_len; match_len = cache.len; /* Is 'name' a substring of the cached path string? */ - else if ((i == len && len < cache.len && cache.path[len] == '/') || - (i == len && len == cache.len)) + } else if ((i == len && len < cache.len && cache.path[len] == '/') || + (i == len && len == cache.len)) { + match_len_prev = match_len; match_len = len; + } + *previous_slash = match_len_prev; return match_len; } @@ -63,7 +70,7 @@ static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func) static int lstat_cache(int len, const char *name, int track_flags, int prefix_len_stat_func) { - int match_len, last_slash, last_slash_dir; + int match_len, last_slash, last_slash_dir, previous_slash; int match_flags, ret_flags, save_flags, max_len, ret; struct stat st; @@ -81,7 +88,8 @@ static int lstat_cache(int len, const char *name, * Check to see if we have a match from the cache for * the 2 "excluding" path types. */ - match_len = last_slash = longest_match_lstat_cache(len, name); + match_len = last_slash = + longest_match_lstat_cache(len, name, &previous_slash); match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK); if (match_flags && match_len == cache.len) return match_flags; @@ -167,6 +175,26 @@ static int lstat_cache(int len, const char *name, return ret_flags; } +/* + * Invalidate the given 'name' from the cache, if 'name' matches + * completely with the cache. + */ +void invalidate_lstat_cache(int len, const char *name) +{ + int match_len, previous_slash; + + match_len = longest_match_lstat_cache(len, name, &previous_slash); + if (len == match_len) { + if ((cache.track_flags & FL_DIR) && previous_slash > 0) { + cache.path[previous_slash] = '\0'; + cache.len = previous_slash; + cache.flags = FL_DIR; + } else + reset_lstat_cache(cache.track_flags, + cache.prefix_len_stat_func); + } +} + #define USE_ONLY_LSTAT 0 /* From bda6eb0da9b4e4e763b531c83cab9fd9f85934ff Mon Sep 17 00:00:00 2001 From: Kjetil Barvik Date: Sun, 18 Jan 2009 16:14:54 +0100 Subject: [PATCH 19/40] lstat_cache(): introduce clear_lstat_cache() function If you want to completely clear the contents of the lstat_cache(), then call this new function. Signed-off-by: Kjetil Barvik Signed-off-by: Junio C Hamano --- cache.h | 1 + symlinks.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/cache.h b/cache.h index f4452a89a7..468daf6a43 100644 --- a/cache.h +++ b/cache.h @@ -723,6 +723,7 @@ extern int has_symlink_leading_path(int len, const char *name); extern int has_symlink_or_noent_leading_path(int len, const char *name); extern int has_dirs_only_path(int len, const char *name, int prefix_len); extern void invalidate_lstat_cache(int len, const char *name); +extern void clear_lstat_cache(void); extern struct alternate_object_database { struct alternate_object_database *next; diff --git a/symlinks.c b/symlinks.c index 31ddbc9e96..f262b7c44b 100644 --- a/symlinks.c +++ b/symlinks.c @@ -195,6 +195,14 @@ void invalidate_lstat_cache(int len, const char *name) } } +/* + * Completely clear the contents of the cache + */ +void clear_lstat_cache(void) +{ + reset_lstat_cache(0, 0); +} + #define USE_ONLY_LSTAT 0 /* From 98a4d87b87e9846eafd21ba232cc2b7ba3f718fc Mon Sep 17 00:00:00 2001 From: Boyd Stephen Smith Jr Date: Tue, 20 Jan 2009 21:46:57 -0600 Subject: [PATCH 20/40] color-words: Support diff.wordregex config option When diff is invoked with --color-words (w/o =regex), use the regular expression the user has configured as diff.wordregex. diff drivers configured via attributes take precedence over the diff.wordregex-words setting. If the user wants to change them, they have their own configuration variables. Signed-off-by: Boyd Stephen Smith Jr Signed-off-by: Junio C Hamano --- Documentation/config.txt | 6 +++++ Documentation/diff-options.txt | 7 +++--- diff.c | 5 ++++ t/t4034-diff-words.sh | 45 ++++++++++++++++++++++++++++++++-- 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 7408bb2d34..0ca983ac3b 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -639,6 +639,12 @@ diff.suppress-blank-empty:: A boolean to inhibit the standard behavior of printing a space before each empty output line. Defaults to false. +diff.wordregex:: + A POSIX Extended Regular Expression used to determine what is a "word" + when performing word-by-word difference calculations. Character + sequences that match the regular expression are "words", all other + characters are *ignorable* whitespace. + fetch.unpackLimit:: If the number of objects fetched over the git native transfer is below this diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 1edb82e8e1..164e2c5348 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -103,9 +103,10 @@ expression to make sure that it matches all non-whitespace characters. A match that contains a newline is silently truncated(!) at the newline. + -The regex can also be set via a diff driver, see -linkgit:gitattributes[1]; giving it explicitly overrides any diff -driver setting. +The regex can also be set via a diff driver or configuration option, see +linkgit:gitattributes[1] or linkgit:git-config[1]. Giving it explicitly +overrides any diff driver or configuration setting. Diff drivers +override configuration settings. --no-renames:: Turn off rename detection, even when the configuration diff --git a/diff.c b/diff.c index 9fcde963db..ed8b83c68f 100644 --- a/diff.c +++ b/diff.c @@ -23,6 +23,7 @@ static int diff_detect_rename_default; static int diff_rename_limit_default = 200; static int diff_suppress_blank_empty; int diff_use_color_default = -1; +static const char *diff_word_regex_cfg; static const char *external_diff_cmd_cfg; int diff_auto_refresh_index = 1; static int diff_mnemonic_prefix; @@ -92,6 +93,8 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "diff.external")) return git_config_string(&external_diff_cmd_cfg, var, value); + if (!strcmp(var, "diff.wordregex")) + return git_config_string(&diff_word_regex_cfg, var, value); return git_diff_basic_config(var, value, cb); } @@ -1550,6 +1553,8 @@ static void builtin_diff(const char *name_a, o->word_regex = userdiff_word_regex(one); if (!o->word_regex) o->word_regex = userdiff_word_regex(two); + if (!o->word_regex) + o->word_regex = diff_word_regex_cfg; if (o->word_regex) { ecbdata.diff_words->word_regex = (regex_t *) xmalloc(sizeof(regex_t)); diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index 744221bef9..6bcc153084 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -77,6 +77,7 @@ a = b + c aeff = aeff * ( aaa ) EOF +cp expect expect.letter-runs-are-words test_expect_success 'word diff with a regular expression' ' @@ -92,7 +93,7 @@ post diff=testdriver EOF ' -test_expect_success 'option overrides default' ' +test_expect_success 'option overrides .gitattributes' ' word_diff --color-words="[a-z]+" @@ -112,13 +113,53 @@ a = b + c aeff = aeff * ( aaa ) EOF +cp expect expect.non-whitespace-is-word -test_expect_success 'use default supplied by driver' ' +test_expect_success 'use regex supplied by driver' ' word_diff --color-words ' +test_expect_success 'set diff.wordregex option' ' + git config diff.wordregex "[[:alnum:]]+" +' + +cp expect.letter-runs-are-words expect + +test_expect_success 'command-line overrides config' ' + word_diff --color-words="[a-z]+" +' + +cp expect.non-whitespace-is-word expect + +test_expect_success '.gitattributes override config' ' + word_diff --color-words +' + +test_expect_success 'remove diff driver regex' ' + git config --unset diff.testdriver.wordregex +' + +cat > expect <<\EOF +diff --git a/pre b/post +index 330b04f..5ed8eff 100644 +--- a/pre ++++ b/post +@@ -1,3 +1,7 @@ +h(4),hh[44] + +a = b + c + +aa = a + +aeff = aeff * ( aaa ) +EOF + +test_expect_success 'use configured regex' ' + word_diff --color-words +' + echo 'aaa (aaa)' > pre echo 'aaa (aaa) aaa' > post From ae3b970ac3e21324a95fea75213c2569180d74c6 Mon Sep 17 00:00:00 2001 From: Boyd Stephen Smith Jr Date: Tue, 20 Jan 2009 22:59:54 -0600 Subject: [PATCH 21/40] Change the spelling of "wordregex". Use "wordRegex" for configuration variable names. Use "word_regex" for C language tokens. Signed-off-by: Boyd Stephen Smith Jr. Signed-off-by: Junio C Hamano --- Documentation/config.txt | 2 +- Documentation/gitattributes.txt | 4 ++-- t/t4034-diff-words.sh | 8 ++++---- userdiff.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 0ca983ac3b..332213e65d 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -639,7 +639,7 @@ diff.suppress-blank-empty:: A boolean to inhibit the standard behavior of printing a space before each empty output line. Defaults to false. -diff.wordregex:: +diff.wordRegex:: A POSIX Extended Regular Expression used to determine what is a "word" when performing word-by-word difference calculations. Character sequences that match the regular expression are "words", all other diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index ba3ba12730..227934f59a 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -341,14 +341,14 @@ Customizing word diff You can customize the rules that `git diff --color-words` uses to split words in a line, by specifying an appropriate regular expression -in the "diff.*.wordregex" configuration variable. For example, in TeX +in the "diff.*.wordRegex" configuration variable. For example, in TeX a backslash followed by a sequence of letters forms a command, but several such commands can be run together without intervening whitespace. To separate them, use a regular expression such as ------------------------ [diff "tex"] - wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+" + wordRegex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+" ------------------------ A built-in pattern is provided for all languages listed in the diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index 6bcc153084..4508effcaa 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -86,7 +86,7 @@ test_expect_success 'word diff with a regular expression' ' ' test_expect_success 'set a diff driver' ' - git config diff.testdriver.wordregex "[^[:space:]]" && + git config diff.testdriver.wordRegex "[^[:space:]]" && cat < .gitattributes pre diff=testdriver post diff=testdriver @@ -121,8 +121,8 @@ test_expect_success 'use regex supplied by driver' ' ' -test_expect_success 'set diff.wordregex option' ' - git config diff.wordregex "[[:alnum:]]+" +test_expect_success 'set diff.wordRegex option' ' + git config diff.wordRegex "[[:alnum:]]+" ' cp expect.letter-runs-are-words expect @@ -138,7 +138,7 @@ test_expect_success '.gitattributes override config' ' ' test_expect_success 'remove diff driver regex' ' - git config --unset diff.testdriver.wordregex + git config --unset diff.testdriver.wordRegex ' cat > expect <<\EOF diff --git a/userdiff.c b/userdiff.c index 2b55509485..d556da9751 100644 --- a/userdiff.c +++ b/userdiff.c @@ -6,8 +6,8 @@ static struct userdiff_driver *drivers; static int ndrivers; static int drivers_alloc; -#define PATTERNS(name, pattern, wordregex) \ - { name, NULL, -1, { pattern, REG_EXTENDED }, wordregex } +#define PATTERNS(name, pattern, word_regex) \ + { name, NULL, -1, { pattern, REG_EXTENDED }, word_regex } static struct userdiff_driver builtin_drivers[] = { PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"), From b938f62a208d24f3d6bed260ceb809cd6b3924d7 Mon Sep 17 00:00:00 2001 From: Boyd Stephen Smith Jr Date: Thu, 22 Jan 2009 12:26:25 -0600 Subject: [PATCH 22/40] Fix Documentation for git-describe The documentation for git-describe says the default abbreviation is 8 hexadecimal digits while cache.c clearly shows DEFAULT_ABBREV set to 7. This patch corrects the documentation. Signed-off-by: Boyd Stephen Smith Jr Signed-off-by: Junio C Hamano --- Documentation/git-describe.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt index c4dbc2ae34..59a6fd17ab 100644 --- a/Documentation/git-describe.txt +++ b/Documentation/git-describe.txt @@ -38,7 +38,7 @@ OPTIONS Automatically implies --tags. --abbrev=:: - Instead of using the default 8 hexadecimal digits as the + Instead of using the default 7 hexadecimal digits as the abbreviated object name, use digits. --candidates=:: From 86ac751859033741a120e9e4a91133d075d9d898 Mon Sep 17 00:00:00 2001 From: Sverre Rabbelier Date: Fri, 23 Jan 2009 01:07:32 +0100 Subject: [PATCH 23/40] Allow cloning an empty repository Cloning an empty repository manually (that is, doing 'git init' and then doing all configuration by hand) can be a lot of work. Save the user this work by allowing the cloning of empty repositories. Signed-off-by: Sverre Rabbelier Signed-off-by: Junio C Hamano --- builtin-clone.c | 17 +++++++++++++---- t/t5701-clone-local.sh | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/builtin-clone.c b/builtin-clone.c index f7e5a7b0a0..1e9c9aa844 100644 --- a/builtin-clone.c +++ b/builtin-clone.c @@ -522,14 +522,23 @@ int cmd_clone(int argc, const char **argv, const char *prefix) option_upload_pack); refs = transport_get_remote_refs(transport); - transport_fetch_refs(transport, refs); + if(refs) + transport_fetch_refs(transport, refs); } - clear_extra_refs(); + if (refs) { + clear_extra_refs(); - mapped_refs = write_remote_refs(refs, &refspec, reflog_msg.buf); + mapped_refs = write_remote_refs(refs, &refspec, reflog_msg.buf); - head_points_at = locate_head(refs, mapped_refs, &remote_head); + head_points_at = locate_head(refs, mapped_refs, &remote_head); + } + else { + warning("You appear to have cloned an empty repository."); + head_points_at = NULL; + remote_head = NULL; + option_no_checkout = 1; + } if (head_points_at) { /* Local default branch link */ diff --git a/t/t5701-clone-local.sh b/t/t5701-clone-local.sh index 8dfaaa456e..fbd9bfa573 100755 --- a/t/t5701-clone-local.sh +++ b/t/t5701-clone-local.sh @@ -116,4 +116,20 @@ test_expect_success 'bundle clone with nonexistent HEAD' ' test ! -e .git/refs/heads/master ' +test_expect_success 'clone empty repository' ' + cd "$D" && + mkdir empty && + (cd empty && git init) && + git clone empty empty-clone && + test_tick && + (cd empty-clone + echo "content" >> foo && + git add foo && + git commit -m "Initial commit" && + git push origin master && + expected=$(git rev-parse master) && + actual=$(git --git-dir=../empty/.git rev-parse master) && + test $actual = $expected) +' + test_done From d930508903909ad5e700dd25e3858a1b2d69ed12 Mon Sep 17 00:00:00 2001 From: Arjen Laarhoven Date: Thu, 22 Jan 2009 17:37:24 +0100 Subject: [PATCH 24/40] t/t4202-log.sh: Add testcases Add testcases for 'git log --diff-filter=[CM]' (copies and renames). Also add a testcase for 'git log --follow'. Signed-off-by: Arjen Laarhoven Signed-off-by: Junio C Hamano --- t/t4202-log.sh | 61 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/t/t4202-log.sh b/t/t4202-log.sh index 0ab925c4e4..7b976ee36d 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -16,27 +16,31 @@ test_expect_success setup ' test_tick && git commit -m second && + git mv one ichi && + test_tick && + git commit -m third && + + cp ichi ein && + git add ein && + test_tick && + git commit -m fourth && + mkdir a && echo ni >a/two && git add a/two && test_tick && - git commit -m third && + git commit -m fifth && - echo san >a/three && - git add a/three && + git rm a/two && test_tick && - git commit -m fourth && - - git rm a/three && - test_tick && - git commit -m fifth + git commit -m sixth ' test_expect_success 'diff-filter=A' ' actual=$(git log --pretty="format:%s" --diff-filter=A HEAD) && - expect=$(echo fourth ; echo third ; echo initial) && + expect=$(echo fifth ; echo fourth ; echo third ; echo initial) && test "$actual" = "$expect" || { echo Oops echo "Actual: $actual" @@ -60,7 +64,43 @@ test_expect_success 'diff-filter=M' ' test_expect_success 'diff-filter=D' ' actual=$(git log --pretty="format:%s" --diff-filter=D HEAD) && - expect=$(echo fifth) && + expect=$(echo sixth ; echo third) && + test "$actual" = "$expect" || { + echo Oops + echo "Actual: $actual" + false + } + +' + +test_expect_success 'diff-filter=R' ' + + actual=$(git log -M --pretty="format:%s" --diff-filter=R HEAD) && + expect=$(echo third) && + test "$actual" = "$expect" || { + echo Oops + echo "Actual: $actual" + false + } + +' + +test_expect_success 'diff-filter=C' ' + + actual=$(git log -C -C --pretty="format:%s" --diff-filter=C HEAD) && + expect=$(echo fourth) && + test "$actual" = "$expect" || { + echo Oops + echo "Actual: $actual" + false + } + +' + +test_expect_success 'git log --follow' ' + + actual=$(git log --follow --pretty="format:%s" ichi) && + expect=$(echo third ; echo second ; echo initial) && test "$actual" = "$expect" || { echo Oops echo "Actual: $actual" @@ -72,6 +112,7 @@ test_expect_success 'diff-filter=D' ' test_expect_success 'setup case sensitivity tests' ' echo case >one && test_tick && + git add one git commit -a -m Second ' From b80da424a13107c239ed40573fae3d692d19b6cd Mon Sep 17 00:00:00 2001 From: "martin f. krafft" Date: Fri, 23 Jan 2009 11:31:21 +1100 Subject: [PATCH 25/40] git-am: implement --reject option passed to git-apply With --reject, git-am simply passes the --reject option to git-apply and thus allows people to work with reject files if they so prefer. Signed-off-by: martin f. krafft Signed-off-by: Junio C Hamano --- Documentation/git-am.txt | 2 ++ git-am.sh | 3 +++ t/t4252-am-options.sh | 11 ++++++++++- t/t4252/am-test-6-1 | 21 +++++++++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 t/t4252/am-test-6-1 diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt index 5cbbe76937..efd311b1ce 100644 --- a/Documentation/git-am.txt +++ b/Documentation/git-am.txt @@ -12,6 +12,7 @@ SYNOPSIS 'git am' [--signoff] [--keep] [--utf8 | --no-utf8] [--3way] [--interactive] [--whitespace=