From 881793c4f71c84e70af256c5721475c7c088b3f7 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 17 Nov 2025 08:04:31 +0000 Subject: [PATCH 1/2] xdiff: add 'minimal' to XDF_DIFF_ALGORITHM_MASK The XDF_DIFF_ALGORITHM_MASK bit mask only includes bits for the patience and histogram diffs, not for the minimal one. This means that when reseting the diff algorithm to the default one, one needs to separately clear the bit for the minimal diff. There are places in the code that fail to do that: merge-ort.c and builtin/merge-file.c. Add the XDF_NEED_MINIMAL bit to the bit mask, and remove the separate clearing of this bit in the places where it hasn't been forgotten. Signed-off-by: Antonin Delpeuch Signed-off-by: Junio C Hamano --- diff.c | 1 - merge-ort.c | 1 - xdiff/xdiff.h | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/diff.c b/diff.c index a74e701806..0eec4a2fe8 100644 --- a/diff.c +++ b/diff.c @@ -3527,7 +3527,6 @@ static int set_diff_algorithm(struct diff_options *opts, return -1; /* clear out previous settings */ - DIFF_XDL_CLR(opts, NEED_MINIMAL); opts->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; opts->xdl_opts |= value; diff --git a/merge-ort.c b/merge-ort.c index 29858074f9..23e2b64c79 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -5496,7 +5496,6 @@ int parse_merge_opt(struct merge_options *opt, const char *s) if (value < 0) return -1; /* clear out previous settings */ - DIFF_XDL_CLR(opt, NEED_MINIMAL); opt->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; opt->xdl_opts |= value; } diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 2cecde5afe..dc370712e9 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -43,7 +43,7 @@ extern "C" { #define XDF_PATIENCE_DIFF (1 << 14) #define XDF_HISTOGRAM_DIFF (1 << 15) -#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF) +#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF | XDF_NEED_MINIMAL) #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK) #define XDF_INDENT_HEURISTIC (1 << 23) From ffffb987fcd3b3d6b88aceed87000ef4a5b6114e Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 17 Nov 2025 08:04:32 +0000 Subject: [PATCH 2/2] blame: make diff algorithm configurable The diff algorithm used in 'git-blame(1)' is set to 'myers', without the possibility to change it aside from the `--minimal` option. There has been long-standing interest in changing the default diff algorithm to "histogram", and Git 3.0 was floated as a possible occasion for taking some steps towards that: https://lore.kernel.org/git/xmqqed873vgn.fsf@gitster.g/ As a preparation for this move, it is worth making sure that the diff algorithm is configurable where useful. Make it configurable in the `git-blame(1)` command by introducing the `--diff-algorithm` option and make honor the `diff.algorithm` config variable. Keep Myers diff as the default. Signed-off-by: Antonin Delpeuch Signed-off-by: Junio C Hamano --- Documentation/diff-algorithm-option.adoc | 20 +++ Documentation/diff-options.adoc | 21 +-- Documentation/git-blame.adoc | 2 + builtin/blame.c | 52 +++++- t/meson.build | 1 + t/t8015-blame-diff-algorithm.sh | 203 +++++++++++++++++++++++ 6 files changed, 278 insertions(+), 21 deletions(-) create mode 100644 Documentation/diff-algorithm-option.adoc create mode 100755 t/t8015-blame-diff-algorithm.sh diff --git a/Documentation/diff-algorithm-option.adoc b/Documentation/diff-algorithm-option.adoc new file mode 100644 index 0000000000..8e3a0b63d7 --- /dev/null +++ b/Documentation/diff-algorithm-option.adoc @@ -0,0 +1,20 @@ +`--diff-algorithm=(patience|minimal|histogram|myers)`:: + Choose a diff algorithm. The variants are as follows: ++ +-- + `default`;; + `myers`;; + The basic greedy diff algorithm. Currently, this is the default. + `minimal`;; + Spend extra time to make sure the smallest possible diff is + produced. + `patience`;; + Use "patience diff" algorithm when generating patches. + `histogram`;; + This algorithm extends the patience algorithm to "support + low-occurrence common elements". +-- ++ +For instance, if you configured the `diff.algorithm` variable to a +non-default value and want to use the default one, then you +have to use `--diff-algorithm=default` option. diff --git a/Documentation/diff-options.adoc b/Documentation/diff-options.adoc index ae31520f7f..9cdad6f72a 100644 --- a/Documentation/diff-options.adoc +++ b/Documentation/diff-options.adoc @@ -197,26 +197,7 @@ and starts with __, this algorithm attempts to prevent it from appearing as a deletion or addition in the output. It uses the "patience diff" algorithm internally. -`--diff-algorithm=(patience|minimal|histogram|myers)`:: - Choose a diff algorithm. The variants are as follows: -+ --- - `default`;; - `myers`;; - The basic greedy diff algorithm. Currently, this is the default. - `minimal`;; - Spend extra time to make sure the smallest possible diff is - produced. - `patience`;; - Use "patience diff" algorithm when generating patches. - `histogram`;; - This algorithm extends the patience algorithm to "support - low-occurrence common elements". --- -+ -For instance, if you configured the `diff.algorithm` variable to a -non-default value and want to use the default one, then you -have to use `--diff-algorithm=default` option. +include::diff-algorithm-option.adoc[] `--stat[=[,[,]]]`:: Generate a diffstat. By default, as much space as necessary diff --git a/Documentation/git-blame.adoc b/Documentation/git-blame.adoc index e438d28625..adcbb6f5dc 100644 --- a/Documentation/git-blame.adoc +++ b/Documentation/git-blame.adoc @@ -85,6 +85,8 @@ include::blame-options.adoc[] Ignore whitespace when comparing the parent's version and the child's to find where the lines came from. +include::diff-algorithm-option.adoc[] + --abbrev=:: Instead of using the default 7+1 hexadecimal digits as the abbreviated object name, use +1 digits, where is at diff --git a/builtin/blame.c b/builtin/blame.c index 2703820258..27b513d27f 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -779,6 +779,19 @@ static int git_blame_config(const char *var, const char *value, } } + if (!strcmp(var, "diff.algorithm")) { + long diff_algorithm; + if (!value) + return config_error_nonbool(var); + diff_algorithm = parse_algorithm_value(value); + if (diff_algorithm < 0) + return error(_("unknown value for config '%s': %s"), + var, value); + xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; + xdl_opts |= diff_algorithm; + return 0; + } + if (git_diff_heuristic_config(var, value, cb) < 0) return -1; if (userdiff_config(var, value) < 0) @@ -824,6 +837,38 @@ static int blame_move_callback(const struct option *option, const char *arg, int return 0; } +static int blame_diff_algorithm_minimal(const struct option *option, + const char *arg, int unset) +{ + int *opt = option->value; + + BUG_ON_OPT_ARG(arg); + + *opt &= ~XDF_DIFF_ALGORITHM_MASK; + if (!unset) + *opt |= XDF_NEED_MINIMAL; + + return 0; +} + +static int blame_diff_algorithm_callback(const struct option *option, + const char *arg, int unset) +{ + int *opt = option->value; + long value = parse_algorithm_value(arg); + + BUG_ON_OPT_NEG(unset); + + if (value < 0) + return error(_("option diff-algorithm accepts \"myers\", " + "\"minimal\", \"patience\" and \"histogram\"")); + + *opt &= ~XDF_DIFF_ALGORITHM_MASK; + *opt |= value; + + return 0; +} + static int is_a_rev(const char *name) { struct object_id oid; @@ -915,11 +960,16 @@ int cmd_blame(int argc, OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR), OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL), OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE), + OPT_CALLBACK_F(0, "diff-algorithm", &xdl_opts, N_(""), + N_("choose a diff algorithm"), + PARSE_OPT_NONEG, blame_diff_algorithm_callback), OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore when blaming")), OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from ")), OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE), OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR), - OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL), + OPT_CALLBACK_F(0, "minimal", &xdl_opts, NULL, + N_("spend extra cycles to find a better match"), + PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, blame_diff_algorithm_minimal), OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from instead of calling git-rev-list")), OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use 's contents as the final image")), OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback), diff --git a/t/meson.build b/t/meson.build index 401b24e50e..9f2fe7af8b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -955,6 +955,7 @@ integration_tests = [ 't8012-blame-colors.sh', 't8013-blame-ignore-revs.sh', 't8014-blame-ignore-fuzzy.sh', + 't8015-blame-diff-algorithm.sh', 't8020-last-modified.sh', 't9001-send-email.sh', 't9002-column.sh', diff --git a/t/t8015-blame-diff-algorithm.sh b/t/t8015-blame-diff-algorithm.sh new file mode 100755 index 0000000000..cd709536c6 --- /dev/null +++ b/t/t8015-blame-diff-algorithm.sh @@ -0,0 +1,203 @@ +#!/bin/sh + +test_description='git blame with specific diff algorithm' + +. ./test-lib.sh + +test_expect_success setup ' + cat >file.c <<-\EOF && + int f(int x, int y) + { + if (x == 0) + { + return y; + } + return x; + } + + int g(size_t u) + { + while (u < 30) + { + u++; + } + return u; + } + EOF + test_write_lines x x x x >file.txt && + git add file.c file.txt && + GIT_AUTHOR_NAME=Commit_1 git commit -m Commit_1 && + + cat >file.c <<-\EOF && + int g(size_t u) + { + while (u < 30) + { + u++; + } + return u; + } + + int h(int x, int y, int z) + { + if (z == 0) + { + return x; + } + return y; + } + EOF + test_write_lines x x x A B C D x E F G >file.txt && + git add file.c file.txt && + GIT_AUTHOR_NAME=Commit_2 git commit -m Commit_2 +' + +test_expect_success 'blame uses Myers diff algorithm by default' ' + cat >expected <<-\EOF && + Commit_2 int g(size_t u) + Commit_1 { + Commit_2 while (u < 30) + Commit_1 { + Commit_2 u++; + Commit_1 } + Commit_2 return u; + Commit_1 } + Commit_1 + Commit_2 int h(int x, int y, int z) + Commit_1 { + Commit_2 if (z == 0) + Commit_1 { + Commit_2 return x; + Commit_1 } + Commit_2 return y; + Commit_1 } + EOF + + git blame file.c >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts && + sed -e "s/ *$//g" without_varying_parts >actual && + test_cmp expected actual +' + +test_expect_success 'blame honors --diff-algorithm option' ' + cat >expected <<-\EOF && + Commit_1 int g(size_t u) + Commit_1 { + Commit_1 while (u < 30) + Commit_1 { + Commit_1 u++; + Commit_1 } + Commit_1 return u; + Commit_1 } + Commit_2 + Commit_2 int h(int x, int y, int z) + Commit_2 { + Commit_2 if (z == 0) + Commit_2 { + Commit_2 return x; + Commit_2 } + Commit_2 return y; + Commit_2 } + EOF + + git blame file.c --diff-algorithm histogram >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts && + sed -e "s/ *$//g" without_varying_parts >actual && + test_cmp expected actual +' + +test_expect_success 'blame honors diff.algorithm config variable' ' + cat >expected <<-\EOF && + Commit_1 int g(size_t u) + Commit_1 { + Commit_1 while (u < 30) + Commit_1 { + Commit_1 u++; + Commit_1 } + Commit_1 return u; + Commit_1 } + Commit_2 + Commit_2 int h(int x, int y, int z) + Commit_2 { + Commit_2 if (z == 0) + Commit_2 { + Commit_2 return x; + Commit_2 } + Commit_2 return y; + Commit_2 } + EOF + + git -c diff.algorithm=histogram blame file.c >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \ + -e "s/ *$//g" output >actual && + test_cmp expected actual +' + +test_expect_success 'blame gives priority to --diff-algorithm over diff.algorithm' ' + cat >expected <<-\EOF && + Commit_1 int g(size_t u) + Commit_1 { + Commit_1 while (u < 30) + Commit_1 { + Commit_1 u++; + Commit_1 } + Commit_1 return u; + Commit_1 } + Commit_2 + Commit_2 int h(int x, int y, int z) + Commit_2 { + Commit_2 if (z == 0) + Commit_2 { + Commit_2 return x; + Commit_2 } + Commit_2 return y; + Commit_2 } + EOF + + git -c diff.algorithm=myers blame file.c --diff-algorithm histogram >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \ + -e "s/ *$//g" output >actual && + test_cmp expected actual +' + +test_expect_success 'blame honors --minimal option' ' + cat >expected <<-\EOF && + Commit_1 x + Commit_1 x + Commit_1 x + Commit_2 A + Commit_2 B + Commit_2 C + Commit_2 D + Commit_1 x + Commit_2 E + Commit_2 F + Commit_2 G + EOF + + git blame file.txt --minimal >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual && + test_cmp expected actual +' + +test_expect_success 'blame respects the order of diff options' ' + cat >expected <<-\EOF && + Commit_1 x + Commit_1 x + Commit_1 x + Commit_2 A + Commit_2 B + Commit_2 C + Commit_2 D + Commit_2 x + Commit_2 E + Commit_2 F + Commit_2 G + EOF + + git blame file.txt --minimal --diff-algorithm myers >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual && + test_cmp expected actual +' + +test_done