diff --git a/builtin-pickaxe.c b/builtin-pickaxe.c index 74c7c9a33b..e5a50b9278 100644 --- a/builtin-pickaxe.c +++ b/builtin-pickaxe.c @@ -34,13 +34,21 @@ static int longest_file; static int longest_author; static int max_orig_digits; static int max_digits; - -#define DEBUG 0 +static int max_score_digits; #define PICKAXE_BLAME_MOVE 01 #define PICKAXE_BLAME_COPY 02 #define PICKAXE_BLAME_COPY_HARDER 04 +/* + * blame for a blame_entry with score lower than these thresholds + * is not passed to the parent using move/copy logic. + */ +static unsigned blame_move_score; +static unsigned blame_copy_score; +#define BLAME_DEFAULT_MOVE_SCORE 20 +#define BLAME_DEFAULT_COPY_SCORE 40 + /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */ #define METAINFO_SHOWN (1u<<12) #define MORE_THAN_ONE_PATH (1u<<13) @@ -78,6 +86,11 @@ struct blame_entry { * suspect's file; internally all line numbers are 0 based. */ int s_lno; + + /* how significant this entry is -- cached to avoid + * scanning the lines over and over + */ + unsigned score; }; struct scoreboard { @@ -215,9 +228,6 @@ static void process_u_diff(void *state_, char *line, unsigned long len) struct chunk *chunk; int off1, off2, len1, len2, num; - if (DEBUG) - fprintf(stderr, "%.*s", (int) len, line); - num = state->ret->num; if (len < 4 || line[0] != '@' || line[1] != '@') { if (state->hunk_in_pre_context && line[0] == ' ') @@ -295,10 +305,6 @@ static struct patch *get_patch(struct origin *parent, struct origin *origin) char *blob_p, *blob_o; struct patch *patch; - if (DEBUG) fprintf(stderr, "get patch %.8s %.8s\n", - sha1_to_hex(parent->commit->object.sha1), - sha1_to_hex(origin->commit->object.sha1)); - blob_p = read_sha1_file(parent->blob_sha1, type, (unsigned long *) &file_p.size); blob_o = read_sha1_file(origin->blob_sha1, type, @@ -352,6 +358,7 @@ static void dup_entry(struct blame_entry *dst, struct blame_entry *src) memcpy(dst, src, sizeof(*src)); dst->prev = p; dst->next = n; + dst->score = 0; } static const char *nth_line(struct scoreboard *sb, int lno) @@ -448,7 +455,7 @@ static void split_blame(struct scoreboard *sb, add_blame_entry(sb, new_entry); } - if (DEBUG) { + if (1) { /* sanity */ struct blame_entry *ent; int lno = 0, corrupt = 0; @@ -530,12 +537,6 @@ static int pass_blame_to_parent(struct scoreboard *sb, for (i = 0; i < patch->num; i++) { struct chunk *chunk = &patch->chunks[i]; - if (DEBUG) - fprintf(stderr, - "plno = %d, tlno = %d, " - "same as parent up to %d, resync %d and %d\n", - plno, tlno, - chunk->same, chunk->p_next, chunk->t_next); blame_chunk(sb, tlno, plno, chunk->same, target, parent); plno = chunk->p_next; tlno = chunk->t_next; @@ -547,14 +548,37 @@ static int pass_blame_to_parent(struct scoreboard *sb, return 0; } -static void copy_split_if_better(struct blame_entry best_so_far[3], +static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e) +{ + unsigned score; + const char *cp, *ep; + + if (e->score) + return e->score; + + score = 0; + cp = nth_line(sb, e->lno); + ep = nth_line(sb, e->lno + e->num_lines); + while (cp < ep) { + unsigned ch = *((unsigned char *)cp); + if (isalnum(ch)) + score++; + cp++; + } + e->score = score; + return score; +} + +static void copy_split_if_better(struct scoreboard *sb, + struct blame_entry best_so_far[3], struct blame_entry this[3]) { if (!this[1].suspect) return; - if (best_so_far[1].suspect && - (this[1].num_lines < best_so_far[1].num_lines)) - return; + if (best_so_far[1].suspect) { + if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1])) + return; + } memcpy(best_so_far, this, sizeof(struct blame_entry [3])); } @@ -596,7 +620,7 @@ static void find_copy_in_blob(struct scoreboard *sb, tlno + ent->s_lno, plno, chunk->same + ent->s_lno, parent); - copy_split_if_better(split, this); + copy_split_if_better(sb, split, this); } plno = chunk->p_next; tlno = chunk->t_next; @@ -630,7 +654,8 @@ static int find_move_in_parent(struct scoreboard *sb, if (ent->suspect != target || ent->guilty) continue; find_copy_in_blob(sb, ent, parent, split, &file_p); - if (split[1].suspect) + if (split[1].suspect && + blame_move_score < ent_score(sb, &split[1])) split_blame(sb, split, ent); } free(blob_p); @@ -699,9 +724,10 @@ static int find_copy_in_parent(struct scoreboard *sb, continue; } find_copy_in_blob(sb, ent, norigin, this, &file_p); - copy_split_if_better(split, this); + copy_split_if_better(sb, split, this); } - if (split[1].suspect) + if (split[1].suspect && + blame_copy_score < ent_score(sb, &split[1])) split_blame(sb, split, ent); } diff_flush(&diff_opts); @@ -944,6 +970,7 @@ static void get_commit_info(struct commit *commit, #define OUTPUT_PORCELAIN 010 #define OUTPUT_SHOW_NAME 020 #define OUTPUT_SHOW_NUMBER 040 +#define OUTPUT_SHOW_SCORE 0100 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent) { @@ -1016,6 +1043,8 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) show_raw_time), ent->lno + 1 + cnt); else { + if (opt & OUTPUT_SHOW_SCORE) + printf(" %*d", max_score_digits, ent->score); if (opt & OUTPUT_SHOW_NAME) printf(" %-*.*s", longest_file, longest_file, suspect->path); @@ -1060,8 +1089,9 @@ static void output(struct scoreboard *sb, int option) for (ent = sb->ent; ent; ent = ent->next) { if (option & OUTPUT_PORCELAIN) emit_porcelain(sb, ent); - else + else { emit_other(sb, ent, option); + } } } @@ -1085,6 +1115,9 @@ static int prepare_lines(struct scoreboard *sb) bol = 1; } } + sb->lineno = xrealloc(sb->lineno, + sizeof(int* ) * (num + incomplete + 1)); + sb->lineno[num + incomplete] = buf - sb->final_buf; sb->num_lines = num + incomplete; return sb->num_lines; } @@ -1118,6 +1151,7 @@ static void find_alignment(struct scoreboard *sb, int *option) { int longest_src_lines = 0; int longest_dst_lines = 0; + unsigned largest_score = 0; struct blame_entry *e; for (e = sb->ent; e; e = e->next) { @@ -1143,9 +1177,12 @@ static void find_alignment(struct scoreboard *sb, int *option) num = e->lno + e->num_lines; if (longest_dst_lines < num) longest_dst_lines = num; + if (largest_score < ent_score(sb, e)) + largest_score = ent_score(sb, e); } max_orig_digits = lineno_width(longest_src_lines); max_digits = lineno_width(longest_dst_lines); + max_score_digits = lineno_width(largest_score); } static int has_path_in_work_tree(const char *path) @@ -1154,6 +1191,15 @@ static int has_path_in_work_tree(const char *path) return !lstat(path, &st); } +static unsigned parse_score(const char *arg) +{ + char *end; + unsigned long score = strtoul(arg, &end, 10); + if (*end) + return 0; + return score; +} + int cmd_pickaxe(int argc, const char **argv, const char *prefix) { struct rev_info revs; @@ -1183,12 +1229,15 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) output_option |= OUTPUT_LONG_OBJECT_NAME; else if (!strcmp("-S", arg) && ++i < argc) revs_file = argv[i]; - else if (!strcmp("-M", arg)) + else if (!strncmp("-M", arg, 2)) { opt |= PICKAXE_BLAME_MOVE; - else if (!strcmp("-C", arg)) { + blame_move_score = parse_score(arg+2); + } + else if (!strncmp("-C", arg, 2)) { if (opt & PICKAXE_BLAME_COPY) opt |= PICKAXE_BLAME_COPY_HARDER; opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE; + blame_copy_score = parse_score(arg+2); } else if (!strcmp("-L", arg) && ++i < argc) { char *term; @@ -1206,6 +1255,8 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) tmp = top; top = bottom; bottom = tmp; } } + else if (!strcmp("--score-debug", arg)) + output_option |= OUTPUT_SHOW_SCORE; else if (!strcmp("-f", arg) || !strcmp("--show-name", arg)) output_option |= OUTPUT_SHOW_NAME; @@ -1224,6 +1275,11 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) argv[unk++] = arg; } + if (!blame_move_score) + blame_move_score = BLAME_DEFAULT_MOVE_SCORE; + if (!blame_copy_score) + blame_copy_score = BLAME_DEFAULT_COPY_SCORE; + /* We have collected options unknown to us in argv[1..unk] * which are to be passed to revision machinery if we are * going to do the "bottom" procesing. diff --git a/git.c b/git.c index 63bdbb0f52..308252a61f 100644 --- a/git.c +++ b/git.c @@ -247,7 +247,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "mv", cmd_mv, RUN_SETUP }, { "name-rev", cmd_name_rev, RUN_SETUP }, { "pack-objects", cmd_pack_objects, RUN_SETUP }, - { "pickaxe", cmd_pickaxe, RUN_SETUP }, + { "pickaxe", cmd_pickaxe, RUN_SETUP | USE_PAGER }, { "prune", cmd_prune, RUN_SETUP }, { "prune-packed", cmd_prune_packed, RUN_SETUP }, { "push", cmd_push, RUN_SETUP },