Merge branch 'jc/pickaxe' into next

* jc/pickaxe:
  git-pickaxe: introduce heuristics to avoid "trivial" chunks
  git-pickaxe: improve "best match" heuristics
  git-pickaxe: fix nth_line()
  git-pickaxe: pagenate output by default.
This commit is contained in:
Junio C Hamano
2006-10-20 18:53:04 -07:00
2 changed files with 84 additions and 28 deletions

View File

@@ -34,13 +34,21 @@ static int longest_file;
static int longest_author;
static int max_orig_digits;
static int max_digits;
#define DEBUG 0
static int max_score_digits;
#define PICKAXE_BLAME_MOVE 01
#define PICKAXE_BLAME_COPY 02
#define PICKAXE_BLAME_COPY_HARDER 04
/*
* blame for a blame_entry with score lower than these thresholds
* is not passed to the parent using move/copy logic.
*/
static unsigned blame_move_score;
static unsigned blame_copy_score;
#define BLAME_DEFAULT_MOVE_SCORE 20
#define BLAME_DEFAULT_COPY_SCORE 40
/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
#define METAINFO_SHOWN (1u<<12)
#define MORE_THAN_ONE_PATH (1u<<13)
@@ -78,6 +86,11 @@ struct blame_entry {
* suspect's file; internally all line numbers are 0 based.
*/
int s_lno;
/* how significant this entry is -- cached to avoid
* scanning the lines over and over
*/
unsigned score;
};
struct scoreboard {
@@ -215,9 +228,6 @@ static void process_u_diff(void *state_, char *line, unsigned long len)
struct chunk *chunk;
int off1, off2, len1, len2, num;
if (DEBUG)
fprintf(stderr, "%.*s", (int) len, line);
num = state->ret->num;
if (len < 4 || line[0] != '@' || line[1] != '@') {
if (state->hunk_in_pre_context && line[0] == ' ')
@@ -295,10 +305,6 @@ static struct patch *get_patch(struct origin *parent, struct origin *origin)
char *blob_p, *blob_o;
struct patch *patch;
if (DEBUG) fprintf(stderr, "get patch %.8s %.8s\n",
sha1_to_hex(parent->commit->object.sha1),
sha1_to_hex(origin->commit->object.sha1));
blob_p = read_sha1_file(parent->blob_sha1, type,
(unsigned long *) &file_p.size);
blob_o = read_sha1_file(origin->blob_sha1, type,
@@ -352,6 +358,7 @@ static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
memcpy(dst, src, sizeof(*src));
dst->prev = p;
dst->next = n;
dst->score = 0;
}
static const char *nth_line(struct scoreboard *sb, int lno)
@@ -448,7 +455,7 @@ static void split_blame(struct scoreboard *sb,
add_blame_entry(sb, new_entry);
}
if (DEBUG) {
if (1) { /* sanity */
struct blame_entry *ent;
int lno = 0, corrupt = 0;
@@ -530,12 +537,6 @@ static int pass_blame_to_parent(struct scoreboard *sb,
for (i = 0; i < patch->num; i++) {
struct chunk *chunk = &patch->chunks[i];
if (DEBUG)
fprintf(stderr,
"plno = %d, tlno = %d, "
"same as parent up to %d, resync %d and %d\n",
plno, tlno,
chunk->same, chunk->p_next, chunk->t_next);
blame_chunk(sb, tlno, plno, chunk->same, target, parent);
plno = chunk->p_next;
tlno = chunk->t_next;
@@ -547,14 +548,37 @@ static int pass_blame_to_parent(struct scoreboard *sb,
return 0;
}
static void copy_split_if_better(struct blame_entry best_so_far[3],
static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
{
unsigned score;
const char *cp, *ep;
if (e->score)
return e->score;
score = 0;
cp = nth_line(sb, e->lno);
ep = nth_line(sb, e->lno + e->num_lines);
while (cp < ep) {
unsigned ch = *((unsigned char *)cp);
if (isalnum(ch))
score++;
cp++;
}
e->score = score;
return score;
}
static void copy_split_if_better(struct scoreboard *sb,
struct blame_entry best_so_far[3],
struct blame_entry this[3])
{
if (!this[1].suspect)
return;
if (best_so_far[1].suspect &&
(this[1].num_lines < best_so_far[1].num_lines))
return;
if (best_so_far[1].suspect) {
if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
return;
}
memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
}
@@ -596,7 +620,7 @@ static void find_copy_in_blob(struct scoreboard *sb,
tlno + ent->s_lno, plno,
chunk->same + ent->s_lno,
parent);
copy_split_if_better(split, this);
copy_split_if_better(sb, split, this);
}
plno = chunk->p_next;
tlno = chunk->t_next;
@@ -630,7 +654,8 @@ static int find_move_in_parent(struct scoreboard *sb,
if (ent->suspect != target || ent->guilty)
continue;
find_copy_in_blob(sb, ent, parent, split, &file_p);
if (split[1].suspect)
if (split[1].suspect &&
blame_move_score < ent_score(sb, &split[1]))
split_blame(sb, split, ent);
}
free(blob_p);
@@ -699,9 +724,10 @@ static int find_copy_in_parent(struct scoreboard *sb,
continue;
}
find_copy_in_blob(sb, ent, norigin, this, &file_p);
copy_split_if_better(split, this);
copy_split_if_better(sb, split, this);
}
if (split[1].suspect)
if (split[1].suspect &&
blame_copy_score < ent_score(sb, &split[1]))
split_blame(sb, split, ent);
}
diff_flush(&diff_opts);
@@ -944,6 +970,7 @@ static void get_commit_info(struct commit *commit,
#define OUTPUT_PORCELAIN 010
#define OUTPUT_SHOW_NAME 020
#define OUTPUT_SHOW_NUMBER 040
#define OUTPUT_SHOW_SCORE 0100
static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
{
@@ -1016,6 +1043,8 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
show_raw_time),
ent->lno + 1 + cnt);
else {
if (opt & OUTPUT_SHOW_SCORE)
printf(" %*d", max_score_digits, ent->score);
if (opt & OUTPUT_SHOW_NAME)
printf(" %-*.*s", longest_file, longest_file,
suspect->path);
@@ -1060,8 +1089,9 @@ static void output(struct scoreboard *sb, int option)
for (ent = sb->ent; ent; ent = ent->next) {
if (option & OUTPUT_PORCELAIN)
emit_porcelain(sb, ent);
else
else {
emit_other(sb, ent, option);
}
}
}
@@ -1085,6 +1115,9 @@ static int prepare_lines(struct scoreboard *sb)
bol = 1;
}
}
sb->lineno = xrealloc(sb->lineno,
sizeof(int* ) * (num + incomplete + 1));
sb->lineno[num + incomplete] = buf - sb->final_buf;
sb->num_lines = num + incomplete;
return sb->num_lines;
}
@@ -1118,6 +1151,7 @@ static void find_alignment(struct scoreboard *sb, int *option)
{
int longest_src_lines = 0;
int longest_dst_lines = 0;
unsigned largest_score = 0;
struct blame_entry *e;
for (e = sb->ent; e; e = e->next) {
@@ -1143,9 +1177,12 @@ static void find_alignment(struct scoreboard *sb, int *option)
num = e->lno + e->num_lines;
if (longest_dst_lines < num)
longest_dst_lines = num;
if (largest_score < ent_score(sb, e))
largest_score = ent_score(sb, e);
}
max_orig_digits = lineno_width(longest_src_lines);
max_digits = lineno_width(longest_dst_lines);
max_score_digits = lineno_width(largest_score);
}
static int has_path_in_work_tree(const char *path)
@@ -1154,6 +1191,15 @@ static int has_path_in_work_tree(const char *path)
return !lstat(path, &st);
}
static unsigned parse_score(const char *arg)
{
char *end;
unsigned long score = strtoul(arg, &end, 10);
if (*end)
return 0;
return score;
}
int cmd_pickaxe(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
@@ -1183,12 +1229,15 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix)
output_option |= OUTPUT_LONG_OBJECT_NAME;
else if (!strcmp("-S", arg) && ++i < argc)
revs_file = argv[i];
else if (!strcmp("-M", arg))
else if (!strncmp("-M", arg, 2)) {
opt |= PICKAXE_BLAME_MOVE;
else if (!strcmp("-C", arg)) {
blame_move_score = parse_score(arg+2);
}
else if (!strncmp("-C", arg, 2)) {
if (opt & PICKAXE_BLAME_COPY)
opt |= PICKAXE_BLAME_COPY_HARDER;
opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
blame_copy_score = parse_score(arg+2);
}
else if (!strcmp("-L", arg) && ++i < argc) {
char *term;
@@ -1206,6 +1255,8 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix)
tmp = top; top = bottom; bottom = tmp;
}
}
else if (!strcmp("--score-debug", arg))
output_option |= OUTPUT_SHOW_SCORE;
else if (!strcmp("-f", arg) ||
!strcmp("--show-name", arg))
output_option |= OUTPUT_SHOW_NAME;
@@ -1224,6 +1275,11 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix)
argv[unk++] = arg;
}
if (!blame_move_score)
blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
if (!blame_copy_score)
blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
/* We have collected options unknown to us in argv[1..unk]
* which are to be passed to revision machinery if we are
* going to do the "bottom" procesing.

2
git.c
View File

@@ -247,7 +247,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp)
{ "mv", cmd_mv, RUN_SETUP },
{ "name-rev", cmd_name_rev, RUN_SETUP },
{ "pack-objects", cmd_pack_objects, RUN_SETUP },
{ "pickaxe", cmd_pickaxe, RUN_SETUP },
{ "pickaxe", cmd_pickaxe, RUN_SETUP | USE_PAGER },
{ "prune", cmd_prune, RUN_SETUP },
{ "prune-packed", cmd_prune_packed, RUN_SETUP },
{ "push", cmd_push, RUN_SETUP },