From 7768e27e1d3f3d5e253e795433033b5de1d1c157 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 19 Oct 2006 10:33:01 +0200 Subject: [PATCH 1/5] Don't use $author_name undefined when $from contains no /\serr new-file OK. Log says: Date: Thu, 19 Oct 2006 10:26:24 +0200 Sendmail: /usr/sbin/sendmail From: j Subject: Cc: To: k Result: OK $ cat err Use of uninitialized value in pattern match (m//) at /p/bin/git-send-email line 416. Use of uninitialized value in concatenation (.) or string at /p/bin/git-send-email line 420. Use of uninitialized value in concatenation (.) or string at /p/bin/git-send-email line 468. There's a patch for the $author_name part below. The example above shows that $subject may also be used uninitialized. That should be easy to fix, too. Signed-off-by: Jim Meyering Signed-off-by: Junio C Hamano --- git-send-email.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-send-email.perl b/git-send-email.perl index b17d261987..1c6d2cc787 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -412,7 +412,7 @@ sub send_message } my ($author_name) = ($from =~ /^(.*?)\s+ Date: Thu, 19 Oct 2006 19:26:08 -0700 Subject: [PATCH 2/5] git-apply: prepare for upcoming GNU diff -u format change. The latest GNU diff from CVS emits an empty line to express an empty context line, instead of more traditional "single white space followed by a newline". Do not get broken by it. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-apply.c | 9 ++++++ t/t4118-apply-empty-context.sh | 55 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100755 t/t4118-apply-empty-context.sh diff --git a/builtin-apply.c b/builtin-apply.c index cbe597771b..11a5277a69 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -934,6 +934,7 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s switch (*line) { default: return -1; + case '\n': /* newer GNU diff, an empty context line */ case ' ': oldlines--; newlines--; @@ -1623,6 +1624,14 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i first = '-'; } switch (first) { + case '\n': + /* Newer GNU diff, empty context line */ + if (plen < 0) + /* ... followed by '\No newline'; nothing */ + break; + old[oldsize++] = '\n'; + new[newsize++] = '\n'; + break; case ' ': case '-': memcpy(old + oldsize, patch + 1, plen); diff --git a/t/t4118-apply-empty-context.sh b/t/t4118-apply-empty-context.sh new file mode 100755 index 0000000000..7309422fe5 --- /dev/null +++ b/t/t4118-apply-empty-context.sh @@ -0,0 +1,55 @@ +#!/bin/sh +# +# Copyright (c) 2006 Junio C Hamano +# + +test_description='git-apply with new style GNU diff with empty context + +' + +. ./test-lib.sh + +test_expect_success setup ' + { + echo; echo; + echo A; echo B; echo C; + echo; + } >file1 && + cat file1 >file1.orig && + { + cat file1 && + echo Q | tr -d "\\012" + } >file2 && + cat file2 >file2.orig + git add file1 file2 && + sed -e "/^B/d" file1 && + sed -e "/^B/d" file2 && + cat file1 >file1.mods && + cat file2 >file2.mods && + git diff | + sed -e "s/^ \$//" >diff.output +' + +test_expect_success 'apply --numstat' ' + + git apply --numstat diff.output >actual && + { + echo "0 1 file1" && + echo "0 1 file2" + } >expect && + diff -u expect actual + +' + +test_expect_success 'apply --apply' ' + + cat file1.orig >file1 && + cat file2.orig >file2 && + git update-index file1 file2 && + git apply --index diff.output && + diff -u file1.mods file1 && + diff -u file2.mods file2 +' + +test_done + From cee7f245dcaef6dade28464f59420095a9949aac Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 19 Oct 2006 16:00:04 -0700 Subject: [PATCH 3/5] git-pickaxe: blame rewritten. Currently it does what git-blame does, but only faster. More importantly, its internal structure is designed to support content movement (aka cut-and-paste) more easily by allowing more than one paths to be taken from the same commit. Signed-off-by: Junio C Hamano --- Documentation/git-pickaxe.txt | 104 +++ Documentation/git.txt | 3 + Makefile | 1 + builtin-pickaxe.c | 1194 +++++++++++++++++++++++++++++++++ builtin.h | 1 + git.c | 1 + t/annotate-tests.sh | 1 + t/t8003-pickaxe.sh | 9 + 8 files changed, 1314 insertions(+) create mode 100644 Documentation/git-pickaxe.txt create mode 100644 builtin-pickaxe.c create mode 100755 t/t8003-pickaxe.sh diff --git a/Documentation/git-pickaxe.txt b/Documentation/git-pickaxe.txt new file mode 100644 index 0000000000..7685bd0e3c --- /dev/null +++ b/Documentation/git-pickaxe.txt @@ -0,0 +1,104 @@ +git-pickaxe(1) +============== + +NAME +---- +git-pickaxe - Show what revision and author last modified each line of a file + +SYNOPSIS +-------- +'git-pickaxe' [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [] [--] + +DESCRIPTION +----------- + +Annotates each line in the given file with information from the revision which +last modified the line. Optionally, start annotating from the given revision. + +Also it can limit the range of lines annotated. + +This report doesn't tell you anything about lines which have been deleted or +replaced; you need to use a tool such as gitlink:git-diff[1] or the "pickaxe" +interface briefly mentioned in the following paragraph. + +Apart from supporting file annotation, git also supports searching the +development history for when a code snippet occured in a change. This makes it +possible to track when a code snippet was added to a file, moved or copied +between files, and eventually deleted or replaced. It works by searching for +a text string in the diff. A small example: + +----------------------------------------------------------------------------- +$ git log --pretty=oneline -S'blame_usage' +5040f17eba15504bad66b14a645bddd9b015ebb7 blame -S +ea4c7f9bf69e781dd0cd88d2bccb2bf5cc15c9a7 git-blame: Make the output +----------------------------------------------------------------------------- + +OPTIONS +------- +-c, --compatibility:: + Use the same output mode as gitlink:git-annotate[1] (Default: off). + +-L n,m:: + Annotate only the specified line range (lines count from 1). + +-l, --long:: + Show long rev (Default: off). + +-t, --time:: + Show raw timestamp (Default: off). + +-S, --rev-file :: + Use revs from revs-file instead of calling gitlink:git-rev-list[1]. + +-f, --show-name:: + Show filename in the original commit. By default + filename is shown if there is any line that came from a + file with different name, due to rename detection. + +-n, --show-number:: + Show line number in the original commit (Default: off). + +-p, --porcelain:: + Show in a format designed for machine consumption. + +-h, --help:: + Show help message. + + +THE PORCELAIN FORMAT +-------------------- + +In this format, each line is output after a header; the +header at the minumum has the first line which has: + +- 40-byte SHA-1 of the commit the line is attributed to; +- the line number of the line in the original file; +- the line number of the line in the final file; +- on a line that starts a group of line from a different + commit than the previous one, the number of lines in this + group. On subsequent lines this field is absent. + +This header line is followed by the following information +at least once for each commit: + +- author name ("author"), email ("author-mail"), time + ("author-time"), and timezone ("author-tz"); similarly + for committer. +- filename in the commit the line is attributed to. +- the first line of the commit log message ("summary"). + +The contents of the actual line is output after the above +header, prefixed by a TAB. This is to allow adding more +header elements later. + +SEE ALSO +-------- +gitlink:git-blame[1] + +AUTHOR +------ +Written by Junio C Hamano + +GIT +--- +Part of the gitlink:git[7] suite diff --git a/Documentation/git.txt b/Documentation/git.txt index 3af6fc63e2..7074e32458 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -430,6 +430,9 @@ gitlink:git-annotate[1]:: gitlink:git-blame[1]:: Blame file lines on commits. +gitlink:git-pickaxe[1]:: + Find out where each line in a file came from. + gitlink:git-check-ref-format[1]:: Make sure ref name is well formed. diff --git a/Makefile b/Makefile index 66c8b4b127..461fef636d 100644 --- a/Makefile +++ b/Makefile @@ -288,6 +288,7 @@ BUILTIN_OBJS = \ builtin-mv.o \ builtin-name-rev.o \ builtin-pack-objects.o \ + builtin-pickaxe.o \ builtin-prune.o \ builtin-prune-packed.o \ builtin-push.o \ diff --git a/builtin-pickaxe.c b/builtin-pickaxe.c new file mode 100644 index 0000000000..cb69fcc16b --- /dev/null +++ b/builtin-pickaxe.c @@ -0,0 +1,1194 @@ +/* + * Pickaxe + * + * Copyright (c) 2006, Junio C Hamano + */ + +#include "cache.h" +#include "builtin.h" +#include "blob.h" +#include "commit.h" +#include "tag.h" +#include "tree-walk.h" +#include "diff.h" +#include "diffcore.h" +#include "revision.h" +#include "xdiff-interface.h" + +#include +#include + +static char pickaxe_usage[] = +"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [commit] [--] file\n" +" -c, --compatibility Use the same output mode as git-annotate (Default: off)\n" +" -l, --long Show long commit SHA1 (Default: off)\n" +" -t, --time Show raw timestamp (Default: off)\n" +" -f, --show-name Show original filename (Default: auto)\n" +" -n, --show-number Show original linenumber (Default: off)\n" +" -p, --porcelain Show in a format designed for machine consumption\n" +" -L n,m Process only line range n,m, counting from 1\n" +" -S revs-file Use revisions from revs-file instead of calling git-rev-list\n"; + +static int longest_file; +static int longest_author; +static int max_orig_digits; +static int max_digits; + +#define DEBUG 0 + +/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */ +#define METAINFO_SHOWN (1u<<12) +#define MORE_THAN_ONE_PATH (1u<<13) + +/* + * One blob in a commit + */ +struct origin { + struct commit *commit; + unsigned char blob_sha1[20]; + char path[FLEX_ARRAY]; +}; + +struct blame_entry { + struct blame_entry *prev; + struct blame_entry *next; + + /* the first line of this group in the final image; + * internally all line numbers are 0 based. + */ + int lno; + + /* how many lines this group has */ + int num_lines; + + /* the commit that introduced this group into the final image */ + struct origin *suspect; + + /* true if the suspect is truly guilty; false while we have not + * checked if the group came from one of its parents. + */ + char guilty; + + /* the line number of the first line of this group in the + * suspect's file; internally all line numbers are 0 based. + */ + int s_lno; +}; + +struct scoreboard { + /* the final commit (i.e. where we started digging from) */ + struct commit *final; + + const char *path; + + /* the contents in the final; pointed into by buf pointers of + * blame_entries + */ + const char *final_buf; + unsigned long final_buf_size; + + /* linked list of blames */ + struct blame_entry *ent; + + int num_lines; + int *lineno; +}; + +static void coalesce(struct scoreboard *sb) +{ + struct blame_entry *ent, *next; + + for (ent = sb->ent; ent && (next = ent->next); ent = next) { + if (ent->suspect == next->suspect && + ent->guilty == next->guilty && + ent->s_lno + ent->num_lines == next->s_lno) { + ent->num_lines += next->num_lines; + ent->next = next->next; + if (ent->next) + ent->next->prev = ent; + free(next); + next = ent; /* again */ + } + } +} + +static void free_origin(struct origin *o) +{ + free(o); +} + +static struct origin *find_origin(struct scoreboard *sb, + struct commit *commit, + const char *path) +{ + struct blame_entry *ent; + struct origin *o; + unsigned mode; + char type[10]; + + for (ent = sb->ent; ent; ent = ent->next) { + if (ent->suspect->commit == commit && + !strcmp(ent->suspect->path, path)) + return ent->suspect; + } + + o = xcalloc(1, sizeof(*o) + strlen(path) + 1); + o->commit = commit; + strcpy(o->path, path); + if (get_tree_entry(commit->object.sha1, path, o->blob_sha1, &mode)) + goto err_out; + if (sha1_object_info(o->blob_sha1, type, NULL) || + strcmp(type, blob_type)) + goto err_out; + return o; + err_out: + free_origin(o); + return NULL; +} + +static struct origin *find_rename(struct scoreboard *sb, + struct commit *parent, + struct origin *origin) +{ + struct origin *porigin = NULL; + struct diff_options diff_opts; + int i; + const char *paths[1]; + + diff_setup(&diff_opts); + diff_opts.recursive = 1; + diff_opts.detect_rename = DIFF_DETECT_RENAME; + diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; + paths[0] = NULL; + diff_tree_setup_paths(paths, &diff_opts); + if (diff_setup_done(&diff_opts) < 0) + die("diff-setup"); + diff_tree_sha1(origin->commit->tree->object.sha1, + parent->tree->object.sha1, + "", &diff_opts); + diffcore_std(&diff_opts); + + for (i = 0; i < diff_queued_diff.nr; i++) { + struct diff_filepair *p = diff_queued_diff.queue[i]; + if (p->status == 'R' && !strcmp(p->one->path, origin->path)) { + porigin = find_origin(sb, parent, p->two->path); + break; + } + } + diff_flush(&diff_opts); + return porigin; +} + +struct chunk { + /* line number in postimage; up to but not including this + * line is the same as preimage + */ + int same; + + /* preimage line number after this chunk */ + int p_next; + + /* postimage line number after this chunk */ + int t_next; +}; + +struct patch { + struct chunk *chunks; + int num; +}; + +struct blame_diff_state { + struct xdiff_emit_state xm; + struct patch *ret; + unsigned hunk_post_context; + unsigned hunk_in_pre_context : 1; +}; + +static void process_u_diff(void *state_, char *line, unsigned long len) +{ + struct blame_diff_state *state = state_; + struct chunk *chunk; + int off1, off2, len1, len2, num; + + if (DEBUG) + fprintf(stderr, "%.*s", (int) len, line); + + num = state->ret->num; + if (len < 4 || line[0] != '@' || line[1] != '@') { + if (state->hunk_in_pre_context && line[0] == ' ') + state->ret->chunks[num - 1].same++; + else { + state->hunk_in_pre_context = 0; + if (line[0] == ' ') + state->hunk_post_context++; + else + state->hunk_post_context = 0; + } + return; + } + + if (num && state->hunk_post_context) { + chunk = &state->ret->chunks[num - 1]; + chunk->p_next -= state->hunk_post_context; + chunk->t_next -= state->hunk_post_context; + } + state->ret->num = ++num; + state->ret->chunks = xrealloc(state->ret->chunks, + sizeof(struct chunk) * num); + chunk = &state->ret->chunks[num - 1]; + if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) { + state->ret->num--; + return; + } + + /* Line numbers in patch output are one based. */ + off1--; + off2--; + + chunk->same = len2 ? off2 : (off2 + 1); + + chunk->p_next = off1 + (len1 ? len1 : 1); + chunk->t_next = chunk->same + len2; + state->hunk_in_pre_context = 1; + state->hunk_post_context = 0; +} + +static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o, + int context) +{ + struct blame_diff_state state; + xpparam_t xpp; + xdemitconf_t xecfg; + xdemitcb_t ecb; + + xpp.flags = XDF_NEED_MINIMAL; + xecfg.ctxlen = context; + xecfg.flags = 0; + ecb.outf = xdiff_outf; + ecb.priv = &state; + memset(&state, 0, sizeof(state)); + state.xm.consume = process_u_diff; + state.ret = xmalloc(sizeof(struct patch)); + state.ret->chunks = NULL; + state.ret->num = 0; + + xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb); + + if (state.ret->num) { + struct chunk *chunk; + chunk = &state.ret->chunks[state.ret->num - 1]; + chunk->p_next -= state.hunk_post_context; + chunk->t_next -= state.hunk_post_context; + } + return state.ret; +} + +static struct patch *get_patch(struct origin *parent, struct origin *origin) +{ + mmfile_t file_p, file_o; + char type[10]; + char *blob_p, *blob_o; + struct patch *patch; + + if (DEBUG) fprintf(stderr, "get patch %.8s %.8s\n", + sha1_to_hex(parent->commit->object.sha1), + sha1_to_hex(origin->commit->object.sha1)); + + blob_p = read_sha1_file(parent->blob_sha1, type, + (unsigned long *) &file_p.size); + blob_o = read_sha1_file(origin->blob_sha1, type, + (unsigned long *) &file_o.size); + file_p.ptr = blob_p; + file_o.ptr = blob_o; + if (!file_p.ptr || !file_o.ptr) { + free(blob_p); + free(blob_o); + return NULL; + } + + patch = compare_buffer(&file_p, &file_o, 0); + free(blob_p); + free(blob_o); + return patch; +} + +static void free_patch(struct patch *p) +{ + free(p->chunks); + free(p); +} + +static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) +{ + struct blame_entry *ent, *prev = NULL; + + for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next) + prev = ent; + + /* prev, if not NULL, is the last one that is below e */ + e->prev = prev; + if (prev) { + e->next = prev->next; + prev->next = e; + } + else { + e->next = sb->ent; + sb->ent = e; + } + if (e->next) + e->next->prev = e; +} + +static void dup_entry(struct blame_entry *dst, struct blame_entry *src) +{ + struct blame_entry *p, *n; + p = dst->prev; + n = dst->next; + memcpy(dst, src, sizeof(*src)); + dst->prev = p; + dst->next = n; +} + +static const char *nth_line(struct scoreboard *sb, int lno) +{ + return sb->final_buf + sb->lineno[lno]; +} + +static void split_overlap(struct blame_entry split[3], + struct blame_entry *e, + int tlno, int plno, int same, + struct origin *parent) +{ + /* it is known that lines between tlno to same came from + * parent, and e has an overlap with that range. it also is + * known that parent's line plno corresponds to e's line tlno. + * + * <---- e -----> + * <------> + * <------------> + * <------------> + * <------------------> + * + * Potentially we need to split e into three parts; before + * this chunk, the chunk to be blamed for parent, and after + * that portion. + */ + int chunk_end_lno; + memset(split, 0, sizeof(struct blame_entry [3])); + + if (e->s_lno < tlno) { + /* there is a pre-chunk part not blamed on parent */ + split[0].suspect = e->suspect; + split[0].lno = e->lno; + split[0].s_lno = e->s_lno; + split[0].num_lines = tlno - e->s_lno; + split[1].lno = e->lno + tlno - e->s_lno; + split[1].s_lno = plno; + } + else { + split[1].lno = e->lno; + split[1].s_lno = plno + (e->s_lno - tlno); + } + + if (same < e->s_lno + e->num_lines) { + /* there is a post-chunk part not blamed on parent */ + split[2].suspect = e->suspect; + split[2].lno = e->lno + (same - e->s_lno); + split[2].s_lno = e->s_lno + (same - e->s_lno); + split[2].num_lines = e->s_lno + e->num_lines - same; + chunk_end_lno = split[2].lno; + } + else + chunk_end_lno = e->lno + e->num_lines; + split[1].num_lines = chunk_end_lno - split[1].lno; + + if (split[1].num_lines < 1) + return; + split[1].suspect = parent; +} + +static void split_blame(struct scoreboard *sb, + struct blame_entry split[3], + struct blame_entry *e) +{ + struct blame_entry *new_entry; + + if (split[0].suspect && split[2].suspect) { + /* we need to split e into two and add another for parent */ + dup_entry(e, &split[0]); + + new_entry = xmalloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + + new_entry = xmalloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + } + else if (!split[0].suspect && !split[2].suspect) + /* parent covers the entire area */ + dup_entry(e, &split[1]); + else if (split[0].suspect) { + dup_entry(e, &split[0]); + + new_entry = xmalloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + } + else { + dup_entry(e, &split[1]); + + new_entry = xmalloc(sizeof(*new_entry)); + memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); + add_blame_entry(sb, new_entry); + } + + if (DEBUG) { + struct blame_entry *ent; + int lno = 0, corrupt = 0; + + for (ent = sb->ent; ent; ent = ent->next) { + if (lno != ent->lno) + corrupt = 1; + if (ent->s_lno < 0) + corrupt = 1; + lno += ent->num_lines; + } + if (corrupt) { + lno = 0; + for (ent = sb->ent; ent; ent = ent->next) { + printf("L %8d l %8d n %8d\n", + lno, ent->lno, ent->num_lines); + lno = ent->lno + ent->num_lines; + } + die("oops"); + } + } +} + +static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, + int tlno, int plno, int same, + struct origin *parent) +{ + struct blame_entry split[3]; + + split_overlap(split, e, tlno, plno, same, parent); + if (!split[1].suspect) + return; + split_blame(sb, split, e); +} + +static int find_last_in_target(struct scoreboard *sb, struct origin *target) +{ + struct blame_entry *e; + int last_in_target = -1; + + for (e = sb->ent; e; e = e->next) { + if (e->guilty || e->suspect != target) + continue; + if (last_in_target < e->s_lno + e->num_lines) + last_in_target = e->s_lno + e->num_lines; + } + return last_in_target; +} + +static void blame_chunk(struct scoreboard *sb, + int tlno, int plno, int same, + struct origin *target, struct origin *parent) +{ + struct blame_entry *e, *n; + + for (e = sb->ent; e; e = n) { + n = e->next; + if (e->guilty || e->suspect != target) + continue; + if (same <= e->s_lno) + continue; + if (tlno < e->s_lno + e->num_lines) + blame_overlap(sb, e, tlno, plno, same, parent); + } +} + +static int pass_blame_to_parent(struct scoreboard *sb, + struct origin *target, + struct origin *parent) +{ + int i, last_in_target, plno, tlno; + struct patch *patch; + + last_in_target = find_last_in_target(sb, target); + if (last_in_target < 0) + return 1; /* nothing remains for this target */ + + patch = get_patch(parent, target); + plno = tlno = 0; + for (i = 0; i < patch->num; i++) { + struct chunk *chunk = &patch->chunks[i]; + + if (DEBUG) + fprintf(stderr, + "plno = %d, tlno = %d, " + "same as parent up to %d, resync %d and %d\n", + plno, tlno, + chunk->same, chunk->p_next, chunk->t_next); + blame_chunk(sb, tlno, plno, chunk->same, target, parent); + plno = chunk->p_next; + tlno = chunk->t_next; + } + /* rest (i.e. anything above tlno) are the same as parent */ + blame_chunk(sb, tlno, plno, last_in_target, target, parent); + + free_patch(patch); + return 0; +} + +#define MAXPARENT 16 + +static void pass_blame(struct scoreboard *sb, struct origin *origin) +{ + int i; + struct commit *commit = origin->commit; + struct commit_list *parent; + struct origin *parent_origin[MAXPARENT], *porigin; + + memset(parent_origin, 0, sizeof(parent_origin)); + for (i = 0, parent = commit->parents; + i < MAXPARENT && parent; + parent = parent->next, i++) { + struct commit *p = parent->item; + + if (parse_commit(p)) + continue; + porigin = find_origin(sb, parent->item, origin->path); + if (!porigin) + porigin = find_rename(sb, parent->item, origin); + if (!porigin) + continue; + if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) { + struct blame_entry *e; + for (e = sb->ent; e; e = e->next) + if (e->suspect == origin) + e->suspect = porigin; + /* now everything blamed for origin is blamed for + * porigin, we do not need to keep it anymore. + * Do not free porigin (or the ones we got from + * earlier round); they may still be used elsewhere. + */ + free_origin(origin); + return; + } + parent_origin[i] = porigin; + } + + for (i = 0, parent = commit->parents; + i < MAXPARENT && parent; + parent = parent->next, i++) { + struct origin *porigin = parent_origin[i]; + if (!porigin) + continue; + if (pass_blame_to_parent(sb, origin, porigin)) + return; + } +} + +static void assign_blame(struct scoreboard *sb, struct rev_info *revs) +{ + while (1) { + struct blame_entry *ent; + struct commit *commit; + struct origin *suspect = NULL; + + /* find one suspect to break down */ + for (ent = sb->ent; !suspect && ent; ent = ent->next) + if (!ent->guilty) + suspect = ent->suspect; + if (!suspect) + return; /* all done */ + + commit = suspect->commit; + parse_commit(commit); + if (!(commit->object.flags & UNINTERESTING) && + !(revs->max_age != -1 && commit->date < revs->max_age)) + pass_blame(sb, suspect); + + /* Take responsibility for the remaining entries */ + for (ent = sb->ent; ent; ent = ent->next) + if (ent->suspect == suspect) + ent->guilty = 1; + } +} + +static const char *format_time(unsigned long time, const char *tz_str, + int show_raw_time) +{ + static char time_buf[128]; + time_t t = time; + int minutes, tz; + struct tm *tm; + + if (show_raw_time) { + sprintf(time_buf, "%lu %s", time, tz_str); + return time_buf; + } + + tz = atoi(tz_str); + minutes = tz < 0 ? -tz : tz; + minutes = (minutes / 100)*60 + (minutes % 100); + minutes = tz < 0 ? -minutes : minutes; + t = time + minutes * 60; + tm = gmtime(&t); + + strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm); + strcat(time_buf, tz_str); + return time_buf; +} + +struct commit_info +{ + char *author; + char *author_mail; + unsigned long author_time; + char *author_tz; + + /* filled only when asked for details */ + char *committer; + char *committer_mail; + unsigned long committer_time; + char *committer_tz; + + char *summary; +}; + +static void get_ac_line(const char *inbuf, const char *what, + int bufsz, char *person, char **mail, + unsigned long *time, char **tz) +{ + int len; + char *tmp, *endp; + + tmp = strstr(inbuf, what); + if (!tmp) + goto error_out; + tmp += strlen(what); + endp = strchr(tmp, '\n'); + if (!endp) + len = strlen(tmp); + else + len = endp - tmp; + if (bufsz <= len) { + error_out: + /* Ugh */ + person = *mail = *tz = "(unknown)"; + *time = 0; + return; + } + memcpy(person, tmp, len); + + tmp = person; + tmp += len; + *tmp = 0; + while (*tmp != ' ') + tmp--; + *tz = tmp+1; + + *tmp = 0; + while (*tmp != ' ') + tmp--; + *time = strtoul(tmp, NULL, 10); + + *tmp = 0; + while (*tmp != ' ') + tmp--; + *mail = tmp + 1; + *tmp = 0; +} + +static void get_commit_info(struct commit *commit, + struct commit_info *ret, + int detailed) +{ + int len; + char *tmp, *endp; + static char author_buf[1024]; + static char committer_buf[1024]; + static char summary_buf[1024]; + + ret->author = author_buf; + get_ac_line(commit->buffer, "\nauthor ", + sizeof(author_buf), author_buf, &ret->author_mail, + &ret->author_time, &ret->author_tz); + + if (!detailed) + return; + + ret->committer = committer_buf; + get_ac_line(commit->buffer, "\ncommitter ", + sizeof(committer_buf), committer_buf, &ret->committer_mail, + &ret->committer_time, &ret->committer_tz); + + ret->summary = summary_buf; + tmp = strstr(commit->buffer, "\n\n"); + if (!tmp) { + error_out: + sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1)); + return; + } + tmp += 2; + endp = strchr(tmp, '\n'); + if (!endp) + goto error_out; + len = endp - tmp; + if (len >= sizeof(summary_buf)) + goto error_out; + memcpy(summary_buf, tmp, len); + summary_buf[len] = 0; +} + +#define OUTPUT_ANNOTATE_COMPAT 001 +#define OUTPUT_LONG_OBJECT_NAME 002 +#define OUTPUT_RAW_TIMESTAMP 004 +#define OUTPUT_PORCELAIN 010 +#define OUTPUT_SHOW_NAME 020 +#define OUTPUT_SHOW_NUMBER 040 + +static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent) +{ + int cnt; + const char *cp; + struct origin *suspect = ent->suspect; + char hex[41]; + + strcpy(hex, sha1_to_hex(suspect->commit->object.sha1)); + printf("%s%c%d %d %d\n", + hex, + ent->guilty ? ' ' : '*', // purely for debugging + ent->s_lno + 1, + ent->lno + 1, + ent->num_lines); + if (!(suspect->commit->object.flags & METAINFO_SHOWN)) { + struct commit_info ci; + suspect->commit->object.flags |= METAINFO_SHOWN; + get_commit_info(suspect->commit, &ci, 1); + printf("author %s\n", ci.author); + printf("author-mail %s\n", ci.author_mail); + printf("author-time %lu\n", ci.author_time); + printf("author-tz %s\n", ci.author_tz); + printf("committer %s\n", ci.committer); + printf("committer-mail %s\n", ci.committer_mail); + printf("committer-time %lu\n", ci.committer_time); + printf("committer-tz %s\n", ci.committer_tz); + printf("filename %s\n", suspect->path); + printf("summary %s\n", ci.summary); + } + else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH) + printf("filename %s\n", suspect->path); + + cp = nth_line(sb, ent->lno); + for (cnt = 0; cnt < ent->num_lines; cnt++) { + char ch; + if (cnt) + printf("%s %d %d\n", hex, + ent->s_lno + 1 + cnt, + ent->lno + 1 + cnt); + putchar('\t'); + do { + ch = *cp++; + putchar(ch); + } while (ch != '\n' && + cp < sb->final_buf + sb->final_buf_size); + } +} + +static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) +{ + int cnt; + const char *cp; + struct origin *suspect = ent->suspect; + struct commit_info ci; + char hex[41]; + int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP); + + get_commit_info(suspect->commit, &ci, 1); + strcpy(hex, sha1_to_hex(suspect->commit->object.sha1)); + + cp = nth_line(sb, ent->lno); + for (cnt = 0; cnt < ent->num_lines; cnt++) { + char ch; + + printf("%.*s", (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8, hex); + if (opt & OUTPUT_ANNOTATE_COMPAT) + printf("\t(%10s\t%10s\t%d)", ci.author, + format_time(ci.author_time, ci.author_tz, + show_raw_time), + ent->lno + 1 + cnt); + else { + if (opt & OUTPUT_SHOW_NAME) + printf(" %-*.*s", longest_file, longest_file, + suspect->path); + if (opt & OUTPUT_SHOW_NUMBER) + printf(" %*d", max_orig_digits, + ent->s_lno + 1 + cnt); + printf(" (%-*.*s %10s %*d) ", + longest_author, longest_author, ci.author, + format_time(ci.author_time, ci.author_tz, + show_raw_time), + max_digits, ent->lno + 1 + cnt); + } + do { + ch = *cp++; + putchar(ch); + } while (ch != '\n' && + cp < sb->final_buf + sb->final_buf_size); + } +} + +static void output(struct scoreboard *sb, int option) +{ + struct blame_entry *ent; + + if (option & OUTPUT_PORCELAIN) { + for (ent = sb->ent; ent; ent = ent->next) { + struct blame_entry *oth; + struct origin *suspect = ent->suspect; + struct commit *commit = suspect->commit; + if (commit->object.flags & MORE_THAN_ONE_PATH) + continue; + for (oth = ent->next; oth; oth = oth->next) { + if ((oth->suspect->commit != commit) || + !strcmp(oth->suspect->path, suspect->path)) + continue; + commit->object.flags |= MORE_THAN_ONE_PATH; + break; + } + } + } + + for (ent = sb->ent; ent; ent = ent->next) { + if (option & OUTPUT_PORCELAIN) + emit_porcelain(sb, ent); + else + emit_other(sb, ent, option); + } +} + +static int prepare_lines(struct scoreboard *sb) +{ + const char *buf = sb->final_buf; + unsigned long len = sb->final_buf_size; + int num = 0, incomplete = 0, bol = 1; + + if (len && buf[len-1] != '\n') + incomplete++; /* incomplete line at the end */ + while (len--) { + if (bol) { + sb->lineno = xrealloc(sb->lineno, + sizeof(int* ) * (num + 1)); + sb->lineno[num] = buf - sb->final_buf; + bol = 0; + } + if (*buf++ == '\n') { + num++; + bol = 1; + } + } + sb->num_lines = num + incomplete; + return sb->num_lines; +} + +static int read_ancestry(const char *graft_file) +{ + FILE *fp = fopen(graft_file, "r"); + char buf[1024]; + if (!fp) + return -1; + while (fgets(buf, sizeof(buf), fp)) { + /* The format is just "Commit Parent1 Parent2 ...\n" */ + int len = strlen(buf); + struct commit_graft *graft = read_graft_line(buf, len); + register_commit_graft(graft, 0); + } + fclose(fp); + return 0; +} + +static int lineno_width(int lines) +{ + int i, width; + + for (width = 1, i = 10; i <= lines + 1; width++) + i *= 10; + return width; +} + +static void find_alignment(struct scoreboard *sb, int *option) +{ + int longest_src_lines = 0; + int longest_dst_lines = 0; + struct blame_entry *e; + + for (e = sb->ent; e; e = e->next) { + struct origin *suspect = e->suspect; + struct commit_info ci; + int num; + + if (!(suspect->commit->object.flags & METAINFO_SHOWN)) { + suspect->commit->object.flags |= METAINFO_SHOWN; + get_commit_info(suspect->commit, &ci, 1); + if (strcmp(suspect->path, sb->path)) + *option |= OUTPUT_SHOW_NAME; + num = strlen(suspect->path); + if (longest_file < num) + longest_file = num; + num = strlen(ci.author); + if (longest_author < num) + longest_author = num; + } + num = e->s_lno + e->num_lines; + if (longest_src_lines < num) + longest_src_lines = num; + num = e->lno + e->num_lines; + if (longest_dst_lines < num) + longest_dst_lines = num; + } + max_orig_digits = lineno_width(longest_src_lines); + max_digits = lineno_width(longest_dst_lines); +} + +static int has_path_in_work_tree(const char *path) +{ + struct stat st; + return !lstat(path, &st); +} + +int cmd_pickaxe(int argc, const char **argv, const char *prefix) +{ + struct rev_info revs; + const char *path; + struct scoreboard sb; + struct origin *o; + struct blame_entry *ent; + int i, seen_dashdash, unk; + long bottom, top, lno; + int output_option = 0; + const char *revs_file = NULL; + const char *final_commit_name = NULL; + char type[10]; + + bottom = top = 0; + seen_dashdash = 0; + for (unk = i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (*arg != '-') + break; + else if (!strcmp("-c", arg)) + output_option |= OUTPUT_ANNOTATE_COMPAT; + else if (!strcmp("-t", arg)) + output_option |= OUTPUT_RAW_TIMESTAMP; + else if (!strcmp("-l", arg)) + output_option |= OUTPUT_LONG_OBJECT_NAME; + else if (!strcmp("-S", arg) && ++i < argc) + revs_file = argv[i]; + else if (!strcmp("-L", arg) && ++i < argc) { + char *term; + arg = argv[i]; + if (bottom || top) + die("More than one '-L n,m' option given"); + bottom = strtol(arg, &term, 10); + if (*term == ',') { + top = strtol(term + 1, &term, 10); + if (*term) + usage(pickaxe_usage); + } + if (bottom && top && top < bottom) { + unsigned long tmp; + tmp = top; top = bottom; bottom = tmp; + } + } + else if (!strcmp("-f", arg) || + !strcmp("--show-name", arg)) + output_option |= OUTPUT_SHOW_NAME; + else if (!strcmp("-n", arg) || + !strcmp("--show-number", arg)) + output_option |= OUTPUT_SHOW_NUMBER; + else if (!strcmp("-p", arg) || + !strcmp("--porcelain", arg)) + output_option |= OUTPUT_PORCELAIN; + else if (!strcmp("--", arg)) { + seen_dashdash = 1; + i++; + break; + } + else + argv[unk++] = arg; + } + + /* We have collected options unknown to us in argv[1..unk] + * which are to be passed to revision machinery if we are + * going to do the "bottom" procesing. + * + * The remaining are: + * + * (1) if seen_dashdash, its either + * "-options -- " or + * "-options -- ". + * but the latter is allowed only if there is no + * options that we passed to revision machinery. + * + * (2) otherwise, we may have "--" somewhere later and + * might be looking at the first one of multiple 'rev' + * parameters (e.g. " master ^next ^maint -- path"). + * See if there is a dashdash first, and give the + * arguments before that to revision machinery. + * After that there must be one 'path'. + * + * (3) otherwise, its one of the three: + * "-options " + * "-options " + * "-options " + * but again the first one is allowed only if + * there is no options that we passed to revision + * machinery. + */ + + if (seen_dashdash) { + /* (1) */ + if (argc <= i) + usage(pickaxe_usage); + path = argv[i]; + if (i + 1 == argc - 1) { + if (unk != 1) + usage(pickaxe_usage); + argv[unk++] = argv[i + 1]; + } + else if (i + 1 != argc) + /* garbage at end */ + usage(pickaxe_usage); + } + else { + int j; + for (j = i; !seen_dashdash && j < argc; j++) + if (!strcmp(argv[j], "--")) + seen_dashdash = j; + if (seen_dashdash) { + if (seen_dashdash + 1 != argc - 1) + usage(pickaxe_usage); + path = argv[seen_dashdash + 1]; + for (j = i; j < seen_dashdash; j++) + argv[unk++] = argv[j]; + } + else { + /* (3) */ + path = argv[i]; + if (i + 1 == argc - 1) { + final_commit_name = argv[i + 1]; + + /* if (unk == 1) we could be getting + * old-style + */ + if (unk == 1 && !has_path_in_work_tree(path)) { + path = argv[i + 1]; + final_commit_name = argv[i]; + } + } + else if (i != argc - 1) + usage(pickaxe_usage); /* garbage at end */ + + if (!has_path_in_work_tree(path)) + die("cannot stat path %s: %s", + path, strerror(errno)); + } + } + + if (final_commit_name) + argv[unk++] = final_commit_name; + + /* Now we got rev and path. We do not want the path pruning + * but we may want "bottom" processing. + */ + argv[unk] = NULL; + + init_revisions(&revs, NULL); + setup_revisions(unk, argv, &revs, "HEAD"); + memset(&sb, 0, sizeof(sb)); + + /* There must be one and only one positive commit in the + * revs->pending array. + */ + for (i = 0; i < revs.pending.nr; i++) { + struct object *obj = revs.pending.objects[i].item; + if (obj->flags & UNINTERESTING) + continue; + while (obj->type == OBJ_TAG) + obj = deref_tag(obj, NULL, 0); + if (obj->type != OBJ_COMMIT) + die("Non commit %s?", + revs.pending.objects[i].name); + if (sb.final) + die("More than one commit to dig from %s and %s?", + revs.pending.objects[i].name, + final_commit_name); + sb.final = (struct commit *) obj; + final_commit_name = revs.pending.objects[i].name; + } + + if (!sb.final) { + /* "--not A B -- path" without anything positive */ + unsigned char head_sha1[20]; + + final_commit_name = "HEAD"; + if (get_sha1(final_commit_name, head_sha1)) + die("No such ref: HEAD"); + sb.final = lookup_commit_reference(head_sha1); + add_pending_object(&revs, &(sb.final->object), "HEAD"); + } + + /* If we have bottom, this will mark the ancestors of the + * bottom commits we would reach while traversing as + * uninteresting. + */ + prepare_revision_walk(&revs); + + o = find_origin(&sb, sb.final, path); + if (!o) + die("no such path %s in %s", path, final_commit_name); + + sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size); + lno = prepare_lines(&sb); + + if (bottom < 1) + bottom = 1; + if (top < 1) + top = lno; + bottom--; + if (lno < top) + die("file %s has only %lu lines", path, lno); + + ent = xcalloc(1, sizeof(*ent)); + ent->lno = bottom; + ent->num_lines = top - bottom; + ent->suspect = o; + ent->s_lno = bottom; + + sb.ent = ent; + sb.path = path; + + if (revs_file && read_ancestry(revs_file)) + die("reading graft file %s failed: %s", + revs_file, strerror(errno)); + + assign_blame(&sb, &revs); + + coalesce(&sb); + + if (!(output_option & OUTPUT_PORCELAIN)) + find_alignment(&sb, &output_option); + + output(&sb, output_option); + free((void *)sb.final_buf); + for (ent = sb.ent; ent; ) { + struct blame_entry *e = ent->next; + free(ent); + ent = e; + } + return 0; +} diff --git a/builtin.h b/builtin.h index f9fa9ff1d2..7451ce64eb 100644 --- a/builtin.h +++ b/builtin.h @@ -39,6 +39,7 @@ extern int cmd_mailsplit(int argc, const char **argv, const char *prefix); extern int cmd_mv(int argc, const char **argv, const char *prefix); extern int cmd_name_rev(int argc, const char **argv, const char *prefix); extern int cmd_pack_objects(int argc, const char **argv, const char *prefix); +extern int cmd_pickaxe(int argc, const char **argv, const char *prefix); extern int cmd_prune(int argc, const char **argv, const char *prefix); extern int cmd_prune_packed(int argc, const char **argv, const char *prefix); extern int cmd_push(int argc, const char **argv, const char *prefix); diff --git a/git.c b/git.c index e089b53571..6164380667 100644 --- a/git.c +++ b/git.c @@ -245,6 +245,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "mv", cmd_mv, RUN_SETUP }, { "name-rev", cmd_name_rev, RUN_SETUP }, { "pack-objects", cmd_pack_objects, RUN_SETUP }, + { "pickaxe", cmd_pickaxe, RUN_SETUP }, { "prune", cmd_prune, RUN_SETUP }, { "prune-packed", cmd_prune_packed, RUN_SETUP }, { "push", cmd_push, RUN_SETUP }, diff --git a/t/annotate-tests.sh b/t/annotate-tests.sh index 8baf2fef69..b5ceba4acf 100644 --- a/t/annotate-tests.sh +++ b/t/annotate-tests.sh @@ -4,6 +4,7 @@ check_count () { head= case "$1" in -h) head="$2"; shift; shift ;; esac + echo "$PROG file $head" >&4 $PROG file $head >.result || return 1 cat .result | perl -e ' my %expect = (@ARGV); diff --git a/t/t8003-pickaxe.sh b/t/t8003-pickaxe.sh new file mode 100755 index 0000000000..d09d1c982c --- /dev/null +++ b/t/t8003-pickaxe.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +test_description='git-pickaxe' +. ./test-lib.sh + +PROG='git pickaxe -c' +. ../annotate-tests.sh + +test_done From d24bba8008d8e537cb48e9760f7621cbe7ae9e38 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 19 Oct 2006 18:49:30 -0700 Subject: [PATCH 4/5] git-pickaxe -M: blame line movements within a file. This makes pickaxe more intelligent than the classic blame. A typical example is a change that moves one static C function from lower part of the file to upper part of the same file, because you added a new caller in the middle. The versions in the parent and the child would look like this: parent child A static foo() { B ... C } D A E B F C G D static foo() { ... call foo(); ... E } F H G H With the classic blame algorithm, we can blame lines A B C D E F G and H to the parent. The child is guilty of introducing the line "... call foo();", and the blame is placed on the child. However, the classic blame algorithm fails to notice that the implementation of foo() at the top of the file is not new, and moved from the lower part of the parent. This commit introduces detection of such line movements, and correctly blames the lines that were simply moved in the file to the parent. Signed-off-by: Junio C Hamano --- Documentation/git-pickaxe.txt | 14 +++- builtin-pickaxe.c | 123 ++++++++++++++++++++++++++++++++-- 2 files changed, 130 insertions(+), 7 deletions(-) diff --git a/Documentation/git-pickaxe.txt b/Documentation/git-pickaxe.txt index 7685bd0e3c..ebae20ff33 100644 --- a/Documentation/git-pickaxe.txt +++ b/Documentation/git-pickaxe.txt @@ -7,7 +7,9 @@ git-pickaxe - Show what revision and author last modified each line of a file SYNOPSIS -------- -'git-pickaxe' [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [] [--] +[verse] +'git-pickaxe' [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] + [-M] [--since=] [] [--] DESCRIPTION ----------- @@ -61,6 +63,16 @@ OPTIONS -p, --porcelain:: Show in a format designed for machine consumption. +-M:: + Detect moving lines in the file as well. When a commit + moves a block of lines in a file (e.g. the original file + has A and then B, and the commit changes it to B and + then A), traditional 'blame' algorithm typically blames + the lines that were moved up (i.e. B) to the parent and + assigns blame to the lines that were moved down (i.e. A) + to the child commit. With this option, both groups of + lines are blamed on the parent. + -h, --help:: Show help message. diff --git a/builtin-pickaxe.c b/builtin-pickaxe.c index cb69fcc16b..e6ce6551d0 100644 --- a/builtin-pickaxe.c +++ b/builtin-pickaxe.c @@ -19,7 +19,7 @@ #include static char pickaxe_usage[] = -"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [commit] [--] file\n" +"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [-M] [commit] [--] file\n" " -c, --compatibility Use the same output mode as git-annotate (Default: off)\n" " -l, --long Show long commit SHA1 (Default: off)\n" " -t, --time Show raw timestamp (Default: off)\n" @@ -27,6 +27,7 @@ static char pickaxe_usage[] = " -n, --show-number Show original linenumber (Default: off)\n" " -p, --porcelain Show in a format designed for machine consumption\n" " -L n,m Process only line range n,m, counting from 1\n" +" -M Find line movements within the file\n" " -S revs-file Use revisions from revs-file instead of calling git-rev-list\n"; static int longest_file; @@ -36,6 +37,8 @@ static int max_digits; #define DEBUG 0 +#define PICKAXE_BLAME_MOVE 01 + /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */ #define METAINFO_SHOWN (1u<<12) #define MORE_THAN_ONE_PATH (1u<<13) @@ -542,9 +545,99 @@ static int pass_blame_to_parent(struct scoreboard *sb, return 0; } +static void copy_split_if_better(struct blame_entry best_so_far[3], + struct blame_entry this[3]) +{ + if (!this[1].suspect) + return; + if (best_so_far[1].suspect && + (this[1].num_lines < best_so_far[1].num_lines)) + return; + memcpy(best_so_far, this, sizeof(struct blame_entry [3])); +} + +static void find_copy_in_blob(struct scoreboard *sb, + struct blame_entry *ent, + struct origin *parent, + struct blame_entry split[3], + mmfile_t *file_p) +{ + const char *cp; + int cnt; + mmfile_t file_o; + struct patch *patch; + int i, plno, tlno; + + cp = nth_line(sb, ent->lno); + file_o.ptr = (char*) cp; + cnt = ent->num_lines; + + while (cnt && cp < sb->final_buf + sb->final_buf_size) { + if (*cp++ == '\n') + cnt--; + } + file_o.size = cp - file_o.ptr; + + patch = compare_buffer(file_p, &file_o, 1); + + memset(split, 0, sizeof(struct blame_entry [3])); + plno = tlno = 0; + for (i = 0; i < patch->num; i++) { + struct chunk *chunk = &patch->chunks[i]; + + /* tlno to chunk->same are the same as ent */ + if (ent->num_lines <= tlno) + break; + if (tlno < chunk->same) { + struct blame_entry this[3]; + split_overlap(this, ent, + tlno + ent->s_lno, plno, + chunk->same + ent->s_lno, + parent); + copy_split_if_better(split, this); + } + plno = chunk->p_next; + tlno = chunk->t_next; + } + free_patch(patch); +} + +static int find_move_in_parent(struct scoreboard *sb, + struct origin *target, + struct origin *parent) +{ + int last_in_target; + struct blame_entry *ent, split[3]; + mmfile_t file_p; + char type[10]; + char *blob_p; + + last_in_target = find_last_in_target(sb, target); + if (last_in_target < 0) + return 1; /* nothing remains for this target */ + + blob_p = read_sha1_file(parent->blob_sha1, type, + (unsigned long *) &file_p.size); + file_p.ptr = blob_p; + if (!file_p.ptr) { + free(blob_p); + return 0; + } + + for (ent = sb->ent; ent; ent = ent->next) { + if (ent->suspect != target || ent->guilty) + continue; + find_copy_in_blob(sb, ent, parent, split, &file_p); + if (split[1].suspect) + split_blame(sb, split, ent); + } + free(blob_p); + return 0; +} + #define MAXPARENT 16 -static void pass_blame(struct scoreboard *sb, struct origin *origin) +static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) { int i; struct commit *commit = origin->commit; @@ -589,9 +682,24 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin) if (pass_blame_to_parent(sb, origin, porigin)) return; } + + /* + * Optionally run "miff" to find moves in parents' files here. + */ + if (opt & PICKAXE_BLAME_MOVE) + for (i = 0, parent = commit->parents; + i < MAXPARENT && parent; + parent = parent->next, i++) { + struct origin *porigin = parent_origin[i]; + if (!porigin) + continue; + if (find_move_in_parent(sb, origin, porigin)) + return; + } + } -static void assign_blame(struct scoreboard *sb, struct rev_info *revs) +static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt) { while (1) { struct blame_entry *ent; @@ -609,7 +717,7 @@ static void assign_blame(struct scoreboard *sb, struct rev_info *revs) parse_commit(commit); if (!(commit->object.flags & UNINTERESTING) && !(revs->max_age != -1 && commit->date < revs->max_age)) - pass_blame(sb, suspect); + pass_blame(sb, suspect, opt); /* Take responsibility for the remaining entries */ for (ent = sb->ent; ent; ent = ent->next) @@ -967,13 +1075,14 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) struct scoreboard sb; struct origin *o; struct blame_entry *ent; - int i, seen_dashdash, unk; + int i, seen_dashdash, unk, opt; long bottom, top, lno; int output_option = 0; const char *revs_file = NULL; const char *final_commit_name = NULL; char type[10]; + opt = 0; bottom = top = 0; seen_dashdash = 0; for (unk = i = 1; i < argc; i++) { @@ -988,6 +1097,8 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) output_option |= OUTPUT_LONG_OBJECT_NAME; else if (!strcmp("-S", arg) && ++i < argc) revs_file = argv[i]; + else if (!strcmp("-M", arg)) + opt |= PICKAXE_BLAME_MOVE; else if (!strcmp("-L", arg) && ++i < argc) { char *term; arg = argv[i]; @@ -1176,7 +1287,7 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) die("reading graft file %s failed: %s", revs_file, strerror(errno)); - assign_blame(&sb, &revs); + assign_blame(&sb, &revs, opt); coalesce(&sb); From 18abd745a05197f498219f5ba88ce238a3d51580 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 19 Oct 2006 18:50:17 -0700 Subject: [PATCH 5/5] git-pickaxe -C: blame cut-and-pasted lines. This completes the initial round of git-pickaxe. In addition to the detection of line movements we already have, this finds new lines that were created by moving or cutting-and-pasting lines from different files in the parent. With this, git pickaxe -f -n -C v1.4.0 -- revision.c finds that a major part of that file actually came from rev-list.c when Linus split the latter at commit ae563642 and blames them to earlier commits that touch rev-list.c. Signed-off-by: Junio C Hamano --- Documentation/git-pickaxe.txt | 10 +++- builtin-pickaxe.c | 95 ++++++++++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/Documentation/git-pickaxe.txt b/Documentation/git-pickaxe.txt index ebae20ff33..6d22fd9e99 100644 --- a/Documentation/git-pickaxe.txt +++ b/Documentation/git-pickaxe.txt @@ -9,7 +9,7 @@ SYNOPSIS -------- [verse] 'git-pickaxe' [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] - [-M] [--since=] [] [--] + [-M] [-C] [-C] [--since=] [] [--] DESCRIPTION ----------- @@ -73,6 +73,14 @@ OPTIONS to the child commit. With this option, both groups of lines are blamed on the parent. +-C:: + In addition to `-M`, detect lines copied from other + files that were modified in the same commit. This is + useful when you reorganize your program and move code + around across files. When this option is given twice, + the command looks for copies from all other files in the + parent for the commit that creates the file in addition. + -h, --help:: Show help message. diff --git a/builtin-pickaxe.c b/builtin-pickaxe.c index e6ce6551d0..74c7c9a33b 100644 --- a/builtin-pickaxe.c +++ b/builtin-pickaxe.c @@ -19,7 +19,7 @@ #include static char pickaxe_usage[] = -"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [-M] [commit] [--] file\n" +"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S ] [-M] [-C] [-C] [commit] [--] file\n" " -c, --compatibility Use the same output mode as git-annotate (Default: off)\n" " -l, --long Show long commit SHA1 (Default: off)\n" " -t, --time Show raw timestamp (Default: off)\n" @@ -27,7 +27,7 @@ static char pickaxe_usage[] = " -n, --show-number Show original linenumber (Default: off)\n" " -p, --porcelain Show in a format designed for machine consumption\n" " -L n,m Process only line range n,m, counting from 1\n" -" -M Find line movements within the file\n" +" -M, -C Find line movements within and across files\n" " -S revs-file Use revisions from revs-file instead of calling git-rev-list\n"; static int longest_file; @@ -38,6 +38,8 @@ static int max_digits; #define DEBUG 0 #define PICKAXE_BLAME_MOVE 01 +#define PICKAXE_BLAME_COPY 02 +#define PICKAXE_BLAME_COPY_HARDER 04 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */ #define METAINFO_SHOWN (1u<<12) @@ -635,6 +637,78 @@ static int find_move_in_parent(struct scoreboard *sb, return 0; } +static int find_copy_in_parent(struct scoreboard *sb, + struct origin *target, + struct commit *parent, + struct origin *porigin, + int opt) +{ + struct diff_options diff_opts; + const char *paths[1]; + struct blame_entry *ent; + int i; + + if (find_last_in_target(sb, target) < 0) + return 1; /* nothing remains for this target */ + + diff_setup(&diff_opts); + diff_opts.recursive = 1; + diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; + + /* Try "find copies harder" on new path */ + if ((opt & PICKAXE_BLAME_COPY_HARDER) && + (!porigin || strcmp(target->path, porigin->path))) { + diff_opts.detect_rename = DIFF_DETECT_COPY; + diff_opts.find_copies_harder = 1; + } + paths[0] = NULL; + diff_tree_setup_paths(paths, &diff_opts); + if (diff_setup_done(&diff_opts) < 0) + die("diff-setup"); + diff_tree_sha1(parent->tree->object.sha1, + target->commit->tree->object.sha1, + "", &diff_opts); + diffcore_std(&diff_opts); + + for (ent = sb->ent; ent; ent = ent->next) { + struct blame_entry split[3]; + if (ent->suspect != target || ent->guilty) + continue; + + memset(split, 0, sizeof(split)); + for (i = 0; i < diff_queued_diff.nr; i++) { + struct diff_filepair *p = diff_queued_diff.queue[i]; + struct origin *norigin; + mmfile_t file_p; + char type[10]; + char *blob; + struct blame_entry this[3]; + + if (!DIFF_FILE_VALID(p->one)) + continue; /* does not exist in parent */ + if (porigin && !strcmp(p->one->path, porigin->path)) + /* find_move already dealt with this path */ + continue; + norigin = find_origin(sb, parent, p->one->path); + + blob = read_sha1_file(norigin->blob_sha1, type, + (unsigned long *) &file_p.size); + file_p.ptr = blob; + if (!file_p.ptr) { + free(blob); + continue; + } + find_copy_in_blob(sb, ent, norigin, this, &file_p); + copy_split_if_better(split, this); + } + if (split[1].suspect) + split_blame(sb, split, ent); + } + diff_flush(&diff_opts); + + return 0; +} + #define MAXPARENT 16 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) @@ -697,6 +771,18 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) return; } + /* + * Optionally run "ciff" to find copies from parents' files here. + */ + if (opt & PICKAXE_BLAME_COPY) + for (i = 0, parent = commit->parents; + i < MAXPARENT && parent; + parent = parent->next, i++) { + struct origin *porigin = parent_origin[i]; + if (find_copy_in_parent(sb, origin, parent->item, + porigin, opt)) + return; + } } static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt) @@ -1099,6 +1185,11 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix) revs_file = argv[i]; else if (!strcmp("-M", arg)) opt |= PICKAXE_BLAME_MOVE; + else if (!strcmp("-C", arg)) { + if (opt & PICKAXE_BLAME_COPY) + opt |= PICKAXE_BLAME_COPY_HARDER; + opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE; + } else if (!strcmp("-L", arg) && ++i < argc) { char *term; arg = argv[i];