From 2cf0223ba48ae51af03de486e119fd7a36e1b0ee Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 25 Dec 2006 23:25:44 -0500 Subject: [PATCH 1/7] add .mailmap for git-shortlog output with the git repository The git repository itself was messed up in a couple cases. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- .mailmap | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .mailmap diff --git a/.mailmap b/.mailmap new file mode 100644 index 0000000000..2c658f42f5 --- /dev/null +++ b/.mailmap @@ -0,0 +1,37 @@ +# +# This list is used by git-shortlog to fix a few botched name translations +# in the git archive, either because the author's full name was messed up +# and/or not always written the same way, making contributions from the +# same person appearing not to be so. +# + +Aneesh Kumar K.V +Chris Shoemaker +Daniel Barkalow +David Kågedal +Fredrik Kuivinen +H. Peter Anvin +H. Peter Anvin +H. Peter Anvin +Horst H. von Brand +Joachim Berdal Haga +Jon Loeliger +Jon Seymour +Karl Hasselström +Kent Engstrom +Lars Doelle +Lars Doelle +Lukas Sandström +Martin Langhoff +Nguyễn Thái Ngọc Duy +Ramsay Allan Jones +René Scharfe +Robert Fitzsimons +Santi Béjar +Sean Estabrooks +Shawn O. Pearce +Tony Luck +Ville Skyttä +YOSHIFUJI Hideaki +anonymous +anonymous From abc8ab19ae72e811335c847166228362035b22c0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 26 Dec 2006 00:11:50 -0800 Subject: [PATCH 2/7] show-branch --reflog: add documentation. Signed-off-by: Junio C Hamano --- Documentation/git-show-branch.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/git-show-branch.txt b/Documentation/git-show-branch.txt index dafacd4308..912e15bcba 100644 --- a/Documentation/git-show-branch.txt +++ b/Documentation/git-show-branch.txt @@ -11,6 +11,7 @@ SYNOPSIS 'git-show-branch' [--all] [--remotes] [--topo-order] [--current] [--more= | --list | --independent | --merge-base] [--no-name | --sha1-name] [--topics] [ | ]... +'git-show-branch' --reflog[=] DESCRIPTION ----------- @@ -96,6 +97,10 @@ OPTIONS will show the revisions given by "git rev-list {caret}master topic1 topic2" +--reflog[=] :: + Shows most recent ref-log entries for the given ref. + + Note that --more, --list, --independent and --merge-base options are mutually exclusive. From 2f89543eaf98400d72498078d762c10e240c0657 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 26 Dec 2006 00:15:26 -0800 Subject: [PATCH 3/7] Document --numstat in git-apply and git-diff Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 4 +++- Documentation/git-apply.txt | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index f12082e134..da1cc60e97 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -19,7 +19,9 @@ --numstat:: Similar to \--stat, but shows number of added and deleted lines in decimal notation and pathname without - abbreviation, to make it more machine friendly. + abbreviation, to make it more machine friendly. For + binary files, outputs two `-` instead of saying + `0 0`. --shortstat:: Output only the last line of the --stat format containing total diff --git a/Documentation/git-apply.txt b/Documentation/git-apply.txt index 2cc32d1c5e..33b93db508 100644 --- a/Documentation/git-apply.txt +++ b/Documentation/git-apply.txt @@ -33,8 +33,9 @@ OPTIONS --numstat:: Similar to \--stat, but shows number of added and deleted lines in decimal notation and pathname without - abbreviation, to make it more machine friendly. Turns - off "apply". + abbreviation, to make it more machine friendly. For + binary files, outputs two `-` instead of saying + `0 0`. Turns off "apply". --summary:: Instead of applying the patch, output a condensed From 6934dec89538e054823aadcce08af040bc8dcf79 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 26 Dec 2006 00:21:01 -0800 Subject: [PATCH 4/7] Document git-reset -- ... --- Documentation/git-reset.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 4a4ceb6201..4f424782eb 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -7,7 +7,9 @@ git-reset - Reset current HEAD to the specified state SYNOPSIS -------- -'git-reset' [--mixed | --soft | --hard] [] +[verse] +'git-reset' [--mixed | --soft | --hard] [] +'git-reset' [--mixed] [--] ... DESCRIPTION ----------- @@ -21,6 +23,10 @@ the undo in the history. If you want to undo a commit other than the latest on a branch, gitlink:git-revert[1] is your friend. +The second form with 'paths' is used to revert selected paths in +the index from a given commit, without moving HEAD. + + OPTIONS ------- --mixed:: @@ -37,9 +43,9 @@ OPTIONS --hard:: Matches the working tree and index to that of the tree being switched to. Any changes to tracked files in the working tree - since are lost. + since are lost. -:: +:: Commit to make the current HEAD. Examples From b45974a655e0e41441e5db64c091000171435096 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 23 Dec 2006 23:36:55 -0800 Subject: [PATCH 5/7] Move encoding conversion routine out of mailinfo to utf8.c This moves the body of convert_to_utf8() routine used in mailinfo to the utf8.c i18n library. Signed-off-by: Junio C Hamano --- builtin-mailinfo.c | 37 +++++++------------------------ utf8.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ utf8.h | 6 ++++++ 3 files changed, 68 insertions(+), 29 deletions(-) diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index e6472293d4..507b93f6a7 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -4,6 +4,7 @@ */ #include "cache.h" #include "builtin.h" +#include "utf8.h" static FILE *cmitmsg, *patchfile, *fin, *fout; @@ -510,40 +511,18 @@ static int decode_b_segment(char *in, char *ot, char *ep) static void convert_to_utf8(char *line, char *charset) { -#ifndef NO_ICONV - char *in, *out; - size_t insize, outsize, nrc; - char outbuf[4096]; /* cheat */ static char latin_one[] = "latin1"; char *input_charset = *charset ? charset : latin_one; - iconv_t conv = iconv_open(metainfo_charset, input_charset); + char *out = reencode_string(line, metainfo_charset, input_charset); - if (conv == (iconv_t) -1) { - static int warned_latin1_once = 0; - if (input_charset != latin_one) { - fprintf(stderr, "cannot convert from %s to %s\n", - input_charset, metainfo_charset); - *charset = 0; - } - else if (!warned_latin1_once) { - warned_latin1_once = 1; - fprintf(stderr, "tried to convert from %s to %s, " - "but your iconv does not work with it.\n", - input_charset, metainfo_charset); - } + if (!out) { + fprintf(stderr, "cannot convert from %s to %s\n", + input_charset, metainfo_charset); + *charset = 0; return; } - in = line; - insize = strlen(in); - out = outbuf; - outsize = sizeof(outbuf); - nrc = iconv(conv, &in, &insize, &out, &outsize); - iconv_close(conv); - if (nrc == (size_t) -1) - return; - *out = 0; - strcpy(line, outbuf); -#endif + strcpy(line, out); + free(out); } static int decode_header_bq(char *it) diff --git a/utf8.c b/utf8.c index 8fa62571aa..1eedd8b61a 100644 --- a/utf8.c +++ b/utf8.c @@ -276,3 +276,57 @@ void print_wrapped_text(const char *text, int indent, int indent2, int width) } } } + +/* + * Given a buffer and its encoding, return it re-encoded + * with iconv. If the conversion fails, returns NULL. + */ +#ifndef NO_ICONV +char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) +{ + iconv_t conv; + size_t insz, outsz, outalloc; + char *out, *outpos, *cp; + + if (!in_encoding) + return NULL; + conv = iconv_open(out_encoding, in_encoding); + if (conv == (iconv_t) -1) + return NULL; + insz = strlen(in); + outsz = insz; + outalloc = outsz + 1; /* for terminating NUL */ + out = xmalloc(outalloc); + outpos = out; + cp = (char *)in; + + while (1) { + size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz); + + if (cnt == -1) { + size_t sofar; + if (errno != E2BIG) { + free(out); + iconv_close(conv); + return NULL; + } + /* insz has remaining number of bytes. + * since we started outsz the same as insz, + * it is likely that insz is not enough for + * converting the rest. + */ + sofar = outpos - out; + outalloc = sofar + insz * 2 + 32; + out = xrealloc(out, outalloc); + outpos = out + sofar; + outsz = outalloc - sofar - 1; + } + else { + *outpos = '\0'; + break; + } + } + iconv_close(conv); + return out; +} +#endif diff --git a/utf8.h b/utf8.h index a0d7f591ad..cae2a8e665 100644 --- a/utf8.h +++ b/utf8.h @@ -5,4 +5,10 @@ int utf8_width(const char **start); int is_utf8(const char *text); void print_wrapped_text(const char *text, int indent, int indent2, int len); +#ifndef NO_ICONV +char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding); +#else +#define reencode_string(a,b,c) NULL +#endif + #endif From 4b2bced55948422198dad92dcbf4b5b98913f1e7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 23 Dec 2006 23:53:02 -0800 Subject: [PATCH 6/7] i18n.logToUTF8: convert commit log message to UTF-8 When i18n.commitencoding is set to a non UTF-8 encoding, commit-tree records the encoding in an extra header after author/committer headers in the commit object. An earlier version used trailer but Johannes points out that there is little risk breaking existing Porcelains with a new header. Signed-off-by: Junio C Hamano --- builtin-commit-tree.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/builtin-commit-tree.c b/builtin-commit-tree.c index f641787988..33c29f7495 100644 --- a/builtin-commit-tree.c +++ b/builtin-commit-tree.c @@ -92,6 +92,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) char comment[1000]; char *buffer; unsigned int size; + int encoding_is_utf8; setup_ident(); git_config(git_default_config); @@ -117,6 +118,8 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) parents++; } + encoding_is_utf8 = !strcmp(git_commit_encoding, "utf-8"); + init_buffer(&buffer, &size); add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1)); @@ -130,7 +133,11 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) /* Person/date information */ add_buffer(&buffer, &size, "author %s\n", git_author_info(1)); - add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info(1)); + add_buffer(&buffer, &size, "committer %s\n", git_committer_info(1)); + if (!encoding_is_utf8) + add_buffer(&buffer, &size, + "encoding %s\n", git_commit_encoding); + add_buffer(&buffer, &size, "\n"); /* And add the comment */ while (fgets(comment, sizeof(comment), stdin) != NULL) @@ -138,7 +145,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) /* And check the encoding */ buffer[size] = '\0'; - if (!strcmp(git_commit_encoding, "utf-8") && !is_utf8(buffer)) + if (encoding_is_utf8 && !is_utf8(buffer)) fprintf(stderr, commit_utf8_warn); if (!write_sha1_file(buffer, size, commit_type, commit_sha1)) { From 52883fbd767f8a79a6f98a08907d0a9f6ba1ece1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 25 Dec 2006 11:48:35 -0800 Subject: [PATCH 7/7] Teach log family --encoding Updated commit objects record the encoding used in their encoding header. This updates the log family to reencode it into the encoding specified in i18n.commitencoding (or the default, which is "utf-8") upon output. To force a specific encoding that is different, log family takes command line flag --encoding=; giving --encoding=none entirely disables the reencoding and lets you view log messges in their original encoding. Signed-off-by: Junio C Hamano --- builtin-log.c | 19 ++++++++++++++-- commit.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++--- revision.h | 1 + 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/builtin-log.c b/builtin-log.c index 8df3c1394a..b7e47cb5fe 100644 --- a/builtin-log.c +++ b/builtin-log.c @@ -20,6 +20,8 @@ void add_head(struct rev_info *revs); static void cmd_log_init(int argc, const char **argv, const char *prefix, struct rev_info *rev) { + int i; + rev->abbrev = DEFAULT_ABBREV; rev->commit_format = CMIT_FMT_DEFAULT; rev->verbose_header = 1; @@ -27,8 +29,21 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix, argc = setup_revisions(argc, argv, rev, "HEAD"); if (rev->diffopt.pickaxe || rev->diffopt.filter) rev->always_show_header = 0; - if (argc > 1) - die("unrecognized argument: %s", argv[1]); + for (i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (!strncmp(arg, "--encoding=", 11)) { + arg += 11; + if (MAX_ENCODING_LENGTH <= strlen(arg)) + die(" Value of output encoding '%s' too long", + arg); + if (strcmp(arg, "none")) + strcpy(git_commit_encoding, arg); + else + git_commit_encoding[0] = 0; + } + else + die("unrecognized argument: %s", arg); + } } static int cmd_log_walk(struct rev_info *rev) diff --git a/commit.c b/commit.c index 289ef65eb1..df4bc0775a 100644 --- a/commit.c +++ b/commit.c @@ -1,6 +1,7 @@ #include "cache.h" #include "tag.h" #include "commit.h" +#include "utf8.h" int save_commit_buffer = 1; @@ -563,10 +564,53 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com return offset; } -unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit, - unsigned long len, char *buf, unsigned long space, +static char *get_header(const struct commit *commit, const char *key) +{ + int key_len = strlen(key); + const char *line = commit->buffer; + + for (;;) { + const char *eol = strchr(line, '\n'), *next; + + if (line == eol) + return NULL; + if (!eol) { + eol = line + strlen(line); + next = NULL; + } else + next = eol + 1; + if (!strncmp(line, key, key_len) && line[key_len] == ' ') { + int len = eol - line - key_len; + char *ret = xmalloc(len); + memcpy(ret, line + key_len + 1, len - 1); + ret[len - 1] = '\0'; + return ret; + } + line = next; + } +} + +static char *logmsg_reencode(const struct commit *commit) +{ + char *encoding = get_header(commit, "encoding"); + char *out; + + if (!encoding || !strcmp(encoding, git_commit_encoding)) + return NULL; + out = reencode_string(commit->buffer, git_commit_encoding, encoding); + free(encoding); + if (!out) + return NULL; + return out; +} + +unsigned long pretty_print_commit(enum cmit_fmt fmt, + const struct commit *commit, + unsigned long len, + char *buf, unsigned long space, int abbrev, const char *subject, - const char *after_subject, int relative_date) + const char *after_subject, + int relative_date) { int hdr = 1, body = 0; unsigned long offset = 0; @@ -574,6 +618,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit int parents_shown = 0; const char *msg = commit->buffer; int plain_non_ascii = 0; + char *reencoded = NULL; + + if (*git_commit_encoding) { + reencoded = logmsg_reencode(commit); + if (reencoded) { + msg = reencoded; + len = strlen(msg); + } + } if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL) indent = 0; @@ -721,6 +774,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit if (fmt == CMIT_FMT_EMAIL && !body) buf[offset++] = '\n'; buf[offset] = '\0'; + + free(reencoded); return offset; } diff --git a/revision.h b/revision.h index ec991e5c57..8f7907d7ab 100644 --- a/revision.h +++ b/revision.h @@ -72,6 +72,7 @@ struct rev_info { const char *ref_message_id; const char *add_signoff; const char *extra_headers; + const char *log_reencode; /* Filter by commit log message */ struct grep_opt *grep_filter;