Merge branch 'jc/utf8' into next

* jc/utf8:
  Teach log family --encoding
  i18n.logToUTF8: convert commit log message to UTF-8
  Move encoding conversion routine out of mailinfo to utf8.c
  Document git-reset <commit> -- <paths>...
  Document --numstat in git-apply and git-diff
  show-branch --reflog: add documentation.
  add .mailmap for git-shortlog output with the git repository

Conflicts:

	commit.c
This commit is contained in:
Junio C Hamano
2006-12-26 00:54:06 -08:00
12 changed files with 210 additions and 42 deletions

37
.mailmap Normal file
View File

@@ -0,0 +1,37 @@
#
# This list is used by git-shortlog to fix a few botched name translations
# in the git archive, either because the author's full name was messed up
# and/or not always written the same way, making contributions from the
# same person appearing not to be so.
#
Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Chris Shoemaker <c.shoemaker@cox.net>
Daniel Barkalow <barkalow@iabervon.org>
David Kågedal <davidk@lysator.liu.se>
Fredrik Kuivinen <freku045@student.liu.se>
H. Peter Anvin <hpa@bonde.sc.orionmulti.com>
H. Peter Anvin <hpa@tazenda.sc.orionmulti.com>
H. Peter Anvin <hpa@trantor.hos.anvin.org>
Horst H. von Brand <vonbrand@inf.utfsm.cl>
Joachim Berdal Haga <cjhaga@fys.uio.no>
Jon Loeliger <jdl@freescale.com>
Jon Seymour <jon@blackcubes.dyndns.org>
Karl Hasselström <kha@treskal.com>
Kent Engstrom <kent@lysator.liu.se>
Lars Doelle <lars.doelle@on-line.de>
Lars Doelle <lars.doelle@on-line ! de>
Lukas Sandström <lukass@etek.chalmers.se>
Martin Langhoff <martin@catalyst.net.nz>
Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Ramsay Allan Jones <ramsay@ramsay1.demon.co.uk>
René Scharfe <rene.scharfe@lsrfire.ath.cx>
Robert Fitzsimons <robfitz@273k.net>
Santi Béjar <sbejar@gmail.com>
Sean Estabrooks <seanlkml@sympatico.ca>
Shawn O. Pearce <spearce@spearce.org>
Tony Luck <tony.luck@intel.com>
Ville Skyttä <scop@xemacs.org>
YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
anonymous <linux@horizon.com>
anonymous <linux@horizon.net>

View File

@@ -19,7 +19,9 @@
--numstat::
Similar to \--stat, but shows number of added and
deleted lines in decimal notation and pathname without
abbreviation, to make it more machine friendly.
abbreviation, to make it more machine friendly. For
binary files, outputs two `-` instead of saying
`0 0`.
--shortstat::
Output only the last line of the --stat format containing total

View File

@@ -33,8 +33,9 @@ OPTIONS
--numstat::
Similar to \--stat, but shows number of added and
deleted lines in decimal notation and pathname without
abbreviation, to make it more machine friendly. Turns
off "apply".
abbreviation, to make it more machine friendly. For
binary files, outputs two `-` instead of saying
`0 0`. Turns off "apply".
--summary::
Instead of applying the patch, output a condensed

View File

@@ -7,7 +7,9 @@ git-reset - Reset current HEAD to the specified state
SYNOPSIS
--------
'git-reset' [--mixed | --soft | --hard] [<commit-ish>]
[verse]
'git-reset' [--mixed | --soft | --hard] [<commit>]
'git-reset' [--mixed] <commit> [--] <paths>...
DESCRIPTION
-----------
@@ -21,6 +23,10 @@ the undo in the history.
If you want to undo a commit other than the latest on a branch,
gitlink:git-revert[1] is your friend.
The second form with 'paths' is used to revert selected paths in
the index from a given commit, without moving HEAD.
OPTIONS
-------
--mixed::
@@ -37,9 +43,9 @@ OPTIONS
--hard::
Matches the working tree and index to that of the tree being
switched to. Any changes to tracked files in the working tree
since <commit-ish> are lost.
since <commit> are lost.
<commit-ish>::
<commit>::
Commit to make the current HEAD.
Examples

View File

@@ -11,6 +11,7 @@ SYNOPSIS
'git-show-branch' [--all] [--remotes] [--topo-order] [--current]
[--more=<n> | --list | --independent | --merge-base]
[--no-name | --sha1-name] [--topics] [<rev> | <glob>]...
'git-show-branch' --reflog[=<n>] <ref>
DESCRIPTION
-----------
@@ -96,6 +97,10 @@ OPTIONS
will show the revisions given by "git rev-list {caret}master
topic1 topic2"
--reflog[=<n>] <ref>::
Shows <n> most recent ref-log entries for the given ref.
Note that --more, --list, --independent and --merge-base options
are mutually exclusive.

View File

@@ -92,6 +92,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
char comment[1000];
char *buffer;
unsigned int size;
int encoding_is_utf8;
setup_ident();
git_config(git_default_config);
@@ -117,6 +118,8 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
parents++;
}
encoding_is_utf8 = !strcmp(git_commit_encoding, "utf-8");
init_buffer(&buffer, &size);
add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
@@ -130,7 +133,11 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
/* Person/date information */
add_buffer(&buffer, &size, "author %s\n", git_author_info(1));
add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info(1));
add_buffer(&buffer, &size, "committer %s\n", git_committer_info(1));
if (!encoding_is_utf8)
add_buffer(&buffer, &size,
"encoding %s\n", git_commit_encoding);
add_buffer(&buffer, &size, "\n");
/* And add the comment */
while (fgets(comment, sizeof(comment), stdin) != NULL)
@@ -138,7 +145,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
/* And check the encoding */
buffer[size] = '\0';
if (!strcmp(git_commit_encoding, "utf-8") && !is_utf8(buffer))
if (encoding_is_utf8 && !is_utf8(buffer))
fprintf(stderr, commit_utf8_warn);
if (!write_sha1_file(buffer, size, commit_type, commit_sha1)) {

View File

@@ -20,6 +20,8 @@ void add_head(struct rev_info *revs);
static void cmd_log_init(int argc, const char **argv, const char *prefix,
struct rev_info *rev)
{
int i;
rev->abbrev = DEFAULT_ABBREV;
rev->commit_format = CMIT_FMT_DEFAULT;
rev->verbose_header = 1;
@@ -27,8 +29,21 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
argc = setup_revisions(argc, argv, rev, "HEAD");
if (rev->diffopt.pickaxe || rev->diffopt.filter)
rev->always_show_header = 0;
if (argc > 1)
die("unrecognized argument: %s", argv[1]);
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strncmp(arg, "--encoding=", 11)) {
arg += 11;
if (MAX_ENCODING_LENGTH <= strlen(arg))
die(" Value of output encoding '%s' too long",
arg);
if (strcmp(arg, "none"))
strcpy(git_commit_encoding, arg);
else
git_commit_encoding[0] = 0;
}
else
die("unrecognized argument: %s", arg);
}
}
static int cmd_log_walk(struct rev_info *rev)

View File

@@ -4,6 +4,7 @@
*/
#include "cache.h"
#include "builtin.h"
#include "utf8.h"
static FILE *cmitmsg, *patchfile, *fin, *fout;
@@ -510,40 +511,18 @@ static int decode_b_segment(char *in, char *ot, char *ep)
static void convert_to_utf8(char *line, char *charset)
{
#ifndef NO_ICONV
char *in, *out;
size_t insize, outsize, nrc;
char outbuf[4096]; /* cheat */
static char latin_one[] = "latin1";
char *input_charset = *charset ? charset : latin_one;
iconv_t conv = iconv_open(metainfo_charset, input_charset);
char *out = reencode_string(line, metainfo_charset, input_charset);
if (conv == (iconv_t) -1) {
static int warned_latin1_once = 0;
if (input_charset != latin_one) {
fprintf(stderr, "cannot convert from %s to %s\n",
input_charset, metainfo_charset);
*charset = 0;
}
else if (!warned_latin1_once) {
warned_latin1_once = 1;
fprintf(stderr, "tried to convert from %s to %s, "
"but your iconv does not work with it.\n",
input_charset, metainfo_charset);
}
if (!out) {
fprintf(stderr, "cannot convert from %s to %s\n",
input_charset, metainfo_charset);
*charset = 0;
return;
}
in = line;
insize = strlen(in);
out = outbuf;
outsize = sizeof(outbuf);
nrc = iconv(conv, &in, &insize, &out, &outsize);
iconv_close(conv);
if (nrc == (size_t) -1)
return;
*out = 0;
strcpy(line, outbuf);
#endif
strcpy(line, out);
free(out);
}
static int decode_header_bq(char *it)

View File

@@ -2,6 +2,7 @@
#include "tag.h"
#include "commit.h"
#include "pkt-line.h"
#include "utf8.h"
int save_commit_buffer = 1;
@@ -597,10 +598,53 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com
return offset;
}
unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
unsigned long len, char *buf, unsigned long space,
static char *get_header(const struct commit *commit, const char *key)
{
int key_len = strlen(key);
const char *line = commit->buffer;
for (;;) {
const char *eol = strchr(line, '\n'), *next;
if (line == eol)
return NULL;
if (!eol) {
eol = line + strlen(line);
next = NULL;
} else
next = eol + 1;
if (!strncmp(line, key, key_len) && line[key_len] == ' ') {
int len = eol - line - key_len;
char *ret = xmalloc(len);
memcpy(ret, line + key_len + 1, len - 1);
ret[len - 1] = '\0';
return ret;
}
line = next;
}
}
static char *logmsg_reencode(const struct commit *commit)
{
char *encoding = get_header(commit, "encoding");
char *out;
if (!encoding || !strcmp(encoding, git_commit_encoding))
return NULL;
out = reencode_string(commit->buffer, git_commit_encoding, encoding);
free(encoding);
if (!out)
return NULL;
return out;
}
unsigned long pretty_print_commit(enum cmit_fmt fmt,
const struct commit *commit,
unsigned long len,
char *buf, unsigned long space,
int abbrev, const char *subject,
const char *after_subject, int relative_date)
const char *after_subject,
int relative_date)
{
int hdr = 1, body = 0;
unsigned long offset = 0;
@@ -608,6 +652,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
int parents_shown = 0;
const char *msg = commit->buffer;
int plain_non_ascii = 0;
char *reencoded = NULL;
if (*git_commit_encoding) {
reencoded = logmsg_reencode(commit);
if (reencoded) {
msg = reencoded;
len = strlen(msg);
}
}
if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
indent = 0;
@@ -755,6 +808,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
if (fmt == CMIT_FMT_EMAIL && !body)
buf[offset++] = '\n';
buf[offset] = '\0';
free(reencoded);
return offset;
}

View File

@@ -72,6 +72,7 @@ struct rev_info {
const char *ref_message_id;
const char *add_signoff;
const char *extra_headers;
const char *log_reencode;
/* Filter by commit log message */
struct grep_opt *grep_filter;

54
utf8.c
View File

@@ -276,3 +276,57 @@ void print_wrapped_text(const char *text, int indent, int indent2, int width)
}
}
}
/*
* Given a buffer and its encoding, return it re-encoded
* with iconv. If the conversion fails, returns NULL.
*/
#ifndef NO_ICONV
char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
{
iconv_t conv;
size_t insz, outsz, outalloc;
char *out, *outpos, *cp;
if (!in_encoding)
return NULL;
conv = iconv_open(out_encoding, in_encoding);
if (conv == (iconv_t) -1)
return NULL;
insz = strlen(in);
outsz = insz;
outalloc = outsz + 1; /* for terminating NUL */
out = xmalloc(outalloc);
outpos = out;
cp = (char *)in;
while (1) {
size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz);
if (cnt == -1) {
size_t sofar;
if (errno != E2BIG) {
free(out);
iconv_close(conv);
return NULL;
}
/* insz has remaining number of bytes.
* since we started outsz the same as insz,
* it is likely that insz is not enough for
* converting the rest.
*/
sofar = outpos - out;
outalloc = sofar + insz * 2 + 32;
out = xrealloc(out, outalloc);
outpos = out + sofar;
outsz = outalloc - sofar - 1;
}
else {
*outpos = '\0';
break;
}
}
iconv_close(conv);
return out;
}
#endif

6
utf8.h
View File

@@ -5,4 +5,10 @@ int utf8_width(const char **start);
int is_utf8(const char *text);
void print_wrapped_text(const char *text, int indent, int indent2, int len);
#ifndef NO_ICONV
char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding);
#else
#define reencode_string(a,b,c) NULL
#endif
#endif