mirror of
https://github.com/git/git.git
synced 2026-03-13 10:23:30 +01:00
Merge branch 'master' into next
* master: i18n: drop "encoding" header in the output after re-coding. commit-tree: cope with different ways "utf-8" can be spelled. Move commit reencoding parameter parsing to revision.c Documentation: minor rewording for git-log and git-show pages. Documentation: i18n commit log message notes. t3900: test log --encoding=none commit re-encoding: fix confusion between no and default conversion.
This commit is contained in:
@@ -81,6 +81,11 @@ Your parents must have hated you!::
|
||||
Your sysadmin must hate you!::
|
||||
The password(5) name field is longer than a giant static buffer.
|
||||
|
||||
Discussion
|
||||
----------
|
||||
|
||||
include::i18n.txt[]
|
||||
|
||||
See Also
|
||||
--------
|
||||
gitlink:git-write-tree[1]
|
||||
|
||||
@@ -223,6 +223,11 @@ should be recorded as a single commit. In fact, the command
|
||||
refuses to run when given pathnames (but see `-i` option).
|
||||
|
||||
|
||||
DISCUSSION
|
||||
----------
|
||||
|
||||
include::i18n.txt[]
|
||||
|
||||
ENVIRONMENT VARIABLES
|
||||
---------------------
|
||||
The command specified by either the VISUAL or EDITOR environment
|
||||
|
||||
@@ -31,7 +31,9 @@ include::pretty-formats.txt[]
|
||||
Limits the number of commits to show.
|
||||
|
||||
<since>..<until>::
|
||||
Show only commits between the named two commits.
|
||||
Show only commits between the named two commits. When
|
||||
either <since> or <until> is omitted, it defaults to
|
||||
`HEAD`, i.e. the tip of the current branch.
|
||||
|
||||
-p::
|
||||
Show the change the commit introduces in a patch form.
|
||||
@@ -63,6 +65,12 @@ git log -r --name-status release..test::
|
||||
in the "release" branch, along with the list of paths
|
||||
each commit modifies.
|
||||
|
||||
Discussion
|
||||
----------
|
||||
|
||||
include::i18n.txt[]
|
||||
|
||||
|
||||
Author
|
||||
------
|
||||
Written by Linus Torvalds <torvalds@osdl.org>
|
||||
|
||||
@@ -21,6 +21,7 @@ SYNOPSIS
|
||||
[ \--stdin ]
|
||||
[ \--topo-order ]
|
||||
[ \--parents ]
|
||||
[ \--encoding[=<encoding>] ]
|
||||
[ \--(author|committer|grep)=<pattern> ]
|
||||
[ [\--objects | \--objects-edge] [ \--unpacked ] ]
|
||||
[ \--pretty | \--header ]
|
||||
|
||||
@@ -30,8 +30,8 @@ This manual page describes only the most frequently used options.
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
<commitid>::
|
||||
ID of the commit to show.
|
||||
<object>::
|
||||
The name of the object to show.
|
||||
|
||||
include::pretty-formats.txt[]
|
||||
|
||||
@@ -40,7 +40,8 @@ EXAMPLES
|
||||
--------
|
||||
|
||||
git show v1.0.0::
|
||||
Shows the tag `v1.0.0`.
|
||||
Shows the tag `v1.0.0`, along with the object the tags
|
||||
points at.
|
||||
|
||||
git show v1.0.0^{tree}::
|
||||
Shows the tree pointed to by the tag `v1.0.0`.
|
||||
@@ -54,10 +55,16 @@ git show master:Makefile master:t/Makefile
|
||||
Concatenates the contents of said Makefiles in the head
|
||||
of the branch `master`.
|
||||
|
||||
Discussion
|
||||
----------
|
||||
|
||||
include::i18n.txt[]
|
||||
|
||||
Author
|
||||
------
|
||||
Written by Linus Torvalds <torvalds@osdl.org> and
|
||||
Junio C Hamano <junkio@cox.net>
|
||||
Junio C Hamano <junkio@cox.net>. Significantly enhanced by
|
||||
Johannes Schindelin <Johannes.Schindelin@gmx.de>.
|
||||
|
||||
|
||||
Documentation
|
||||
|
||||
57
Documentation/i18n.txt
Normal file
57
Documentation/i18n.txt
Normal file
@@ -0,0 +1,57 @@
|
||||
At the core level, git is character encoding agnostic.
|
||||
|
||||
- The pathnames recorded in the index and in the tree objects
|
||||
are treated as uninterpreted sequences of non-NUL bytes.
|
||||
What readdir(2) returns are what are recorded and compared
|
||||
with the data git keeps track of, which in turn are expected
|
||||
to be what lstat(2) and creat(2) accepts. There is no such
|
||||
thing as pathname encoding translation.
|
||||
|
||||
- The contents of the blob objects are uninterpreted sequence
|
||||
of bytes. There is no encoding translation at the core
|
||||
level.
|
||||
|
||||
- The commit log messages are uninterpreted sequence of non-NUL
|
||||
bytes.
|
||||
|
||||
Although we encourage that the commit log messages are encoded
|
||||
in UTF-8, both the core and git Porcelain are designed not to
|
||||
force UTF-8 on projects. If all participants of a particular
|
||||
project find it more convenient to use legacy encodings, git
|
||||
does not forbid it. However, there are a few things to keep in
|
||||
mind.
|
||||
|
||||
. `git-commit-tree` (hence, `git-commit` which uses it) issues
|
||||
an warning if the commit log message given to it does not look
|
||||
like a valid UTF-8 string, unless you explicitly say your
|
||||
project uses a legacy encoding. The way to say this is to
|
||||
have core.commitencoding in `.git/config` file, like this:
|
||||
+
|
||||
------------
|
||||
[core]
|
||||
commitencoding = ISO-8859-1
|
||||
------------
|
||||
+
|
||||
Commit objects created with the above setting record the value
|
||||
of `core.commitencoding` in its `encoding` header. This is to
|
||||
help other people who look at them later. Lack of this header
|
||||
implies that the commit log message is encoded in UTF-8.
|
||||
|
||||
. `git-log`, `git-show` and friends looks at the `encoding`
|
||||
header of a commit object, and tries to re-code the log
|
||||
message into UTF-8 unless otherwise specified. You can
|
||||
specify the desired output encoding with
|
||||
`core.logoutputencoding` in `.git/config` file, like this:
|
||||
+
|
||||
------------
|
||||
[core]
|
||||
logoutputencoding = ISO-8859-1
|
||||
------------
|
||||
+
|
||||
If you do not have this configuration variable, the value of
|
||||
`core.commitencoding` is used instead.
|
||||
|
||||
Note that we deliberately chose not to re-code the commit log
|
||||
message when a commit is made to force UTF-8 at the commit
|
||||
object level, because re-coding to UTF-8 is not necessarily a
|
||||
reversible operation.
|
||||
@@ -76,3 +76,10 @@ displayed in full, regardless of whether --abbrev or
|
||||
--no-abbrev are used, and 'parents' information show the
|
||||
true parent commits, without taking grafts nor history
|
||||
simplification into account.
|
||||
|
||||
--encoding[=<encoding>]::
|
||||
The commit objects record the encoding used for the log message
|
||||
in their encoding header; this option can be used to tell the
|
||||
command to re-code the commit log message in the encoding
|
||||
preferred by the user. For non plumbing commands this
|
||||
defaults to UTF-8.
|
||||
|
||||
@@ -119,8 +119,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
|
||||
}
|
||||
|
||||
/* Not having i18n.commitencoding is the same as having utf-8 */
|
||||
encoding_is_utf8 = (!git_commit_encoding ||
|
||||
!strcmp(git_commit_encoding, "utf-8"));
|
||||
encoding_is_utf8 = is_encoding_utf8(git_commit_encoding);
|
||||
|
||||
init_buffer(&buffer, &size);
|
||||
add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
|
||||
|
||||
47
commit.c
47
commit.c
@@ -624,6 +624,48 @@ static char *get_header(const struct commit *commit, const char *key)
|
||||
}
|
||||
}
|
||||
|
||||
static char *replace_encoding_header(char *buf, char *encoding)
|
||||
{
|
||||
char *encoding_header = strstr(buf, "\nencoding ");
|
||||
char *end_of_encoding_header;
|
||||
int encoding_header_pos;
|
||||
int encoding_header_len;
|
||||
int new_len;
|
||||
int need_len;
|
||||
int buflen = strlen(buf) + 1;
|
||||
|
||||
if (!encoding_header)
|
||||
return buf; /* should not happen but be defensive */
|
||||
encoding_header++;
|
||||
end_of_encoding_header = strchr(encoding_header, '\n');
|
||||
if (!end_of_encoding_header)
|
||||
return buf; /* should not happen but be defensive */
|
||||
end_of_encoding_header++;
|
||||
|
||||
encoding_header_len = end_of_encoding_header - encoding_header;
|
||||
encoding_header_pos = encoding_header - buf;
|
||||
|
||||
if (is_encoding_utf8(encoding)) {
|
||||
/* we have re-coded to UTF-8; drop the header */
|
||||
memmove(encoding_header, end_of_encoding_header,
|
||||
buflen - (encoding_header_pos + encoding_header_len));
|
||||
return buf;
|
||||
}
|
||||
new_len = strlen(encoding);
|
||||
need_len = new_len + strlen("encoding \n");
|
||||
if (encoding_header_len < need_len) {
|
||||
buf = xrealloc(buf, buflen + (need_len - encoding_header_len));
|
||||
encoding_header = buf + encoding_header_pos;
|
||||
end_of_encoding_header = encoding_header + encoding_header_len;
|
||||
}
|
||||
memmove(end_of_encoding_header + (need_len - encoding_header_len),
|
||||
end_of_encoding_header,
|
||||
buflen - (encoding_header_pos + encoding_header_len));
|
||||
memcpy(encoding_header + 9, encoding, strlen(encoding));
|
||||
encoding_header[9 + new_len] = '\n';
|
||||
return buf;
|
||||
}
|
||||
|
||||
static char *logmsg_reencode(const struct commit *commit)
|
||||
{
|
||||
char *encoding;
|
||||
@@ -633,6 +675,8 @@ static char *logmsg_reencode(const struct commit *commit)
|
||||
: git_commit_encoding);
|
||||
|
||||
if (!output_encoding)
|
||||
output_encoding = "utf-8";
|
||||
else if (!*output_encoding)
|
||||
return NULL;
|
||||
encoding = get_header(commit, "encoding");
|
||||
if (!encoding || !strcmp(encoding, output_encoding)) {
|
||||
@@ -640,6 +684,9 @@ static char *logmsg_reencode(const struct commit *commit)
|
||||
return NULL;
|
||||
}
|
||||
out = reencode_string(commit->buffer, output_encoding, encoding);
|
||||
if (out)
|
||||
out = replace_encoding_header(out, output_encoding);
|
||||
|
||||
free(encoding);
|
||||
if (!out)
|
||||
return NULL;
|
||||
|
||||
@@ -1039,6 +1039,14 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
|
||||
all_match = 1;
|
||||
continue;
|
||||
}
|
||||
if (!strncmp(arg, "--encoding=", 11)) {
|
||||
arg += 11;
|
||||
if (strcmp(arg, "none"))
|
||||
git_log_output_encoding = strdup(arg);
|
||||
else
|
||||
git_log_output_encoding = "";
|
||||
continue;
|
||||
}
|
||||
|
||||
opts = diff_opt_parse(&revs->diffopt, argv+i, argc-i);
|
||||
if (opts > 0) {
|
||||
|
||||
@@ -8,7 +8,7 @@ test_description='commit and log output encodings'
|
||||
. ./test-lib.sh
|
||||
|
||||
compare_with () {
|
||||
git-show -s "$1" | sed -e '1,/^$/d' -e 's/^ //' -e '$d' >current &&
|
||||
git-show -s $1 | sed -e '1,/^$/d' -e 's/^ //' -e '$d' >current &&
|
||||
diff -u current "$2"
|
||||
}
|
||||
|
||||
@@ -112,4 +112,11 @@ do
|
||||
done
|
||||
done
|
||||
|
||||
for H in ISO-8859-1 EUCJP ISO-2022-JP
|
||||
do
|
||||
test_expect_success "No conversion with $H" '
|
||||
compare_with "--encoding=none '$H'" ../t3900/'$H'.txt
|
||||
'
|
||||
done
|
||||
|
||||
test_done
|
||||
|
||||
9
utf8.c
9
utf8.c
@@ -277,6 +277,15 @@ void print_wrapped_text(const char *text, int indent, int indent2, int width)
|
||||
}
|
||||
}
|
||||
|
||||
int is_encoding_utf8(const char *name)
|
||||
{
|
||||
if (!name)
|
||||
return 1;
|
||||
if (!strcasecmp(name, "utf-8") || !strcasecmp(name, "utf8"))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a buffer and its encoding, return it re-encoded
|
||||
* with iconv. If the conversion fails, returns NULL.
|
||||
|
||||
Reference in New Issue
Block a user