From d01bac0ef79f3b25861d0168f7c0376b1d1b964e Mon Sep 17 00:00:00 2001 From: Jean-Jacques Lafay Date: Thu, 24 Apr 2014 14:24:39 +0200 Subject: [PATCH 1/4] git tag --contains: avoid stack overflow In large repos, the recursion implementation of contains(commit, commit_list) may result in a stack overflow. Replace the recursion with a loop to fix it. This problem is more apparent on Windows than on Linux, where the stack is more limited by default. See also this thread on the msysGit list: https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion [jes: re-written to imitate the original recursion more closely] Thomas Braun pointed out several documentation shortcomings. Tests are run only if ulimit -s is available. This means they cannot be run on Windows. Signed-off-by: Jean-Jacques Lafay Signed-off-by: Johannes Schindelin Tested-by: Stepan Kasal Reviewed-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/tag.c | 90 +++++++++++++++++++++++++++++++++++++++++--------- t/t7004-tag.sh | 26 +++++++++++++++ 2 files changed, 101 insertions(+), 15 deletions(-) diff --git a/builtin/tag.c b/builtin/tag.c index 74d3780b77..40758d40a5 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -73,11 +73,19 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) return 0; } -static int contains_recurse(struct commit *candidate, +enum contains_result { + CONTAINS_UNKNOWN = -1, + CONTAINS_NO = 0, + CONTAINS_YES = 1, +}; + +/* + * Test whether the candidate or one of its parents is contained in the list. + * Do not recurse to find out, though, but return -1 if inconclusive. + */ +static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want) { - struct commit_list *p; - /* was it previously marked as containing a want commit? */ if (candidate->object.flags & TMP_MARK) return 1; @@ -85,26 +93,78 @@ static int contains_recurse(struct commit *candidate, if (candidate->object.flags & UNINTERESTING) return 0; /* or are we it? */ - if (in_commit_list(want, candidate)) + if (in_commit_list(want, candidate)) { + candidate->object.flags |= TMP_MARK; return 1; + } if (parse_commit(candidate) < 0) return 0; - /* Otherwise recurse and mark ourselves for future traversals. */ - for (p = candidate->parents; p; p = p->next) { - if (contains_recurse(p->item, want)) { - candidate->object.flags |= TMP_MARK; - return 1; - } - } - candidate->object.flags |= UNINTERESTING; - return 0; + return -1; } -static int contains(struct commit *candidate, const struct commit_list *want) +/* + * Mimicking the real stack, this stack lives on the heap, avoiding stack + * overflows. + * + * At each recursion step, the stack items points to the commits whose + * ancestors are to be inspected. + */ +struct stack { + int nr, alloc; + struct stack_entry { + struct commit *commit; + struct commit_list *parents; + } *stack; +}; + +static void push_to_stack(struct commit *candidate, struct stack *stack) { - return contains_recurse(candidate, want); + int index = stack->nr++; + ALLOC_GROW(stack->stack, stack->nr, stack->alloc); + stack->stack[index].commit = candidate; + stack->stack[index].parents = candidate->parents; +} + +static enum contains_result contains(struct commit *candidate, + const struct commit_list *want) +{ + struct stack stack = { 0, 0, NULL }; + int result = contains_test(candidate, want); + + if (result != CONTAINS_UNKNOWN) + return result; + + push_to_stack(candidate, &stack); + while (stack.nr) { + struct stack_entry *entry = &stack.stack[stack.nr - 1]; + struct commit *commit = entry->commit; + struct commit_list *parents = entry->parents; + + if (!parents) { + commit->object.flags |= UNINTERESTING; + stack.nr--; + } + /* + * If we just popped the stack, parents->item has been marked, + * therefore contains_test will return a meaningful 0 or 1. + */ + else switch (contains_test(parents->item, want)) { + case CONTAINS_YES: + commit->object.flags |= TMP_MARK; + stack.nr--; + break; + case CONTAINS_NO: + entry->parents = parents->next; + break; + case CONTAINS_UNKNOWN: + push_to_stack(parents->item, &stack); + break; + } + } + free(stack.stack); + return contains_test(candidate, want); } static void show_tag_lines(const unsigned char *sha1, int lines) diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index c8d6e9f88c..ea4d48be2b 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -1380,4 +1380,30 @@ test_expect_success 'multiple --points-at are OR-ed together' ' test_cmp expect actual ' +run_with_limited_stack () { + (ulimit -s 64 && "$@") +} + +test_lazy_prereq ULIMIT 'run_with_limited_stack true' + +# we require ulimit, this excludes Windows +test_expect_success ULIMIT '--contains works in a deep repo' ' + >expect && + i=1 && + while test $i -lt 4000 + do + echo "commit refs/heads/master +committer A U Thor $((1000000000 + $i * 100)) +0200 +data <actual && + test_cmp expect actual +' + test_done From 2ba8ca4d91547333cdd04778bf2b175c2a9c24ba Mon Sep 17 00:00:00 2001 From: RomanBelinsky Date: Tue, 11 Feb 2014 18:23:02 +0200 Subject: [PATCH 2/4] SVN.pm::parse_svn_date: allow timestamps with a single-digit hour Some broken subversion server gives timestamps with only one digit in the hour part, like this: 2014-01-07T5:01:02.048176Z Loosen the regexp that expected to see two-digit hour, minute and second parts to accept a single-digit hour (but not minute or second). Signed-off-by: Stepan Kasal Signed-off-by: Junio C Hamano --- perl/Git/SVN.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index a59564fb34..09cff135ef 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1321,7 +1321,7 @@ sub get_untracked { sub parse_svn_date { my $date = shift || return '+0000 1970-01-01 00:00:00'; my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T - (\d\d)\:(\d\d)\:(\d\d)\.\d*Z$/x) or + (\d\d?)\:(\d\d)\:(\d\d)\.\d*Z$/x) or croak "Unable to parse date: $date\n"; my $parsed_date; # Set next. From ac2378d326eed5a4828cbc552210f2be6272a981 Mon Sep 17 00:00:00 2001 From: Theodore Leblond Date: Wed, 16 May 2012 06:52:49 -0700 Subject: [PATCH 3/4] compat/poll: sleep 1 millisecond to avoid busy wait SwitchToThread() only gives away the rest of the current time slice to another thread in the current process. So if the thread that feeds the file decscriptor we're polling is not in the current process, we get busy-waiting. I played around with this quite a bit. After trying some more complex schemes, I found that what worked best is to just sleep 1 millisecond between iterations. Though it's a very short time, it still completely eliminates the busy wait condition, without hurting perf. There code uses SleepEx(1, TRUE) to sleep. See this page for a good discussion of why that is better than calling SwitchToThread, which is what was used previously: http://stackoverflow.com/questions/1383943/switchtothread-vs-sleep1 Note that calling SleepEx(0, TRUE) does *not* solve the busy wait. The most striking case was when testing on a UNC share with a large repo, on a single CPU machine. Without the fix, it took 4 minutes 15 seconds, and with the fix it took just 1:08! I think it's because git-upload-pack's busy wait was eating the CPU away from the git process that's doing the real work. With multi-proc, the timing is not much different, but tons of CPU time is still wasted, which can be a killer on a server that needs to do bunch of other things. I also tested the very fast local case, and didn't see any measurable difference. On a big repo with 4500 files, the upload-pack took about 2 seconds with and without the fix. [jc: this was first accepted in msysgit tree in May 2012 via a pull request and Paolo Bonzini has also accepted the same fix to Gnulib around the same time; see $gmane/247518 for a bit more detail] Signed-off-by: Stepan Kasal Acked-by: Johannes Sixt Acked-by: Erik Faye-Lund Signed-off-by: Junio C Hamano --- compat/poll/poll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/poll/poll.c b/compat/poll/poll.c index 31163f2ae7..a9b41d89f4 100644 --- a/compat/poll/poll.c +++ b/compat/poll/poll.c @@ -605,7 +605,7 @@ restart: if (!rc && timeout == INFTIM) { - SwitchToThread(); + SleepEx (1, TRUE); goto restart; } From 997304a54178f07ea9bca93cbaf6e91d7a06fd21 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Fri, 7 Jan 2011 17:20:21 +0100 Subject: [PATCH 4/4] MSVC: link dynamically to the CRT Dynamic linking is generally preferred over static linking, and MSVCRT.dll has been integral part of Windows for a long time. This also fixes linker warnings for _malloc and _free in zlib.lib, which seems to be compiled for MSVCRT.dll already. The DLL version also exports some of the CRT initialization functions, which are hidden in the static libcmt.lib (e.g. __wgetmainargs, required by subsequent Unicode patches). Signed-off-by: Karsten Blees Signed-off-by: Stepan Kasal Acked-by: Sebastian Schuberth Acked-by: Marat Radchenko Signed-off-by: Junio C Hamano --- config.mak.uname | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config.mak.uname b/config.mak.uname index efaed94d5d..eebc847b81 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -365,16 +365,16 @@ ifeq ($(uname_S),Windows) compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/dirent.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" - BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE -NODEFAULTLIB:MSVCRT.lib + BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib PTHREAD_LIBS = lib = ifndef DEBUG - BASIC_CFLAGS += -GL -Os -MT + BASIC_CFLAGS += -GL -Os -MD BASIC_LDFLAGS += -LTCG AR += -LTCG else - BASIC_CFLAGS += -Zi -MTd + BASIC_CFLAGS += -Zi -MDd endif X = .exe endif