From 776398709dee4050fc194fec45c5818ba9b01afe Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 1 Sep 2007 23:53:47 -0700 Subject: [PATCH 001/232] Keep last used delta base in the delta window This is based on Martin Koegler's idea to keep the object that was successfully used as the base of the delta when it is about to fall off the edge of the window. Instead of doing so only for the objects at the edge of the window, this makes the window a lru eviction mechanism. If an entry is used as a base, it is moved to the last of the queue to be evicted. This is a quick-and-dirty implementation, as it keeps the original implementation of the data structure used for the window. This originally was done as an array, not as an array of pointers, because it was meant to be used as a cyclic FIFO buffer and a plain array avoids an extra pointer indirection, while its FIFOness eant that we are not "moving" the entries like this patch does. The runtime from three versions were comparable. It seems to make the resulting chain even shorter, which can only be good. (stock "master") 15782196 bytes chain length = 1: 2972 objects chain length = 2: 2651 objects chain length = 3: 2369 objects chain length = 4: 2121 objects chain length = 5: 1877 objects ... chain length = 46: 490 objects chain length = 47: 515 objects chain length = 48: 527 objects chain length = 49: 570 objects chain length = 50: 408 objects (with your patch) 15745736 bytes (0.23% smaller) chain length = 1: 3137 objects chain length = 2: 2688 objects chain length = 3: 2322 objects chain length = 4: 2146 objects chain length = 5: 1824 objects ... chain length = 46: 503 objects chain length = 47: 509 objects chain length = 48: 536 objects chain length = 49: 588 objects chain length = 50: 357 objects (with this patch) 15612086 bytes (1.08% smaller) chain length = 1: 4831 objects chain length = 2: 3811 objects chain length = 3: 2964 objects chain length = 4: 2352 objects chain length = 5: 1944 objects ... chain length = 46: 327 objects chain length = 47: 353 objects chain length = 48: 304 objects chain length = 49: 298 objects chain length = 50: 135 objects [jc: this is with code simplification follow-up from Nico] Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 12509faa77..e64e3a03a0 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1460,7 +1460,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) do { struct object_entry *entry = list[--i]; struct unpacked *n = array + idx; - int j; + int j, best_base = -1; if (!entry->preferred_base) processed++; @@ -1505,6 +1505,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) j = window; while (--j > 0) { + int ret; uint32_t other_idx = idx + j; struct unpacked *m; if (other_idx >= window) @@ -1512,8 +1513,11 @@ static void find_deltas(struct object_entry **list, int window, int depth) m = array + other_idx; if (!m->entry) break; - if (try_delta(n, m, max_depth) < 0) + ret = try_delta(n, m, max_depth); + if (ret < 0) break; + else if (ret > 0) + best_base = other_idx; } /* if we made n a delta, and if n is already at max @@ -1523,6 +1527,23 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (entry->delta && depth <= n->depth) continue; + /* + * Move the best delta base up in the window, after the + * currently deltified object, to keep it longer. It will + * be the first base object to be attempted next. + */ + if (entry->delta) { + struct unpacked swap = array[best_base]; + int dist = (window + idx - best_base) % window; + int dst = best_base; + while (dist--) { + int src = (dst + 1) % window; + array[dst] = array[src]; + dst = src; + } + array[dst] = swap; + } + next: idx++; if (count + 1 < window) From 1e61b7640d09015213dbcae3564fa27ac6a8c151 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 3 Sep 2007 02:40:06 -0700 Subject: [PATCH 002/232] Start 1.5.4 cycle Signed-off-by: Junio C Hamano --- Documentation/RelNotes-1.5.4.txt | 14 ++++++++++++++ GIT-VERSION-GEN | 2 +- RelNotes | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 Documentation/RelNotes-1.5.4.txt diff --git a/Documentation/RelNotes-1.5.4.txt b/Documentation/RelNotes-1.5.4.txt new file mode 100644 index 0000000000..1df66af9ce --- /dev/null +++ b/Documentation/RelNotes-1.5.4.txt @@ -0,0 +1,14 @@ +GIT v1.5.4 Release Notes +======================== + +Updates since v1.5.3 +-------------------- + + + +Fixes since v1.5.3 +------------------ + +All of the fixes in v1.5.3 maintenance series are included in +this release, unless otherwise noted. + diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 3835fb3965..3c0032cec5 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v1.5.3.1.GIT +DEF_VER=v1.5.3.GIT LF=' ' diff --git a/RelNotes b/RelNotes index ea8f800cbb..46308cee0b 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes-1.5.3.1.txt \ No newline at end of file +Documentation/RelNotes-1.5.4.txt \ No newline at end of file From 34cc60ce2b48f6037997543ddbab1ed9903df4a8 Mon Sep 17 00:00:00 2001 From: Douglas Stockwell Date: Mon, 3 Sep 2007 03:06:25 +0900 Subject: [PATCH 003/232] send-email: Add support for SSL and SMTP-AUTH Allows username and password to be given using --smtp-user and --smtp-pass. SSL use is flagged by --smtp-ssl. These are backed by corresponding defaults in the git configuration file. This implements Junio's 'mail identity' suggestion in a slightly more generalised manner. --identity=$identity, backed by sendemail.identity indicates that the configuration subsection [sendemail "$identity"] should take priority over the [sendemail] section for all configuration values. Signed-off-by: Douglas Stockwell Signed-off-by: Junio C Hamano --- Documentation/git-send-email.txt | 35 ++++++++++- git-send-email.perl | 101 +++++++++++++++++++++++-------- 2 files changed, 109 insertions(+), 27 deletions(-) diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt index 16bfd7be22..1ec61affab 100644 --- a/Documentation/git-send-email.txt +++ b/Documentation/git-send-email.txt @@ -75,6 +75,12 @@ The --cc option must be repeated for each user you want on the cc list. Make git-send-email less verbose. One line per email should be all that is output. +--identity:: + A configuration identity. When given, causes values in the + 'sendemail.' subsection to take precedence over + values in the 'sendemail' section. The default identity is + the value of 'sendemail.identity'. + --smtp-server:: If set, specifies the outgoing SMTP server to use (e.g. `smtp.example.com` or a raw IP address). Alternatively it can @@ -85,6 +91,17 @@ The --cc option must be repeated for each user you want on the cc list. `/usr/lib/sendmail` if such program is available, or `localhost` otherwise. +--smtp-user, --smtp-pass:: + Username and password for SMTP-AUTH. Defaults are the values of + the configuration values 'sendemail.smtpuser' and + 'sendemail.smtppass', but see also 'sendemail.identity'. + If not set, authentication is not attempted. + +--smtp-ssl:: + If set, connects to the SMTP server using SSL. + Default is the value of the 'sendemail.smtpssl' configuration value; + if that is unspecified, does not use SSL. + --subject:: Specify the initial subject of the email thread. Only necessary if --compose is also set. If --compose @@ -122,6 +139,13 @@ The --to option must be repeated for each user you want on the to list. CONFIGURATION ------------- +sendemail.identity:: + The default configuration identity. When specified, + 'sendemail..' will have higher precedence than + 'sendemail.'. This is useful to declare multiple SMTP + identities and to hoist sensitive authentication information + out of the repository and into the global configuation file. + sendemail.aliasesfile:: To avoid typing long email addresses, point this to one or more email aliases files. You must also supply 'sendemail.aliasfiletype'. @@ -141,7 +165,16 @@ sendemail.chainreplyto:: parameter. sendemail.smtpserver:: - Default smtp server to use. + Default SMTP server to use. + +sendemail.smtpuser:: + Default SMTP-AUTH username. + +sendemail.smtppass:: + Default SMTP-AUTH password. + +sendemail.smtpssl:: + Boolean value specifying the default to the '--smtp-ssl' parameter. Author ------ diff --git a/git-send-email.perl b/git-send-email.perl index e0b7d1245e..dd7560b180 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -73,9 +73,18 @@ Options: --signed-off-cc Automatically add email addresses that appear in Signed-off-by: or Cc: lines to the cc: list. Defaults to on. + --identity The configuration identity, a subsection to prioritise over + the default section. + --smtp-server If set, specifies the outgoing SMTP server to use. Defaults to localhost. + --smtp-user The username for SMTP-AUTH. + + --smtp-pass The password for SMTP-AUTH. + + --smtp-ssl If set, connects to the SMTP server using SSL. + --suppress-from Suppress sending emails to yourself if your address appears in a From: line. Defaults to off. @@ -145,7 +154,6 @@ my $compose_filename = ".msg.$$"; my (@to,@cc,@initial_cc,@bcclist,@xh, $initial_reply_to,$initial_subject,@files,$author,$sender,$compose,$time); -my $smtp_server; my $envelope_sender; # Example reply to: @@ -164,24 +172,26 @@ my ($quiet, $dry_run) = (0, 0); # Variables with corresponding config settings my ($thread, $chain_reply_to, $suppress_from, $signed_off_cc, $cc_cmd); +my ($smtp_server, $smtp_authuser, $smtp_authpass, $smtp_ssl); +my ($identity, $aliasfiletype, @alias_files); -my %config_settings = ( +my %config_bool_settings = ( "thread" => [\$thread, 1], "chainreplyto" => [\$chain_reply_to, 1], "suppressfrom" => [\$suppress_from, 0], "signedoffcc" => [\$signed_off_cc, 1], - "cccmd" => [\$cc_cmd, ""], + "smtpssl" => [\$smtp_ssl, 0], ); -foreach my $setting (keys %config_settings) { - my $config = $repo->config_bool("sendemail.$setting"); - ${$config_settings{$setting}->[0]} = (defined $config) ? $config : $config_settings{$setting}->[1]; -} - -@bcclist = $repo->config('sendemail.bcc'); -if (!@bcclist or !$bcclist[0]) { - @bcclist = (); -} +my %config_settings = ( + "smtpserver" => \$smtp_server, + "smtpuser" => \$smtp_authuser, + "smtppass" => \$smtp_authpass, + "cccmd" => \$cc_cmd, + "aliasfiletype" => \$aliasfiletype, + "bcc" => \@bcclist, + "aliasesfile" => \@alias_files, +); # Begin by accumulating all the variables (defined above), that we will end up # needing, first, from the command line: @@ -194,6 +204,10 @@ my $rc = GetOptions("sender|from=s" => \$sender, "bcc=s" => \@bcclist, "chain-reply-to!" => \$chain_reply_to, "smtp-server=s" => \$smtp_server, + "smtp-user=s" => \$smtp_authuser, + "smtp-pass=s" => \$smtp_authpass, + "smtp-ssl!" => \$smtp_ssl, + "identity=s" => \$identity, "compose" => \$compose, "quiet" => \$quiet, "cc-cmd=s" => \$cc_cmd, @@ -208,6 +222,43 @@ unless ($rc) { usage(); } +# Now, let's fill any that aren't set in with defaults: + +sub read_config { + my ($prefix) = @_; + + foreach my $setting (keys %config_bool_settings) { + my $target = $config_bool_settings{$setting}->[0]; + $$target = $repo->config_bool("$prefix.$setting") unless (defined $$target); + } + + foreach my $setting (keys %config_settings) { + my $target = $config_settings{$setting}; + if (ref($target) eq "ARRAY") { + unless (@$target) { + my @values = $repo->config("$prefix.$setting"); + @$target = @values if (@values && defined $values[0]); + } + } + else { + $$target = $repo->config("$prefix.$setting") unless (defined $$target); + } + } +} + +# read configuration from [sendemail "$identity"], fall back on [sendemail] +$identity = $repo->config("sendemail.identity") unless (defined $identity); +read_config("sendemail.$identity") if (defined $identity); +read_config("sendemail"); + +# fall back on builtin bool defaults +foreach my $setting (values %config_bool_settings) { + ${$setting->[0]} = $setting->[1] unless (defined (${$setting->[0]})); +} + +my ($repoauthor) = $repo->ident_person('author'); +my ($repocommitter) = $repo->ident_person('committer'); + # Verify the user input foreach my $entry (@to) { @@ -222,14 +273,7 @@ foreach my $entry (@bcclist) { die "Comma in --bcclist entry: $entry'\n" unless $entry !~ m/,/; } -# Now, let's fill any that aren't set in with defaults: - -my ($repoauthor) = $repo->ident_person('author'); -my ($repocommitter) = $repo->ident_person('committer'); - my %aliases; -my @alias_files = $repo->config('sendemail.aliasesfile'); -my $aliasfiletype = $repo->config('sendemail.aliasfiletype'); my %parse_alias = ( # multiline formats can be supported in the future mutt => sub { my $fh = shift; while (<$fh>) { @@ -320,10 +364,7 @@ if ($thread && !defined $initial_reply_to && $prompting) { $initial_reply_to =~ s/(^\s+|\s+$)//g; } -if (!$smtp_server) { - $smtp_server = $repo->config('sendemail.smtpserver'); -} -if (!$smtp_server) { +if (!defined $smtp_server) { foreach (qw( /usr/sbin/sendmail /usr/lib/sendmail )) { if (-x $_) { $smtp_server = $_; @@ -553,8 +594,16 @@ X-Mailer: git-send-email $gitversion print $sm "$header\n$message"; close $sm or die $?; } else { - require Net::SMTP; - $smtp ||= Net::SMTP->new( $smtp_server ); + if ($smtp_ssl) { + require Net::SMTP::SSL; + $smtp ||= Net::SMTP::SSL->new( $smtp_server, Port => 465 ); + } + else { + require Net::SMTP; + $smtp ||= Net::SMTP->new( $smtp_server ); + } + $smtp->auth( $smtp_authuser, $smtp_authpass ) + or die $smtp->message if (defined $smtp_authuser); $smtp->mail( $raw_from ) or die $smtp->message; $smtp->to( @recipients ) or die $smtp->message; $smtp->data or die $smtp->message; @@ -661,7 +710,7 @@ foreach my $t (@files) { } close F; - if ($cc_cmd ne "") { + if (defined $cc_cmd) { open(F, "$cc_cmd $t |") or die "(cc-cmd) Could not execute '$cc_cmd'"; while() { From 38944390220425cc3c4208dd31172397e7f18e8c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 2 Sep 2007 21:10:14 +0100 Subject: [PATCH 004/232] Teach "git remote" a mirror mode When using the "--mirror" option to "git remote add", the refs will not be stored in the refs/remotes/ namespace, but in the same location as on the remote side. This option probably only makes sense in a bare repository. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/git-remote.txt | 6 +++++- git-remote.perl | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/git-remote.txt b/Documentation/git-remote.txt index 61a6022ce8..94b9f17772 100644 --- a/Documentation/git-remote.txt +++ b/Documentation/git-remote.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git-remote' -'git-remote' add [-t ] [-m ] [-f] +'git-remote' add [-t ] [-m ] [-f] [--mirror] 'git-remote' show 'git-remote' prune 'git-remote' update [group] @@ -45,6 +45,10 @@ multiple branches without grabbing all branches. With `-m ` option, `$GIT_DIR/remotes//HEAD` is set up to point at remote's `` branch instead of whatever branch the `HEAD` at the remote repository actually points at. ++ +In mirror mode, enabled with `--mirror`, the refs will not be stored +in the 'refs/remotes/' namespace, but in 'refs/heads/'. This option +only makes sense in bare repositories. 'show':: diff --git a/git-remote.perl b/git-remote.perl index 01cf480221..f6f283ea4f 100755 --- a/git-remote.perl +++ b/git-remote.perl @@ -278,7 +278,9 @@ sub add_remote { for (@$track) { $git->command('config', '--add', "remote.$name.fetch", - "+refs/heads/$_:refs/remotes/$name/$_"); + $opts->{'mirror'} ? + "+refs/$_:refs/$_" : + "+refs/heads/$_:refs/remotes/$name/$_"); } if ($opts->{'fetch'}) { $git->command('fetch', $name); @@ -409,6 +411,10 @@ elsif ($ARGV[0] eq 'add') { shift @ARGV; next; } + if ($opt eq '--mirror') { + $opts{'mirror'} = 1; + next; + } add_usage(); } if (@ARGV != 3) { From 31f9ec129ef37e50b5cacf26a2ebcb5420fcdc5e Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Tue, 21 Aug 2007 11:53:02 +0200 Subject: [PATCH 005/232] git-p4: Always call 'p4 sync ...' before submitting to Perforce. Acked-by: Marius Storm-Olsen Acked-by: Thiago Macieira --- contrib/fast-import/git-p4 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index 55778c5775..3728cbf9aa 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -664,9 +664,8 @@ class P4Submit(Command): f.close(); os.chdir(self.clientPath) - response = raw_input("Do you want to sync %s with p4 sync? [y]es/[n]o " % self.clientPath) - if response == "y" or response == "yes": - system("p4 sync ...") + print "Syncronizing p4 checkout..." + system("p4 sync ...") if self.reset: self.firstTime = True From 14594f4b5747e51b051f647f6430089e6664e77d Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Wed, 22 Aug 2007 09:07:15 +0200 Subject: [PATCH 006/232] git-p4: After submission to p4 always synchronize from p4 again (into refs/remotes). Whether to rebase HEAD or not is still left as question to the end-user. Signed-off-by: Simon Hausmann --- contrib/fast-import/git-p4 | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index 3728cbf9aa..16e0a7bc81 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -704,10 +704,14 @@ class P4Submit(Command): else: print "All changes applied!" os.chdir(self.oldWorkingDirectory) - response = raw_input("Do you want to sync from Perforce now using git-p4 rebase? [y]es/[n]o ") + + sync = P4Sync() + sync.run([]) + + response = raw_input("Do you want to rebase current HEAD from Perforce now using git-p4 rebase? [y]es/[n]o ") if response == "y" or response == "yes": rebase = P4Rebase() - rebase.run([]) + rebase.rebase() os.remove(self.configFile) return True @@ -1439,6 +1443,9 @@ class P4Rebase(Command): sync = P4Sync() sync.run([]) + return self.rebase() + + def rebase(self): [upstream, settings] = findUpstreamBranchPoint() if len(upstream) == 0: die("Cannot find upstream branchpoint for rebase") From 4f6432d8cc6ebdcdc366cf67ab39e8125c449d80 Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Sun, 26 Aug 2007 15:56:36 +0200 Subject: [PATCH 007/232] git-p4: Cleanup; moved the code for getting a sorted list of p4 changes for a list of given depot paths into a standalone method. Signed-off-by: Simon Hausmann --- contrib/fast-import/git-p4 | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index 16e0a7bc81..e9feb7498c 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -281,6 +281,19 @@ def createOrUpdateBranchesFromOrigin(localRefPrefix = "refs/remotes/p4/", silent def originP4BranchesExist(): return gitBranchExists("origin") or gitBranchExists("origin/p4") or gitBranchExists("origin/p4/master") +def p4ChangesForPaths(depotPaths, changeRange): + assert depotPaths + output = read_pipe_lines("p4 changes " + ' '.join (["%s...%s" % (p, changeRange) + for p in depotPaths])) + + changes = [] + for line in output: + changeNum = line.split(" ")[1] + changes.append(int(changeNum)) + + changes.sort() + return changes + class Command: def __init__(self): self.usage = "usage: %prog [options]" @@ -1322,15 +1335,7 @@ class P4Sync(Command): if self.verbose: print "Getting p4 changes for %s...%s" % (', '.join(self.depotPaths), self.changeRange) - assert self.depotPaths - output = read_pipe_lines("p4 changes " + ' '.join (["%s...%s" % (p, self.changeRange) - for p in self.depotPaths])) - - for line in output: - changeNum = line.split(" ")[1] - changes.append(int(changeNum)) - - changes.sort() + changes = p4ChangesForPaths(self.depotPaths, self.changeRange) if len(self.maxChanges) > 0: changes = changes[:min(int(self.maxChanges), len(changes))] From e87f37ae42dad89b39620c234fc29c94529a4d07 Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Sun, 26 Aug 2007 16:00:52 +0200 Subject: [PATCH 008/232] git-p4: Cleanup; moved the code to import a list of p4 changes using fast-import into a separate member function of P4Sync. Signed-off-by: Simon Hausmann --- contrib/fast-import/git-p4 | 140 +++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 69 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index e9feb7498c..c00702c895 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -1118,6 +1118,76 @@ class P4Sync(Command): self.keepRepoPath = (d.has_key('options') and ('keepRepoPath' in d['options'])) + def importChanges(self, changes): + cnt = 1 + for change in changes: + description = p4Cmd("describe %s" % change) + self.updateOptionDict(description) + + if not self.silent: + sys.stdout.write("\rImporting revision %s (%s%%)" % (change, cnt * 100 / len(changes))) + sys.stdout.flush() + cnt = cnt + 1 + + try: + if self.detectBranches: + branches = self.splitFilesIntoBranches(description) + for branch in branches.keys(): + ## HACK --hwn + branchPrefix = self.depotPaths[0] + branch + "/" + + parent = "" + + filesForCommit = branches[branch] + + if self.verbose: + print "branch is %s" % branch + + self.updatedBranches.add(branch) + + if branch not in self.createdBranches: + self.createdBranches.add(branch) + parent = self.knownBranches[branch] + if parent == branch: + parent = "" + elif self.verbose: + print "parent determined through known branches: %s" % parent + + # main branch? use master + if branch == "main": + branch = "master" + else: + + ## FIXME + branch = self.projectName + branch + + if parent == "main": + parent = "master" + elif len(parent) > 0: + ## FIXME + parent = self.projectName + parent + + branch = self.refPrefix + branch + if len(parent) > 0: + parent = self.refPrefix + parent + + if self.verbose: + print "looking for initial parent for %s; current parent is %s" % (branch, parent) + + if len(parent) == 0 and branch in self.initialParents: + parent = self.initialParents[branch] + del self.initialParents[branch] + + self.commit(description, filesForCommit, branch, [branchPrefix], parent) + else: + files = self.extractFilesFromCommit(description) + self.commit(description, files, self.branch, self.depotPaths, + self.initialParent) + self.initialParent = "" + except IOError: + print self.gitError.read() + sys.exit(1) + def run(self, args): self.depotPaths = [] self.changeRange = "" @@ -1350,74 +1420,7 @@ class P4Sync(Command): self.updatedBranches = set() - cnt = 1 - for change in changes: - description = p4Cmd("describe %s" % change) - self.updateOptionDict(description) - - if not self.silent: - sys.stdout.write("\rImporting revision %s (%s%%)" % (change, cnt * 100 / len(changes))) - sys.stdout.flush() - cnt = cnt + 1 - - try: - if self.detectBranches: - branches = self.splitFilesIntoBranches(description) - for branch in branches.keys(): - ## HACK --hwn - branchPrefix = self.depotPaths[0] + branch + "/" - - parent = "" - - filesForCommit = branches[branch] - - if self.verbose: - print "branch is %s" % branch - - self.updatedBranches.add(branch) - - if branch not in self.createdBranches: - self.createdBranches.add(branch) - parent = self.knownBranches[branch] - if parent == branch: - parent = "" - elif self.verbose: - print "parent determined through known branches: %s" % parent - - # main branch? use master - if branch == "main": - branch = "master" - else: - - ## FIXME - branch = self.projectName + branch - - if parent == "main": - parent = "master" - elif len(parent) > 0: - ## FIXME - parent = self.projectName + parent - - branch = self.refPrefix + branch - if len(parent) > 0: - parent = self.refPrefix + parent - - if self.verbose: - print "looking for initial parent for %s; current parent is %s" % (branch, parent) - - if len(parent) == 0 and branch in self.initialParents: - parent = self.initialParents[branch] - del self.initialParents[branch] - - self.commit(description, filesForCommit, branch, [branchPrefix], parent) - else: - files = self.extractFilesFromCommit(description) - self.commit(description, files, self.branch, self.depotPaths, - self.initialParent) - self.initialParent = "" - except IOError: - print self.gitError.read() - sys.exit(1) + self.importChanges(changes) if not self.silent: print "" @@ -1427,7 +1430,6 @@ class P4Sync(Command): sys.stdout.write("%s " % b) sys.stdout.write("\n") - self.gitStream.close() if importProcess.wait() != 0: die("fast-import failed: %s" % self.gitError.read()) From 1c49fc197bd05a4c2ed602efcdbe277ef798813a Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Sun, 26 Aug 2007 16:04:34 +0200 Subject: [PATCH 009/232] git-p4: Cleanup; Turn self.revision into a function local variable (it's not used anywhere outside the function). Signed-off-by: Simon Hausmann --- contrib/fast-import/git-p4 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index c00702c895..d7c5becc0e 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -1285,7 +1285,7 @@ class P4Sync(Command): self.depotPaths = sorted(args) - self.revision = "" + revision = "" self.users = {} newPaths = [] @@ -1296,15 +1296,15 @@ class P4Sync(Command): if self.changeRange == "@all": self.changeRange = "" elif ',' not in self.changeRange: - self.revision = self.changeRange + revision = self.changeRange self.changeRange = "" p = p[:atIdx] elif p.find("#") != -1: hashIdx = p.index("#") - self.revision = p[hashIdx:] + revision = p[hashIdx:] p = p[:hashIdx] elif self.previousDepotPaths == []: - self.revision = "#head" + revision = "#head" p = re.sub ("\.\.\.$", "", p) if not p.endswith("/"): @@ -1345,19 +1345,19 @@ class P4Sync(Command): self.gitStream = importProcess.stdin self.gitError = importProcess.stderr - if self.revision: - print "Doing initial import of %s from revision %s into %s" % (' '.join(self.depotPaths), self.revision, self.branch) + if revision: + print "Doing initial import of %s from revision %s into %s" % (' '.join(self.depotPaths), revision, self.branch) details = { "user" : "git perforce import user", "time" : int(time.time()) } details["desc"] = ("Initial import of %s from the state at revision %s" - % (' '.join(self.depotPaths), self.revision)) - details["change"] = self.revision + % (' '.join(self.depotPaths), revision)) + details["change"] = revision newestRevision = 0 fileCnt = 0 for info in p4CmdList("files " + ' '.join(["%s...%s" - % (p, self.revision) + % (p, revision) for p in self.depotPaths])): if info['code'] == 'error': From c208a24310582d9cf337b66f41a0d7a9fe106bb4 Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Sun, 26 Aug 2007 16:07:18 +0200 Subject: [PATCH 010/232] git-p4: Cleanup; moved the code for the initial #head or revision import into a separate function, out of P4Sync.run. Signed-off-by: Simon Hausmann --- contrib/fast-import/git-p4 | 87 ++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index d7c5becc0e..2c67190ffc 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -1188,6 +1188,50 @@ class P4Sync(Command): print self.gitError.read() sys.exit(1) + def importHeadRevision(self, revision): + print "Doing initial import of %s from revision %s into %s" % (' '.join(self.depotPaths), revision, self.branch) + + details = { "user" : "git perforce import user", "time" : int(time.time()) } + details["desc"] = ("Initial import of %s from the state at revision %s" + % (' '.join(self.depotPaths), revision)) + details["change"] = revision + newestRevision = 0 + + fileCnt = 0 + for info in p4CmdList("files " + + ' '.join(["%s...%s" + % (p, revision) + for p in self.depotPaths])): + + if info['code'] == 'error': + sys.stderr.write("p4 returned an error: %s\n" + % info['data']) + sys.exit(1) + + + change = int(info["change"]) + if change > newestRevision: + newestRevision = change + + if info["action"] == "delete": + # don't increase the file cnt, otherwise details["depotFile123"] will have gaps! + #fileCnt = fileCnt + 1 + continue + + for prop in ["depotFile", "rev", "action", "type" ]: + details["%s%s" % (prop, fileCnt)] = info[prop] + + fileCnt = fileCnt + 1 + + details["change"] = newestRevision + self.updateOptionDict(details) + try: + self.commit(details, self.extractFilesFromCommit(details), self.branch, self.depotPaths) + except IOError: + print "IO error with git fast-import. Is your git version recent enough?" + print self.gitError.read() + + def run(self, args): self.depotPaths = [] self.changeRange = "" @@ -1346,48 +1390,7 @@ class P4Sync(Command): self.gitError = importProcess.stderr if revision: - print "Doing initial import of %s from revision %s into %s" % (' '.join(self.depotPaths), revision, self.branch) - - details = { "user" : "git perforce import user", "time" : int(time.time()) } - details["desc"] = ("Initial import of %s from the state at revision %s" - % (' '.join(self.depotPaths), revision)) - details["change"] = revision - newestRevision = 0 - - fileCnt = 0 - for info in p4CmdList("files " - + ' '.join(["%s...%s" - % (p, revision) - for p in self.depotPaths])): - - if info['code'] == 'error': - sys.stderr.write("p4 returned an error: %s\n" - % info['data']) - sys.exit(1) - - - change = int(info["change"]) - if change > newestRevision: - newestRevision = change - - if info["action"] == "delete": - # don't increase the file cnt, otherwise details["depotFile123"] will have gaps! - #fileCnt = fileCnt + 1 - continue - - for prop in ["depotFile", "rev", "action", "type" ]: - details["%s%s" % (prop, fileCnt)] = info[prop] - - fileCnt = fileCnt + 1 - - details["change"] = newestRevision - self.updateOptionDict(details) - try: - self.commit(details, self.extractFilesFromCommit(details), self.branch, self.depotPaths) - except IOError: - print "IO error with git fast-import. Is your git version recent enough?" - print self.gitError.read() - + self.importHeadRevision(revision) else: changes = [] From 8134f69c21ff47283d8b3ea3cc5b306727cde256 Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Sun, 26 Aug 2007 16:44:55 +0200 Subject: [PATCH 011/232] git-p4: Cleanup; moved the (duplicated) code for turning a branch into a git ref (for example foo -> refs/remotes/p4//foo) into a separate method. Signed-off-by: Simon Hausmann --- contrib/fast-import/git-p4 | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index 2c67190ffc..406bec1a29 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -1118,6 +1118,15 @@ class P4Sync(Command): self.keepRepoPath = (d.has_key('options') and ('keepRepoPath' in d['options'])) + def gitRefForBranch(self, branch): + if branch == "main": + return self.refPrefix + "master" + + if len(branch) <= 0: + return branch + + return self.refPrefix + self.projectName + branch + def importChanges(self, changes): cnt = 1 for change in changes: @@ -1153,23 +1162,8 @@ class P4Sync(Command): elif self.verbose: print "parent determined through known branches: %s" % parent - # main branch? use master - if branch == "main": - branch = "master" - else: - - ## FIXME - branch = self.projectName + branch - - if parent == "main": - parent = "master" - elif len(parent) > 0: - ## FIXME - parent = self.projectName + parent - - branch = self.refPrefix + branch - if len(parent) > 0: - parent = self.refPrefix + parent + branch = self.gitRefForBranch(branch) + parent = self.gitRefForBranch(parent) if self.verbose: print "looking for initial parent for %s; current parent is %s" % (branch, parent) From 1ca3d71069620f1438d9f89165a3e69e8d47d302 Mon Sep 17 00:00:00 2001 From: Simon Hausmann Date: Sun, 26 Aug 2007 17:36:55 +0200 Subject: [PATCH 012/232] git-p4: Added support for automatically importing newly appearing perforce branches. If a change in a p4 "branch" appears that hasn't seen any previous commit and that has a known branch mapping we now try to import it properly. First we find the p4 change of the source branch that the new p4 branch is based on. Then we using git rev-list --bisect to locate the corresponding git commit to that change. Finally we import all changes in the new p4 branch up to the current change and resume with the regular import. Signed-off-by: Simon Hausmann --- contrib/fast-import/git-p4 | 76 +++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index 406bec1a29..adaaae6633 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -1127,6 +1127,67 @@ class P4Sync(Command): return self.refPrefix + self.projectName + branch + def gitCommitByP4Change(self, ref, change): + if self.verbose: + print "looking in ref " + ref + " for change %s using bisect..." % change + + earliestCommit = "" + latestCommit = parseRevision(ref) + + while True: + if self.verbose: + print "trying: earliest %s latest %s" % (earliestCommit, latestCommit) + next = read_pipe("git rev-list --bisect %s %s" % (latestCommit, earliestCommit)).strip() + if len(next) == 0: + if self.verbose: + print "argh" + return "" + log = extractLogMessageFromGitCommit(next) + settings = extractSettingsGitLog(log) + currentChange = int(settings['change']) + if self.verbose: + print "current change %s" % currentChange + + if currentChange == change: + if self.verbose: + print "found %s" % next + return next + + if currentChange < change: + earliestCommit = "^%s" % next + else: + latestCommit = "%s" % next + + return "" + + def importNewBranch(self, branch, maxChange): + # make fast-import flush all changes to disk and update the refs using the checkpoint + # command so that we can try to find the branch parent in the git history + self.gitStream.write("checkpoint\n\n"); + self.gitStream.flush(); + branchPrefix = self.depotPaths[0] + branch + "/" + range = "@1,%s" % maxChange + #print "prefix" + branchPrefix + changes = p4ChangesForPaths([branchPrefix], range) + if len(changes) <= 0: + return False + firstChange = changes[0] + #print "first change in branch: %s" % firstChange + sourceBranch = self.knownBranches[branch] + sourceDepotPath = self.depotPaths[0] + sourceBranch + sourceRef = self.gitRefForBranch(sourceBranch) + #print "source " + sourceBranch + + branchParentChange = int(p4Cmd("changes -m 1 %s...@1,%s" % (sourceDepotPath, firstChange))["change"]) + #print "branch parent: %s" % branchParentChange + gitParent = self.gitCommitByP4Change(sourceRef, branchParentChange) + if len(gitParent) > 0: + self.initialParents[self.gitRefForBranch(branch)] = gitParent + #print "parent git commit: %s" % gitParent + + self.importChanges(changes) + return True + def importChanges(self, changes): cnt = 1 for change in changes: @@ -1159,8 +1220,19 @@ class P4Sync(Command): parent = self.knownBranches[branch] if parent == branch: parent = "" - elif self.verbose: - print "parent determined through known branches: %s" % parent + else: + fullBranch = self.projectName + branch + if fullBranch not in self.p4BranchesInGit: + if not self.silent: + print("\n Importing new branch %s" % fullBranch); + if self.importNewBranch(branch, change - 1): + parent = "" + self.p4BranchesInGit.append(fullBranch) + if not self.silent: + print("\n Resuming with change %s" % change); + + if self.verbose: + print "parent determined through known branches: %s" % parent branch = self.gitRefForBranch(branch) parent = self.gitRefForBranch(parent) From fec60a261d9375d1f129313bb68036fbd2a5175c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 3 Sep 2007 17:51:43 +0100 Subject: [PATCH 013/232] verify-tag: also grok CR/LFs in the tag signature On some people's favorite platform, gpg outputs signatures with CR/LF line endings. So verify-tag has to play nice with them. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- builtin-verify-tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin-verify-tag.c b/builtin-verify-tag.c index dfcfcd0455..cc4c55d7ee 100644 --- a/builtin-verify-tag.c +++ b/builtin-verify-tag.c @@ -35,7 +35,7 @@ static int run_gpg_verify(const char *buf, unsigned long size, int verbose) /* find the length without signature */ len = 0; - while (len < size && prefixcmp(buf + len, PGP_SIGNATURE "\n")) { + while (len < size && prefixcmp(buf + len, PGP_SIGNATURE)) { eol = memchr(buf + len, '\n', size - len); len += eol ? eol - (buf + len) + 1 : size - len; } From 7b95089c0f59a25bb1c506b6962eb64412c585eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=1B=2Cbi=1B=28B=20Scharfe?= Date: Mon, 3 Sep 2007 20:06:36 +0200 Subject: [PATCH 014/232] Export format_commit_message() Drop the parameter "msg" of format_commit_message() (as it can be inferred from the parameter "commit"), add a parameter "template" in order to avoid accessing the static variable user_format directly and export the result. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- commit.c | 9 +++++---- commit.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/commit.c b/commit.c index dc5a0643f3..99f65cee0e 100644 --- a/commit.c +++ b/commit.c @@ -787,8 +787,8 @@ static void fill_person(struct interp *table, const char *msg, int len) interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601)); } -static long format_commit_message(const struct commit *commit, - const char *msg, char **buf_p, unsigned long *space_p) +long format_commit_message(const struct commit *commit, const void *format, + char **buf_p, unsigned long *space_p) { struct interp table[] = { { "%H" }, /* commit hash */ @@ -843,6 +843,7 @@ static long format_commit_message(const struct commit *commit, char parents[1024]; int i; enum { HEADER, SUBJECT, BODY } state; + const char *msg = commit->buffer; if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table)) die("invalid interp table!"); @@ -924,7 +925,7 @@ static long format_commit_message(const struct commit *commit, char *buf = *buf_p; unsigned long space = *space_p; - space = interpolate(buf, space, user_format, + space = interpolate(buf, space, format, table, ARRAY_SIZE(table)); if (!space) break; @@ -1165,7 +1166,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, char *buf; if (fmt == CMIT_FMT_USERFORMAT) - return format_commit_message(commit, msg, buf_p, space_p); + return format_commit_message(commit, user_format, buf_p, space_p); encoding = (git_log_output_encoding ? git_log_output_encoding diff --git a/commit.h b/commit.h index 467872eeca..a8d76616d2 100644 --- a/commit.h +++ b/commit.h @@ -61,6 +61,7 @@ enum cmit_fmt { }; extern enum cmit_fmt get_commit_format(const char *arg); +extern long format_commit_message(const struct commit *commit, const void *template, char **buf_p, unsigned long *space_p); extern unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *, unsigned long len, char **buf_p, unsigned long *space_p, int abbrev, const char *subject, const char *after_subject, enum date_mode dmode); /** Removes the first commit from a list sorted by date, and adds all From 8460b2fcd45668d91567c36a22ea4f1b14ba133d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 3 Sep 2007 20:07:01 +0200 Subject: [PATCH 015/232] archive: specfile support (--pretty=format: in archive files) Add support for a new attribute, specfile. Files marked as being specfiles are expanded by git-archive when they are written to an archive. It has no effect on worktree files. The same placeholders as those for the option --pretty=format: of git-log et al. can be used. The attribute is useful for creating auto-updating specfiles. It is limited by the underlying function format_commit_message(), though. E.g. currently there is no placeholder for git-describe like output, and expanded specfiles can't contain NUL bytes. That can be fixed in format_commit_message() later and will then benefit users of git-log, too. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 14 +++++++++ archive-tar.c | 5 ++- archive-zip.c | 5 ++- archive.h | 3 ++ builtin-archive.c | 55 ++++++++++++++++++++++++++++++++- t/t5000-tar-tree.sh | 19 ++++++++++++ 6 files changed, 98 insertions(+), 3 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 46f9d591aa..47a621b733 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -421,6 +421,20 @@ frotz unspecified ---------------------------------------------------------------- +Creating an archive +~~~~~~~~~~~~~~~~~~~ + +`specfile` +^^^^^^^^^^ + +If the attribute `specfile` is set for a file then git will expand +several placeholders when adding this file to an archive. The +expansion depends on the availability of a commit ID, i.e. if +gitlink:git-archive[1] has been given a tree instead of a commit or a +tag then no replacement will be done. The placeholders are the same +as those for the option `--pretty=format:` of gitlink:git-log[1]. + + GIT --- Part of the gitlink:git[7] suite diff --git a/archive-tar.c b/archive-tar.c index 66fe3e375b..c0d95dab0d 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -17,6 +17,7 @@ static unsigned long offset; static time_t archive_time; static int tar_umask = 002; static int verbose; +static const struct commit *commit; /* writes out the whole block, but only if it is full */ static void write_if_needed(void) @@ -285,7 +286,8 @@ static int write_tar_entry(const unsigned char *sha1, buffer = NULL; size = 0; } else { - buffer = convert_sha1_file(path.buf, sha1, mode, &type, &size); + buffer = sha1_file_to_archive(path.buf, sha1, mode, &type, + &size, commit); if (!buffer) die("cannot read %s", sha1_to_hex(sha1)); } @@ -304,6 +306,7 @@ int write_tar_archive(struct archiver_args *args) archive_time = args->time; verbose = args->verbose; + commit = args->commit; if (args->commit_sha1) write_global_extended_header(args->commit_sha1); diff --git a/archive-zip.c b/archive-zip.c index 444e1623db..f63dff3834 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -12,6 +12,7 @@ static int verbose; static int zip_date; static int zip_time; +static const struct commit *commit; static unsigned char *zip_dir; static unsigned int zip_dir_size; @@ -195,7 +196,8 @@ static int write_zip_entry(const unsigned char *sha1, if (S_ISREG(mode) && zlib_compression_level != 0) method = 8; result = 0; - buffer = convert_sha1_file(path, sha1, mode, &type, &size); + buffer = sha1_file_to_archive(path, sha1, mode, &type, &size, + commit); if (!buffer) die("cannot read %s", sha1_to_hex(sha1)); crc = crc32(crc, buffer, size); @@ -316,6 +318,7 @@ int write_zip_archive(struct archiver_args *args) zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE); zip_dir_size = ZIP_DIRECTORY_MIN_SIZE; verbose = args->verbose; + commit = args->commit; if (args->base && plen > 0 && args->base[plen - 1] == '/') { char *base = xstrdup(args->base); diff --git a/archive.h b/archive.h index 6838dc788f..5791e657e9 100644 --- a/archive.h +++ b/archive.h @@ -8,6 +8,7 @@ struct archiver_args { const char *base; struct tree *tree; const unsigned char *commit_sha1; + const struct commit *commit; time_t time; const char **pathspec; unsigned int verbose : 1; @@ -42,4 +43,6 @@ extern int write_tar_archive(struct archiver_args *); extern int write_zip_archive(struct archiver_args *); extern void *parse_extra_zip_args(int argc, const char **argv); +extern void *sha1_file_to_archive(const char *path, const unsigned char *sha1, unsigned int mode, enum object_type *type, unsigned long *size, const struct commit *commit); + #endif /* ARCHIVE_H */ diff --git a/builtin-archive.c b/builtin-archive.c index 187491bc17..faccce302a 100644 --- a/builtin-archive.c +++ b/builtin-archive.c @@ -10,6 +10,7 @@ #include "exec_cmd.h" #include "pkt-line.h" #include "sideband.h" +#include "attr.h" static const char archive_usage[] = \ "git-archive --format= [--prefix=/] [--verbose] [] [path...]"; @@ -80,6 +81,57 @@ static int run_remote_archiver(const char *remote, int argc, return !!rv; } +static void *convert_to_archive(const char *path, + const void *src, unsigned long *sizep, + const struct commit *commit) +{ + static struct git_attr *attr_specfile; + struct git_attr_check check[1]; + char *interpolated = NULL; + unsigned long allocated = 0; + + if (!commit) + return NULL; + + if (!attr_specfile) + attr_specfile = git_attr("specfile", 8); + + check[0].attr = attr_specfile; + if (git_checkattr(path, ARRAY_SIZE(check), check)) + return NULL; + if (!ATTR_TRUE(check[0].value)) + return NULL; + + *sizep = format_commit_message(commit, src, &interpolated, &allocated); + + return interpolated; +} + +void *sha1_file_to_archive(const char *path, const unsigned char *sha1, + unsigned int mode, enum object_type *type, + unsigned long *size, + const struct commit *commit) +{ + void *buffer, *converted; + + buffer = read_sha1_file(sha1, type, size); + if (buffer && S_ISREG(mode)) { + converted = convert_to_working_tree(path, buffer, size); + if (converted) { + free(buffer); + buffer = converted; + } + + converted = convert_to_archive(path, buffer, size, commit); + if (converted) { + free(buffer); + buffer = converted; + } + } + + return buffer; +} + static int init_archiver(const char *name, struct archiver *ar) { int rv = -1, i; @@ -109,7 +161,7 @@ void parse_treeish_arg(const char **argv, struct archiver_args *ar_args, const unsigned char *commit_sha1; time_t archive_time; struct tree *tree; - struct commit *commit; + const struct commit *commit; unsigned char sha1[20]; if (get_sha1(name, sha1)) @@ -142,6 +194,7 @@ void parse_treeish_arg(const char **argv, struct archiver_args *ar_args, } ar_args->tree = tree; ar_args->commit_sha1 = commit_sha1; + ar_args->commit = commit; ar_args->time = archive_time; } diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 1a4c53a031..3d5d01be78 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -28,12 +28,15 @@ commit id embedding: TAR=${TAR:-tar} UNZIP=${UNZIP:-unzip} +SPECFILEFORMAT=%H%n + test_expect_success \ 'populate workdir' \ 'mkdir a b c && echo simple textfile >a/a && mkdir a/bin && cp /bin/sh a/bin && + printf "%s" "$SPECFILEFORMAT" >a/specfile && ln -s a a/l1 && (p=long_path_to_a_file && cd a && for depth in 1 2 3 4 5; do mkdir $p && cd $p; done && @@ -104,6 +107,22 @@ test_expect_success \ 'validate file contents with prefix' \ 'diff -r a c/prefix/a' +test_expect_success \ + 'create an archive with a specfile' \ + 'echo specfile specfile >a/.gitattributes && + git archive HEAD >f.tar && + rm a/.gitattributes' + +test_expect_success \ + 'extract specfile' \ + '(mkdir f && cd f && $TAR xf -) f/a/specfile.expected && + diff f/a/specfile.expected f/a/specfile' + test_expect_success \ 'git archive --format=zip' \ 'git archive --format=zip HEAD >d.zip' From 89b4256cfbb8d878cc4cd1104ac4865ba1f2a58e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 3 Sep 2007 20:08:01 +0200 Subject: [PATCH 016/232] Remove unused function convert_sha1_file() convert_sha1_file() became unused by the previous patch -- remove it. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- cache.h | 1 - convert.c | 15 --------------- 2 files changed, 16 deletions(-) diff --git a/cache.h b/cache.h index 70abbd59bf..493983cbae 100644 --- a/cache.h +++ b/cache.h @@ -592,7 +592,6 @@ extern void trace_argv_printf(const char **argv, int count, const char *format, /* convert.c */ extern char *convert_to_git(const char *path, const char *src, unsigned long *sizep); extern char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep); -extern void *convert_sha1_file(const char *path, const unsigned char *sha1, unsigned int mode, enum object_type *type, unsigned long *size); /* diff.c */ extern int diff_auto_refresh_index; diff --git a/convert.c b/convert.c index 21908b1039..d77c8eb8b2 100644 --- a/convert.c +++ b/convert.c @@ -687,18 +687,3 @@ char *convert_to_working_tree(const char *path, const char *src, unsigned long * return buf; } - -void *convert_sha1_file(const char *path, const unsigned char *sha1, - unsigned int mode, enum object_type *type, - unsigned long *size) -{ - void *buffer = read_sha1_file(sha1, type, size); - if (S_ISREG(mode) && buffer) { - void *converted = convert_to_working_tree(path, buffer, size); - if (converted) { - free(buffer); - buffer = converted; - } - } - return buffer; -} From 3d9f037c60ceae1bd60ee3c861564812a89b05b1 Mon Sep 17 00:00:00 2001 From: Carlos Rica Date: Wed, 5 Sep 2007 03:38:24 +0200 Subject: [PATCH 017/232] Function for updating refs. A function intended to be called from builtins updating refs by locking them before write, specially those that came from scripts using "git update-ref". [jc: with minor fixups] Signed-off-by: Carlos Rica Signed-off-by: Junio C Hamano --- builtin-fetch--tool.c | 21 ++++++++------------- builtin-update-ref.c | 9 ++------- refs.c | 27 +++++++++++++++++++++++++++ refs.h | 6 ++++++ send-pack.c | 12 +++--------- 5 files changed, 46 insertions(+), 29 deletions(-) diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c index e2f8ede9ae..24c7e6f7db 100644 --- a/builtin-fetch--tool.c +++ b/builtin-fetch--tool.c @@ -31,24 +31,19 @@ static void show_new(enum object_type type, unsigned char *sha1_new) find_unique_abbrev(sha1_new, DEFAULT_ABBREV)); } -static int update_ref(const char *action, +static int update_ref_env(const char *action, const char *refname, unsigned char *sha1, unsigned char *oldval) { char msg[1024]; char *rla = getenv("GIT_REFLOG_ACTION"); - static struct ref_lock *lock; if (!rla) rla = "(reflog update)"; - snprintf(msg, sizeof(msg), "%s: %s", rla, action); - lock = lock_any_ref_for_update(refname, oldval, 0); - if (!lock) - return 1; - if (write_ref_sha1(lock, sha1, msg) < 0) - return 1; - return 0; + if (snprintf(msg, sizeof(msg), "%s: %s", rla, action) >= sizeof(msg)) + warning("reflog message too long: %.*s...", 50, msg); + return update_ref(msg, refname, sha1, oldval, 0, QUIET_ON_ERR); } static int update_local_ref(const char *name, @@ -88,7 +83,7 @@ static int update_local_ref(const char *name, fprintf(stderr, "* %s: storing %s\n", name, note); show_new(type, sha1_new); - return update_ref(msg, name, sha1_new, NULL); + return update_ref_env(msg, name, sha1_new, NULL); } if (!hashcmp(sha1_old, sha1_new)) { @@ -102,7 +97,7 @@ static int update_local_ref(const char *name, if (!strncmp(name, "refs/tags/", 10)) { fprintf(stderr, "* %s: updating with %s\n", name, note); show_new(type, sha1_new); - return update_ref("updating tag", name, sha1_new, NULL); + return update_ref_env("updating tag", name, sha1_new, NULL); } current = lookup_commit_reference(sha1_old); @@ -117,7 +112,7 @@ static int update_local_ref(const char *name, fprintf(stderr, "* %s: fast forward to %s\n", name, note); fprintf(stderr, " old..new: %s..%s\n", oldh, newh); - return update_ref("fast forward", name, sha1_new, sha1_old); + return update_ref_env("fast forward", name, sha1_new, sha1_old); } if (!force) { fprintf(stderr, @@ -131,7 +126,7 @@ static int update_local_ref(const char *name, "* %s: forcing update to non-fast forward %s\n", name, note); fprintf(stderr, " old...new: %s...%s\n", oldh, newh); - return update_ref("forced-update", name, sha1_new, sha1_old); + return update_ref_env("forced-update", name, sha1_new, sha1_old); } static int append_fetch_head(FILE *fp, diff --git a/builtin-update-ref.c b/builtin-update-ref.c index 8339cf19e2..fe1f74c9f3 100644 --- a/builtin-update-ref.c +++ b/builtin-update-ref.c @@ -8,7 +8,6 @@ static const char git_update_ref_usage[] = int cmd_update_ref(int argc, const char **argv, const char *prefix) { const char *refname=NULL, *value=NULL, *oldval=NULL, *msg=NULL; - struct ref_lock *lock; unsigned char sha1[20], oldsha1[20]; int i, delete, ref_flags; @@ -62,10 +61,6 @@ int cmd_update_ref(int argc, const char **argv, const char *prefix) if (oldval && *oldval && get_sha1(oldval, oldsha1)) die("%s: not a valid old SHA1", oldval); - lock = lock_any_ref_for_update(refname, oldval ? oldsha1 : NULL, ref_flags); - if (!lock) - die("%s: cannot lock the ref", refname); - if (write_ref_sha1(lock, sha1, msg) < 0) - die("%s: cannot update the ref", refname); - return 0; + return update_ref(msg, refname, sha1, oldval ? oldsha1 : NULL, + ref_flags, DIE_ON_ERR); } diff --git a/refs.c b/refs.c index 09a2c87fc2..7fb3350789 100644 --- a/refs.c +++ b/refs.c @@ -1455,3 +1455,30 @@ int for_each_reflog(each_ref_fn fn, void *cb_data) { return do_for_each_reflog("", fn, cb_data); } + +int update_ref(const char *action, const char *refname, + const unsigned char *sha1, const unsigned char *oldval, + int flags, enum action_on_err onerr) +{ + static struct ref_lock *lock; + lock = lock_any_ref_for_update(refname, oldval, flags); + if (!lock) { + const char *str = "Cannot lock the ref '%s'."; + switch (onerr) { + case MSG_ON_ERR: error(str, refname); break; + case DIE_ON_ERR: die(str, refname); break; + case QUIET_ON_ERR: break; + } + return 1; + } + if (write_ref_sha1(lock, sha1, action) < 0) { + const char *str = "Cannot update the ref '%s'."; + switch (onerr) { + case MSG_ON_ERR: error(str, refname); break; + case DIE_ON_ERR: die(str, refname); break; + case QUIET_ON_ERR: break; + } + return 1; + } + return 0; +} diff --git a/refs.h b/refs.h index f234eb76ba..6eb98a4caf 100644 --- a/refs.h +++ b/refs.h @@ -64,4 +64,10 @@ extern int rename_ref(const char *oldref, const char *newref, const char *logmsg /** resolve ref in nested "gitlink" repository */ extern int resolve_gitlink_ref(const char *name, const char *refname, unsigned char *result); +/** lock a ref and then write its file */ +enum action_on_err { MSG_ON_ERR, DIE_ON_ERR, QUIET_ON_ERR }; +int update_ref(const char *action, const char *refname, + const unsigned char *sha1, const unsigned char *oldval, + int flags, enum action_on_err onerr); + #endif /* REFS_H */ diff --git a/send-pack.c b/send-pack.c index 9fc8a812f4..f74e66a8ba 100644 --- a/send-pack.c +++ b/send-pack.c @@ -307,20 +307,14 @@ static int send_pack(int in, int out, struct remote *remote, int nr_refspec, cha rs.src = ref->name; rs.dst = NULL; if (!remote_find_tracking(remote, &rs)) { - struct ref_lock *lock; fprintf(stderr, " Also local %s\n", rs.dst); if (will_delete_ref) { if (delete_ref(rs.dst, NULL)) { error("Failed to delete"); } - } else { - lock = lock_any_ref_for_update(rs.dst, NULL, 0); - if (!lock) - error("Failed to lock"); - else - write_ref_sha1(lock, ref->new_sha1, - "update by push"); - } + } else + update_ref("update by push", rs.dst, + ref->new_sha1, NULL, 0, 0); free(rs.dst); } } From 05b4df31537a653eaa30d2c6f53e05d7a12d1bc8 Mon Sep 17 00:00:00 2001 From: Lars Hjemli Date: Wed, 5 Sep 2007 11:35:29 +0200 Subject: [PATCH 018/232] git-svn: add support for --first-parent When git-svn uses git-log to find embedded 'git-svn-id'-lines in commit messages, it can get confused when local history contains merges with other git-svn branches. But if --first-parent is supplied to git-log, working_head_info() will only see 'branch-local' commits and thus the first commit containing a 'git-svn-id' line should refer to the correct subversion branch. Signed-off-by: Lars Hjemli Acked-by: Eric Wong Signed-off-by: Junio C Hamano --- Documentation/git-svn.txt | 10 ++++++++++ git-svn.perl | 17 +++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index be2e34eb8f..42d7b82a37 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -317,6 +317,16 @@ This is only used with the 'dcommit' command. Print out the series of git arguments that would show which diffs would be committed to SVN. +--first-parent:: + +This is only used with the 'dcommit', 'rebase', 'log', 'find-rev' and +'show-ignore' commands. + +These commands tries to detect the upstream subversion branch by means of +the embedded 'git-svn-id' line in commit messages. When --first-parent is +specified, git-svn only follows the first parent of each commit, effectively +ignoring commits brought into the current branch through merge-operations. + -- ADVANCED OPTIONS diff --git a/git-svn.perl b/git-svn.perl index d3c8cd0b8e..d21eb7fa9e 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -59,7 +59,7 @@ my ($_stdin, $_help, $_edit, $_template, $_shared, $_version, $_fetch_all, $_no_rebase, $_merge, $_strategy, $_dry_run, $_local, - $_prefix, $_no_checkout, $_verbose); + $_prefix, $_no_checkout, $_verbose, $_first_parent); $Git::SVN::_follow_parent = 1; my %remote_opts = ( 'username=s' => \$Git::SVN::Prompt::_username, 'config-dir=s' => \$Git::SVN::Ra::config_dir, @@ -119,12 +119,15 @@ my %cmd = ( 'dry-run|n' => \$_dry_run, 'fetch-all|all' => \$_fetch_all, 'no-rebase' => \$_no_rebase, + 'first-parent' => \$_first_parent, %cmt_opts, %fc_opts } ], 'set-tree' => [ \&cmd_set_tree, "Set an SVN repository to a git tree-ish", { 'stdin|' => \$_stdin, %cmt_opts, %fc_opts, } ], 'show-ignore' => [ \&cmd_show_ignore, "Show svn:ignore listings", - { 'revision|r=i' => \$_revision } ], + { 'revision|r=i' => \$_revision, + 'first-parent' => \$_first_parent + } ], 'multi-fetch' => [ \&cmd_multi_fetch, "Deprecated alias for $0 fetch --all", { 'revision|r=s' => \$_revision, %fc_opts } ], @@ -145,15 +148,19 @@ my %cmd = ( 'authors-file|A=s' => \$_authors, 'color' => \$Git::SVN::Log::color, 'pager=s' => \$Git::SVN::Log::pager, + 'first-parent' => \$_first_parent } ], 'find-rev' => [ \&cmd_find_rev, "Translate between SVN revision numbers and tree-ish", - { } ], + { + 'first-parent' => \$_first_parent + } ], 'rebase' => [ \&cmd_rebase, "Fetch and rebase your working directory", { 'merge|m|M' => \$_merge, 'verbose|v' => \$_verbose, 'strategy|s=s' => \$_strategy, 'local|l' => \$_local, 'fetch-all|all' => \$_fetch_all, + 'first-parent' => \$_first_parent, %fc_opts } ], 'commit-diff' => [ \&cmd_commit_diff, 'Commit a diff between two trees', @@ -811,7 +818,9 @@ sub cmt_metadata { sub working_head_info { my ($head, $refs) = @_; - my ($fh, $ctx) = command_output_pipe('log', '--no-color', $head); + my @args = ('log', '--no-color'); + push @args, '--first-parent' if $_first_parent; + my ($fh, $ctx) = command_output_pipe(@args, $head); my $hash; my %max; while (<$fh>) { From 75d3985319f2eb40008e9fe6454880ecc620a0de Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 6 Sep 2007 02:13:08 -0400 Subject: [PATCH 019/232] straighten the list of objects to deltify Not all objects are subject to deltification, so avoid carrying those along, and provide the real count to progress display. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 77 +++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index e64e3a03a0..b1c64bec3e 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1313,12 +1313,6 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, if (trg_entry->type != src_entry->type) return -1; - /* We do not compute delta to *create* objects we are not - * going to pack. - */ - if (trg_entry->preferred_base) - return -1; - /* * We do not bother to try a delta that we discarded * on an earlier try, but only when reusing delta data. @@ -1443,43 +1437,24 @@ static void free_unpacked(struct unpacked *n) n->depth = 0; } -static void find_deltas(struct object_entry **list, int window, int depth) +static void find_deltas(struct object_entry **list, unsigned list_size, + unsigned nr_deltas, int window, int depth) { - uint32_t i = nr_objects, idx = 0, count = 0, processed = 0; + uint32_t i = list_size, idx = 0, count = 0, processed = 0; unsigned int array_size = window * sizeof(struct unpacked); struct unpacked *array; int max_depth; - if (!nr_objects) - return; array = xmalloc(array_size); memset(array, 0, array_size); if (progress) - start_progress(&progress_state, "Deltifying %u objects...", "", nr_result); + start_progress(&progress_state, "Deltifying %u objects...", "", nr_deltas); do { struct object_entry *entry = list[--i]; struct unpacked *n = array + idx; int j, best_base = -1; - if (!entry->preferred_base) - processed++; - - if (progress) - display_progress(&progress_state, processed); - - if (entry->delta) - /* This happens if we decided to reuse existing - * delta from a pack. "!no_reuse_delta &&" is implied. - */ - continue; - - if (entry->size < 50) - continue; - - if (entry->no_try_delta) - continue; - free_unpacked(n); n->entry = entry; @@ -1491,6 +1466,15 @@ static void find_deltas(struct object_entry **list, int window, int depth) count--; } + /* We do not compute delta to *create* objects we are not + * going to pack. + */ + if (entry->preferred_base) + goto next; + + if (progress) + display_progress(&progress_state, ++processed); + /* * If the current object is at pack edge, take the depth the * objects that depend on the current object into account @@ -1565,18 +1549,41 @@ static void find_deltas(struct object_entry **list, int window, int depth) static void prepare_pack(int window, int depth) { struct object_entry **delta_list; - uint32_t i; + uint32_t i, n, nr_deltas; get_object_details(); - if (!window || !depth) + if (!nr_objects || !window || !depth) return; delta_list = xmalloc(nr_objects * sizeof(*delta_list)); - for (i = 0; i < nr_objects; i++) - delta_list[i] = objects + i; - qsort(delta_list, nr_objects, sizeof(*delta_list), type_size_sort); - find_deltas(delta_list, window+1, depth); + nr_deltas = n = 0; + + for (i = 0; i < nr_objects; i++) { + struct object_entry *entry = objects + i; + + if (entry->delta) + /* This happens if we decided to reuse existing + * delta from a pack. "!no_reuse_delta &&" is implied. + */ + continue; + + if (entry->size < 50) + continue; + + if (entry->no_try_delta) + continue; + + if (!entry->preferred_base) + nr_deltas++; + + delta_list[n++] = entry; + } + + if (nr_deltas) { + qsort(delta_list, n, sizeof(*delta_list), type_size_sort); + find_deltas(delta_list, n, nr_deltas, window+1, depth); + } free(delta_list); } From ef0316fcd996c1679fef37ae2a53bef403c77356 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 6 Sep 2007 02:13:09 -0400 Subject: [PATCH 020/232] localize window memory usage accounting This is to help threadification of delta searching. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index b1c64bec3e..b8495bf924 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -78,7 +78,6 @@ static unsigned long delta_cache_size = 0; static unsigned long max_delta_cache_size = 0; static unsigned long cache_max_small_delta_size = 1000; -static unsigned long window_memory_usage = 0; static unsigned long window_memory_limit = 0; /* @@ -1300,7 +1299,7 @@ static int delta_cacheable(unsigned long src_size, unsigned long trg_size, * one. */ static int try_delta(struct unpacked *trg, struct unpacked *src, - unsigned max_depth) + unsigned max_depth, unsigned long *mem_usage) { struct object_entry *trg_entry = trg->entry; struct object_entry *src_entry = src->entry; @@ -1356,7 +1355,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, if (sz != trg_size) die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(trg_entry->idx.sha1), sz, trg_size); - window_memory_usage += sz; + *mem_usage += sz; } if (!src->data) { src->data = read_sha1_file(src_entry->idx.sha1, &type, &sz); @@ -1366,7 +1365,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, if (sz != src_size) die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(src_entry->idx.sha1), sz, src_size); - window_memory_usage += sz; + *mem_usage += sz; } if (!src->index) { src->index = create_delta_index(src->data, src_size); @@ -1376,7 +1375,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, warning("suboptimal pack - out of memory"); return 0; } - window_memory_usage += sizeof_delta_index(src->index); + *mem_usage += sizeof_delta_index(src->index); } delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size); @@ -1423,18 +1422,19 @@ static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) return m; } -static void free_unpacked(struct unpacked *n) +static unsigned long free_unpacked(struct unpacked *n) { - window_memory_usage -= sizeof_delta_index(n->index); + unsigned long freed_mem = sizeof_delta_index(n->index); free_delta_index(n->index); n->index = NULL; if (n->data) { + freed_mem += n->entry->size; free(n->data); n->data = NULL; - window_memory_usage -= n->entry->size; } n->entry = NULL; n->depth = 0; + return freed_mem; } static void find_deltas(struct object_entry **list, unsigned list_size, @@ -1443,7 +1443,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size, uint32_t i = list_size, idx = 0, count = 0, processed = 0; unsigned int array_size = window * sizeof(struct unpacked); struct unpacked *array; - int max_depth; + unsigned long mem_usage = 0; array = xmalloc(array_size); memset(array, 0, array_size); @@ -1453,16 +1453,16 @@ static void find_deltas(struct object_entry **list, unsigned list_size, do { struct object_entry *entry = list[--i]; struct unpacked *n = array + idx; - int j, best_base = -1; + int j, max_depth, best_base = -1; - free_unpacked(n); + mem_usage -= free_unpacked(n); n->entry = entry; while (window_memory_limit && - window_memory_usage > window_memory_limit && + mem_usage > window_memory_limit && count > 1) { uint32_t tail = (idx + window - count) % window; - free_unpacked(array + tail); + mem_usage -= free_unpacked(array + tail); count--; } @@ -1497,7 +1497,7 @@ static void find_deltas(struct object_entry **list, unsigned list_size, m = array + other_idx; if (!m->entry) break; - ret = try_delta(n, m, max_depth); + ret = try_delta(n, m, max_depth, &mem_usage); if (ret < 0) break; else if (ret > 0) From e334977dfad575cd8ac1a9e5f8e73fe4d018cec0 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 6 Sep 2007 02:13:10 -0400 Subject: [PATCH 021/232] rearrange delta search progress reporting This is to help threadification of the delta search code, with a bonus consistency check. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index b8495bf924..9d565925e7 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1438,17 +1438,15 @@ static unsigned long free_unpacked(struct unpacked *n) } static void find_deltas(struct object_entry **list, unsigned list_size, - unsigned nr_deltas, int window, int depth) + int window, int depth, unsigned *processed) { - uint32_t i = list_size, idx = 0, count = 0, processed = 0; + uint32_t i = list_size, idx = 0, count = 0; unsigned int array_size = window * sizeof(struct unpacked); struct unpacked *array; unsigned long mem_usage = 0; array = xmalloc(array_size); memset(array, 0, array_size); - if (progress) - start_progress(&progress_state, "Deltifying %u objects...", "", nr_deltas); do { struct object_entry *entry = list[--i]; @@ -1472,8 +1470,9 @@ static void find_deltas(struct object_entry **list, unsigned list_size, if (entry->preferred_base) goto next; + (*processed)++; if (progress) - display_progress(&progress_state, ++processed); + display_progress(&progress_state, *processed); /* * If the current object is at pack edge, take the depth the @@ -1536,9 +1535,6 @@ static void find_deltas(struct object_entry **list, unsigned list_size, idx = 0; } while (i > 0); - if (progress) - stop_progress(&progress_state); - for (i = 0; i < window; ++i) { free_delta_index(array[i].index); free(array[i].data); @@ -1581,8 +1577,17 @@ static void prepare_pack(int window, int depth) } if (nr_deltas) { + unsigned nr_done = 0; + if (progress) + start_progress(&progress_state, + "Deltifying %u objects...", "", + nr_deltas); qsort(delta_list, n, sizeof(*delta_list), type_size_sort); - find_deltas(delta_list, n, nr_deltas, window+1, depth); + find_deltas(delta_list, n, window+1, depth, &nr_done); + if (progress) + stop_progress(&progress_state); + if (nr_done != nr_deltas) + die("inconsistency with delta count"); } free(delta_list); } From 8ecce684a38f7cea084abe9eef80bda04d7c77be Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 6 Sep 2007 02:13:11 -0400 Subject: [PATCH 022/232] basic threaded delta search this is still rough, hence it is disabled by default. You need to compile with "make THREADED_DELTA_SEARCH=1 ..." at the moment. Threading is done on different portions of the object list to be deltified. This is currently done by spliting the list into n parts and then a thread is spawned for each of them. A better method would consist of spliting the list into more smaller parts and have the n threads pick the next part available. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- Makefile | 8 ++++ builtin-pack-objects.c | 83 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 51af531c9a..a92fb31695 100644 --- a/Makefile +++ b/Makefile @@ -122,6 +122,9 @@ all:: # If not set it defaults to the bare 'wish'. If it is set to the empty # string then NO_TCLTK will be forced (this is used by configure script). # +# Define THREADED_DELTA_SEARCH if you have pthreads and wish to exploit +# parallel delta searching when packing objects. +# GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE @$(SHELL_PATH) ./GIT-VERSION-GEN @@ -662,6 +665,11 @@ ifdef NO_HSTRERROR COMPAT_OBJS += compat/hstrerror.o endif +ifdef THREADED_DELTA_SEARCH + BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH + EXTLIBS += -lpthread +endif + ifeq ($(TCLTK_PATH),) NO_TCLTK=NoThanks endif diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 9d565925e7..1bcee23ca1 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -15,6 +15,10 @@ #include "list-objects.h" #include "progress.h" +#ifdef THREADED_DELTA_SEARCH +#include +#endif + static const char pack_usage[] = "\ git-pack-objects [{ -q | --progress | --all-progress }] \n\ [--max-pack-size=N] [--local] [--incremental] \n\ @@ -1290,6 +1294,25 @@ static int delta_cacheable(unsigned long src_size, unsigned long trg_size, return 0; } +#ifdef THREADED_DELTA_SEARCH + +static pthread_mutex_t read_mutex = PTHREAD_MUTEX_INITIALIZER; +#define read_lock() pthread_mutex_lock(&read_mutex) +#define read_unlock() pthread_mutex_unlock(&read_mutex) + +static pthread_mutex_t progress_mutex = PTHREAD_MUTEX_INITIALIZER; +#define progress_lock() pthread_mutex_lock(&progress_mutex) +#define progress_unlock() pthread_mutex_unlock(&progress_mutex) + +#else + +#define read_lock() 0 +#define read_unlock() 0 +#define progress_lock() 0 +#define progress_unlock() 0 + +#endif + /* * We search for deltas _backwards_ in a list sorted by type and * by size, so that we see progressively smaller and smaller files. @@ -1348,7 +1371,9 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, /* Load data if not already done */ if (!trg->data) { + read_lock(); trg->data = read_sha1_file(trg_entry->idx.sha1, &type, &sz); + read_unlock(); if (!trg->data) die("object %s cannot be read", sha1_to_hex(trg_entry->idx.sha1)); @@ -1358,7 +1383,9 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, *mem_usage += sz; } if (!src->data) { + read_lock(); src->data = read_sha1_file(src_entry->idx.sha1, &type, &sz); + read_unlock(); if (!src->data) die("object %s cannot be read", sha1_to_hex(src_entry->idx.sha1)); @@ -1470,9 +1497,11 @@ static void find_deltas(struct object_entry **list, unsigned list_size, if (entry->preferred_base) goto next; + progress_lock(); (*processed)++; if (progress) display_progress(&progress_state, *processed); + progress_unlock(); /* * If the current object is at pack edge, take the depth the @@ -1542,6 +1571,58 @@ static void find_deltas(struct object_entry **list, unsigned list_size, free(array); } +#ifdef THREADED_DELTA_SEARCH + +struct thread_params { + pthread_t thread; + struct object_entry **list; + unsigned list_size; + int window; + int depth; + unsigned *processed; +}; + +static void *threaded_find_deltas(void *arg) +{ + struct thread_params *p = arg; + if (p->list_size) + find_deltas(p->list, p->list_size, + p->window, p->depth, p->processed); + return NULL; +} + +#define NR_THREADS 8 + +static void ll_find_deltas(struct object_entry **list, unsigned list_size, + int window, int depth, unsigned *processed) +{ + struct thread_params p[NR_THREADS]; + int i, ret; + + for (i = 0; i < NR_THREADS; i++) { + unsigned sublist_size = list_size / (NR_THREADS - i); + p[i].list = list; + p[i].list_size = sublist_size; + p[i].window = window; + p[i].depth = depth; + p[i].processed = processed; + ret = pthread_create(&p[i].thread, NULL, + threaded_find_deltas, &p[i]); + if (ret) + die("unable to create thread: %s", strerror(ret)); + list += sublist_size; + list_size -= sublist_size; + } + + for (i = 0; i < NR_THREADS; i++) { + pthread_join(p[i].thread, NULL); + } +} + +#else +#define ll_find_deltas find_deltas +#endif + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -1583,7 +1664,7 @@ static void prepare_pack(int window, int depth) "Deltifying %u objects...", "", nr_deltas); qsort(delta_list, n, sizeof(*delta_list), type_size_sort); - find_deltas(delta_list, n, window+1, depth, &nr_done); + ll_find_deltas(delta_list, n, window+1, depth, &nr_done); if (progress) stop_progress(&progress_state); if (nr_done != nr_deltas) From 2c3c4399477533329579ca6b84824ef0b125914f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 5 Sep 2007 13:01:37 -0700 Subject: [PATCH 023/232] Implement git gc --auto This implements a new option "git gc --auto". When gc.auto is set to a positive value, and the object database has accumulated roughly that many number of loose objects, this runs a lightweight version of "git gc". The primary difference from the full "git gc" is that it does not pass "-a" option to "git repack", which means we do not try to repack _everything_, but only repack incrementally. We still do "git prune-packed". The default threshold is arbitrarily set by yours truly to: - not trigger it for fully unpacked git v0.99 history; - do trigger it for fully unpacked git v1.0.0 history; - not trigger it for incremental update to git v1.0.0 starting from fully packed git v0.99 history. This patch does not add invocation of the "auto repacking". It is left to key Porcelain commands that could produce tons of loose objects to add a call to "git gc --auto" after they are done their work. Signed-off-by: Junio C Hamano --- builtin-gc.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/builtin-gc.c b/builtin-gc.c index 9397482610..093b3dda9f 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -20,6 +20,7 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]"; static int pack_refs = 1; static int aggressive_window = -1; +static int gc_auto_threshold = 6700; #define MAX_ADD 10 static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL}; @@ -28,6 +29,8 @@ static const char *argv_repack[MAX_ADD] = {"repack", "-a", "-d", "-l", NULL}; static const char *argv_prune[] = {"prune", NULL}; static const char *argv_rerere[] = {"rerere", "gc", NULL}; +static const char *argv_repack_auto[] = {"repack", "-d", "-l", NULL}; + static int gc_config(const char *var, const char *value) { if (!strcmp(var, "gc.packrefs")) { @@ -41,6 +44,10 @@ static int gc_config(const char *var, const char *value) aggressive_window = git_config_int(var, value); return 0; } + if (!strcmp(var, "gc.auto")) { + gc_auto_threshold = git_config_int(var, value); + return 0; + } return git_default_config(var, value); } @@ -57,10 +64,49 @@ static void append_option(const char **cmd, const char *opt, int max_length) cmd[i] = NULL; } +static int need_to_gc(void) +{ + /* + * Quickly check if a "gc" is needed, by estimating how + * many loose objects there are. Because SHA-1 is evenly + * distributed, we can check only one and get a reasonable + * estimate. + */ + char path[PATH_MAX]; + const char *objdir = get_object_directory(); + DIR *dir; + struct dirent *ent; + int auto_threshold; + int num_loose = 0; + int needed = 0; + + if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) { + warning("insanely long object directory %.*s", 50, objdir); + return 0; + } + dir = opendir(path); + if (!dir) + return 0; + + auto_threshold = (gc_auto_threshold + 255) / 256; + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != 38 || + ent->d_name[38] != '\0') + continue; + if (++num_loose > auto_threshold) { + needed = 1; + break; + } + } + closedir(dir); + return needed; +} + int cmd_gc(int argc, const char **argv, const char *prefix) { int i; int prune = 0; + int auto_gc = 0; char buf[80]; git_config(gc_config); @@ -82,12 +128,28 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } continue; } - /* perhaps other parameters later... */ + if (!strcmp(arg, "--auto")) { + if (gc_auto_threshold <= 0) + return 0; + auto_gc = 1; + continue; + } break; } if (i != argc) usage(builtin_gc_usage); + if (auto_gc) { + /* + * Auto-gc should be least intrusive as possible. + */ + prune = 0; + for (i = 0; i < ARRAY_SIZE(argv_repack_auto); i++) + argv_repack[i] = argv_repack_auto[i]; + if (!need_to_gc()) + return 0; + } + if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD)) return error(FAILED_RUN, argv_pack_refs[0]); From d4bb43ee273528064192848165f93f8fc3512be1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 5 Sep 2007 14:59:59 -0700 Subject: [PATCH 024/232] Invoke "git gc --auto" from commit, merge, am and rebase. The point of auto gc is to pack new objects created in loose format, so a good rule of thumb is where we do update-ref after creating a new commit. Signed-off-by: Junio C Hamano --- git-am.sh | 2 ++ git-commit.sh | 1 + git-merge.sh | 1 + git-rebase--interactive.sh | 2 ++ 4 files changed, 6 insertions(+) diff --git a/git-am.sh b/git-am.sh index 6809aa07f6..4db4701c9e 100755 --- a/git-am.sh +++ b/git-am.sh @@ -466,6 +466,8 @@ do "$GIT_DIR"/hooks/post-applypatch fi + git gc --auto + go_next done diff --git a/git-commit.sh b/git-commit.sh index 1d04f1ff31..d22d35eadc 100755 --- a/git-commit.sh +++ b/git-commit.sh @@ -652,6 +652,7 @@ git rerere if test "$ret" = 0 then + git gc --auto if test -x "$GIT_DIR"/hooks/post-commit then "$GIT_DIR"/hooks/post-commit diff --git a/git-merge.sh b/git-merge.sh index 3a01db0d75..697bec24fa 100755 --- a/git-merge.sh +++ b/git-merge.sh @@ -82,6 +82,7 @@ finish () { ;; *) git update-ref -m "$rlogm" HEAD "$1" "$head" || exit 1 + git gc --auto ;; esac ;; diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh index abc2b1c3e0..8258b7adf9 100755 --- a/git-rebase--interactive.sh +++ b/git-rebase--interactive.sh @@ -307,6 +307,8 @@ do_next () { rm -rf "$DOTEST" && warn "Successfully rebased and updated $HEADNAME." + git gc --auto + exit } From b21b9f1de313acb5550c070911ae58c735cdb451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 7 Sep 2007 00:32:54 +0200 Subject: [PATCH 025/232] add memmem() memmem() is a nice GNU extension for searching a length limited string in another one. This compat version is based on the version found in glibc 2.2 (GPL 2); I only removed the optimization of checking the first char by hand, and generally tried to keep the code simple. We can add it back if memcmp shows up high in a profile, but for now I prefer to keep it (almost trivially) simple. Since I don't really know which platforms beside those with a glibc have their own memmem(), I used a heuristic: if NO_STRCASESTR is set, then NO_MEMMEM is set, too. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Makefile | 11 +++++++++++ compat/memmem.c | 29 +++++++++++++++++++++++++++++ git-compat-util.h | 6 ++++++ 3 files changed, 46 insertions(+) create mode 100644 compat/memmem.c diff --git a/Makefile b/Makefile index 51af531c9a..bae073fe37 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,8 @@ all:: # # Define NO_STRCASESTR if you don't have strcasestr. # +# Define NO_MEMMEM if you don't have memmem. +# # Define NO_STRLCPY if you don't have strlcpy. # # Define NO_STRTOUMAX if you don't have strtoumax in the C library. @@ -402,6 +404,7 @@ ifeq ($(uname_S),SunOS) NEEDS_NSL = YesPlease SHELL_PATH = /bin/bash NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease NO_HSTRERROR = YesPlease ifeq ($(uname_R),5.8) NEEDS_LIBICONV = YesPlease @@ -424,6 +427,7 @@ ifeq ($(uname_O),Cygwin) NO_D_TYPE_IN_DIRENT = YesPlease NO_D_INO_IN_DIRENT = YesPlease NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease NO_SYMLINK_HEAD = YesPlease NEEDS_LIBICONV = YesPlease NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes @@ -442,6 +446,7 @@ ifeq ($(uname_S),FreeBSD) endif ifeq ($(uname_S),OpenBSD) NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease NEEDS_LIBICONV = YesPlease BASIC_CFLAGS += -I/usr/local/include BASIC_LDFLAGS += -L/usr/local/lib @@ -456,6 +461,7 @@ ifeq ($(uname_S),NetBSD) endif ifeq ($(uname_S),AIX) NO_STRCASESTR=YesPlease + NO_MEMMEM = YesPlease NO_STRLCPY = YesPlease NEEDS_LIBICONV=YesPlease endif @@ -467,6 +473,7 @@ ifeq ($(uname_S),IRIX64) NO_IPV6=YesPlease NO_SETENV=YesPlease NO_STRCASESTR=YesPlease + NO_MEMMEM = YesPlease NO_STRLCPY = YesPlease NO_SOCKADDR_STORAGE=YesPlease SHELL_PATH=/usr/gnu/bin/bash @@ -661,6 +668,10 @@ ifdef NO_HSTRERROR COMPAT_CFLAGS += -DNO_HSTRERROR COMPAT_OBJS += compat/hstrerror.o endif +ifdef NO_MEMMEM + COMPAT_CFLAGS += -DNO_MEMMEM + COMPAT_OBJS += compat/memmem.o +endif ifeq ($(TCLTK_PATH),) NO_TCLTK=NoThanks diff --git a/compat/memmem.c b/compat/memmem.c new file mode 100644 index 0000000000..cd0d877364 --- /dev/null +++ b/compat/memmem.c @@ -0,0 +1,29 @@ +#include "../git-compat-util.h" + +void *gitmemmem(const void *haystack, size_t haystack_len, + const void *needle, size_t needle_len) +{ + const char *begin = haystack; + const char *last_possible = begin + haystack_len - needle_len; + + /* + * The first occurrence of the empty string is deemed to occur at + * the beginning of the string. + */ + if (needle_len == 0) + return (void *)begin; + + /* + * Sanity check, otherwise the loop might search through the whole + * memory. + */ + if (haystack_len < needle_len) + return NULL; + + for (; begin <= last_possible; begin++) { + if (!memcmp(begin, needle, needle_len)) + return (void *)begin; + } + + return NULL; +} diff --git a/git-compat-util.h b/git-compat-util.h index ca0a597a28..1bfbdeb94f 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -172,6 +172,12 @@ extern uintmax_t gitstrtoumax(const char *, char **, int); extern const char *githstrerror(int herror); #endif +#ifdef NO_MEMMEM +#define memmem gitmemmem +void *gitmemmem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); +#endif + extern void release_pack_memory(size_t, int); static inline char* xstrdup(const char *str) From df4a394f91d7d107c2a57e6c1df3638517cab54f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 7 Sep 2007 00:34:06 +0200 Subject: [PATCH 026/232] archive: specfile syntax change: "$Format:%PLCHLDR$" instead of just "%PLCHLDR" (take 2) As suggested by Johannes, --pretty=format: placeholders in specfiles need to be wrapped in $Format:...$ now. This syntax change restricts the expansion of placeholders and makes it easier to use with files that contain non-placeholder percent signs. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 5 +++- builtin-archive.c | 52 +++++++++++++++++++++++++++++---- t/t5000-tar-tree.sh | 4 +-- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 47a621b733..37b3be8b72 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -432,7 +432,10 @@ several placeholders when adding this file to an archive. The expansion depends on the availability of a commit ID, i.e. if gitlink:git-archive[1] has been given a tree instead of a commit or a tag then no replacement will be done. The placeholders are the same -as those for the option `--pretty=format:` of gitlink:git-log[1]. +as those for the option `--pretty=format:` of gitlink:git-log[1], +except that they need to be wrapped like this: `$Format:PLACEHOLDERS$` +in the file. E.g. the string `$Format:%H$` will be replaced by the +commit hash. GIT diff --git a/builtin-archive.c b/builtin-archive.c index faccce302a..65bf9cbec1 100644 --- a/builtin-archive.c +++ b/builtin-archive.c @@ -81,14 +81,58 @@ static int run_remote_archiver(const char *remote, int argc, return !!rv; } +static void *format_specfile(const struct commit *commit, const char *format, + unsigned long *sizep) +{ + unsigned long len = *sizep, result_len = 0; + const char *a = format; + char *result = NULL; + + for (;;) { + const char *b, *c; + char *fmt, *formatted = NULL; + unsigned long a_len, fmt_len, formatted_len, allocated = 0; + + b = memmem(a, len, "$Format:", 8); + if (!b || a + len < b + 9) + break; + c = memchr(b + 8, '$', len - 8); + if (!c) + break; + + a_len = b - a; + fmt_len = c - b - 8; + fmt = xmalloc(fmt_len + 1); + memcpy(fmt, b + 8, fmt_len); + fmt[fmt_len] = '\0'; + + formatted_len = format_commit_message(commit, fmt, &formatted, + &allocated); + result = xrealloc(result, result_len + a_len + formatted_len); + memcpy(result + result_len, a, a_len); + memcpy(result + result_len + a_len, formatted, formatted_len); + result_len += a_len + formatted_len; + len -= c + 1 - a; + a = c + 1; + } + + if (result && len) { + result = xrealloc(result, result_len + len); + memcpy(result + result_len, a, len); + result_len += len; + } + + *sizep = result_len; + + return result; +} + static void *convert_to_archive(const char *path, const void *src, unsigned long *sizep, const struct commit *commit) { static struct git_attr *attr_specfile; struct git_attr_check check[1]; - char *interpolated = NULL; - unsigned long allocated = 0; if (!commit) return NULL; @@ -102,9 +146,7 @@ static void *convert_to_archive(const char *path, if (!ATTR_TRUE(check[0].value)) return NULL; - *sizep = format_commit_message(commit, src, &interpolated, &allocated); - - return interpolated; + return format_specfile(commit, src, sizep); } void *sha1_file_to_archive(const char *path, const unsigned char *sha1, diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 3d5d01be78..6e89e07272 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -36,7 +36,7 @@ test_expect_success \ echo simple textfile >a/a && mkdir a/bin && cp /bin/sh a/bin && - printf "%s" "$SPECFILEFORMAT" >a/specfile && + printf "A\$Format:%s\$O" "$SPECFILEFORMAT" >a/specfile && ln -s a a/l1 && (p=long_path_to_a_file && cd a && for depth in 1 2 3 4 5; do mkdir $p && cd $p; done && @@ -119,7 +119,7 @@ test_expect_success \ test_expect_success \ 'validate specfile contents' \ - 'git log --max-count=1 "--pretty=format:$SPECFILEFORMAT" HEAD \ + 'git log --max-count=1 "--pretty=format:A${SPECFILEFORMAT}O" HEAD \ >f/a/specfile.expected && diff f/a/specfile.expected f/a/specfile' From 38c9c9b798a0df875968ae49d699298131dfa24d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 6 Sep 2007 18:51:11 +0200 Subject: [PATCH 027/232] archive: rename attribute specfile to export-subst As suggested by Junio and Johannes, change the name of the former attribute specfile to export-subst to indicate its function rather than purpose and to make clear that it is not applied to working tree files. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 6 +++--- builtin-archive.c | 14 +++++++------- t/t5000-tar-tree.sh | 18 +++++++++--------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 37b3be8b72..d0e951ee6f 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -424,10 +424,10 @@ frotz unspecified Creating an archive ~~~~~~~~~~~~~~~~~~~ -`specfile` -^^^^^^^^^^ +`export-subst` +^^^^^^^^^^^^^^ -If the attribute `specfile` is set for a file then git will expand +If the attribute `export-subst` is set for a file then git will expand several placeholders when adding this file to an archive. The expansion depends on the availability of a commit ID, i.e. if gitlink:git-archive[1] has been given a tree instead of a commit or a diff --git a/builtin-archive.c b/builtin-archive.c index 65bf9cbec1..e221f115f9 100644 --- a/builtin-archive.c +++ b/builtin-archive.c @@ -81,8 +81,8 @@ static int run_remote_archiver(const char *remote, int argc, return !!rv; } -static void *format_specfile(const struct commit *commit, const char *format, - unsigned long *sizep) +static void *format_subst(const struct commit *commit, const char *format, + unsigned long *sizep) { unsigned long len = *sizep, result_len = 0; const char *a = format; @@ -131,22 +131,22 @@ static void *convert_to_archive(const char *path, const void *src, unsigned long *sizep, const struct commit *commit) { - static struct git_attr *attr_specfile; + static struct git_attr *attr_export_subst; struct git_attr_check check[1]; if (!commit) return NULL; - if (!attr_specfile) - attr_specfile = git_attr("specfile", 8); + if (!attr_export_subst) + attr_export_subst = git_attr("export-subst", 12); - check[0].attr = attr_specfile; + check[0].attr = attr_export_subst; if (git_checkattr(path, ARRAY_SIZE(check), check)) return NULL; if (!ATTR_TRUE(check[0].value)) return NULL; - return format_specfile(commit, src, sizep); + return format_subst(commit, src, sizep); } void *sha1_file_to_archive(const char *path, const unsigned char *sha1, diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 6e89e07272..42e28ab758 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -28,7 +28,7 @@ commit id embedding: TAR=${TAR:-tar} UNZIP=${UNZIP:-unzip} -SPECFILEFORMAT=%H%n +SUBSTFORMAT=%H%n test_expect_success \ 'populate workdir' \ @@ -36,7 +36,7 @@ test_expect_success \ echo simple textfile >a/a && mkdir a/bin && cp /bin/sh a/bin && - printf "A\$Format:%s\$O" "$SPECFILEFORMAT" >a/specfile && + printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile && ln -s a a/l1 && (p=long_path_to_a_file && cd a && for depth in 1 2 3 4 5; do mkdir $p && cd $p; done && @@ -108,20 +108,20 @@ test_expect_success \ 'diff -r a c/prefix/a' test_expect_success \ - 'create an archive with a specfile' \ - 'echo specfile specfile >a/.gitattributes && + 'create an archive with a substfile' \ + 'echo substfile export-subst >a/.gitattributes && git archive HEAD >f.tar && rm a/.gitattributes' test_expect_success \ - 'extract specfile' \ + 'extract substfile' \ '(mkdir f && cd f && $TAR xf -) f/a/specfile.expected && - diff f/a/specfile.expected f/a/specfile' + 'validate substfile contents' \ + 'git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \ + >f/a/substfile.expected && + diff f/a/substfile.expected f/a/substfile' test_expect_success \ 'git archive --format=zip' \ From 4dbfe2e9bdfdde2d3257194573cee0d41471592b Mon Sep 17 00:00:00 2001 From: Lars Hjemli Date: Fri, 7 Sep 2007 02:00:08 +0200 Subject: [PATCH 028/232] git-svn: always use --first-parent This makes git-svn unconditionally invoke git-log with --first-parent when it is trying to discover its upstream subversion branch and collecting the commit ids which should be pushed to it with dcommit. The reason for always using --first-parent is to make git-svn behave in a predictable way when the ancestry chain contains merges with other git-svn branches. Since git-svn now always uses 'git-log --first-parent' there is no longer any need for the --first-parent option to git-svn, so this is removed. Signed-off-by: Lars Hjemli Acked-by: Eric Wong Signed-off-by: Junio C Hamano --- Documentation/git-svn.txt | 10 ---------- git-svn.perl | 17 +++++------------ 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index 42d7b82a37..be2e34eb8f 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -317,16 +317,6 @@ This is only used with the 'dcommit' command. Print out the series of git arguments that would show which diffs would be committed to SVN. ---first-parent:: - -This is only used with the 'dcommit', 'rebase', 'log', 'find-rev' and -'show-ignore' commands. - -These commands tries to detect the upstream subversion branch by means of -the embedded 'git-svn-id' line in commit messages. When --first-parent is -specified, git-svn only follows the first parent of each commit, effectively -ignoring commits brought into the current branch through merge-operations. - -- ADVANCED OPTIONS diff --git a/git-svn.perl b/git-svn.perl index d21eb7fa9e..badcd33c2c 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -59,7 +59,7 @@ my ($_stdin, $_help, $_edit, $_template, $_shared, $_version, $_fetch_all, $_no_rebase, $_merge, $_strategy, $_dry_run, $_local, - $_prefix, $_no_checkout, $_verbose, $_first_parent); + $_prefix, $_no_checkout, $_verbose); $Git::SVN::_follow_parent = 1; my %remote_opts = ( 'username=s' => \$Git::SVN::Prompt::_username, 'config-dir=s' => \$Git::SVN::Ra::config_dir, @@ -119,14 +119,12 @@ my %cmd = ( 'dry-run|n' => \$_dry_run, 'fetch-all|all' => \$_fetch_all, 'no-rebase' => \$_no_rebase, - 'first-parent' => \$_first_parent, %cmt_opts, %fc_opts } ], 'set-tree' => [ \&cmd_set_tree, "Set an SVN repository to a git tree-ish", { 'stdin|' => \$_stdin, %cmt_opts, %fc_opts, } ], 'show-ignore' => [ \&cmd_show_ignore, "Show svn:ignore listings", - { 'revision|r=i' => \$_revision, - 'first-parent' => \$_first_parent + { 'revision|r=i' => \$_revision } ], 'multi-fetch' => [ \&cmd_multi_fetch, "Deprecated alias for $0 fetch --all", @@ -147,20 +145,16 @@ my %cmd = ( 'non-recursive' => \$Git::SVN::Log::non_recursive, 'authors-file|A=s' => \$_authors, 'color' => \$Git::SVN::Log::color, - 'pager=s' => \$Git::SVN::Log::pager, - 'first-parent' => \$_first_parent + 'pager=s' => \$Git::SVN::Log::pager } ], 'find-rev' => [ \&cmd_find_rev, "Translate between SVN revision numbers and tree-ish", - { - 'first-parent' => \$_first_parent - } ], + {} ], 'rebase' => [ \&cmd_rebase, "Fetch and rebase your working directory", { 'merge|m|M' => \$_merge, 'verbose|v' => \$_verbose, 'strategy|s=s' => \$_strategy, 'local|l' => \$_local, 'fetch-all|all' => \$_fetch_all, - 'first-parent' => \$_first_parent, %fc_opts } ], 'commit-diff' => [ \&cmd_commit_diff, 'Commit a diff between two trees', @@ -818,8 +812,7 @@ sub cmt_metadata { sub working_head_info { my ($head, $refs) = @_; - my @args = ('log', '--no-color'); - push @args, '--first-parent' if $_first_parent; + my @args = ('log', '--no-color', '--first-parent'); my ($fh, $ctx) = command_output_pipe(@args, $head); my $hash; my %max; From b449f4cfc972929b638b90d375b8960c37790618 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 6 Sep 2007 13:20:05 +0200 Subject: [PATCH 029/232] Rework strbuf API and semantics. The gory details are explained in strbuf.h. The change of semantics this patch enforces is that the embeded buffer has always a '\0' character after its last byte, to always make it a C-string. The offs-by-one changes are all related to that very change. A strbuf can be used to store byte arrays, or as an extended string library. The `buf' member can be passed to any C legacy string function, because strbuf operations always ensure there is a terminating \0 at the end of the buffer, not accounted in the `len' field of the structure. A strbuf can be used to generate a string/buffer whose final size is not really known, and then "strbuf_detach" can be used to get the built buffer, and keep the wrapping "strbuf" structure usable for further work again. Other interesting feature: strbuf_grow(sb, size) ensure that there is enough allocated space in `sb' to put `size' new octets of data in the buffer. It helps avoiding reallocating data for nothing when the problem the strbuf helps to solve has a known typical size. Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- archive-tar.c | 2 +- fast-import.c | 15 ++++---- mktree.c | 4 +- strbuf.c | 101 +++++++++++++++++++++++++++++++++++++++++++------- strbuf.h | 86 +++++++++++++++++++++++++++++++++++++++++- 5 files changed, 180 insertions(+), 28 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 66fe3e375b..a0763c5b5a 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -166,7 +166,7 @@ static void write_entry(const unsigned char *sha1, struct strbuf *path, sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); } else { if (verbose) - fprintf(stderr, "%.*s\n", path->len, path->buf); + fprintf(stderr, "%.*s\n", (int)path->len, path->buf); if (S_ISDIR(mode) || S_ISGITLINK(mode)) { *header.typeflag = TYPEFLAG_DIR; mode = (mode | 0777) & ~tar_umask; diff --git a/fast-import.c b/fast-import.c index 078079d404..2f7baf4917 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1595,7 +1595,7 @@ static void read_next_command(void) } else { struct recent_command *rc; - command_buf.buf = NULL; + strbuf_detach(&command_buf); read_line(&command_buf, stdin, '\n'); if (command_buf.eof) return; @@ -1649,7 +1649,6 @@ static void *cmd_data (size_t *size) size_t sz = 8192, term_len = command_buf.len - 5 - 2; length = 0; buffer = xmalloc(sz); - command_buf.buf = NULL; for (;;) { read_line(&command_buf, stdin, '\n'); if (command_buf.eof) @@ -1657,11 +1656,11 @@ static void *cmd_data (size_t *size) if (term_len == command_buf.len && !strcmp(term, command_buf.buf)) break; - ALLOC_GROW(buffer, length + command_buf.len, sz); + ALLOC_GROW(buffer, length + command_buf.len + 1, sz); memcpy(buffer + length, command_buf.buf, - command_buf.len - 1); - length += command_buf.len - 1; + command_buf.len); + length += command_buf.len; buffer[length++] = '\n'; } free(term); @@ -2101,7 +2100,7 @@ static void cmd_new_commit(void) } /* file_change* */ - while (!command_buf.eof && command_buf.len > 1) { + while (!command_buf.eof && command_buf.len > 0) { if (!prefixcmp(command_buf.buf, "M ")) file_change_m(b); else if (!prefixcmp(command_buf.buf, "D ")) @@ -2256,7 +2255,7 @@ static void cmd_reset_branch(void) else b = new_branch(sp); read_next_command(); - if (!cmd_from(b) && command_buf.len > 1) + if (!cmd_from(b) && command_buf.len > 0) unread_command_buf = 1; } @@ -2273,7 +2272,7 @@ static void cmd_checkpoint(void) static void cmd_progress(void) { - fwrite(command_buf.buf, 1, command_buf.len - 1, stdout); + fwrite(command_buf.buf, 1, command_buf.len, stdout); fputc('\n', stdout); fflush(stdout); skip_optional_lf(); diff --git a/mktree.c b/mktree.c index d86dde89d6..86de5eb5f6 100644 --- a/mktree.c +++ b/mktree.c @@ -92,7 +92,6 @@ int main(int ac, char **av) strbuf_init(&sb); while (1) { - int len; char *ptr, *ntr; unsigned mode; enum object_type type; @@ -101,7 +100,6 @@ int main(int ac, char **av) read_line(&sb, stdin, line_termination); if (sb.eof) break; - len = sb.len; ptr = sb.buf; /* Input is non-recursive ls-tree output format * mode SP type SP sha1 TAB name @@ -111,7 +109,7 @@ int main(int ac, char **av) die("input format error: %s", sb.buf); ptr = ntr + 1; /* type */ ntr = strchr(ptr, ' '); - if (!ntr || sb.buf + len <= ntr + 41 || + if (!ntr || sb.buf + sb.len <= ntr + 40 || ntr[41] != '\t' || get_sha1_hex(ntr + 1, sha1)) die("input format error: %s", sb.buf); diff --git a/strbuf.c b/strbuf.c index e33d06b87c..7136de14c6 100644 --- a/strbuf.c +++ b/strbuf.c @@ -2,40 +2,113 @@ #include "strbuf.h" void strbuf_init(struct strbuf *sb) { - sb->buf = NULL; - sb->eof = sb->alloc = sb->len = 0; + memset(sb, 0, sizeof(*sb)); } -static void strbuf_begin(struct strbuf *sb) { +void strbuf_release(struct strbuf *sb) { free(sb->buf); + memset(sb, 0, sizeof(*sb)); +} + +void strbuf_reset(struct strbuf *sb) { + if (sb->len) + strbuf_setlen(sb, 0); + sb->eof = 0; +} + +char *strbuf_detach(struct strbuf *sb) { + char *res = sb->buf; strbuf_init(sb); + return res; } -static void inline strbuf_add(struct strbuf *sb, int ch) { - if (sb->alloc <= sb->len) { - sb->alloc = sb->alloc * 3 / 2 + 16; - sb->buf = xrealloc(sb->buf, sb->alloc); +void strbuf_grow(struct strbuf *sb, size_t extra) { + if (sb->len + extra + 1 <= sb->len) + die("you want to use way too much memory"); + ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); +} + +void strbuf_add(struct strbuf *sb, const void *data, size_t len) { + strbuf_grow(sb, len); + memcpy(sb->buf + sb->len, data, len); + strbuf_setlen(sb, sb->len + len); +} + +void strbuf_addf(struct strbuf *sb, const char *fmt, ...) { + int len; + va_list ap; + + va_start(ap, fmt); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + va_end(ap); + if (len < 0) { + len = 0; } - sb->buf[sb->len++] = ch; + if (len >= strbuf_avail(sb)) { + strbuf_grow(sb, len); + va_start(ap, fmt); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + va_end(ap); + if (len >= strbuf_avail(sb)) { + die("this should not happen, your snprintf is broken"); + } + } + strbuf_setlen(sb, sb->len + len); } -static void strbuf_end(struct strbuf *sb) { - strbuf_add(sb, 0); +size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) { + size_t res; + + strbuf_grow(sb, size); + res = fread(sb->buf + sb->len, 1, size, f); + if (res > 0) { + strbuf_setlen(sb, sb->len + res); + } + return res; +} + +ssize_t strbuf_read(struct strbuf *sb, int fd) +{ + size_t oldlen = sb->len; + + for (;;) { + ssize_t cnt; + + strbuf_grow(sb, 8192); + cnt = xread(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); + if (cnt < 0) { + strbuf_setlen(sb, oldlen); + return -1; + } + if (!cnt) + break; + sb->len += cnt; + } + + sb->buf[sb->len] = '\0'; + return sb->len - oldlen; } void read_line(struct strbuf *sb, FILE *fp, int term) { int ch; - strbuf_begin(sb); if (feof(fp)) { + strbuf_release(sb); sb->eof = 1; return; } + + strbuf_reset(sb); while ((ch = fgetc(fp)) != EOF) { if (ch == term) break; - strbuf_add(sb, ch); + strbuf_grow(sb, 1); + sb->buf[sb->len++] = ch; } - if (ch == EOF && sb->len == 0) + if (ch == EOF && sb->len == 0) { + strbuf_release(sb); sb->eof = 1; - strbuf_end(sb); + } + + strbuf_grow(sb, 1); + sb->buf[sb->len] = '\0'; } diff --git a/strbuf.h b/strbuf.h index 74cc012c2c..b40dc99fd0 100644 --- a/strbuf.h +++ b/strbuf.h @@ -1,13 +1,95 @@ #ifndef STRBUF_H #define STRBUF_H + +/* + * Strbuf's can be use in many ways: as a byte array, or to store arbitrary + * long, overflow safe strings. + * + * Strbufs has some invariants that are very important to keep in mind: + * + * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to + * build complex strings/buffers whose final size isn't easily known. + * + * It is legal to copy the ->buf pointer away. Though if you want to reuse + * the strbuf after that, setting ->buf to NULL isn't legal. + * `strbuf_detach' is the operation that detachs a buffer from its shell + * while keeping the shell valid wrt its invariants. + * + * 2. the ->buf member is a byte array that has at least ->len + 1 bytes + * allocated. The extra byte is used to store a '\0', allowing the ->buf + * member to be a valid C-string. Every strbuf function ensure this + * invariant is preserved. + * + * Note that it is OK to "play" with the buffer directly if you work it + * that way: + * + * strbuf_grow(sb, SOME_SIZE); + * // ... here the memory areay starting at sb->buf, and of length + * // sb_avail(sb) is all yours, and you are sure that sb_avail(sb) is at + * // least SOME_SIZE + * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); + * + * Of course, SOME_OTHER_SIZE must be smaller or equal to sb_avail(sb). + * + * Doing so is safe, though if it has to be done in many places, adding the + * missing API to the strbuf module is the way to go. + * + * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 + * even if it's true in the current implementation. Alloc is somehow a + * "private" member that should not be messed with. + */ + +#include + struct strbuf { - int alloc; - int len; + size_t alloc; + size_t len; int eof; char *buf; }; +#define STRBUF_INIT { 0, 0, 0, NULL } + +/*----- strbuf life cycle -----*/ extern void strbuf_init(struct strbuf *); +extern void strbuf_release(struct strbuf *); +extern void strbuf_reset(struct strbuf *); +extern char *strbuf_detach(struct strbuf *); + +/*----- strbuf size related -----*/ +static inline size_t strbuf_avail(struct strbuf *sb) { + return sb->alloc ? sb->alloc - sb->len - 1 : 0; +} +static inline void strbuf_setlen(struct strbuf *sb, size_t len) { + assert (len < sb->alloc); + sb->len = len; + sb->buf[len] = '\0'; +} + +extern void strbuf_grow(struct strbuf *, size_t); + +/*----- add data in your buffer -----*/ +static inline void strbuf_addch(struct strbuf *sb, int c) { + strbuf_grow(sb, 1); + sb->buf[sb->len++] = c; + sb->buf[sb->len] = '\0'; +} + +extern void strbuf_add(struct strbuf *, const void *, size_t); +static inline void strbuf_addstr(struct strbuf *sb, const char *s) { + strbuf_add(sb, s, strlen(s)); +} +static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) { + strbuf_add(sb, sb2->buf, sb2->len); +} + +__attribute__((format(printf,2,3))) +extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); + +extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); +/* XXX: if read fails, any partial read is undone */ +extern ssize_t strbuf_read(struct strbuf *, int fd); + extern void read_line(struct strbuf *, FILE *, int); #endif /* STRBUF_H */ From 7a604f16b71e3bfd1c6e30d400f05be918e5376e Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 6 Sep 2007 13:20:06 +0200 Subject: [PATCH 030/232] Simplify strbuf uses in archive-tar.c using strbuf API This is just cleaner way to deal with strbufs, using its API rather than reinventing it in the module (e.g. strbuf_append_string is just the plain strbuf_addstr function, and it was used to perform what strbuf_addch does anyways). Signed-off-by: Junio C Hamano --- archive-tar.c | 65 +++++++++++++-------------------------------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index a0763c5b5a..c84d7c0652 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -78,19 +78,6 @@ static void write_trailer(void) } } -static void strbuf_append_string(struct strbuf *sb, const char *s) -{ - int slen = strlen(s); - int total = sb->len + slen; - if (total + 1 > sb->alloc) { - sb->buf = xrealloc(sb->buf, total + 1); - sb->alloc = total + 1; - } - memcpy(sb->buf + sb->len, s, slen); - sb->len = total; - sb->buf[total] = '\0'; -} - /* * pax extended header records have the format "%u %s=%s\n". %u contains * the size of the whole string (including the %u), the first %s is the @@ -100,26 +87,17 @@ static void strbuf_append_string(struct strbuf *sb, const char *s) static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword, const char *value, unsigned int valuelen) { - char *p; - int len, total, tmp; + int len, tmp; /* "%u %s=%s\n" */ len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1; for (tmp = len; tmp > 9; tmp /= 10) len++; - total = sb->len + len; - if (total > sb->alloc) { - sb->buf = xrealloc(sb->buf, total); - sb->alloc = total; - } - - p = sb->buf; - p += sprintf(p, "%u %s=", len, keyword); - memcpy(p, value, valuelen); - p += valuelen; - *p = '\n'; - sb->len = total; + strbuf_grow(sb, len); + strbuf_addf(sb, "%u %s=", len, keyword); + strbuf_add(sb, value, valuelen); + strbuf_addch(sb, '\n'); } static unsigned int ustar_header_chksum(const struct ustar_header *header) @@ -153,8 +131,7 @@ static void write_entry(const unsigned char *sha1, struct strbuf *path, struct strbuf ext_header; memset(&header, 0, sizeof(header)); - ext_header.buf = NULL; - ext_header.len = ext_header.alloc = 0; + strbuf_init(&ext_header); if (!sha1) { *header.typeflag = TYPEFLAG_GLOBAL_HEADER; @@ -225,8 +202,8 @@ static void write_entry(const unsigned char *sha1, struct strbuf *path, if (ext_header.len > 0) { write_entry(sha1, NULL, 0, ext_header.buf, ext_header.len); - free(ext_header.buf); } + strbuf_release(&ext_header); write_blocked(&header, sizeof(header)); if (S_ISREG(mode) && buffer && size > 0) write_blocked(buffer, size); @@ -235,11 +212,11 @@ static void write_entry(const unsigned char *sha1, struct strbuf *path, static void write_global_extended_header(const unsigned char *sha1) { struct strbuf ext_header; - ext_header.buf = NULL; - ext_header.len = ext_header.alloc = 0; + + strbuf_init(&ext_header); strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); write_entry(NULL, NULL, 0, ext_header.buf, ext_header.len); - free(ext_header.buf); + strbuf_release(&ext_header); } static int git_tar_config(const char *var, const char *value) @@ -260,28 +237,18 @@ static int write_tar_entry(const unsigned char *sha1, const char *base, int baselen, const char *filename, unsigned mode, int stage) { - static struct strbuf path; + static struct strbuf path = STRBUF_INIT; int filenamelen = strlen(filename); void *buffer; enum object_type type; unsigned long size; - if (!path.alloc) { - path.buf = xmalloc(PATH_MAX); - path.alloc = PATH_MAX; - path.len = path.eof = 0; - } - if (path.alloc < baselen + filenamelen + 1) { - free(path.buf); - path.buf = xmalloc(baselen + filenamelen + 1); - path.alloc = baselen + filenamelen + 1; - } - memcpy(path.buf, base, baselen); - memcpy(path.buf + baselen, filename, filenamelen); - path.len = baselen + filenamelen; - path.buf[path.len] = '\0'; + strbuf_grow(&path, MAX(PATH_MAX, baselen + filenamelen + 1)); + strbuf_reset(&path); + strbuf_add(&path, base, baselen); + strbuf_add(&path, filename, filenamelen); if (S_ISDIR(mode) || S_ISGITLINK(mode)) { - strbuf_append_string(&path, "/"); + strbuf_addch(&path, '/'); buffer = NULL; size = 0; } else { From 4a241d79c9020dcafb1f254774bcab200171ab46 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 6 Sep 2007 13:20:07 +0200 Subject: [PATCH 031/232] fast-import: Use strbuf API, and simplify cmd_data() This patch features the use of strbuf_detach, and prevent the programmer to mess with allocation directly. The code is as efficent as before, just more concise and more straightforward. Signed-off-by: Junio C Hamano --- fast-import.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/fast-import.c b/fast-import.c index 2f7baf4917..74ff0fdadd 100644 --- a/fast-import.c +++ b/fast-import.c @@ -340,7 +340,7 @@ static struct tag *last_tag; /* Input stream parsing */ static whenspec_type whenspec = WHENSPEC_RAW; -static struct strbuf command_buf; +static struct strbuf command_buf = STRBUF_INIT; static int unread_command_buf; static struct recent_command cmd_hist = {&cmd_hist, &cmd_hist, NULL}; static struct recent_command *cmd_tail = &cmd_hist; @@ -1638,17 +1638,16 @@ static void cmd_mark(void) static void *cmd_data (size_t *size) { - size_t length; - char *buffer; + struct strbuf buffer; + strbuf_init(&buffer); if (prefixcmp(command_buf.buf, "data ")) die("Expected 'data n' command, found: %s", command_buf.buf); if (!prefixcmp(command_buf.buf + 5, "<<")) { char *term = xstrdup(command_buf.buf + 5 + 2); - size_t sz = 8192, term_len = command_buf.len - 5 - 2; - length = 0; - buffer = xmalloc(sz); + size_t term_len = command_buf.len - 5 - 2; + for (;;) { read_line(&command_buf, stdin, '\n'); if (command_buf.eof) @@ -1656,21 +1655,18 @@ static void *cmd_data (size_t *size) if (term_len == command_buf.len && !strcmp(term, command_buf.buf)) break; - ALLOC_GROW(buffer, length + command_buf.len + 1, sz); - memcpy(buffer + length, - command_buf.buf, - command_buf.len); - length += command_buf.len; - buffer[length++] = '\n'; + strbuf_addbuf(&buffer, &command_buf); + strbuf_addch(&buffer, '\n'); } free(term); } else { - size_t n = 0; + size_t n = 0, length; + length = strtoul(command_buf.buf + 5, NULL, 10); - buffer = xmalloc(length); + while (n < length) { - size_t s = fread(buffer + n, 1, length - n, stdin); + size_t s = strbuf_fread(&buffer, length - n, stdin); if (!s && feof(stdin)) die("EOF in data (%lu bytes remaining)", (unsigned long)(length - n)); @@ -1679,8 +1675,8 @@ static void *cmd_data (size_t *size) } skip_optional_lf(); - *size = length; - return buffer; + *size = buffer.len; + return strbuf_detach(&buffer); } static int validate_raw_date(const char *src, char *result, int maxlen) From d52bc66152834dff3fb5f32a54f6ed57730f58c6 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 6 Sep 2007 13:20:08 +0200 Subject: [PATCH 032/232] mktree: Simplify write_tree() using strbuf API Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- mktree.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/mktree.c b/mktree.c index 86de5eb5f6..2e84889c02 100644 --- a/mktree.c +++ b/mktree.c @@ -44,30 +44,23 @@ static int ent_compare(const void *a_, const void *b_) static void write_tree(unsigned char *sha1) { - char *buffer; - unsigned long size, offset; + struct strbuf buf; + size_t size; int i; qsort(entries, used, sizeof(*entries), ent_compare); for (size = i = 0; i < used; i++) size += 32 + entries[i]->len; - buffer = xmalloc(size); - offset = 0; + strbuf_init(&buf); + strbuf_grow(&buf, size); for (i = 0; i < used; i++) { struct treeent *ent = entries[i]; - - if (offset + ent->len + 100 < size) { - size = alloc_nr(offset + ent->len + 100); - buffer = xrealloc(buffer, size); - } - offset += sprintf(buffer + offset, "%o ", ent->mode); - offset += sprintf(buffer + offset, "%s", ent->name); - buffer[offset++] = 0; - hashcpy((unsigned char*)buffer + offset, ent->sha1); - offset += 20; + strbuf_addf(&buf, "%o %s%c", ent->mode, ent->name, '\0'); + strbuf_add(&buf, ent->sha1, 20); } - write_sha1_file(buffer, offset, tree_type, sha1); + + write_sha1_file(buf.buf, buf.len, tree_type, sha1); } static const char mktree_usage[] = "git-mktree [-z]"; From af6eb82262e35687aa8f00d688e327cb845973fa Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 6 Sep 2007 13:20:09 +0200 Subject: [PATCH 033/232] Use strbuf API in apply, blame, commit-tree and diff Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- builtin-apply.c | 30 +++++++--------------- builtin-blame.c | 35 +++++++++---------------- builtin-commit-tree.c | 60 ++++++++++--------------------------------- diff.c | 25 ++++++------------ 4 files changed, 43 insertions(+), 107 deletions(-) diff --git a/builtin-apply.c b/builtin-apply.c index 976ec77041..90e328ef91 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -12,6 +12,7 @@ #include "blob.h" #include "delta.h" #include "builtin.h" +#include "strbuf.h" /* * --check turns on checking that the working tree matches the @@ -181,34 +182,21 @@ static void say_patch_name(FILE *output, const char *pre, struct patch *patch, c static void *read_patch_file(int fd, unsigned long *sizep) { - unsigned long size = 0, alloc = CHUNKSIZE; - void *buffer = xmalloc(alloc); + struct strbuf buf; - for (;;) { - ssize_t nr = alloc - size; - if (nr < 1024) { - alloc += CHUNKSIZE; - buffer = xrealloc(buffer, alloc); - nr = alloc - size; - } - nr = xread(fd, (char *) buffer + size, nr); - if (!nr) - break; - if (nr < 0) - die("git-apply: read returned %s", strerror(errno)); - size += nr; - } - *sizep = size; + strbuf_init(&buf); + if (strbuf_read(&buf, fd) < 0) + die("git-apply: read returned %s", strerror(errno)); + *sizep = buf.len; /* * Make sure that we have some slop in the buffer * so that we can do speculative "memcmp" etc, and * see to it that it is NUL-filled. */ - if (alloc < size + SLOP) - buffer = xrealloc(buffer, size + SLOP); - memset((char *) buffer + size, 0, SLOP); - return buffer; + strbuf_grow(&buf, SLOP); + memset(buf.buf + buf.len, 0, SLOP); + return strbuf_detach(&buf); } static unsigned long linelen(const char *buffer, unsigned long size) diff --git a/builtin-blame.c b/builtin-blame.c index dc88a953a5..1b1e6da853 100644 --- a/builtin-blame.c +++ b/builtin-blame.c @@ -18,6 +18,7 @@ #include "cache-tree.h" #include "path-list.h" #include "mailmap.h" +#include "strbuf.h" static char blame_usage[] = "git-blame [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-p] [-w] [-L n,m] [-S ] [-M] [-C] [-C] [--contents ] [--incremental] [commit] [--] file\n" @@ -2001,11 +2002,10 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con struct commit *commit; struct origin *origin; unsigned char head_sha1[20]; - char *buf; + struct strbuf buf; const char *ident; int fd; time_t now; - unsigned long fin_size; int size, len; struct cache_entry *ce; unsigned mode; @@ -2023,9 +2023,11 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con origin = make_origin(commit, path); + strbuf_init(&buf); if (!contents_from || strcmp("-", contents_from)) { struct stat st; const char *read_from; + unsigned long fin_size; if (contents_from) { if (stat(contents_from, &st) < 0) @@ -2038,19 +2040,19 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con read_from = path; } fin_size = xsize_t(st.st_size); - buf = xmalloc(fin_size+1); mode = canon_mode(st.st_mode); switch (st.st_mode & S_IFMT) { case S_IFREG: fd = open(read_from, O_RDONLY); if (fd < 0) die("cannot open %s", read_from); - if (read_in_full(fd, buf, fin_size) != fin_size) + if (strbuf_read(&buf, fd) != xsize_t(st.st_size)) die("cannot read %s", read_from); break; case S_IFLNK: - if (readlink(read_from, buf, fin_size+1) != fin_size) + if (readlink(read_from, buf.buf, buf.alloc) != fin_size) die("cannot readlink %s", read_from); + buf.len = fin_size; break; default: die("unsupported file type %s", read_from); @@ -2059,26 +2061,13 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con else { /* Reading from stdin */ contents_from = "standard input"; - buf = NULL; - fin_size = 0; mode = 0; - while (1) { - ssize_t cnt = 8192; - buf = xrealloc(buf, fin_size + cnt); - cnt = xread(0, buf + fin_size, cnt); - if (cnt < 0) - die("read error %s from stdin", - strerror(errno)); - if (!cnt) - break; - fin_size += cnt; - } - buf = xrealloc(buf, fin_size + 1); + if (strbuf_read(&buf, 0) < 0) + die("read error %s from stdin", strerror(errno)); } - buf[fin_size] = 0; - origin->file.ptr = buf; - origin->file.size = fin_size; - pretend_sha1_file(buf, fin_size, OBJ_BLOB, origin->blob_sha1); + origin->file.ptr = buf.buf; + origin->file.size = buf.len; + pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1); commit->util = origin; /* diff --git a/builtin-commit-tree.c b/builtin-commit-tree.c index ccbcbe30da..bc9502c135 100644 --- a/builtin-commit-tree.c +++ b/builtin-commit-tree.c @@ -8,42 +8,13 @@ #include "tree.h" #include "builtin.h" #include "utf8.h" +#include "strbuf.h" #define BLOCKING (1ul << 14) /* * FIXME! Share the code with "write-tree.c" */ -static void init_buffer(char **bufp, unsigned int *sizep) -{ - *bufp = xmalloc(BLOCKING); - *sizep = 0; -} - -static void add_buffer(char **bufp, unsigned int *sizep, const char *fmt, ...) -{ - char one_line[2048]; - va_list args; - int len; - unsigned long alloc, size, newsize; - char *buf; - - va_start(args, fmt); - len = vsnprintf(one_line, sizeof(one_line), fmt, args); - va_end(args); - size = *sizep; - newsize = size + len + 1; - alloc = (size + 32767) & ~32767; - buf = *bufp; - if (newsize > alloc) { - alloc = (newsize + 32767) & ~32767; - buf = xrealloc(buf, alloc); - *bufp = buf; - } - *sizep = newsize - 1; - memcpy(buf + size, one_line, len); -} - static void check_valid(unsigned char *sha1, enum object_type expect) { enum object_type type = sha1_object_info(sha1, NULL); @@ -87,9 +58,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) int parents = 0; unsigned char tree_sha1[20]; unsigned char commit_sha1[20]; - char comment[1000]; - char *buffer; - unsigned int size; + struct strbuf buffer; int encoding_is_utf8; git_config(git_default_config); @@ -118,8 +87,9 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) /* Not having i18n.commitencoding is the same as having utf-8 */ encoding_is_utf8 = is_encoding_utf8(git_commit_encoding); - init_buffer(&buffer, &size); - add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1)); + strbuf_init(&buffer); + strbuf_grow(&buffer, 8192); /* should avoid reallocs for the headers */ + strbuf_addf(&buffer, "tree %s\n", sha1_to_hex(tree_sha1)); /* * NOTE! This ordering means that the same exact tree merged with a @@ -127,26 +97,24 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) * if everything else stays the same. */ for (i = 0; i < parents; i++) - add_buffer(&buffer, &size, "parent %s\n", sha1_to_hex(parent_sha1[i])); + strbuf_addf(&buffer, "parent %s\n", sha1_to_hex(parent_sha1[i])); /* Person/date information */ - add_buffer(&buffer, &size, "author %s\n", git_author_info(1)); - add_buffer(&buffer, &size, "committer %s\n", git_committer_info(1)); + strbuf_addf(&buffer, "author %s\n", git_author_info(1)); + strbuf_addf(&buffer, "committer %s\n", git_committer_info(1)); if (!encoding_is_utf8) - add_buffer(&buffer, &size, - "encoding %s\n", git_commit_encoding); - add_buffer(&buffer, &size, "\n"); + strbuf_addf(&buffer, "encoding %s\n", git_commit_encoding); + strbuf_addch(&buffer, '\n'); /* And add the comment */ - while (fgets(comment, sizeof(comment), stdin) != NULL) - add_buffer(&buffer, &size, "%s", comment); + if (strbuf_read(&buffer, 0) < 0) + die("git-commit-tree: read returned %s", strerror(errno)); /* And check the encoding */ - buffer[size] = '\0'; - if (encoding_is_utf8 && !is_utf8(buffer)) + if (encoding_is_utf8 && !is_utf8(buffer.buf)) fprintf(stderr, commit_utf8_warn); - if (!write_sha1_file(buffer, size, commit_type, commit_sha1)) { + if (!write_sha1_file(buffer.buf, buffer.len, commit_type, commit_sha1)) { printf("%s\n", sha1_to_hex(commit_sha1)); return 0; } diff --git a/diff.c b/diff.c index 0d30d05263..c054b234b8 100644 --- a/diff.c +++ b/diff.c @@ -9,6 +9,7 @@ #include "xdiff-interface.h" #include "color.h" #include "attr.h" +#include "strbuf.h" #ifdef NO_FAST_WORKING_DIRECTORY #define FAST_WORKING_DIRECTORY 0 @@ -1545,26 +1546,16 @@ static int reuse_worktree_file(const char *name, const unsigned char *sha1, int static int populate_from_stdin(struct diff_filespec *s) { -#define INCREMENT 1024 - char *buf; - unsigned long size; - ssize_t got; + struct strbuf buf; - size = 0; - buf = NULL; - while (1) { - buf = xrealloc(buf, size + INCREMENT); - got = xread(0, buf + size, INCREMENT); - if (!got) - break; /* EOF */ - if (got < 0) - return error("error while reading from stdin %s", + strbuf_init(&buf); + if (strbuf_read(&buf, 0) < 0) + return error("error while reading from stdin %s", strerror(errno)); - size += got; - } + s->should_munmap = 0; - s->data = buf; - s->size = size; + s->size = buf.len; + s->data = strbuf_detach(&buf); s->should_free = 1; return 0; } From 19b358e8daafdfe5a5d25ff7972e041493b05156 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 6 Sep 2007 13:20:10 +0200 Subject: [PATCH 034/232] Use strbuf API in buitin-rerere.c Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- builtin-rerere.c | 56 ++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/builtin-rerere.c b/builtin-rerere.c index 29d057c98c..98d7702168 100644 --- a/builtin-rerere.c +++ b/builtin-rerere.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "cache.h" #include "path-list.h" +#include "strbuf.h" #include "xdiff/xdiff.h" #include "xdiff-interface.h" @@ -66,41 +67,20 @@ static int write_rr(struct path_list *rr, int out_fd) return commit_lock_file(&write_lock); } -struct buffer { - char *ptr; - int nr, alloc; -}; - -static void append_line(struct buffer *buffer, const char *line) -{ - int len = strlen(line); - - if (buffer->nr + len > buffer->alloc) { - buffer->alloc = alloc_nr(buffer->nr + len); - buffer->ptr = xrealloc(buffer->ptr, buffer->alloc); - } - memcpy(buffer->ptr + buffer->nr, line, len); - buffer->nr += len; -} - -static void clear_buffer(struct buffer *buffer) -{ - free(buffer->ptr); - buffer->ptr = NULL; - buffer->nr = buffer->alloc = 0; -} - static int handle_file(const char *path, unsigned char *sha1, const char *output) { SHA_CTX ctx; char buf[1024]; int hunk = 0, hunk_no = 0; - struct buffer minus = { NULL, 0, 0 }, plus = { NULL, 0, 0 }; - struct buffer *one = &minus, *two = + + struct strbuf minus, plus; + struct strbuf *one = &minus, *two = + FILE *f = fopen(path, "r"); FILE *out; + strbuf_init(&minus); + strbuf_init(&plus); + if (!f) return error("Could not open %s", path); @@ -122,36 +102,36 @@ static int handle_file(const char *path, else if (!prefixcmp(buf, "=======")) hunk = 2; else if (!prefixcmp(buf, ">>>>>>> ")) { - int one_is_longer = (one->nr > two->nr); - int common_len = one_is_longer ? two->nr : one->nr; - int cmp = memcmp(one->ptr, two->ptr, common_len); + int one_is_longer = (one->len > two->len); + int common_len = one_is_longer ? two->len : one->len; + int cmp = memcmp(one->buf, two->buf, common_len); hunk_no++; hunk = 0; if ((cmp > 0) || ((cmp == 0) && one_is_longer)) { - struct buffer *swap = one; + struct strbuf *swap = one; one = two; two = swap; } if (out) { fputs("<<<<<<<\n", out); - fwrite(one->ptr, one->nr, 1, out); + fwrite(one->buf, one->len, 1, out); fputs("=======\n", out); - fwrite(two->ptr, two->nr, 1, out); + fwrite(two->buf, two->len, 1, out); fputs(">>>>>>>\n", out); } if (sha1) { - SHA1_Update(&ctx, one->ptr, one->nr); + SHA1_Update(&ctx, one->buf, one->len); SHA1_Update(&ctx, "\0", 1); - SHA1_Update(&ctx, two->ptr, two->nr); + SHA1_Update(&ctx, two->buf, two->len); SHA1_Update(&ctx, "\0", 1); } - clear_buffer(one); - clear_buffer(two); + strbuf_release(one); + strbuf_release(two); } else if (hunk == 1) - append_line(one, buf); + strbuf_addstr(one, buf); else if (hunk == 2) - append_line(two, buf); + strbuf_addstr(two, buf); else if (out) fputs(buf, out); } From 5242bcbb638f031818e9ebd4467c8e55d5a06bfb Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 6 Sep 2007 13:20:11 +0200 Subject: [PATCH 035/232] Use strbuf API in cache-tree.c Should even be marginally faster. Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- cache-tree.c | 59 ++++++++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 077f034369..76af6f5d99 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -1,4 +1,5 @@ #include "cache.h" +#include "strbuf.h" #include "tree.h" #include "cache-tree.h" @@ -235,8 +236,7 @@ static int update_one(struct cache_tree *it, int missing_ok, int dryrun) { - unsigned long size, offset; - char *buffer; + struct strbuf buffer; int i; if (0 <= it->entry_count && has_sha1_file(it->sha1)) @@ -293,9 +293,8 @@ static int update_one(struct cache_tree *it, /* * Then write out the tree object for this level. */ - size = 8192; - buffer = xmalloc(size); - offset = 0; + strbuf_init(&buffer); + strbuf_grow(&buffer, 8192); for (i = 0; i < entries; i++) { struct cache_entry *ce = cache[i]; @@ -332,15 +331,9 @@ static int update_one(struct cache_tree *it, if (!ce->ce_mode) continue; /* entry being removed */ - if (size < offset + entlen + 100) { - size = alloc_nr(offset + entlen + 100); - buffer = xrealloc(buffer, size); - } - offset += sprintf(buffer + offset, - "%o %.*s", mode, entlen, path + baselen); - buffer[offset++] = 0; - hashcpy((unsigned char*)buffer + offset, sha1); - offset += 20; + strbuf_grow(&buffer, entlen + 100); + strbuf_addf(&buffer, "%o %.*s%c", mode, entlen, path + baselen, '\0'); + strbuf_add(&buffer, sha1, 20); #if DEBUG fprintf(stderr, "cache-tree update-one %o %.*s\n", @@ -349,10 +342,10 @@ static int update_one(struct cache_tree *it, } if (dryrun) - hash_sha1_file(buffer, offset, tree_type, it->sha1); + hash_sha1_file(buffer.buf, buffer.len, tree_type, it->sha1); else - write_sha1_file(buffer, offset, tree_type, it->sha1); - free(buffer); + write_sha1_file(buffer.buf, buffer.len, tree_type, it->sha1); + strbuf_release(&buffer); it->entry_count = i; #if DEBUG fprintf(stderr, "cache-tree update-one (%d ent, %d subtree) %s\n", @@ -378,12 +371,10 @@ int cache_tree_update(struct cache_tree *it, return 0; } -static void *write_one(struct cache_tree *it, +static void write_one(struct cache_tree *it, char *path, int pathlen, - char *buffer, - unsigned long *size, - unsigned long *offset) + struct strbuf *buffer) { int i; @@ -393,13 +384,9 @@ static void *write_one(struct cache_tree *it, * tree-sha1 (missing if invalid) * subtree_nr "cache-tree" entries for subtrees. */ - if (*size < *offset + pathlen + 100) { - *size = alloc_nr(*offset + pathlen + 100); - buffer = xrealloc(buffer, *size); - } - *offset += sprintf(buffer + *offset, "%.*s%c%d %d\n", - pathlen, path, 0, - it->entry_count, it->subtree_nr); + strbuf_grow(buffer, pathlen + 100); + strbuf_add(buffer, path, pathlen); + strbuf_addf(buffer, "%c%d %d\n", 0, it->entry_count, it->subtree_nr); #if DEBUG if (0 <= it->entry_count) @@ -412,8 +399,7 @@ static void *write_one(struct cache_tree *it, #endif if (0 <= it->entry_count) { - hashcpy((unsigned char*)buffer + *offset, it->sha1); - *offset += 20; + strbuf_add(buffer, it->sha1, 20); } for (i = 0; i < it->subtree_nr; i++) { struct cache_tree_sub *down = it->down[i]; @@ -423,21 +409,20 @@ static void *write_one(struct cache_tree *it, prev->name, prev->namelen) <= 0) die("fatal - unsorted cache subtree"); } - buffer = write_one(down->cache_tree, down->name, down->namelen, - buffer, size, offset); + write_one(down->cache_tree, down->name, down->namelen, buffer); } - return buffer; } void *cache_tree_write(struct cache_tree *root, unsigned long *size_p) { char path[PATH_MAX]; - unsigned long size = 8192; - char *buffer = xmalloc(size); + struct strbuf buffer; - *size_p = 0; path[0] = 0; - return write_one(root, path, 0, buffer, &size, size_p); + strbuf_init(&buffer); + write_one(root, path, 0, &buffer); + *size_p = buffer.len; + return strbuf_detach(&buffer); } static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) From 059f446d57d51fbacdace3fbadf2414916c201dd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Sep 2007 10:20:50 -0400 Subject: [PATCH 036/232] git-rebase: support --whitespace=