From 968e77a5f87c1a50983323f15296a6dda53a1098 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 17 Aug 2018 16:55:06 -0400
Subject: [PATCH 1/6] t/perf: factor boilerplate out of test_perf

About half of test_perf() is boilerplate preparing to run
_any_ test, and the other half is specifically running a
timing test. Let's split it into two functions, so that we
can reuse the boilerplate in future commits.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/perf-lib.sh | 65 ++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 28 deletions(-)
diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh
index e4c343a6b7..a54be09516 100644
--- a/t/perf/perf-lib.sh
+++ b/t/perf/perf-lib.sh
@@ -179,8 +179,8 @@ exit $ret' >&3 2>&4
 	return "$eval_ret"
 }
 
-
-test_perf () {
+test_wrapper_ () {
+	test_wrapper_func_=$1; shift
 	test_start_
 	test "$#" = 3 && { test_prereq=$1; shift; } || test_prereq=
 	test "$#" = 2 ||
@@ -191,37 +191,46 @@ test_perf () {
 		base=$(basename "$0" .sh)
 		echo "$test_count" >>"$perf_results_dir"/$base.subtests
 		echo "$1" >"$perf_results_dir"/$base.$test_count.descr
-		if test -z "$verbose"; then
-			printf "%s" "perf $test_count - $1:"
-		else
-			echo "perf $test_count - $1:"
-		fi
-		for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do
-			say >&3 "running: $2"
-			if test_run_perf_ "$2"
-			then
-				if test -z "$verbose"; then
-					printf " %s" "$i"
-				else
-					echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:"
-				fi
-			else
-				test -z "$verbose" && echo
-				test_failure_ "$@"
-				break
-			fi
-		done
-		if test -z "$verbose"; then
-			echo " ok"
-		else
-			test_ok_ "$1"
-		fi
 		base="$perf_results_dir"/"$perf_results_prefix$(basename "$0" .sh)"."$test_count"
-		"$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times
+		"$test_wrapper_func_" "$@"
 	fi
+
 	test_finish_
 }
 
+test_perf_ () {
+	if test -z "$verbose"; then
+		printf "%s" "perf $test_count - $1:"
+	else
+		echo "perf $test_count - $1:"
+	fi
+	for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do
+		say >&3 "running: $2"
+		if test_run_perf_ "$2"
+		then
+			if test -z "$verbose"; then
+				printf " %s" "$i"
+			else
+				echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:"
+			fi
+		else
+			test -z "$verbose" && echo
+			test_failure_ "$@"
+			break
+		fi
+	done
+	if test -z "$verbose"; then
+		echo " ok"
+	else
+		test_ok_ "$1"
+	fi
+	"$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times
+}
+
+test_perf () {
+	test_wrapper_ test_perf_ "$@"
+}
+
 # We extend test_done to print timings at the end (./run disables this
 # and does it after running everything)
 test_at_end_hook_ () {

From 5a924a62bb0fc5524101aeec9086aee530c2244d Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 17 Aug 2018 16:55:24 -0400
Subject: [PATCH 2/6] t/perf: factor out percent calculations

This will let us reuse the code when we add new values to
aggregate besides times.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/aggregate.perl | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index bc865160e7..3181b087ab 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -19,21 +19,24 @@ sub get_times {
 	return ($rt, $4, $5);
 }
 
+sub relative_change {
+	my ($r, $firstr) = @_;
+	if ($firstr > 0) {
+		return sprintf "%+.1f%%", 100.0*($r-$firstr)/$firstr;
+	} elsif ($r == 0) {
+		return "=";
+	} else {
+		return "+inf";
+	}
+}
+
 sub format_times {
 	my ($r, $u, $s, $firstr) = @_;
 	if (!defined $r) {
 		return "<missing>";
 	}
 	my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;
-	if (defined $firstr) {
-		if ($firstr > 0) {
-			$out .= sprintf " %+.1f%%", 100.0*($r-$firstr)/$firstr;
-		} elsif ($r == 0) {
-			$out .= " =";
-		} else {
-			$out .= " +inf";
-		}
-	}
+	$out .= ' ' . relative_change($r, $firstr) if defined $firstr;
 	return $out;
 }
 

From 22bec79d1aef8fd82c7870e62f77b7817f0575a7 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 17 Aug 2018 16:56:37 -0400
Subject: [PATCH 3/6] t/perf: add infrastructure for measuring sizes

The main objective of scripts in the perf framework is to
run "test_perf", which measures the time it takes to run
some operation. However, it can also be interesting to see
the change in the output size of certain operations.

This patch introduces test_size, which records a single
numeric output from the test and shows it in the aggregated
output (with pretty printing and relative size comparison).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/README         | 25 ++++++++++++++++++++++
 t/perf/aggregate.perl | 48 ++++++++++++++++++++++++++++++++++++++-----
 t/perf/perf-lib.sh    | 13 ++++++++++++
 3 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/t/perf/README b/t/perf/README
index 21321a0f36..be12090c38 100644
--- a/t/perf/README
+++ b/t/perf/README
@@ -168,3 +168,28 @@ that
   While we have tried to make sure that it can cope with embedded
   whitespace and other special characters, it will not work with
   multi-line data.
+
+Rather than tracking the performance by run-time as `test_perf` does, you
+may also track output size by using `test_size`. The stdout of the
+function should be a single numeric value, which will be captured and
+shown in the aggregated output. For example:
+
+	test_perf 'time foo' '
+		./foo >foo.out
+	'
+
+	test_size 'output size'
+		wc -c <foo.out
+	'
+
+might produce output like:
+
+	Test                origin           HEAD
+	-------------------------------------------------------------
+	1234.1 time foo     0.37(0.79+0.02)  0.26(0.51+0.02) -29.7%
+	1234.2 output size             4.3M             3.6M -14.7%
+
+The item being measured (and its units) is up to the test; the context
+and the test title should make it clear to the user whether bigger or
+smaller numbers are better. Unlike test_perf, the test code will only be
+run once, since output sizes tend to be more deterministic than timings.
diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index 3181b087ab..494907a892 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -13,10 +13,16 @@ sub get_times {
 	my $line = <$fh>;
 	return undef if not defined $line;
 	close $fh or die "cannot close $name: $!";
-	$line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/
-		or die "bad input line: $line";
-	my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3;
-	return ($rt, $4, $5);
+	# times
+	if ($line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/) {
+		my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3;
+		return ($rt, $4, $5);
+	# size
+	} elsif ($line =~ /^\d+$/) {
+		return $&;
+	} else {
+		die "bad input line: $line";
+	}
 }
 
 sub relative_change {
@@ -32,9 +38,15 @@ sub relative_change {
 
 sub format_times {
 	my ($r, $u, $s, $firstr) = @_;
+	# no value means we did not finish the test
 	if (!defined $r) {
 		return "<missing>";
 	}
+	# a single value means we have a size, not times
+	if (!defined $u) {
+		return format_size($r, $firstr);
+	}
+	# otherwise, we have real/user/system times
 	my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;
 	$out .= ' ' . relative_change($r, $firstr) if defined $firstr;
 	return $out;
@@ -54,6 +66,25 @@ EOT
 	exit(1);
 }
 
+sub human_size {
+	my $n = shift;
+	my @units = ('', qw(K M G));
+	while ($n > 900 && @units > 1) {
+		$n /= 1000;
+		shift @units;
+	}
+	return $n unless length $units[0];
+	return sprintf '%.1f%s', $n, $units[0];
+}
+
+sub format_size {
+	my ($size, $first) = @_;
+	# match the width of a time: 0.00(0.00+0.00)
+	my $out = sprintf '%15s', human_size($size);
+	$out .= ' ' . relative_change($size, $first) if defined $first;
+	return $out;
+}
+
 my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests,
     $codespeed, $sortby, $subsection, $reponame);
 
@@ -184,7 +215,14 @@ sub print_default_results {
 		my $firstr;
 		for my $i (0..$#dirs) {
 			my $d = $dirs[$i];
-			$times{$prefixes{$d}.$t} = [get_times("$resultsdir/$prefixes{$d}$t.times")];
+			my $base = "$resultsdir/$prefixes{$d}$t";
+			$times{$prefixes{$d}.$t} = [];
+			foreach my $type (qw(times size)) {
+				if (-e "$base.$type") {
+					$times{$prefixes{$d}.$t} = [get_times("$base.$type")];
+					last;
+				}
+			}
 			my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
 			my $w = length format_times($r,$u,$s,$firstr);
 			$colwidth[$i] = $w if $w > $colwidth[$i];
diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh
index a54be09516..11d1922cf5 100644
--- a/t/perf/perf-lib.sh
+++ b/t/perf/perf-lib.sh
@@ -231,6 +231,19 @@ test_perf () {
 	test_wrapper_ test_perf_ "$@"
 }
 
+test_size_ () {
+	say >&3 "running: $2"
+	if test_eval_ "$2" 3>"$base".size; then
+		test_ok_ "$1"
+	else
+		test_failure_ "$@"
+	fi
+}
+
+test_size () {
+	test_wrapper_ test_size_ "$@"
+}
+
 # We extend test_done to print timings at the end (./run disables this
 # and does it after running everything)
 test_at_end_hook_ () {

From 198b349da808041869d16367b4e3328b0a23174f Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 17 Aug 2018 16:57:41 -0400
Subject: [PATCH 4/6] t/perf: add perf tests for fetches from a bitmapped
 server

A server with bitmapped packs can serve a clone very
quickly. However, fetches are not necessarily made any
faster, because we spend a lot less time in object traversal
(which is what bitmaps help with) and more time finding
deltas (because we may have to throw out on-disk deltas if
the client does not have the base).

As a first step to making this faster, this patch introduces
a new perf script to measure fetches into a repo of various
ages from a fully-bitmapped server.

We separately measure the work done by the server (in
pack-objects) and that done by the client (in index-pack).
Furthermore, we measure the size of the resulting pack.

Breaking it down like this (instead of just doing a regular
"git fetch") lets us see how much each side benefits from
any changes. And since we know the pack size, if we estimate
the network speed, then one could calculate a complete
wall-clock time for the operation (though the script does
not do this automatically).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/p5311-pack-bitmaps-fetch.sh | 45 ++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100755 t/perf/p5311-pack-bitmaps-fetch.sh

diff --git a/t/perf/p5311-pack-bitmaps-fetch.sh b/t/perf/p5311-pack-bitmaps-fetch.sh
new file mode 100755
index 0000000000..b04575951f
--- /dev/null
+++ b/t/perf/p5311-pack-bitmaps-fetch.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+test_description='performance of fetches from bitmapped packs'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+test_expect_success 'create bitmapped server repo' '
+	git config pack.writebitmaps true &&
+	git config pack.writebitmaphashcache true &&
+	git repack -ad
+'
+
+# simulate a fetch from a repository that last fetched N days ago, for
+# various values of N. We do so by following the first-parent chain,
+# and assume the first entry in the chain that is N days older than the current
+# HEAD is where the HEAD would have been then.
+for days in 1 2 4 8 16 32 64 128; do
+	title=$(printf '%10s' "($days days)")
+	test_expect_success "setup revs from $days days ago" '
+		now=$(git log -1 --format=%ct HEAD) &&
+		then=$(($now - ($days * 86400))) &&
+		tip=$(git rev-list -1 --first-parent --until=$then HEAD) &&
+		{
+			echo HEAD &&
+			echo ^$tip
+		} >revs
+	'
+
+	test_perf "server $title" '
+		git pack-objects --stdout --revs \
+				 --thin --delta-base-offset \
+				 <revs >tmp.pack
+	'
+
+	test_size "size   $title" '
+		wc -c <tmp.pack
+	'
+
+	test_perf "client $title" '
+		git index-pack --stdin --fix-thin <tmp.pack
+	'
+done
+
+test_done

From 30cdc33fba361528c317042535fad9d5f0a2c6e6 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 21 Aug 2018 15:07:01 -0400
Subject: [PATCH 5/6] pack-bitmap: save "have" bitmap from walk

When we do a bitmap walk, we save the result, which
represents (WANTs & ~HAVEs); i.e., every object we care
about visiting in our walk. However, we throw away the
haves bitmap, which can sometimes be useful, too. Save it
and provide an access function so code which has performed a
walk can query it.

A few notes on the accessor interface:

 - the bitmap code calls these "haves" because it grew out
   of the want/have negotiation for fetches. But really,
   these are simply the objects that would be flagged
   UNINTERESTING in a regular traversal. Let's use that
   more universal nomenclature for the external module
   interface. We may want to change the internal naming
   inside the bitmap code, but that's outside the scope of
   this patch.

 - it still uses a bare "sha1" rather than "oid". That's
   true of all of the bitmap code. And in this particular
   instance, our caller in pack-objects is dealing with the
   bare sha1 that comes from a packed REF_DELTA (we're
   pointing directly to the mmap'd pack on disk). That's
   something we'll have to deal with as we transition to a
   new hash, but we can wait and see how the caller ends up
   being fixed and adjust this interface accordingly.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pack-bitmap.c | 25 ++++++++++++++++++++++++-
 pack-bitmap.h |  7 +++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index f0a1937a1c..c3231ef9ef 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -86,6 +86,9 @@ struct bitmap_index {
 	/* Bitmap result of the last performed walk */
 	struct bitmap *result;
 
+	/* "have" bitmap from the last performed walk */
+	struct bitmap *haves;
+
 	/* Version of the bitmap index */
 	unsigned int version;
 
@@ -759,8 +762,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs)
 		bitmap_and_not(wants_bitmap, haves_bitmap);
 
 	bitmap_git->result = wants_bitmap;
+	bitmap_git->haves = haves_bitmap;
 
-	bitmap_free(haves_bitmap);
 	return bitmap_git;
 
 cleanup:
@@ -1114,5 +1117,25 @@ void free_bitmap_index(struct bitmap_index *b)
 	free(b->ext_index.objects);
 	free(b->ext_index.hashes);
 	bitmap_free(b->result);
+	bitmap_free(b->haves);
 	free(b);
 }
+
+int bitmap_has_sha1_in_uninteresting(struct bitmap_index *bitmap_git,
+				     const unsigned char *sha1)
+{
+	int pos;
+
+	if (!bitmap_git)
+		return 0; /* no bitmap loaded */
+	if (!bitmap_git->result)
+		BUG("failed to perform bitmap walk before querying");
+	if (!bitmap_git->haves)
+		return 0; /* walk had no "haves" */
+
+	pos = bitmap_position_packfile(bitmap_git, sha1);
+	if (pos < 0)
+		return 0;
+
+	return bitmap_get(bitmap_git->haves, pos);
+}
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 8a04741e12..c633bf5238 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -53,6 +53,13 @@ int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping
 			     khash_sha1 *reused_bitmaps, int show_progress);
 void free_bitmap_index(struct bitmap_index *);
 
+/*
+ * After a traversal has been performed on the bitmap_index, this can be
+ * queried to see if a particular object was reachable from any of the
+ * objects flagged as UNINTERESTING.
+ */
+int bitmap_has_sha1_in_uninteresting(struct bitmap_index *, const unsigned char *sha1);
+
 void bitmap_writer_show_progress(int show);
 void bitmap_writer_set_checksum(unsigned char *sha1);
 void bitmap_writer_build_type_index(struct packing_data *to_pack,

From 6a1e32d532c5948071e322cefc7052be6228adc3 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Tue, 21 Aug 2018 15:07:05 -0400
Subject: [PATCH 6/6] pack-objects: reuse on-disk deltas for thin "have"
 objects

When we serve a fetch, we pass the "wants" and "haves" from
the fetch negotiation to pack-objects. That tells us not
only which objects we need to send, but we also use the
boundary commits as "preferred bases": their trees and blobs
are candidates for delta bases, both for reusing on-disk
deltas and for finding new ones.

However, this misses some opportunities. Modulo some special
cases like shallow or partial clones, we know that every
object reachable from the "haves" could be a preferred base.
We don't use all of them for two reasons:

  1. It's expensive to traverse the whole history and
     enumerate all of the objects the other side has.

  2. The delta search is expensive, so we want to keep the
     number of candidate bases sane. The boundary commits
     are the most likely to work.

When we have reachability bitmaps, though, reason 1 no
longer applies. We can efficiently compute the set of
reachable objects on the other side (and in fact already did
so as part of the bitmap set-difference to get the list of
interesting objects). And using this set conveniently
covers the shallow and partial cases, since we have to
disable the use of bitmaps for those anyway.

The second reason argues against using these bases in the
search for new deltas. But there's one case where we can use
this information for free: when we have an existing on-disk
delta that we're considering reusing, we can do so if we
know the other side has the base object. This in fact saves
time during the delta search, because it's one less delta we
have to compute.

And that's exactly what this patch does: when we're
considering whether to reuse an on-disk delta, if bitmaps
tell us the other side has the object (and we're making a
thin-pack), then we reuse it.

Here are the results on p5311 using linux.git, which
simulates a client fetching after `N` days since their last
fetch:

 Test                         origin              HEAD
 --------------------------------------------------------------------------
 5311.3: server   (1 days)    0.27(0.27+0.04)     0.12(0.09+0.03) -55.6%
 5311.4: size     (1 days)               0.9M              237.0K -73.7%
 5311.5: client   (1 days)    0.04(0.05+0.00)     0.10(0.10+0.00) +150.0%
 5311.7: server   (2 days)    0.34(0.42+0.04)     0.13(0.10+0.03) -61.8%
 5311.8: size     (2 days)               1.5M              347.7K -76.5%
 5311.9: client   (2 days)    0.07(0.08+0.00)     0.16(0.15+0.01) +128.6%
 5311.11: server   (4 days)   0.56(0.77+0.08)     0.13(0.10+0.02) -76.8%
 5311.12: size     (4 days)              2.8M              566.6K -79.8%
 5311.13: client   (4 days)   0.13(0.15+0.00)     0.34(0.31+0.02) +161.5%
 5311.15: server   (8 days)   0.97(1.39+0.11)     0.30(0.25+0.05) -69.1%
 5311.16: size     (8 days)              4.3M                1.0M -76.0%
 5311.17: client   (8 days)   0.20(0.22+0.01)     0.53(0.52+0.01) +165.0%
 5311.19: server  (16 days)   1.52(2.51+0.12)     0.30(0.26+0.03) -80.3%
 5311.20: size    (16 days)              8.0M                2.0M -74.5%
 5311.21: client  (16 days)   0.40(0.47+0.03)     1.01(0.98+0.04) +152.5%
 5311.23: server  (32 days)   2.40(4.44+0.20)     0.31(0.26+0.04) -87.1%
 5311.24: size    (32 days)             14.1M                4.1M -70.9%
 5311.25: client  (32 days)   0.70(0.90+0.03)     1.81(1.75+0.06) +158.6%
 5311.27: server  (64 days)   11.76(26.57+0.29)   0.55(0.50+0.08) -95.3%
 5311.28: size    (64 days)             89.4M               47.4M -47.0%
 5311.29: client  (64 days)   5.71(9.31+0.27)     15.20(15.20+0.32) +166.2%
 5311.31: server (128 days)   16.15(36.87+0.40)   0.91(0.82+0.14) -94.4%
 5311.32: size   (128 days)            134.8M              100.4M -25.5%
 5311.33: client (128 days)   9.42(16.86+0.49)    25.34(25.80+0.46) +169.0%

In all cases we save CPU time on the server (sometimes
significant) and the resulting pack is smaller. We do spend
more CPU time on the client side, because it has to
reconstruct more deltas. But that's the right tradeoff to
make, since clients tend to outnumber servers. It just means
the thin pack mechanism is doing its job.

From the user's perspective, the end-to-end time of the
operation will generally be faster. E.g., in the 128-day
case, we saved 15s on the server at a cost of 16s on the
client. Since the resulting pack is 34MB smaller, this is a
net win if the network speed is less than 270Mbit/s. And
that's actually the worst case. The 64-day case saves just
over 11s at a cost of just under 11s. So it's a slight win
at any network speed, and the 40MB saved is pure bonus. That
trend continues for the smaller fetches.

The implementation itself is mostly straightforward, with
the new logic going into check_object(). But there are two
tricky bits.

The first is that check_object() needs access to the
relevant information (the thin flag and bitmap result). We
can do this by pushing these into program-lifetime globals.

The second is that the rest of the code assumes that any
reused delta will point to another "struct object_entry" as
its base. But of course the case we are interested in here
is the one where don't have such an entry!

I looked at a number of options that didn't quite work:

 - we could use a flag to signal a reused delta, but it's
   not a single bit. We have to actually store the oid of
   the base, which is normally done by pointing to the
   existing object_entry. And we'd have to modify all the
   code which looks at deltas.

 - we could add the reused bases to the end of the existing
   object_entry array. While this does create some extra
   work as later stages consider the extra entries, it's
   actually not too bad (we're not sending them, so they
   don't cost much in the delta search, and at most we'd
   have 2*N of them).

   But there's a more subtle problem. Adding to the existing
   array means we might need to grow it with realloc, which
   could move the earlier entries around. While many of the
   references to other entries are done by integer index,
   some (including ones on the stack) use pointers, which
   would become invalidated.

   This isn't insurmountable, but it would require quite a
   bit of refactoring (and it's hard to know that you've got
   it all, since it may work _most_ of the time and then
   fail subtly based on memory allocation patterns).

 - we could allocate a new one-off entry for the base. In
   fact, this is what an earlier version of this patch did.
   However, since the refactoring brought in by ad635e82d6
   (Merge branch 'nd/pack-objects-pack-struct', 2018-05-23),
   the delta_idx code requires that both entries be in the
   main packing list.

So taking all of those options into account, what I ended up
with is a separate list of "external bases" that are not
part of the main packing list. Each delta entry that points
to an external base has a single-bit flag to do so; we have a
little breathing room in the bitfield section of
object_entry.

This lets us limit the change primarily to the oe_delta()
and oe_set_delta_ext() functions. And as a bonus, most of
the rest of the code does not consider these dummy entries
at all, saving both runtime CPU and code complexity.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/pack-objects.c | 28 +++++++++++++++++++---------
 pack-objects.c         | 19 +++++++++++++++++++
 pack-objects.h         | 20 ++++++++++++++++++--
 3 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0d80dee2ba..1bd43432f7 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -40,6 +40,7 @@
 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
+#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
@@ -59,6 +60,7 @@ static struct packing_data to_pack;
 
 static struct pack_idx_entry **written_list;
 static uint32_t nr_result, nr_written, nr_seen;
+static struct bitmap_index *bitmap_git;
 
 static int non_empty;
 static int reuse_delta = 1, reuse_object = 1;
@@ -79,6 +81,7 @@ static unsigned long pack_size_limit;
 static int depth = 50;
 static int delta_search_threads;
 static int pack_to_stdout;
+static int thin;
 static int num_preferred_base;
 static struct progress *progress_state;
 
@@ -1510,11 +1513,15 @@ static void check_object(struct object_entry *entry)
 			break;
 		}
 
-		if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
+		if (base_ref && (
+		    (base_entry = packlist_find(&to_pack, base_ref, NULL)) ||
+		    (thin &&
+		     bitmap_has_sha1_in_uninteresting(bitmap_git, base_ref)))) {
 			/*
 			 * If base_ref was set above that means we wish to
-			 * reuse delta data, and we even found that base
-			 * in the list of objects we want to pack. Goodie!
+			 * reuse delta data, and either we found that object in
+			 * the list of objects we want to pack, or it's one we
+			 * know the receiver has.
 			 *
 			 * Depth value does not matter - find_deltas() will
 			 * never consider reused delta as the base object to
@@ -1523,10 +1530,16 @@ static void check_object(struct object_entry *entry)
 			 */
 			oe_set_type(entry, entry->in_pack_type);
 			SET_SIZE(entry, in_pack_size); /* delta size */
-			SET_DELTA(entry, base_entry);
 			SET_DELTA_SIZE(entry, in_pack_size);
-			entry->delta_sibling_idx = base_entry->delta_child_idx;
-			SET_DELTA_CHILD(base_entry, entry);
+
+			if (base_entry) {
+				SET_DELTA(entry, base_entry);
+				entry->delta_sibling_idx = base_entry->delta_child_idx;
+				SET_DELTA_CHILD(base_entry, entry);
+			} else {
+				SET_DELTA_EXT(entry, base_ref);
+			}
+
 			unuse_pack(&w_curs);
 			return;
 		}
@@ -2954,7 +2967,6 @@ static int pack_options_allow_reuse(void)
 
 static int get_object_list_from_bitmap(struct rev_info *revs)
 {
-	struct bitmap_index *bitmap_git;
 	if (!(bitmap_git = prepare_bitmap_walk(revs)))
 		return -1;
 
@@ -2970,7 +2982,6 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
 	}
 
 	traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
-	free_bitmap_index(bitmap_git);
 	return 0;
 }
 
@@ -3118,7 +3129,6 @@ static int option_parse_unpack_unreachable(const struct option *opt,
 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 {
 	int use_internal_rev_list = 0;
-	int thin = 0;
 	int shallow = 0;
 	int all_progress_implied = 0;
 	struct argv_array rp = ARGV_ARRAY_INIT;
diff --git a/pack-objects.c b/pack-objects.c
index 92708522e7..9ae0cecd81 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -177,3 +177,22 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
 
 	return new_entry;
 }
+
+void oe_set_delta_ext(struct packing_data *pdata,
+		      struct object_entry *delta,
+		      const unsigned char *sha1)
+{
+	struct object_entry *base;
+
+	ALLOC_GROW(pdata->ext_bases, pdata->nr_ext + 1, pdata->alloc_ext);
+	base = &pdata->ext_bases[pdata->nr_ext++];
+	memset(base, 0, sizeof(*base));
+	hashcpy(base->idx.oid.hash, sha1);
+
+	/* These flags mark that we are not part of the actual pack output. */
+	base->preferred_base = 1;
+	base->filled = 1;
+
+	delta->ext_base = 1;
+	delta->delta_idx = base - pdata->ext_bases + 1;
+}
diff --git a/pack-objects.h b/pack-objects.h
index 08c6b57d49..3fd10b15c9 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -111,6 +111,7 @@ struct object_entry {
 	unsigned dfs_state:OE_DFS_STATE_BITS;
 	unsigned char in_pack_header_size;
 	unsigned depth:OE_DEPTH_BITS;
+	unsigned ext_base:1; /* delta_idx points outside packlist */
 
 	/*
 	 * pahole results on 64-bit linux (gcc and clang)
@@ -141,6 +142,14 @@ struct packing_data {
 	struct packed_git **in_pack_by_idx;
 	struct packed_git **in_pack;
 
+	/*
+	 * This list contains entries for bases which we know the other side
+	 * has (e.g., via reachability bitmaps), but which aren't in our
+	 * "objects" list.
+	 */
+	struct object_entry *ext_bases;
+	uint32_t nr_ext, alloc_ext;
+
 	uintmax_t oe_size_limit;
 };
 
@@ -228,9 +237,12 @@ static inline struct object_entry *oe_delta(
 		const struct packing_data *pack,
 		const struct object_entry *e)
 {
-	if (e->delta_idx)
+	if (!e->delta_idx)
+		return NULL;
+	if (e->ext_base)
+		return &pack->ext_bases[e->delta_idx - 1];
+	else
 		return &pack->objects[e->delta_idx - 1];
-	return NULL;
 }
 
 static inline void oe_set_delta(struct packing_data *pack,
@@ -243,6 +255,10 @@ static inline void oe_set_delta(struct packing_data *pack,
 		e->delta_idx = 0;
 }
 
+void oe_set_delta_ext(struct packing_data *pack,
+		      struct object_entry *e,
+		      const unsigned char *sha1);
+
 static inline struct object_entry *oe_delta_child(
 		const struct packing_data *pack,
 		const struct object_entry *e)