From 6fd1cc8f985ccd8b014e945a819482b267dae21f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 10 Feb 2022 13:28:09 +0100 Subject: [PATCH 1/2] fetch-pack: use commit-graph when computing cutoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During packfile negotiation we iterate over all refs announced by the remote side to check whether their IDs refer to commits already known to us. If a commit is known to us already, then its date is a potential cutoff point for commits we have in common with the remote side. There is potentially a lot of commits announced by the remote depending on how many refs there are in the remote repository, and for every one of them we need to search for it in our object database and, if found, parse the corresponding object to find out whether it is a candidate for the cutoff date. This can be sped up by trying to look up commits via the commit-graph first, which is a lot more efficient. Benchmarks in a repository with about 2,1 million refs and an up-to-date commit-graph show an almost 20% speedup when mirror-fetching: Benchmark 1: git fetch +refs/*:refs/* (v2.35.0) Time (mean ± σ): 115.587 s ± 2.009 s [User: 109.874 s, System: 11.305 s] Range (min … max): 113.584 s … 118.820 s 5 runs Benchmark 2: git fetch +refs/*:refs/* (HEAD) Time (mean ± σ): 96.859 s ± 0.624 s [User: 91.948 s, System: 10.980 s] Range (min … max): 96.180 s … 97.875 s 5 runs Summary 'git fetch +refs/*:refs/* (HEAD)' ran 1.19 ± 0.02 times faster than 'git fetch +refs/*:refs/* (v2.35.0)' Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fetch-pack.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index dd6ec449f2..c5967e228e 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -696,26 +696,30 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); for (ref = *refs; ref; ref = ref->next) { - struct object *o; + struct commit *commit; - if (!has_object_file_with_flags(&ref->old_oid, + commit = lookup_commit_in_graph(the_repository, &ref->old_oid); + if (!commit) { + struct object *o; + + if (!has_object_file_with_flags(&ref->old_oid, OBJECT_INFO_QUICK | - OBJECT_INFO_SKIP_FETCH_OBJECT)) - continue; - o = parse_object(the_repository, &ref->old_oid); - if (!o) - continue; + OBJECT_INFO_SKIP_FETCH_OBJECT)) + continue; + o = parse_object(the_repository, &ref->old_oid); + if (!o || o->type != OBJ_COMMIT) + continue; + + commit = (struct commit *)o; + } /* * We already have it -- which may mean that we were * in sync with the other side at some time after * that (it is OK if we guess wrong here). */ - if (o->type == OBJ_COMMIT) { - struct commit *commit = (struct commit *)o; - if (!cutoff || cutoff < commit->date) - cutoff = commit->date; - } + if (!cutoff || cutoff < commit->date) + cutoff = commit->date; } trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); From b18aaaa5e931d79d057f68ac0d7c3dd0377e5f03 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 10 Feb 2022 13:28:16 +0100 Subject: [PATCH 2/2] fetch: skip computing output width when not printing anything MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When updating references via git-fetch(1), then by default we report to the user which references have been changed. This output is formatted in a nice table such that the different columns are aligned. Because the first column contains abbreviated object IDs we thus need to iterate over all refs which have changed and compute the minimum length for their respective abbreviated hashes. While this effort makes sense in most cases, it is wasteful when the user passes the `--quiet` flag: we don't print the summary, but still compute the length. Skip computing the summary width when the user asked for us to be quiet. This gives us a speedup of nearly 10% when doing a mirror-fetch in a repository with thousands of references being updated: Benchmark 1: git fetch --quiet +refs/*:refs/* (HEAD~) Time (mean ± σ): 96.078 s ± 0.508 s [User: 91.378 s, System: 10.870 s] Range (min … max): 95.449 s … 96.760 s 5 runs Benchmark 2: git fetch --quiet +refs/*:refs/* (HEAD) Time (mean ± σ): 88.214 s ± 0.192 s [User: 83.274 s, System: 10.978 s] Range (min … max): 87.998 s … 88.446 s 5 runs Summary 'git fetch --quiet +refs/*:refs/* (HEAD)' ran 1.09 ± 0.01 times faster than 'git fetch --quiet +refs/*:refs/* (HEAD~)' Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fetch.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 5b3b18a72f..7ef305c66d 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1094,12 +1094,15 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, struct ref *rm; char *url; int want_status; - int summary_width = transport_summary_width(ref_map); + int summary_width = 0; rc = open_fetch_head(&fetch_head); if (rc) return -1; + if (verbosity >= 0) + summary_width = transport_summary_width(ref_map); + if (raw_url) url = transport_anonymize_url(raw_url); else @@ -1345,7 +1348,6 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, int url_len, i, result = 0; struct ref *ref, *stale_refs = get_stale_heads(rs, ref_map); char *url; - int summary_width = transport_summary_width(stale_refs); const char *dangling_msg = dry_run ? _(" (%s will become dangling)") : _(" (%s has become dangling)"); @@ -1374,6 +1376,8 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, } if (verbosity >= 0) { + int summary_width = transport_summary_width(stale_refs); + for (ref = stale_refs; ref; ref = ref->next) { struct strbuf sb = STRBUF_INIT; if (!shown_url) {