From 6462d5eb9a5b23ab9cff4e3c92ff930600562d8f Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 5 Nov 2019 10:56:19 -0800 Subject: [PATCH 1/3] fetch: remove fetch_if_missing=0 In fetch_pack() (and all functions it calls), pass OBJECT_INFO_SKIP_FETCH_OBJECT whenever we query an object that could be a tree or blob that we do not want to be lazy-fetched even if it is absent. Thus, the only lazy-fetches occurring for trees and blobs are when resolving deltas. Thus, we can remove fetch_if_missing=0 from builtin/fetch.c. Remove this, and also add a test ensuring that such objects are not lazy-fetched. (We might be able to remove fetch_if_missing=0 from other places too, but I have limited myself to builtin/fetch.c in this commit because I have not written tests for the other commands yet.) Note that commits and tags may still be lazy-fetched. I limited myself to objects that could be trees or blobs here because Git does not support creating such commit- and tag-excluding clones yet, and even if such a clone were manually created, Git does not have good support for fetching a single commit (when fetching a commit, it and all its ancestors would be sent). Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/fetch.c | 5 ++- fetch-pack.c | 3 +- t/t5616-partial-clone.sh | 70 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 863c858fde..5ff7367dd7 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1074,7 +1074,8 @@ static int check_exist_and_connected(struct ref *ref_map) * we need all direct targets to exist. */ for (r = rm; r; r = r->next) { - if (!has_object_file(&r->old_oid)) + if (!has_object_file_with_flags(&r->old_oid, + OBJECT_INFO_SKIP_FETCH_OBJECT)) return -1; } @@ -1822,8 +1823,6 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } } - fetch_if_missing = 0; - if (remote) { if (filter_options.choice || has_promisor_remote()) fetch_one_setup_partial(remote); diff --git a/fetch-pack.c b/fetch-pack.c index 0130b44112..37178e2d34 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -673,7 +673,8 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, struct object *o; if (!has_object_file_with_flags(&ref->old_oid, - OBJECT_INFO_QUICK)) + OBJECT_INFO_QUICK | + OBJECT_INFO_SKIP_FETCH_OBJECT)) continue; o = parse_object(the_repository, &ref->old_oid); if (!o) diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 79f7b65f8c..171011d488 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -296,6 +296,76 @@ test_expect_success 'partial clone with unresolvable sparse filter fails cleanly test_i18ngrep "unable to parse sparse filter data in" err ' +setup_triangle () { + rm -rf big-blob.txt server client promisor-remote && + + printf "line %d\n" $(test_seq 1 100) >big-blob.txt && + + # Create a server with 2 commits: a commit with a big blob and a child + # commit with an incremental change. Also, create a partial clone + # client that only contains the first commit. + git init server && + git -C server config --local uploadpack.allowfilter 1 && + cp big-blob.txt server && + git -C server add big-blob.txt && + git -C server commit -m "initial" && + git clone --bare --filter=tree:0 "file://$(pwd)/server" client && + echo another line >>server/big-blob.txt && + git -C server commit -am "append line to big blob" && + + # Create a promisor remote that only contains the blob from the first + # commit, and set it as the promisor remote of client. Thus, whenever + # the client lazy fetches, the lazy fetch will succeed only if it is + # for this blob. + git init promisor-remote && + test_commit -C promisor-remote one && # so that ref advertisement is not empty + git -C promisor-remote config --local uploadpack.allowanysha1inwant 1 && + git -C promisor-remote hash-object -w --stdin hash && + grep "want $(cat hash)" trace +' + +test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' ' + setup_triangle && + + git -C server config --local protocol.version 2 && + git -C client config --local protocol.version 2 && + git -C promisor-remote config --local protocol.version 2 && + + # Exercise to make sure it works. Git will not fetch anything from the + # promisor remote other than for the big blob (because it needs to + # resolve the delta). + GIT_TRACE_PACKET="$(pwd)/trace" git -C client \ + fetch "file://$(pwd)/server" master && + + # Verify that protocol version 2 was used. + grep "fetch< version 2" trace && + + # Verify the assumption that the client needed to fetch the delta base + # to resolve the delta. + git hash-object big-blob.txt >hash && + grep "want $(cat hash)" trace +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From e362fadcd03753471cf8e7fc91d6d721b7423b8f Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 12 Nov 2019 16:34:19 -0800 Subject: [PATCH 2/3] clone: remove fetch_if_missing=0 Commit 6462d5eb9a ("fetch: remove fetch_if_missing=0", 2019-11-08) strove to remove the need for fetch_if_missing=0 from the fetching mechanism, so it is plausible to attempt removing fetch_if_missing=0 from clone as well. But doing so reveals a bug - when the server does not send an object directly pointed to by a ref, this should be an error, not a trigger for a lazy fetch. (This case in the fetching mechanism was covered by a test using "git clone", not "git fetch", which is why the aforementioned commit didn't uncover the bug.) The bug can be fixed by suppressing lazy-fetching during the connectivity check. Fix this bug, and remove fetch_if_missing from clone. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/clone.c | 3 --- connected.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index c46ee29f0a..8bf12ce5e3 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -927,8 +927,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct argv_array ref_prefixes = ARGV_ARRAY_INIT; - fetch_if_missing = 0; - packet_trace_identity("clone"); argc = parse_options(argc, argv, prefix, builtin_clone_options, builtin_clone_usage, 0); @@ -1265,7 +1263,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } junk_mode = JUNK_LEAVE_REPO; - fetch_if_missing = 1; err = checkout(submodule_progress); strbuf_release(&reflog_msg); diff --git a/connected.c b/connected.c index 36c4e5dedb..c337f5f7f4 100644 --- a/connected.c +++ b/connected.c @@ -62,7 +62,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data, * received the objects pointed to by each wanted ref. */ do { - if (!repo_has_object_file(the_repository, &oid)) + if (!repo_has_object_file_with_flags(the_repository, &oid, + OBJECT_INFO_SKIP_FETCH_OBJECT)) return 1; } while (!fn(cb_data, &oid)); return 0; From 603960b50edeb1f0afa694f2f0283e553c031129 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 12 Nov 2019 16:34:20 -0800 Subject: [PATCH 3/3] promisor-remote: remove fetch_if_missing=0 Commit 6462d5eb9a ("fetch: remove fetch_if_missing=0", 2019-11-08) strove to remove the need for fetch_if_missing=0 from the fetching mechanism, so it is plausible to attempt removing fetch_if_missing=0 from the lazy-fetching mechanism in promisor-remote as well. But doing so reveals a bug - when the server does not send an object pointed to by a tag object, an infinite loop occurs: Git attempts to fetch the missing object, which causes a deferencing of all refs (for negotiation), which causes a lazy fetch of that missing object, and so on. This bug is because of unnecessary use of the fetch negotiator during lazy fetching - it is not used after initialization, but it is still initialized (which causes the dereferencing of all refs). Thus, when the negotiator is not used during fetching, refrain from initializing it. Then, remove fetch_if_missing from promisor-remote. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- fetch-pack.c | 46 ++++++++++++++++++++++++++++++++-------------- promisor-remote.c | 3 --- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index 37178e2d34..490b111822 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -896,8 +896,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, struct object_id oid; const char *agent_feature; int agent_len; - struct fetch_negotiator negotiator; - fetch_negotiator_init(r, &negotiator); + struct fetch_negotiator negotiator_alloc; + struct fetch_negotiator *negotiator; + + if (args->no_dependents) { + negotiator = NULL; + } else { + negotiator = &negotiator_alloc; + fetch_negotiator_init(r, negotiator); + } sort_ref_list(&ref, ref_compare_name); QSORT(sought, nr_sought, cmp_ref_by_name); @@ -984,7 +991,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, die(_("Server does not support --deepen")); if (!args->no_dependents) { - mark_complete_and_common_ref(&negotiator, args, &ref); + mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); if (everything_local(args, &ref)) { packet_flush(fd[1]); @@ -993,7 +1000,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, } else { filter_refs(args, &ref, sought, nr_sought); } - if (find_common(&negotiator, args, fd, &oid, ref) < 0) + if (find_common(negotiator, args, fd, &oid, ref) < 0) if (!args->keep_pack) /* When cloning, it is not unusual to have * no common commit. @@ -1013,7 +1020,8 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, die(_("git fetch-pack: fetch failed.")); all_done: - negotiator.release(&negotiator); + if (negotiator) + negotiator->release(negotiator); return ref; } @@ -1231,7 +1239,8 @@ static int process_acks(struct fetch_negotiator *negotiator, struct commit *commit; oidset_insert(common, &oid); commit = lookup_commit(the_repository, &oid); - negotiator->ack(negotiator, commit); + if (negotiator) + negotiator->ack(negotiator, commit); } continue; } @@ -1383,8 +1392,16 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct packet_reader reader; int in_vain = 0, negotiation_started = 0; int haves_to_send = INITIAL_FLUSH; - struct fetch_negotiator negotiator; - fetch_negotiator_init(r, &negotiator); + struct fetch_negotiator negotiator_alloc; + struct fetch_negotiator *negotiator; + + if (args->no_dependents) { + negotiator = NULL; + } else { + negotiator = &negotiator_alloc; + fetch_negotiator_init(r, negotiator); + } + packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); @@ -1408,15 +1425,15 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, /* Filter 'ref' by 'sought' and those that aren't local */ if (!args->no_dependents) { - mark_complete_and_common_ref(&negotiator, args, &ref); + mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); if (everything_local(args, &ref)) state = FETCH_DONE; else state = FETCH_SEND_REQUEST; - mark_tips(&negotiator, args->negotiation_tips); - for_each_cached_alternate(&negotiator, + mark_tips(negotiator, args->negotiation_tips); + for_each_cached_alternate(negotiator, insert_one_alternate_object); } else { filter_refs(args, &ref, sought, nr_sought); @@ -1430,7 +1447,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, "negotiation_v2", the_repository); } - if (send_fetch_request(&negotiator, fd[1], args, ref, + if (send_fetch_request(negotiator, fd[1], args, ref, &common, &haves_to_send, &in_vain, reader.use_sideband)) @@ -1440,7 +1457,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, break; case FETCH_PROCESS_ACKS: /* Process ACKs/NAKs */ - switch (process_acks(&negotiator, &reader, &common)) { + switch (process_acks(negotiator, &reader, &common)) { case 2: state = FETCH_GET_PACK; break; @@ -1475,7 +1492,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, } } - negotiator.release(&negotiator); + if (negotiator) + negotiator->release(negotiator); oidset_clear(&common); return ref; } diff --git a/promisor-remote.c b/promisor-remote.c index 9bd5b79d59..9f338c945f 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -16,10 +16,8 @@ static int fetch_refs(const char *remote_name, struct ref *ref) { struct remote *remote; struct transport *transport; - int original_fetch_if_missing = fetch_if_missing; int res; - fetch_if_missing = 0; remote = remote_get(remote_name); if (!remote->url[0]) die(_("Remote with no URL")); @@ -28,7 +26,6 @@ static int fetch_refs(const char *remote_name, struct ref *ref) transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); res = transport_fetch_refs(transport, ref); - fetch_if_missing = original_fetch_if_missing; return res; }