clone: add clone.<url>.defaultObjectFilter config

Add a new configuration option that lets users specify a default
partial clone filter per URL pattern.  When cloning a repository
whose URL matches a configured pattern, git-clone automatically
applies the filter, equivalent to passing --filter on the command
line.

    [clone "https://github.com/"]
        defaultObjectFilter = blob:limit=5m

    [clone "https://internal.corp.com/large-project/"]
        defaultObjectFilter = blob:none

URL matching uses the existing urlmatch_config_entry() infrastructure,
following the same rules as http.<url>.* — you can match a domain,
a namespace path, or a specific project, and the most specific match
wins.

The config only affects the initial clone.  Once the clone completes,
the filter is recorded in remote.<name>.partialCloneFilter, so
subsequent fetches inherit it automatically.  An explicit --filter
flag on the command line takes precedence.

Only the URL-qualified form (clone.<url>.defaultObjectFilter) is
honored; a bare clone.defaultObjectFilter without a URL subsection
is ignored.

Signed-off-by: Alan Braithwaite <alan@braithwaite.dev>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Alan Braithwaite
2026-03-05 00:57:31 +00:00
committed by Junio C Hamano
parent 795c338de7
commit 5d76a10449
3 changed files with 167 additions and 0 deletions

View File

@@ -21,3 +21,29 @@ endif::[]
If a partial clone filter is provided (see `--filter` in
linkgit:git-rev-list[1]) and `--recurse-submodules` is used, also apply
the filter to submodules.
`clone.<url>.defaultObjectFilter`::
When set to a filter spec string (e.g., `blob:limit=1m`,
`blob:none`, `tree:0`), linkgit:git-clone[1] will automatically
use `--filter=<value>` when the clone URL matches `<url>`.
Objects matching the filter are excluded from the initial
transfer and lazily fetched on demand (e.g., during checkout).
Subsequent fetches inherit the filter via the per-remote config
that is written during the clone.
+
The URL matching follows the same rules as `http.<url>.*` (see
linkgit:git-config[1]). The most specific URL match wins. You can
match a complete domain, a namespace, or a specific project:
+
----
[clone "https://github.com/"]
defaultObjectFilter = blob:limit=5m
[clone "https://internal.corp.com/large-project/"]
defaultObjectFilter = blob:none
----
+
An explicit `--filter` option on the command line takes precedence
over this config. Only affects the initial clone; it has no effect
on later fetches into an existing repository. If the server does
not support object filtering, the setting is silently ignored.

View File

@@ -44,6 +44,7 @@
#include "path.h"
#include "pkt-line.h"
#include "list-objects-filter-options.h"
#include "urlmatch.h"
#include "hook.h"
#include "bundle.h"
#include "bundle-uri.h"
@@ -759,6 +760,65 @@ static int git_clone_config(const char *k, const char *v,
return git_default_config(k, v, ctx, cb);
}
struct clone_filter_data {
char *default_object_filter;
};
static int clone_filter_collect(const char *var, const char *value,
const struct config_context *ctx UNUSED,
void *cb)
{
struct clone_filter_data *data = cb;
if (!strcmp(var, "clone.defaultobjectfilter")) {
free(data->default_object_filter);
data->default_object_filter = xstrdup(value);
}
return 0;
}
/*
* Look up clone.<url>.defaultObjectFilter using the urlmatch
* infrastructure. Only URL-qualified forms are supported; a bare
* clone.defaultObjectFilter (without a URL) is ignored.
*/
static char *get_default_object_filter(const char *url)
{
struct urlmatch_config config = URLMATCH_CONFIG_INIT;
struct clone_filter_data data = { 0 };
struct string_list_item *item;
char *normalized_url;
config.section = "clone";
config.key = "defaultobjectfilter";
config.collect_fn = clone_filter_collect;
config.cascade_fn = git_clone_config;
config.cb = &data;
normalized_url = url_normalize(url, &config.url);
repo_config(the_repository, urlmatch_config_entry, &config);
free(normalized_url);
/*
* Reject the bare form clone.defaultObjectFilter (no URL
* subsection). urlmatch stores the best match in vars with
* hostmatch_len == 0 for non-URL-qualified entries; discard
* the result if that is what we got.
*/
item = string_list_lookup(&config.vars, "defaultobjectfilter");
if (item) {
const struct urlmatch_item *m = item->util;
if (!m->hostmatch_len && !m->pathmatch_len) {
FREE_AND_NULL(data.default_object_filter);
}
}
urlmatch_config_release(&config);
return data.default_object_filter;
}
static int write_one_config(const char *key, const char *value,
const struct config_context *ctx,
void *data)
@@ -1059,6 +1119,14 @@ int cmd_clone(int argc,
} else
die(_("repository '%s' does not exist"), repo_name);
if (!filter_options.choice) {
char *config_filter = get_default_object_filter(repo);
if (config_filter) {
parse_list_objects_filter(&filter_options, config_filter);
free(config_filter);
}
}
/* no need to be strict, transport_set_option() will validate it again */
if (option_depth && atoi(option_depth) < 1)
die(_("depth %s is not a positive number"), option_depth);

View File

@@ -723,6 +723,79 @@ test_expect_success 'after fetching descendants of non-promisor commits, gc work
git -C partial gc --prune=now
'
# Test clone.<url>.defaultObjectFilter config
test_expect_success 'setup for clone.defaultObjectFilter tests' '
git init default-filter-src &&
echo "small" >default-filter-src/small.txt &&
dd if=/dev/zero of=default-filter-src/large.bin bs=1024 count=100 2>/dev/null &&
git -C default-filter-src add . &&
git -C default-filter-src commit -m "initial" &&
git clone --bare "file://$(pwd)/default-filter-src" default-filter-srv.bare &&
git -C default-filter-srv.bare config --local uploadpack.allowfilter 1 &&
git -C default-filter-srv.bare config --local uploadpack.allowanysha1inwant 1
'
test_expect_success 'clone with clone.<url>.defaultObjectFilter applies filter' '
SERVER_URL="file://$(pwd)/default-filter-srv.bare" &&
git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" clone \
"$SERVER_URL" default-filter-clone &&
test "$(git -C default-filter-clone config --local remote.origin.promisor)" = "true" &&
test "$(git -C default-filter-clone config --local remote.origin.partialclonefilter)" = "blob:limit=1024"
'
test_expect_success 'clone with --filter overrides clone.<url>.defaultObjectFilter' '
SERVER_URL="file://$(pwd)/default-filter-srv.bare" &&
git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" \
clone --filter=blob:none "$SERVER_URL" default-filter-override &&
test "$(git -C default-filter-override config --local remote.origin.partialclonefilter)" = "blob:none"
'
test_expect_success 'clone with clone.<url>.defaultObjectFilter=blob:none works' '
SERVER_URL="file://$(pwd)/default-filter-srv.bare" &&
git -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" clone \
"$SERVER_URL" default-filter-blobnone &&
test "$(git -C default-filter-blobnone config --local remote.origin.promisor)" = "true" &&
test "$(git -C default-filter-blobnone config --local remote.origin.partialclonefilter)" = "blob:none"
'
test_expect_success 'clone.<url>.defaultObjectFilter with tree:0 works' '
SERVER_URL="file://$(pwd)/default-filter-srv.bare" &&
git -c "clone.$SERVER_URL.defaultObjectFilter=tree:0" clone \
"$SERVER_URL" default-filter-tree0 &&
test "$(git -C default-filter-tree0 config --local remote.origin.promisor)" = "true" &&
test "$(git -C default-filter-tree0 config --local remote.origin.partialclonefilter)" = "tree:0"
'
test_expect_success 'most specific URL match wins for clone.defaultObjectFilter' '
SERVER_URL="file://$(pwd)/default-filter-srv.bare" &&
git \
-c "clone.file://.defaultObjectFilter=blob:limit=1k" \
-c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \
clone "$SERVER_URL" default-filter-url-specific &&
test "$(git -C default-filter-url-specific config --local remote.origin.partialclonefilter)" = "blob:none"
'
test_expect_success 'non-matching URL does not apply clone.defaultObjectFilter' '
git \
-c "clone.https://other.example.com/.defaultObjectFilter=blob:none" \
clone "file://$(pwd)/default-filter-srv.bare" default-filter-url-nomatch &&
test_must_fail git -C default-filter-url-nomatch config --local remote.origin.promisor
'
test_expect_success 'bare clone.defaultObjectFilter without URL is ignored' '
git -c clone.defaultObjectFilter=blob:none \
clone "file://$(pwd)/default-filter-srv.bare" default-filter-bare-key &&
test_must_fail git -C default-filter-bare-key config --local remote.origin.promisor
'
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd