From a70a9bf6eec832b808a805de318601684b19f18e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 16 Aug 2024 12:44:57 +0200 Subject: [PATCH 1/7] config: fix constness of out parameter for `git_config_get_expiry()` The type of the out parameter of `git_config_get_expiry()` is a pointer to a constant string, which creates the impression that ownership of the returned data wasn't transferred to the caller. This isn't true though and thus quite misleading. Adapt the parameter to be of type `char **` and adjust callers accordingly. While at it, refactor `get_shared_index_expire_date()` to drop the static `shared_index_expire` variable. It is only used in that function, and furthermore we would only hit the code where we parse the expiry date a single time because we already use a static `prepared` variable to track whether we did parse it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 6 +++--- config.c | 4 ++-- config.h | 2 +- read-cache.c | 12 +++++++++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 72bac2554f..e7406bf667 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -167,9 +167,9 @@ static void gc_config(void) git_config_get_bool("gc.autodetach", &detach_auto); git_config_get_bool("gc.cruftpacks", &cruft_packs); git_config_get_ulong("gc.maxcruftsize", &max_cruft_size); - git_config_get_expiry("gc.pruneexpire", &prune_expire); - git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire); - git_config_get_expiry("gc.logexpiry", &gc_log_expire); + git_config_get_expiry("gc.pruneexpire", (char **) &prune_expire); + git_config_get_expiry("gc.worktreepruneexpire", (char **) &prune_worktrees_expire); + git_config_get_expiry("gc.logexpiry", (char **) &gc_log_expire); git_config_get_ulong("gc.bigpackthreshold", &big_pack_threshold); git_config_get_ulong("pack.deltacachesize", &max_delta_cache_size); diff --git a/config.c b/config.c index 6421894614..dfa4df1417 100644 --- a/config.c +++ b/config.c @@ -2766,9 +2766,9 @@ int git_config_get_pathname(const char *key, char **dest) return repo_config_get_pathname(the_repository, key, dest); } -int git_config_get_expiry(const char *key, const char **output) +int git_config_get_expiry(const char *key, char **output) { - int ret = git_config_get_string(key, (char **)output); + int ret = git_config_get_string(key, output); if (ret) return ret; if (strcmp(*output, "now")) { diff --git a/config.h b/config.h index 54b47dec9e..4801391c32 100644 --- a/config.h +++ b/config.h @@ -701,7 +701,7 @@ int git_config_get_split_index(void); int git_config_get_max_percent_split_change(void); /* This dies if the configured or default date is in the future */ -int git_config_get_expiry(const char *key, const char **output); +int git_config_get_expiry(const char *key, char **output); /* parse either "this many days" integer, or "5.days.ago" approxidate */ int git_config_get_expiry_in_days(const char *key, timestamp_t *, timestamp_t now); diff --git a/read-cache.c b/read-cache.c index 48bf24f87c..7f393ee687 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3176,18 +3176,24 @@ static int write_split_index(struct index_state *istate, return ret; } -static const char *shared_index_expire = "2.weeks.ago"; - static unsigned long get_shared_index_expire_date(void) { static unsigned long shared_index_expire_date; static int shared_index_expire_date_prepared; if (!shared_index_expire_date_prepared) { + const char *shared_index_expire = "2.weeks.ago"; + char *value = NULL; + git_config_get_expiry("splitindex.sharedindexexpire", - &shared_index_expire); + &value); + if (value) + shared_index_expire = value; + shared_index_expire_date = approxidate(shared_index_expire); shared_index_expire_date_prepared = 1; + + free(value); } return shared_index_expire_date; From d1ae15d68b44af4ef6e6caedb7aa2b2dcce03c02 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 16 Aug 2024 12:45:00 +0200 Subject: [PATCH 2/7] builtin/gc: refactor to read config into structure The git-gc(1) command knows to read a bunch of config keys to tweak its own behaviour. The values are parsed into global variables, which makes it hard to correctly manage the lifecycle of values that may require a memory allocation. Refactor the code to use a `struct gc_config` that gets populated and passed around. For one, this makes previously-implicit dependencies on these config values clear. Second, it will allow us to properly manage the lifecycle in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 255 +++++++++++++++++++++++++++++---------------------- 1 file changed, 143 insertions(+), 112 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index e7406bf667..eee7401647 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -49,23 +49,7 @@ static const char * const builtin_gc_usage[] = { NULL }; -static int pack_refs = 1; -static int prune_reflogs = 1; -static int cruft_packs = 1; -static unsigned long max_cruft_size; -static int aggressive_depth = 50; -static int aggressive_window = 250; -static int gc_auto_threshold = 6700; -static int gc_auto_pack_limit = 50; -static int detach_auto = 1; static timestamp_t gc_log_expire_time; -static const char *gc_log_expire = "1.day.ago"; -static const char *prune_expire = "2.weeks.ago"; -static const char *prune_worktrees_expire = "3.months.ago"; -static char *repack_filter; -static char *repack_filter_to; -static unsigned long big_pack_threshold; -static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE; static struct strvec reflog = STRVEC_INIT; static struct strvec repack = STRVEC_INIT; @@ -145,37 +129,71 @@ static int gc_config_is_timestamp_never(const char *var) return 0; } -static void gc_config(void) +struct gc_config { + int pack_refs; + int prune_reflogs; + int cruft_packs; + unsigned long max_cruft_size; + int aggressive_depth; + int aggressive_window; + int gc_auto_threshold; + int gc_auto_pack_limit; + int detach_auto; + const char *gc_log_expire; + const char *prune_expire; + const char *prune_worktrees_expire; + char *repack_filter; + char *repack_filter_to; + unsigned long big_pack_threshold; + unsigned long max_delta_cache_size; +}; + +#define GC_CONFIG_INIT { \ + .pack_refs = 1, \ + .prune_reflogs = 1, \ + .cruft_packs = 1, \ + .aggressive_depth = 50, \ + .aggressive_window = 250, \ + .gc_auto_threshold = 6700, \ + .gc_auto_pack_limit = 50, \ + .detach_auto = 1, \ + .gc_log_expire = "1.day.ago", \ + .prune_expire = "2.weeks.ago", \ + .prune_worktrees_expire = "3.months.ago", \ + .max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \ +} + +static void gc_config(struct gc_config *cfg) { const char *value; if (!git_config_get_value("gc.packrefs", &value)) { if (value && !strcmp(value, "notbare")) - pack_refs = -1; + cfg->pack_refs = -1; else - pack_refs = git_config_bool("gc.packrefs", value); + cfg->pack_refs = git_config_bool("gc.packrefs", value); } if (gc_config_is_timestamp_never("gc.reflogexpire") && gc_config_is_timestamp_never("gc.reflogexpireunreachable")) - prune_reflogs = 0; + cfg->prune_reflogs = 0; - git_config_get_int("gc.aggressivewindow", &aggressive_window); - git_config_get_int("gc.aggressivedepth", &aggressive_depth); - git_config_get_int("gc.auto", &gc_auto_threshold); - git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit); - git_config_get_bool("gc.autodetach", &detach_auto); - git_config_get_bool("gc.cruftpacks", &cruft_packs); - git_config_get_ulong("gc.maxcruftsize", &max_cruft_size); - git_config_get_expiry("gc.pruneexpire", (char **) &prune_expire); - git_config_get_expiry("gc.worktreepruneexpire", (char **) &prune_worktrees_expire); - git_config_get_expiry("gc.logexpiry", (char **) &gc_log_expire); + git_config_get_int("gc.aggressivewindow", &cfg->aggressive_window); + git_config_get_int("gc.aggressivedepth", &cfg->aggressive_depth); + git_config_get_int("gc.auto", &cfg->gc_auto_threshold); + git_config_get_int("gc.autopacklimit", &cfg->gc_auto_pack_limit); + git_config_get_bool("gc.autodetach", &cfg->detach_auto); + git_config_get_bool("gc.cruftpacks", &cfg->cruft_packs); + git_config_get_ulong("gc.maxcruftsize", &cfg->max_cruft_size); + git_config_get_expiry("gc.pruneexpire", (char **) &cfg->prune_expire); + git_config_get_expiry("gc.worktreepruneexpire", (char **) &cfg->prune_worktrees_expire); + git_config_get_expiry("gc.logexpiry", (char **) &cfg->gc_log_expire); - git_config_get_ulong("gc.bigpackthreshold", &big_pack_threshold); - git_config_get_ulong("pack.deltacachesize", &max_delta_cache_size); + git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold); + git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size); - git_config_get_string("gc.repackfilter", &repack_filter); - git_config_get_string("gc.repackfilterto", &repack_filter_to); + git_config_get_string("gc.repackfilter", &cfg->repack_filter); + git_config_get_string("gc.repackfilterto", &cfg->repack_filter_to); git_config(git_default_config, NULL); } @@ -206,7 +224,7 @@ struct maintenance_run_opts { enum schedule_priority schedule; }; -static int pack_refs_condition(void) +static int pack_refs_condition(UNUSED struct gc_config *cfg) { /* * The auto-repacking logic for refs is handled by the ref backends and @@ -216,7 +234,8 @@ static int pack_refs_condition(void) return 1; } -static int maintenance_task_pack_refs(MAYBE_UNUSED struct maintenance_run_opts *opts) +static int maintenance_task_pack_refs(MAYBE_UNUSED struct maintenance_run_opts *opts, + UNUSED struct gc_config *cfg) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -228,7 +247,7 @@ static int maintenance_task_pack_refs(MAYBE_UNUSED struct maintenance_run_opts * return run_command(&cmd); } -static int too_many_loose_objects(void) +static int too_many_loose_objects(struct gc_config *cfg) { /* * Quickly check if a "gc" is needed, by estimating how @@ -247,7 +266,7 @@ static int too_many_loose_objects(void) if (!dir) return 0; - auto_threshold = DIV_ROUND_UP(gc_auto_threshold, 256); + auto_threshold = DIV_ROUND_UP(cfg->gc_auto_threshold, 256); while ((ent = readdir(dir)) != NULL) { if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || ent->d_name[hexsz_loose] != '\0') @@ -283,12 +302,12 @@ static struct packed_git *find_base_packs(struct string_list *packs, return base; } -static int too_many_packs(void) +static int too_many_packs(struct gc_config *cfg) { struct packed_git *p; int cnt; - if (gc_auto_pack_limit <= 0) + if (cfg->gc_auto_pack_limit <= 0) return 0; for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) { @@ -302,7 +321,7 @@ static int too_many_packs(void) */ cnt++; } - return gc_auto_pack_limit < cnt; + return cfg->gc_auto_pack_limit < cnt; } static uint64_t total_ram(void) @@ -336,7 +355,8 @@ static uint64_t total_ram(void) return 0; } -static uint64_t estimate_repack_memory(struct packed_git *pack) +static uint64_t estimate_repack_memory(struct gc_config *cfg, + struct packed_git *pack) { unsigned long nr_objects = repo_approximate_object_count(the_repository); size_t os_cache, heap; @@ -373,7 +393,7 @@ static uint64_t estimate_repack_memory(struct packed_git *pack) */ heap += delta_base_cache_limit; /* and of course pack-objects has its own delta cache */ - heap += max_delta_cache_size; + heap += cfg->max_delta_cache_size; return os_cache + heap; } @@ -384,30 +404,31 @@ static int keep_one_pack(struct string_list_item *item, void *data UNUSED) return 0; } -static void add_repack_all_option(struct string_list *keep_pack) +static void add_repack_all_option(struct gc_config *cfg, + struct string_list *keep_pack) { - if (prune_expire && !strcmp(prune_expire, "now")) + if (cfg->prune_expire && !strcmp(cfg->prune_expire, "now")) strvec_push(&repack, "-a"); - else if (cruft_packs) { + else if (cfg->cruft_packs) { strvec_push(&repack, "--cruft"); - if (prune_expire) - strvec_pushf(&repack, "--cruft-expiration=%s", prune_expire); - if (max_cruft_size) + if (cfg->prune_expire) + strvec_pushf(&repack, "--cruft-expiration=%s", cfg->prune_expire); + if (cfg->max_cruft_size) strvec_pushf(&repack, "--max-cruft-size=%lu", - max_cruft_size); + cfg->max_cruft_size); } else { strvec_push(&repack, "-A"); - if (prune_expire) - strvec_pushf(&repack, "--unpack-unreachable=%s", prune_expire); + if (cfg->prune_expire) + strvec_pushf(&repack, "--unpack-unreachable=%s", cfg->prune_expire); } if (keep_pack) for_each_string_list(keep_pack, keep_one_pack, NULL); - if (repack_filter && *repack_filter) - strvec_pushf(&repack, "--filter=%s", repack_filter); - if (repack_filter_to && *repack_filter_to) - strvec_pushf(&repack, "--filter-to=%s", repack_filter_to); + if (cfg->repack_filter && *cfg->repack_filter) + strvec_pushf(&repack, "--filter=%s", cfg->repack_filter); + if (cfg->repack_filter_to && *cfg->repack_filter_to) + strvec_pushf(&repack, "--filter-to=%s", cfg->repack_filter_to); } static void add_repack_incremental_option(void) @@ -415,13 +436,13 @@ static void add_repack_incremental_option(void) strvec_push(&repack, "--no-write-bitmap-index"); } -static int need_to_gc(void) +static int need_to_gc(struct gc_config *cfg) { /* * Setting gc.auto to 0 or negative can disable the * automatic gc. */ - if (gc_auto_threshold <= 0) + if (cfg->gc_auto_threshold <= 0) return 0; /* @@ -430,13 +451,13 @@ static int need_to_gc(void) * we run "repack -A -d -l". Otherwise we tell the caller * there is no need. */ - if (too_many_packs()) { + if (too_many_packs(cfg)) { struct string_list keep_pack = STRING_LIST_INIT_NODUP; - if (big_pack_threshold) { - find_base_packs(&keep_pack, big_pack_threshold); - if (keep_pack.nr >= gc_auto_pack_limit) { - big_pack_threshold = 0; + if (cfg->big_pack_threshold) { + find_base_packs(&keep_pack, cfg->big_pack_threshold); + if (keep_pack.nr >= cfg->gc_auto_pack_limit) { + cfg->big_pack_threshold = 0; string_list_clear(&keep_pack, 0); find_base_packs(&keep_pack, 0); } @@ -445,7 +466,7 @@ static int need_to_gc(void) uint64_t mem_have, mem_want; mem_have = total_ram(); - mem_want = estimate_repack_memory(p); + mem_want = estimate_repack_memory(cfg, p); /* * Only allow 1/2 of memory for pack-objects, leave @@ -456,9 +477,9 @@ static int need_to_gc(void) string_list_clear(&keep_pack, 0); } - add_repack_all_option(&keep_pack); + add_repack_all_option(cfg, &keep_pack); string_list_clear(&keep_pack, 0); - } else if (too_many_loose_objects()) + } else if (too_many_loose_objects(cfg)) add_repack_incremental_option(); else return 0; @@ -585,7 +606,8 @@ done: return ret; } -static void gc_before_repack(struct maintenance_run_opts *opts) +static void gc_before_repack(struct maintenance_run_opts *opts, + struct gc_config *cfg) { /* * We may be called twice, as both the pre- and @@ -596,10 +618,10 @@ static void gc_before_repack(struct maintenance_run_opts *opts) if (done++) return; - if (pack_refs && maintenance_task_pack_refs(opts)) + if (cfg->pack_refs && maintenance_task_pack_refs(opts, cfg)) die(FAILED_RUN, "pack-refs"); - if (prune_reflogs) { + if (cfg->prune_reflogs) { struct child_process cmd = CHILD_PROCESS_INIT; cmd.git_cmd = 1; @@ -621,14 +643,15 @@ int cmd_gc(int argc, const char **argv, const char *prefix) timestamp_t dummy; struct child_process rerere_cmd = CHILD_PROCESS_INIT; struct maintenance_run_opts opts = {0}; + struct gc_config cfg = GC_CONFIG_INIT; struct option builtin_gc_options[] = { OPT__QUIET(&quiet, N_("suppress progress reporting")), - { OPTION_STRING, 0, "prune", &prune_expire, N_("date"), + { OPTION_STRING, 0, "prune", &cfg.prune_expire, N_("date"), N_("prune unreferenced objects"), - PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire }, - OPT_BOOL(0, "cruft", &cruft_packs, N_("pack unreferenced objects separately")), - OPT_MAGNITUDE(0, "max-cruft-size", &max_cruft_size, + PARSE_OPT_OPTARG, NULL, (intptr_t)cfg.prune_expire }, + OPT_BOOL(0, "cruft", &cfg.cruft_packs, N_("pack unreferenced objects separately")), + OPT_MAGNITUDE(0, "max-cruft-size", &cfg.max_cruft_size, N_("with --cruft, limit the size of new cruft packs")), OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")), OPT_BOOL_F(0, "auto", &opts.auto_flag, N_("enable auto-gc mode"), @@ -651,27 +674,27 @@ int cmd_gc(int argc, const char **argv, const char *prefix) strvec_pushl(&rerere, "rerere", "gc", NULL); /* default expiry time, overwritten in gc_config */ - gc_config(); - if (parse_expiry_date(gc_log_expire, &gc_log_expire_time)) - die(_("failed to parse gc.logExpiry value %s"), gc_log_expire); + gc_config(&cfg); + if (parse_expiry_date(cfg.gc_log_expire, &gc_log_expire_time)) + die(_("failed to parse gc.logExpiry value %s"), cfg.gc_log_expire); - if (pack_refs < 0) - pack_refs = !is_bare_repository(); + if (cfg.pack_refs < 0) + cfg.pack_refs = !is_bare_repository(); argc = parse_options(argc, argv, prefix, builtin_gc_options, builtin_gc_usage, 0); if (argc > 0) usage_with_options(builtin_gc_usage, builtin_gc_options); - if (prune_expire && parse_expiry_date(prune_expire, &dummy)) - die(_("failed to parse prune expiry value %s"), prune_expire); + if (cfg.prune_expire && parse_expiry_date(cfg.prune_expire, &dummy)) + die(_("failed to parse prune expiry value %s"), cfg.prune_expire); if (aggressive) { strvec_push(&repack, "-f"); - if (aggressive_depth > 0) - strvec_pushf(&repack, "--depth=%d", aggressive_depth); - if (aggressive_window > 0) - strvec_pushf(&repack, "--window=%d", aggressive_window); + if (cfg.aggressive_depth > 0) + strvec_pushf(&repack, "--depth=%d", cfg.aggressive_depth); + if (cfg.aggressive_window > 0) + strvec_pushf(&repack, "--window=%d", cfg.aggressive_window); } if (quiet) strvec_push(&repack, "-q"); @@ -680,16 +703,16 @@ int cmd_gc(int argc, const char **argv, const char *prefix) /* * Auto-gc should be least intrusive as possible. */ - if (!need_to_gc()) + if (!need_to_gc(&cfg)) return 0; if (!quiet) { - if (detach_auto) + if (cfg.detach_auto) fprintf(stderr, _("Auto packing the repository in background for optimum performance.\n")); else fprintf(stderr, _("Auto packing the repository for optimum performance.\n")); fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n")); } - if (detach_auto) { + if (cfg.detach_auto) { int ret = report_last_gc_error(); if (ret == 1) @@ -701,7 +724,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (lock_repo_for_gc(force, &pid)) return 0; - gc_before_repack(&opts); /* dies on failure */ + gc_before_repack(&opts, &cfg); /* dies on failure */ delete_tempfile(&pidfile); /* @@ -716,11 +739,11 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (keep_largest_pack != -1) { if (keep_largest_pack) find_base_packs(&keep_pack, 0); - } else if (big_pack_threshold) { - find_base_packs(&keep_pack, big_pack_threshold); + } else if (cfg.big_pack_threshold) { + find_base_packs(&keep_pack, cfg.big_pack_threshold); } - add_repack_all_option(&keep_pack); + add_repack_all_option(&cfg, &keep_pack); string_list_clear(&keep_pack, 0); } @@ -741,7 +764,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) atexit(process_log_file_at_exit); } - gc_before_repack(&opts); + gc_before_repack(&opts, &cfg); if (!repository_format_precious_objects) { struct child_process repack_cmd = CHILD_PROCESS_INIT; @@ -752,11 +775,11 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (run_command(&repack_cmd)) die(FAILED_RUN, repack.v[0]); - if (prune_expire) { + if (cfg.prune_expire) { struct child_process prune_cmd = CHILD_PROCESS_INIT; /* run `git prune` even if using cruft packs */ - strvec_push(&prune, prune_expire); + strvec_push(&prune, cfg.prune_expire); if (quiet) strvec_push(&prune, "--no-progress"); if (repo_has_promisor_remote(the_repository)) @@ -769,10 +792,10 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } } - if (prune_worktrees_expire) { + if (cfg.prune_worktrees_expire) { struct child_process prune_worktrees_cmd = CHILD_PROCESS_INIT; - strvec_push(&prune_worktrees, prune_worktrees_expire); + strvec_push(&prune_worktrees, cfg.prune_worktrees_expire); prune_worktrees_cmd.git_cmd = 1; strvec_pushv(&prune_worktrees_cmd.args, prune_worktrees.v); if (run_command(&prune_worktrees_cmd)) @@ -796,7 +819,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) !quiet && !daemonized ? COMMIT_GRAPH_WRITE_PROGRESS : 0, NULL); - if (opts.auto_flag && too_many_loose_objects()) + if (opts.auto_flag && too_many_loose_objects(&cfg)) warning(_("There are too many unreachable loose objects; " "run 'git prune' to remove them.")); @@ -892,7 +915,7 @@ static int dfs_on_ref(const char *refname UNUSED, return result; } -static int should_write_commit_graph(void) +static int should_write_commit_graph(struct gc_config *cfg) { int result; struct cg_auto_data data; @@ -929,7 +952,8 @@ static int run_write_commit_graph(struct maintenance_run_opts *opts) return !!run_command(&child); } -static int maintenance_task_commit_graph(struct maintenance_run_opts *opts) +static int maintenance_task_commit_graph(struct maintenance_run_opts *opts, + struct gc_config *cfg) { prepare_repo_settings(the_repository); if (!the_repository->settings.core_commit_graph) @@ -963,7 +987,8 @@ static int fetch_remote(struct remote *remote, void *cbdata) return !!run_command(&child); } -static int maintenance_task_prefetch(struct maintenance_run_opts *opts) +static int maintenance_task_prefetch(struct maintenance_run_opts *opts, + struct gc_config *cfg) { if (for_each_remote(fetch_remote, opts)) { error(_("failed to prefetch remotes")); @@ -973,7 +998,8 @@ static int maintenance_task_prefetch(struct maintenance_run_opts *opts) return 0; } -static int maintenance_task_gc(struct maintenance_run_opts *opts) +static int maintenance_task_gc(struct maintenance_run_opts *opts, + struct gc_config *cfg) { struct child_process child = CHILD_PROCESS_INIT; @@ -1021,7 +1047,7 @@ static int loose_object_count(const struct object_id *oid UNUSED, return 0; } -static int loose_object_auto_condition(void) +static int loose_object_auto_condition(struct gc_config *cfg) { int count = 0; @@ -1106,12 +1132,13 @@ static int pack_loose(struct maintenance_run_opts *opts) return result; } -static int maintenance_task_loose_objects(struct maintenance_run_opts *opts) +static int maintenance_task_loose_objects(struct maintenance_run_opts *opts, + struct gc_config *cfg) { return prune_packed(opts) || pack_loose(opts); } -static int incremental_repack_auto_condition(void) +static int incremental_repack_auto_condition(struct gc_config *cfg) { struct packed_git *p; int incremental_repack_auto_limit = 10; @@ -1230,7 +1257,8 @@ static int multi_pack_index_repack(struct maintenance_run_opts *opts) return 0; } -static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts) +static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts, + struct gc_config *cfg) { prepare_repo_settings(the_repository); if (!the_repository->settings.core_multi_pack_index) { @@ -1247,14 +1275,15 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts return 0; } -typedef int maintenance_task_fn(struct maintenance_run_opts *opts); +typedef int maintenance_task_fn(struct maintenance_run_opts *opts, + struct gc_config *cfg); /* * An auto condition function returns 1 if the task should run * and 0 if the task should NOT run. See needs_to_gc() for an * example. */ -typedef int maintenance_auto_fn(void); +typedef int maintenance_auto_fn(struct gc_config *cfg); struct maintenance_task { const char *name; @@ -1321,7 +1350,8 @@ static int compare_tasks_by_selection(const void *a_, const void *b_) return b->selected_order - a->selected_order; } -static int maintenance_run_tasks(struct maintenance_run_opts *opts) +static int maintenance_run_tasks(struct maintenance_run_opts *opts, + struct gc_config *cfg) { int i, found_selected = 0; int result = 0; @@ -1360,14 +1390,14 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) if (opts->auto_flag && (!tasks[i].auto_condition || - !tasks[i].auto_condition())) + !tasks[i].auto_condition(cfg))) continue; if (opts->schedule && tasks[i].schedule < opts->schedule) continue; trace2_region_enter("maintenance", tasks[i].name, r); - if (tasks[i].fn(opts)) { + if (tasks[i].fn(opts, cfg)) { error(_("task '%s' failed"), tasks[i].name); result = 1; } @@ -1404,7 +1434,6 @@ static void initialize_task_config(int schedule) { int i; struct strbuf config_name = STRBUF_INIT; - gc_config(); if (schedule) initialize_maintenance_strategy(); @@ -1468,6 +1497,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) { int i; struct maintenance_run_opts opts; + struct gc_config cfg = GC_CONFIG_INIT; struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, N_("run tasks based on the state of the repository")), @@ -1496,12 +1526,13 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) if (opts.auto_flag && opts.schedule) die(_("use at most one of --auto and --schedule=")); + gc_config(&cfg); initialize_task_config(opts.schedule); if (argc != 0) usage_with_options(builtin_maintenance_run_usage, builtin_maintenance_run_options); - return maintenance_run_tasks(&opts); + return maintenance_run_tasks(&opts, &cfg); } static char *get_maintpath(void) From 0ce44e2293352bc3636d93c156fbc2a1ebac0e45 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 16 Aug 2024 12:45:03 +0200 Subject: [PATCH 3/7] builtin/gc: fix leaking config values We're leaking config values in git-gc(1) when those values are tracked as strings. Introduce a new `gc_config_release()` function that releases this memory to plug those leaks and release old values before populating the config fields via `git_config_string()` et al. Note that there is one small gotcha here with the "--prune" option. Next to passing a string, this option also accepts the "--no-prune" option that overrides the default or configured value. We thus need to discern between the option not having been passed by the user and the negative variant of it. This is done by using a simple sentinel value that lets us discern these cases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 110 +++++++++++++++++++++++++++++++++++------------ t/t5304-prune.sh | 1 + 2 files changed, 83 insertions(+), 28 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index eee7401647..a93cfa147e 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -139,9 +139,9 @@ struct gc_config { int gc_auto_threshold; int gc_auto_pack_limit; int detach_auto; - const char *gc_log_expire; - const char *prune_expire; - const char *prune_worktrees_expire; + char *gc_log_expire; + char *prune_expire; + char *prune_worktrees_expire; char *repack_filter; char *repack_filter_to; unsigned long big_pack_threshold; @@ -157,15 +157,25 @@ struct gc_config { .gc_auto_threshold = 6700, \ .gc_auto_pack_limit = 50, \ .detach_auto = 1, \ - .gc_log_expire = "1.day.ago", \ - .prune_expire = "2.weeks.ago", \ - .prune_worktrees_expire = "3.months.ago", \ + .gc_log_expire = xstrdup("1.day.ago"), \ + .prune_expire = xstrdup("2.weeks.ago"), \ + .prune_worktrees_expire = xstrdup("3.months.ago"), \ .max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \ } +static void gc_config_release(struct gc_config *cfg) +{ + free(cfg->gc_log_expire); + free(cfg->prune_expire); + free(cfg->prune_worktrees_expire); + free(cfg->repack_filter); + free(cfg->repack_filter_to); +} + static void gc_config(struct gc_config *cfg) { const char *value; + char *owned = NULL; if (!git_config_get_value("gc.packrefs", &value)) { if (value && !strcmp(value, "notbare")) @@ -185,15 +195,34 @@ static void gc_config(struct gc_config *cfg) git_config_get_bool("gc.autodetach", &cfg->detach_auto); git_config_get_bool("gc.cruftpacks", &cfg->cruft_packs); git_config_get_ulong("gc.maxcruftsize", &cfg->max_cruft_size); - git_config_get_expiry("gc.pruneexpire", (char **) &cfg->prune_expire); - git_config_get_expiry("gc.worktreepruneexpire", (char **) &cfg->prune_worktrees_expire); - git_config_get_expiry("gc.logexpiry", (char **) &cfg->gc_log_expire); + + if (!git_config_get_expiry("gc.pruneexpire", &owned)) { + free(cfg->prune_expire); + cfg->prune_expire = owned; + } + + if (!git_config_get_expiry("gc.worktreepruneexpire", &owned)) { + free(cfg->prune_worktrees_expire); + cfg->prune_worktrees_expire = owned; + } + + if (!git_config_get_expiry("gc.logexpiry", &owned)) { + free(cfg->gc_log_expire); + cfg->gc_log_expire = owned; + } git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold); git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size); - git_config_get_string("gc.repackfilter", &cfg->repack_filter); - git_config_get_string("gc.repackfilterto", &cfg->repack_filter_to); + if (!git_config_get_string("gc.repackfilter", &owned)) { + free(cfg->repack_filter); + cfg->repack_filter = owned; + } + + if (!git_config_get_string("gc.repackfilterto", &owned)) { + free(cfg->repack_filter_to); + cfg->repack_filter_to = owned; + } git_config(git_default_config, NULL); } @@ -644,12 +673,15 @@ int cmd_gc(int argc, const char **argv, const char *prefix) struct child_process rerere_cmd = CHILD_PROCESS_INIT; struct maintenance_run_opts opts = {0}; struct gc_config cfg = GC_CONFIG_INIT; + const char *prune_expire_sentinel = "sentinel"; + const char *prune_expire_arg = prune_expire_sentinel; + int ret; struct option builtin_gc_options[] = { OPT__QUIET(&quiet, N_("suppress progress reporting")), - { OPTION_STRING, 0, "prune", &cfg.prune_expire, N_("date"), + { OPTION_STRING, 0, "prune", &prune_expire_arg, N_("date"), N_("prune unreferenced objects"), - PARSE_OPT_OPTARG, NULL, (intptr_t)cfg.prune_expire }, + PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire_arg }, OPT_BOOL(0, "cruft", &cfg.cruft_packs, N_("pack unreferenced objects separately")), OPT_MAGNITUDE(0, "max-cruft-size", &cfg.max_cruft_size, N_("with --cruft, limit the size of new cruft packs")), @@ -673,8 +705,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) strvec_pushl(&prune_worktrees, "worktree", "prune", "--expire", NULL); strvec_pushl(&rerere, "rerere", "gc", NULL); - /* default expiry time, overwritten in gc_config */ gc_config(&cfg); + if (parse_expiry_date(cfg.gc_log_expire, &gc_log_expire_time)) die(_("failed to parse gc.logExpiry value %s"), cfg.gc_log_expire); @@ -686,6 +718,10 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (argc > 0) usage_with_options(builtin_gc_usage, builtin_gc_options); + if (prune_expire_arg != prune_expire_sentinel) { + free(cfg.prune_expire); + cfg.prune_expire = xstrdup_or_null(prune_expire_arg); + } if (cfg.prune_expire && parse_expiry_date(cfg.prune_expire, &dummy)) die(_("failed to parse prune expiry value %s"), cfg.prune_expire); @@ -703,8 +739,11 @@ int cmd_gc(int argc, const char **argv, const char *prefix) /* * Auto-gc should be least intrusive as possible. */ - if (!need_to_gc(&cfg)) - return 0; + if (!need_to_gc(&cfg)) { + ret = 0; + goto out; + } + if (!quiet) { if (cfg.detach_auto) fprintf(stderr, _("Auto packing the repository in background for optimum performance.\n")); @@ -713,17 +752,22 @@ int cmd_gc(int argc, const char **argv, const char *prefix) fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n")); } if (cfg.detach_auto) { - int ret = report_last_gc_error(); - - if (ret == 1) + ret = report_last_gc_error(); + if (ret == 1) { /* Last gc --auto failed. Skip this one. */ - return 0; - else if (ret) - /* an I/O error occurred, already reported */ - return ret; + ret = 0; + goto out; + + } else if (ret) { + /* an I/O error occurred, already reported */ + goto out; + } + + if (lock_repo_for_gc(force, &pid)) { + ret = 0; + goto out; + } - if (lock_repo_for_gc(force, &pid)) - return 0; gc_before_repack(&opts, &cfg); /* dies on failure */ delete_tempfile(&pidfile); @@ -749,8 +793,11 @@ int cmd_gc(int argc, const char **argv, const char *prefix) name = lock_repo_for_gc(force, &pid); if (name) { - if (opts.auto_flag) - return 0; /* be quiet on --auto */ + if (opts.auto_flag) { + ret = 0; + goto out; /* be quiet on --auto */ + } + die(_("gc is already running on machine '%s' pid %"PRIuMAX" (use --force if not)"), name, (uintmax_t)pid); } @@ -826,6 +873,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (!daemonized) unlink(git_path("gc.log")); +out: + gc_config_release(&cfg); return 0; } @@ -1511,6 +1560,8 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) PARSE_OPT_NONEG, task_option_parse), OPT_END() }; + int ret; + memset(&opts, 0, sizeof(opts)); opts.quiet = !isatty(2); @@ -1532,7 +1583,10 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) if (argc != 0) usage_with_options(builtin_maintenance_run_usage, builtin_maintenance_run_options); - return maintenance_run_tasks(&opts, &cfg); + + ret = maintenance_run_tasks(&opts, &cfg); + gc_config_release(&cfg); + return ret; } static char *get_maintpath(void) diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh index 1f1f664871..e641df0116 100755 --- a/t/t5304-prune.sh +++ b/t/t5304-prune.sh @@ -7,6 +7,7 @@ test_description='prune' GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh day=$((60*60*24)) From 9b6b994f90f4427a0ddef38594036055e7b0efa1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 16 Aug 2024 12:45:06 +0200 Subject: [PATCH 4/7] builtin/gc: stop processing log file on signal When detaching, git-gc(1) will redirect its stderr to a "gc.log" log file, which is then used to surface errors of a backgrounded process to the user. To ensure that the file is properly managed on abnormal exit paths, we install both signal and exit handlers that try to either commit the underlying lock file or roll it back in case there wasn't any error. This logic is severly broken when handling signals though, as we end up calling all kinds of functions that are not signal safe. This includes malloc(3P) via `git_path()`, fprintf(3P), fflush(3P) and many more functions. The consequence can be anything, from deadlocks to crashes. Unfortunately, we cannot really do much about this without a larger refactoring. The least-worst thing we can do is to not set up the signal handler in the first place. This will still cause us to remove the lockfile, as the underlying tempfile subsystem already knows to unlink locks when receiving a signal. But it may cause us to remove the lock even in the case where it would have contained actual errors, which is a change in behaviour. The consequence is that "gc.log" will not be committed, and thus subsequent calls to `git gc --auto` won't bail out because of this. Arguably though, it is better to retry garbage collection rather than having the process run into a potentially-corrupted state. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index a93cfa147e..f815557081 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -109,13 +109,6 @@ static void process_log_file_at_exit(void) process_log_file(); } -static void process_log_file_on_signal(int signo) -{ - process_log_file(); - sigchain_pop(signo); - raise(signo); -} - static int gc_config_is_timestamp_never(const char *var) { const char *value; @@ -807,7 +800,6 @@ int cmd_gc(int argc, const char **argv, const char *prefix) git_path("gc.log"), LOCK_DIE_ON_ERROR); dup2(get_lock_file_fd(&log_lock), 2); - sigchain_push_common(process_log_file_on_signal); atexit(process_log_file_at_exit); } From c7185df01bb68091d3bdea2fe192f4b34e169c9d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 16 Aug 2024 12:45:11 +0200 Subject: [PATCH 5/7] builtin/gc: add a `--detach` flag When running `git gc --auto`, the command will by default detach and continue running in the background. This behaviour can be tweaked via the `gc.autoDetach` config, but not via a command line switch. We need that in a subsequent commit though, where git-maintenance(1) will want to ask its git-gc(1) child process to not detach anymore. Add a `--[no-]detach` flag that does this for us. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-gc.txt | 5 ++- builtin/gc.c | 70 ++++++++++++++++++++++------------------ t/t6500-gc.sh | 45 +++++++++++++++++++++++--- 3 files changed, 84 insertions(+), 36 deletions(-) diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index b5561c458a..370e22faae 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -9,7 +9,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository SYNOPSIS -------- [verse] -'git gc' [--aggressive] [--auto] [--quiet] [--prune= | --no-prune] [--force] [--keep-largest-pack] +'git gc' [--aggressive] [--auto] [--[no-]detach] [--quiet] [--prune= | --no-prune] [--force] [--keep-largest-pack] DESCRIPTION ----------- @@ -53,6 +53,9 @@ configuration options such as `gc.auto` and `gc.autoPackLimit`, all other housekeeping tasks (e.g. rerere, working trees, reflog...) will be performed as well. +--[no-]detach:: + Run in the background if the system supports it. This option overrides + the `gc.autoDetach` config. --[no-]cruft:: When expiring unreachable objects, pack them separately into a diff --git a/builtin/gc.c b/builtin/gc.c index f815557081..269a77960f 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -242,9 +242,13 @@ static enum schedule_priority parse_schedule(const char *value) struct maintenance_run_opts { int auto_flag; + int detach; int quiet; enum schedule_priority schedule; }; +#define MAINTENANCE_RUN_OPTS_INIT { \ + .detach = -1, \ +} static int pack_refs_condition(UNUSED struct gc_config *cfg) { @@ -664,7 +668,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) int keep_largest_pack = -1; timestamp_t dummy; struct child_process rerere_cmd = CHILD_PROCESS_INIT; - struct maintenance_run_opts opts = {0}; + struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT; struct gc_config cfg = GC_CONFIG_INIT; const char *prune_expire_sentinel = "sentinel"; const char *prune_expire_arg = prune_expire_sentinel; @@ -681,6 +685,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")), OPT_BOOL_F(0, "auto", &opts.auto_flag, N_("enable auto-gc mode"), PARSE_OPT_NOCOMPLETE), + OPT_BOOL(0, "detach", &opts.detach, + N_("perform garbage collection in the background")), OPT_BOOL_F(0, "force", &force, N_("force running gc even if there may be another gc running"), PARSE_OPT_NOCOMPLETE), @@ -729,6 +735,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) strvec_push(&repack, "-q"); if (opts.auto_flag) { + if (cfg.detach_auto && opts.detach < 0) + opts.detach = 1; + /* * Auto-gc should be least intrusive as possible. */ @@ -738,38 +747,12 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } if (!quiet) { - if (cfg.detach_auto) + if (opts.detach > 0) fprintf(stderr, _("Auto packing the repository in background for optimum performance.\n")); else fprintf(stderr, _("Auto packing the repository for optimum performance.\n")); fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n")); } - if (cfg.detach_auto) { - ret = report_last_gc_error(); - if (ret == 1) { - /* Last gc --auto failed. Skip this one. */ - ret = 0; - goto out; - - } else if (ret) { - /* an I/O error occurred, already reported */ - goto out; - } - - if (lock_repo_for_gc(force, &pid)) { - ret = 0; - goto out; - } - - gc_before_repack(&opts, &cfg); /* dies on failure */ - delete_tempfile(&pidfile); - - /* - * failure to daemonize is ok, we'll continue - * in foreground - */ - daemonized = !daemonize(); - } } else { struct string_list keep_pack = STRING_LIST_INIT_NODUP; @@ -784,6 +767,33 @@ int cmd_gc(int argc, const char **argv, const char *prefix) string_list_clear(&keep_pack, 0); } + if (opts.detach > 0) { + ret = report_last_gc_error(); + if (ret == 1) { + /* Last gc --auto failed. Skip this one. */ + ret = 0; + goto out; + + } else if (ret) { + /* an I/O error occurred, already reported */ + goto out; + } + + if (lock_repo_for_gc(force, &pid)) { + ret = 0; + goto out; + } + + gc_before_repack(&opts, &cfg); /* dies on failure */ + delete_tempfile(&pidfile); + + /* + * failure to daemonize is ok, we'll continue + * in foreground + */ + daemonized = !daemonize(); + } + name = lock_repo_for_gc(force, &pid); if (name) { if (opts.auto_flag) { @@ -1537,7 +1547,7 @@ static int task_option_parse(const struct option *opt UNUSED, static int maintenance_run(int argc, const char **argv, const char *prefix) { int i; - struct maintenance_run_opts opts; + struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT; struct gc_config cfg = GC_CONFIG_INIT; struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, @@ -1554,8 +1564,6 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) }; int ret; - memset(&opts, 0, sizeof(opts)); - opts.quiet = !isatty(2); for (i = 0; i < TASK__COUNT; i++) diff --git a/t/t6500-gc.sh b/t/t6500-gc.sh index 1b5909d1b7..5378455968 100755 --- a/t/t6500-gc.sh +++ b/t/t6500-gc.sh @@ -338,14 +338,14 @@ test_expect_success 'gc.maxCruftSize sets appropriate repack options' ' test_subcommand $cruft_max_size_opts --max-cruft-size=3145728 &1) + doesnt_matter=$(git gc "$@" 9>&1) } test_expect_success 'background auto gc does not run if gc.log is present and recent but does if it is old' ' @@ -361,7 +361,7 @@ test_expect_success 'background auto gc does not run if gc.log is present and re test-tool chmtime =-345600 .git/gc.log && git gc --auto && test_config gc.logexpiry 2.days && - run_and_wait_for_auto_gc && + run_and_wait_for_gc --auto && ls .git/objects/pack/pack-*.pack >packs && test_line_count = 1 packs ' @@ -391,11 +391,48 @@ test_expect_success 'background auto gc respects lock for all operations' ' printf "%d %s" "$shell_pid" "$hostname" >.git/gc.pid && # our gc should exit zero without doing anything - run_and_wait_for_auto_gc && + run_and_wait_for_gc --auto && (ls -1 .git/refs/heads .git/reftable >actual || true) && test_cmp expect actual ' +test_expect_success '--detach overrides gc.autoDetach=false' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + # Prepare the repository such that git-gc(1) ends up repacking. + test_commit "$(test_oid blob17_1)" && + test_commit "$(test_oid blob17_2)" && + git config gc.autodetach false && + git config gc.auto 2 && + + # Note that we cannot use `test_cmp` here to compare stderr + # because it may contain output from `set -x`. + run_and_wait_for_gc --auto --detach 2>actual && + test_grep "Auto packing the repository in background for optimum performance." actual + ) +' + +test_expect_success '--no-detach overrides gc.autoDetach=true' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + # Prepare the repository such that git-gc(1) ends up repacking. + test_commit "$(test_oid blob17_1)" && + test_commit "$(test_oid blob17_2)" && + git config gc.autodetach true && + git config gc.auto 2 && + + GIT_PROGRESS_DELAY=0 git gc --auto --no-detach 2>output && + test_grep "Auto packing the repository for optimum performance." output && + test_grep "Collecting referenced commits: 2, done." output + ) +' + # DO NOT leave a detached auto gc process running near the end of the # test script: it can run long enough in the background to racily # interfere with the cleanup in 'test_done'. From a6affd3343f143e5983036150bb8e95eb336cbc7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 16 Aug 2024 12:45:15 +0200 Subject: [PATCH 6/7] builtin/maintenance: add a `--detach` flag Same as the preceding commit, add a `--[no-]detach` flag to the git-maintenance(1) command. This will be used in a subsequent commit to fix backgrounding of that command when configured with a non-standard set of tasks. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 6 ++++++ t/t7900-maintenance.sh | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/builtin/gc.c b/builtin/gc.c index 269a77960f..63106e2028 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1426,6 +1426,10 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts, } free(lock_path); + /* Failure to daemonize is ok, we'll continue in foreground. */ + if (opts->detach > 0) + daemonize(); + for (i = 0; !found_selected && i < TASK__COUNT; i++) found_selected = tasks[i].selected_order >= 0; @@ -1552,6 +1556,8 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, N_("run tasks based on the state of the repository")), + OPT_BOOL(0, "detach", &opts.detach, + N_("perform maintenance in the background")), OPT_CALLBACK(0, "schedule", &opts.schedule, N_("frequency"), N_("run tasks based on frequency"), maintenance_opt_schedule), diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 8595489ceb..771525aa4b 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -908,4 +908,43 @@ test_expect_success 'failed schedule prevents config change' ' done ' +test_expect_success '--no-detach causes maintenance to not run in background' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + # Prepare the repository such that git-maintenance(1) ends up + # outputting something. + test_commit something && + git config set maintenance.gc.enabled false && + git config set maintenance.loose-objects.enabled true && + git config set maintenance.loose-objects.auto 1 && + git config set maintenance.incremental-repack.enabled true && + + # We have no better way to check whether or not the task ran in + # the background than to verify whether it output anything. The + # next testcase checks the reverse, making this somewhat safer. + git maintenance run --no-detach >out 2>&1 && + test_line_count = 1 out + ) +' + +test_expect_success '--detach causes maintenance to run in background' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + test_commit something && + git config set maintenance.gc.enabled false && + git config set maintenance.loose-objects.enabled true && + git config set maintenance.loose-objects.auto 1 && + git config set maintenance.incremental-repack.enabled true && + + git maintenance run --detach >out 2>&1 && + test_must_be_empty out + ) +' + test_done From 98077d06b28b97d508c389886ee5014056707a5e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 16 Aug 2024 12:45:17 +0200 Subject: [PATCH 7/7] run-command: fix detaching when running auto maintenance In the past, we used to execute `git gc --auto` as part of our automatic housekeeping routines. As git-gc(1) may require quite some time to perform the housekeeping, it knows to detach itself and run in the background so that the user can continue their work. Eventually, we refactored our automatic housekeeping to instead use the more flexible git-maintenance(1) command. The upside of this new infra is that the user can configure which maintenance tasks are performed, at least to a certain degree. So while it continues to run git-gc(1) by default, it can also be adapted to e.g. use git-multi-pack-index(1) for maintenance of the object database. The auto-detach of the new infra is somewhat broken though once the user configures non-standard tasks. The problem is essentially that we detach at the wrong level in the process hierarchy: git-maintenance(1) never detaches itself, but instead it continues to be git-gc(1) which does. When configured to only run the git-gc(1) maintenance task, then the result is basically the same as before. But when configured to run other tasks, then git-maintenance(1) will wait for these to run to completion. Even worse, it may be that git-gc(1) runs concurrently with other housekeeping tasks, stomping on each others feet. Fix this bug by asking git-gc(1) to not detach when it is being invoked via git-maintenance(1). Instead, git-maintenance(1) now respects a new config "maintenance.autoDetach", the equivalent of "gc.autoDetach", and detaches itself into the background when running as part of our auto maintenance. This should continue to behave the same for all users which use the git-gc(1) task, only. For others though, it means that we now properly perform all tasks in the background. The default behaviour of git-maintenance(1) when executed by the user does not change, it will remain in the foreground unless they pass the `--detach` option. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/config/gc.txt | 3 +- Documentation/config/maintenance.txt | 11 +++++++ builtin/gc.c | 1 + run-command.c | 12 +++++++- t/t5616-partial-clone.sh | 6 ++-- t/t7900-maintenance.sh | 43 ++++++++++++++++++++++------ 6 files changed, 62 insertions(+), 14 deletions(-) diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt index 664a3c2874..1d4f9470ea 100644 --- a/Documentation/config/gc.txt +++ b/Documentation/config/gc.txt @@ -40,7 +40,8 @@ use, it'll affect how the auto pack limit works. gc.autoDetach:: Make `git gc --auto` return immediately and run in the background - if the system supports it. Default is true. + if the system supports it. Default is true. This config variable acts + as a fallback in case `maintenance.autoDetach` is not set. gc.bigPackThreshold:: If non-zero, all non-cruft packs larger than this limit are kept diff --git a/Documentation/config/maintenance.txt b/Documentation/config/maintenance.txt index 69a4f05153..72a9d6cf81 100644 --- a/Documentation/config/maintenance.txt +++ b/Documentation/config/maintenance.txt @@ -3,6 +3,17 @@ maintenance.auto:: `git maintenance run --auto` after doing their normal work. Defaults to true. +maintenance.autoDetach:: + Many Git commands trigger automatic maintenance after they have + written data into the repository. This boolean config option + controls whether this automatic maintenance shall happen in the + foreground or whether the maintenance process shall detach and + continue to run in the background. ++ +If unset, the value of `gc.autoDetach` is used as a fallback. Defaults +to true if both are unset, meaning that the maintenance process will +detach. + maintenance.strategy:: This string config option provides a way to specify one of a few recommended schedules for background maintenance. This only affects diff --git a/builtin/gc.c b/builtin/gc.c index 63106e2028..bafee330a2 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1063,6 +1063,7 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts, strvec_push(&child.args, "--quiet"); else strvec_push(&child.args, "--no-quiet"); + strvec_push(&child.args, "--no-detach"); return run_command(&child); } diff --git a/run-command.c b/run-command.c index 45ba544932..94f2f3079f 100644 --- a/run-command.c +++ b/run-command.c @@ -1808,16 +1808,26 @@ void run_processes_parallel(const struct run_process_parallel_opts *opts) int prepare_auto_maintenance(int quiet, struct child_process *maint) { - int enabled; + int enabled, auto_detach; if (!git_config_get_bool("maintenance.auto", &enabled) && !enabled) return 0; + /* + * When `maintenance.autoDetach` isn't set, then we fall back to + * honoring `gc.autoDetach`. This is somewhat weird, but required to + * retain behaviour from when we used to run git-gc(1) here. + */ + if (git_config_get_bool("maintenance.autodetach", &auto_detach) && + git_config_get_bool("gc.autodetach", &auto_detach)) + auto_detach = 1; + maint->git_cmd = 1; maint->close_object_store = 1; strvec_pushl(&maint->args, "maintenance", "run", "--auto", NULL); strvec_push(&maint->args, quiet ? "--quiet" : "--no-quiet"); + strvec_push(&maint->args, auto_detach ? "--detach" : "--no-detach"); return 1; } diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 2da7291e37..8415884754 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -229,7 +229,7 @@ test_expect_success 'fetch --refetch triggers repacking' ' GIT_TRACE2_EVENT="$PWD/trace1.event" \ git -C pc1 fetch --refetch origin && - test_subcommand git maintenance run --auto --no-quiet /dev/null && GIT_TRACE2_EVENT="$(pwd)/run-no-quiet.txt" \ git maintenance run --no-quiet 2>/dev/null && - test_subcommand git gc --quiet ' ' git maintenance run --task=commit-graph 2>/dev/null && GIT_TRACE2_EVENT="$(pwd)/run-both.txt" \ git maintenance run --task=commit-graph --task=gc 2>/dev/null && - test_subcommand ! git gc --quiet