mirror of
https://github.com/git/git.git
synced 2026-01-11 02:32:58 +00:00
A few commands that parse --expire=<time> command line option behave
sillily when given nonsense input. For example
$ git prune --no-expire
Segmentation falut
$ git prune --expire=npw; echo $?
129
Both come from parse_opt_expiry_date_cb().
The former is because the function is not prepared to see arg==NULL
(for "--no-expire", it is a norm; "--expire" at the end of the
command line could be made to pass NULL, if it is told that the
argument is optional, but we don't so we do not have to worry about
that case).
The latter is because it does not check the value returned from the
underlying parse_expiry_date().
This seems to be a recent regression introduced while we attempted
to avoid spewing the entire usage message when given a correct
option but with an invalid value at 3bb0923f ("parse-options: do not
show usage upon invalid option value", 2018-03-22). Before that, we
didn't fail silently but showed a full usage help (which arguably is
not all that better).
Also catch this error early when "git gc --prune=<expiration>" is
misspelled by doing a dummy parsing before the main body of "gc"
that is time consuming even begins. Otherwise, we'd spend time to
pack objects and then later have "git prune" first notice the error.
Aborting "gc" in the middle that way is not harmful but is ugly and
can be avoided.
Helped-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
493 lines
12 KiB
C
493 lines
12 KiB
C
/*
|
|
* git gc builtin command
|
|
*
|
|
* Cleanup unreachable files and optimize the repository.
|
|
*
|
|
* Copyright (c) 2007 James Bowes
|
|
*
|
|
* Based on git-gc.sh, which is
|
|
*
|
|
* Copyright (c) 2006 Shawn O. Pearce
|
|
*/
|
|
|
|
#include "builtin.h"
|
|
#include "config.h"
|
|
#include "tempfile.h"
|
|
#include "lockfile.h"
|
|
#include "parse-options.h"
|
|
#include "run-command.h"
|
|
#include "sigchain.h"
|
|
#include "argv-array.h"
|
|
#include "commit.h"
|
|
#include "packfile.h"
|
|
|
|
#define FAILED_RUN "failed to run %s"
|
|
|
|
static const char * const builtin_gc_usage[] = {
|
|
N_("git gc [<options>]"),
|
|
NULL
|
|
};
|
|
|
|
static int pack_refs = 1;
|
|
static int prune_reflogs = 1;
|
|
static int aggressive_depth = 50;
|
|
static int aggressive_window = 250;
|
|
static int gc_auto_threshold = 6700;
|
|
static int gc_auto_pack_limit = 50;
|
|
static int detach_auto = 1;
|
|
static timestamp_t gc_log_expire_time;
|
|
static const char *gc_log_expire = "1.day.ago";
|
|
static const char *prune_expire = "2.weeks.ago";
|
|
static const char *prune_worktrees_expire = "3.months.ago";
|
|
|
|
static struct argv_array pack_refs_cmd = ARGV_ARRAY_INIT;
|
|
static struct argv_array reflog = ARGV_ARRAY_INIT;
|
|
static struct argv_array repack = ARGV_ARRAY_INIT;
|
|
static struct argv_array prune = ARGV_ARRAY_INIT;
|
|
static struct argv_array prune_worktrees = ARGV_ARRAY_INIT;
|
|
static struct argv_array rerere = ARGV_ARRAY_INIT;
|
|
|
|
static struct tempfile *pidfile;
|
|
static struct lock_file log_lock;
|
|
|
|
static struct string_list pack_garbage = STRING_LIST_INIT_DUP;
|
|
|
|
static void clean_pack_garbage(void)
|
|
{
|
|
int i;
|
|
for (i = 0; i < pack_garbage.nr; i++)
|
|
unlink_or_warn(pack_garbage.items[i].string);
|
|
string_list_clear(&pack_garbage, 0);
|
|
}
|
|
|
|
static void report_pack_garbage(unsigned seen_bits, const char *path)
|
|
{
|
|
if (seen_bits == PACKDIR_FILE_IDX)
|
|
string_list_append(&pack_garbage, path);
|
|
}
|
|
|
|
static void process_log_file(void)
|
|
{
|
|
struct stat st;
|
|
if (fstat(get_lock_file_fd(&log_lock), &st)) {
|
|
/*
|
|
* Perhaps there was an i/o error or another
|
|
* unlikely situation. Try to make a note of
|
|
* this in gc.log along with any existing
|
|
* messages.
|
|
*/
|
|
int saved_errno = errno;
|
|
fprintf(stderr, _("Failed to fstat %s: %s"),
|
|
get_tempfile_path(log_lock.tempfile),
|
|
strerror(saved_errno));
|
|
fflush(stderr);
|
|
commit_lock_file(&log_lock);
|
|
errno = saved_errno;
|
|
} else if (st.st_size) {
|
|
/* There was some error recorded in the lock file */
|
|
commit_lock_file(&log_lock);
|
|
} else {
|
|
/* No error, clean up any old gc.log */
|
|
unlink(git_path("gc.log"));
|
|
rollback_lock_file(&log_lock);
|
|
}
|
|
}
|
|
|
|
static void process_log_file_at_exit(void)
|
|
{
|
|
fflush(stderr);
|
|
process_log_file();
|
|
}
|
|
|
|
static void process_log_file_on_signal(int signo)
|
|
{
|
|
process_log_file();
|
|
sigchain_pop(signo);
|
|
raise(signo);
|
|
}
|
|
|
|
static void gc_config(void)
|
|
{
|
|
const char *value;
|
|
|
|
if (!git_config_get_value("gc.packrefs", &value)) {
|
|
if (value && !strcmp(value, "notbare"))
|
|
pack_refs = -1;
|
|
else
|
|
pack_refs = git_config_bool("gc.packrefs", value);
|
|
}
|
|
|
|
git_config_get_int("gc.aggressivewindow", &aggressive_window);
|
|
git_config_get_int("gc.aggressivedepth", &aggressive_depth);
|
|
git_config_get_int("gc.auto", &gc_auto_threshold);
|
|
git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit);
|
|
git_config_get_bool("gc.autodetach", &detach_auto);
|
|
git_config_get_expiry("gc.pruneexpire", &prune_expire);
|
|
git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire);
|
|
git_config_get_expiry("gc.logexpiry", &gc_log_expire);
|
|
|
|
git_config(git_default_config, NULL);
|
|
}
|
|
|
|
static int too_many_loose_objects(void)
|
|
{
|
|
/*
|
|
* Quickly check if a "gc" is needed, by estimating how
|
|
* many loose objects there are. Because SHA-1 is evenly
|
|
* distributed, we can check only one and get a reasonable
|
|
* estimate.
|
|
*/
|
|
DIR *dir;
|
|
struct dirent *ent;
|
|
int auto_threshold;
|
|
int num_loose = 0;
|
|
int needed = 0;
|
|
|
|
if (gc_auto_threshold <= 0)
|
|
return 0;
|
|
|
|
dir = opendir(git_path("objects/17"));
|
|
if (!dir)
|
|
return 0;
|
|
|
|
auto_threshold = DIV_ROUND_UP(gc_auto_threshold, 256);
|
|
while ((ent = readdir(dir)) != NULL) {
|
|
if (strspn(ent->d_name, "0123456789abcdef") != 38 ||
|
|
ent->d_name[38] != '\0')
|
|
continue;
|
|
if (++num_loose > auto_threshold) {
|
|
needed = 1;
|
|
break;
|
|
}
|
|
}
|
|
closedir(dir);
|
|
return needed;
|
|
}
|
|
|
|
static int too_many_packs(void)
|
|
{
|
|
struct packed_git *p;
|
|
int cnt;
|
|
|
|
if (gc_auto_pack_limit <= 0)
|
|
return 0;
|
|
|
|
prepare_packed_git();
|
|
for (cnt = 0, p = packed_git; p; p = p->next) {
|
|
if (!p->pack_local)
|
|
continue;
|
|
if (p->pack_keep)
|
|
continue;
|
|
/*
|
|
* Perhaps check the size of the pack and count only
|
|
* very small ones here?
|
|
*/
|
|
cnt++;
|
|
}
|
|
return gc_auto_pack_limit < cnt;
|
|
}
|
|
|
|
static void add_repack_all_option(void)
|
|
{
|
|
if (prune_expire && !strcmp(prune_expire, "now"))
|
|
argv_array_push(&repack, "-a");
|
|
else {
|
|
argv_array_push(&repack, "-A");
|
|
if (prune_expire)
|
|
argv_array_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
|
|
}
|
|
}
|
|
|
|
static void add_repack_incremental_option(void)
|
|
{
|
|
argv_array_push(&repack, "--no-write-bitmap-index");
|
|
}
|
|
|
|
static int need_to_gc(void)
|
|
{
|
|
/*
|
|
* Setting gc.auto to 0 or negative can disable the
|
|
* automatic gc.
|
|
*/
|
|
if (gc_auto_threshold <= 0)
|
|
return 0;
|
|
|
|
/*
|
|
* If there are too many loose objects, but not too many
|
|
* packs, we run "repack -d -l". If there are too many packs,
|
|
* we run "repack -A -d -l". Otherwise we tell the caller
|
|
* there is no need.
|
|
*/
|
|
if (too_many_packs())
|
|
add_repack_all_option();
|
|
else if (too_many_loose_objects())
|
|
add_repack_incremental_option();
|
|
else
|
|
return 0;
|
|
|
|
if (run_hook_le(NULL, "pre-auto-gc", NULL))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* return NULL on success, else hostname running the gc */
|
|
static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
|
|
{
|
|
static struct lock_file lock;
|
|
char my_host[HOST_NAME_MAX + 1];
|
|
struct strbuf sb = STRBUF_INIT;
|
|
struct stat st;
|
|
uintmax_t pid;
|
|
FILE *fp;
|
|
int fd;
|
|
char *pidfile_path;
|
|
|
|
if (is_tempfile_active(pidfile))
|
|
/* already locked */
|
|
return NULL;
|
|
|
|
if (xgethostname(my_host, sizeof(my_host)))
|
|
xsnprintf(my_host, sizeof(my_host), "unknown");
|
|
|
|
pidfile_path = git_pathdup("gc.pid");
|
|
fd = hold_lock_file_for_update(&lock, pidfile_path,
|
|
LOCK_DIE_ON_ERROR);
|
|
if (!force) {
|
|
static char locking_host[HOST_NAME_MAX + 1];
|
|
static char *scan_fmt;
|
|
int should_exit;
|
|
|
|
if (!scan_fmt)
|
|
scan_fmt = xstrfmt("%s %%%ds", "%"SCNuMAX, HOST_NAME_MAX);
|
|
fp = fopen(pidfile_path, "r");
|
|
memset(locking_host, 0, sizeof(locking_host));
|
|
should_exit =
|
|
fp != NULL &&
|
|
!fstat(fileno(fp), &st) &&
|
|
/*
|
|
* 12 hour limit is very generous as gc should
|
|
* never take that long. On the other hand we
|
|
* don't really need a strict limit here,
|
|
* running gc --auto one day late is not a big
|
|
* problem. --force can be used in manual gc
|
|
* after the user verifies that no gc is
|
|
* running.
|
|
*/
|
|
time(NULL) - st.st_mtime <= 12 * 3600 &&
|
|
fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&
|
|
/* be gentle to concurrent "gc" on remote hosts */
|
|
(strcmp(locking_host, my_host) || !kill(pid, 0) || errno == EPERM);
|
|
if (fp != NULL)
|
|
fclose(fp);
|
|
if (should_exit) {
|
|
if (fd >= 0)
|
|
rollback_lock_file(&lock);
|
|
*ret_pid = pid;
|
|
free(pidfile_path);
|
|
return locking_host;
|
|
}
|
|
}
|
|
|
|
strbuf_addf(&sb, "%"PRIuMAX" %s",
|
|
(uintmax_t) getpid(), my_host);
|
|
write_in_full(fd, sb.buf, sb.len);
|
|
strbuf_release(&sb);
|
|
commit_lock_file(&lock);
|
|
pidfile = register_tempfile(pidfile_path);
|
|
free(pidfile_path);
|
|
return NULL;
|
|
}
|
|
|
|
static int report_last_gc_error(void)
|
|
{
|
|
struct strbuf sb = STRBUF_INIT;
|
|
int ret = 0;
|
|
struct stat st;
|
|
char *gc_log_path = git_pathdup("gc.log");
|
|
|
|
if (stat(gc_log_path, &st)) {
|
|
if (errno == ENOENT)
|
|
goto done;
|
|
|
|
ret = error_errno(_("Can't stat %s"), gc_log_path);
|
|
goto done;
|
|
}
|
|
|
|
if (st.st_mtime < gc_log_expire_time)
|
|
goto done;
|
|
|
|
ret = strbuf_read_file(&sb, gc_log_path, 0);
|
|
if (ret > 0)
|
|
ret = error(_("The last gc run reported the following. "
|
|
"Please correct the root cause\n"
|
|
"and remove %s.\n"
|
|
"Automatic cleanup will not be performed "
|
|
"until the file is removed.\n\n"
|
|
"%s"),
|
|
gc_log_path, sb.buf);
|
|
strbuf_release(&sb);
|
|
done:
|
|
free(gc_log_path);
|
|
return ret;
|
|
}
|
|
|
|
static int gc_before_repack(void)
|
|
{
|
|
if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD))
|
|
return error(FAILED_RUN, pack_refs_cmd.argv[0]);
|
|
|
|
if (prune_reflogs && run_command_v_opt(reflog.argv, RUN_GIT_CMD))
|
|
return error(FAILED_RUN, reflog.argv[0]);
|
|
|
|
pack_refs = 0;
|
|
prune_reflogs = 0;
|
|
return 0;
|
|
}
|
|
|
|
int cmd_gc(int argc, const char **argv, const char *prefix)
|
|
{
|
|
int aggressive = 0;
|
|
int auto_gc = 0;
|
|
int quiet = 0;
|
|
int force = 0;
|
|
const char *name;
|
|
pid_t pid;
|
|
int daemonized = 0;
|
|
timestamp_t dummy;
|
|
|
|
struct option builtin_gc_options[] = {
|
|
OPT__QUIET(&quiet, N_("suppress progress reporting")),
|
|
{ OPTION_STRING, 0, "prune", &prune_expire, N_("date"),
|
|
N_("prune unreferenced objects"),
|
|
PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire },
|
|
OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")),
|
|
OPT_BOOL(0, "auto", &auto_gc, N_("enable auto-gc mode")),
|
|
OPT_BOOL(0, "force", &force, N_("force running gc even if there may be another gc running")),
|
|
OPT_END()
|
|
};
|
|
|
|
if (argc == 2 && !strcmp(argv[1], "-h"))
|
|
usage_with_options(builtin_gc_usage, builtin_gc_options);
|
|
|
|
argv_array_pushl(&pack_refs_cmd, "pack-refs", "--all", "--prune", NULL);
|
|
argv_array_pushl(&reflog, "reflog", "expire", "--all", NULL);
|
|
argv_array_pushl(&repack, "repack", "-d", "-l", NULL);
|
|
argv_array_pushl(&prune, "prune", "--expire", NULL);
|
|
argv_array_pushl(&prune_worktrees, "worktree", "prune", "--expire", NULL);
|
|
argv_array_pushl(&rerere, "rerere", "gc", NULL);
|
|
|
|
/* default expiry time, overwritten in gc_config */
|
|
gc_config();
|
|
if (parse_expiry_date(gc_log_expire, &gc_log_expire_time))
|
|
die(_("failed to parse gc.logexpiry value %s"), gc_log_expire);
|
|
|
|
if (pack_refs < 0)
|
|
pack_refs = !is_bare_repository();
|
|
|
|
argc = parse_options(argc, argv, prefix, builtin_gc_options,
|
|
builtin_gc_usage, 0);
|
|
if (argc > 0)
|
|
usage_with_options(builtin_gc_usage, builtin_gc_options);
|
|
|
|
if (prune_expire && parse_expiry_date(prune_expire, &dummy))
|
|
die(_("failed to parse prune expiry value %s"), prune_expire);
|
|
|
|
if (aggressive) {
|
|
argv_array_push(&repack, "-f");
|
|
if (aggressive_depth > 0)
|
|
argv_array_pushf(&repack, "--depth=%d", aggressive_depth);
|
|
if (aggressive_window > 0)
|
|
argv_array_pushf(&repack, "--window=%d", aggressive_window);
|
|
}
|
|
if (quiet)
|
|
argv_array_push(&repack, "-q");
|
|
|
|
if (auto_gc) {
|
|
/*
|
|
* Auto-gc should be least intrusive as possible.
|
|
*/
|
|
if (!need_to_gc())
|
|
return 0;
|
|
if (!quiet) {
|
|
if (detach_auto)
|
|
fprintf(stderr, _("Auto packing the repository in background for optimum performance.\n"));
|
|
else
|
|
fprintf(stderr, _("Auto packing the repository for optimum performance.\n"));
|
|
fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n"));
|
|
}
|
|
if (detach_auto) {
|
|
if (report_last_gc_error())
|
|
return -1;
|
|
|
|
if (lock_repo_for_gc(force, &pid))
|
|
return 0;
|
|
if (gc_before_repack())
|
|
return -1;
|
|
delete_tempfile(&pidfile);
|
|
|
|
/*
|
|
* failure to daemonize is ok, we'll continue
|
|
* in foreground
|
|
*/
|
|
daemonized = !daemonize();
|
|
}
|
|
} else
|
|
add_repack_all_option();
|
|
|
|
name = lock_repo_for_gc(force, &pid);
|
|
if (name) {
|
|
if (auto_gc)
|
|
return 0; /* be quiet on --auto */
|
|
die(_("gc is already running on machine '%s' pid %"PRIuMAX" (use --force if not)"),
|
|
name, (uintmax_t)pid);
|
|
}
|
|
|
|
if (daemonized) {
|
|
hold_lock_file_for_update(&log_lock,
|
|
git_path("gc.log"),
|
|
LOCK_DIE_ON_ERROR);
|
|
dup2(get_lock_file_fd(&log_lock), 2);
|
|
sigchain_push_common(process_log_file_on_signal);
|
|
atexit(process_log_file_at_exit);
|
|
}
|
|
|
|
if (gc_before_repack())
|
|
return -1;
|
|
|
|
if (!repository_format_precious_objects) {
|
|
if (run_command_v_opt(repack.argv, RUN_GIT_CMD))
|
|
return error(FAILED_RUN, repack.argv[0]);
|
|
|
|
if (prune_expire) {
|
|
argv_array_push(&prune, prune_expire);
|
|
if (quiet)
|
|
argv_array_push(&prune, "--no-progress");
|
|
if (run_command_v_opt(prune.argv, RUN_GIT_CMD))
|
|
return error(FAILED_RUN, prune.argv[0]);
|
|
}
|
|
}
|
|
|
|
if (prune_worktrees_expire) {
|
|
argv_array_push(&prune_worktrees, prune_worktrees_expire);
|
|
if (run_command_v_opt(prune_worktrees.argv, RUN_GIT_CMD))
|
|
return error(FAILED_RUN, prune_worktrees.argv[0]);
|
|
}
|
|
|
|
if (run_command_v_opt(rerere.argv, RUN_GIT_CMD))
|
|
return error(FAILED_RUN, rerere.argv[0]);
|
|
|
|
report_garbage = report_pack_garbage;
|
|
reprepare_packed_git();
|
|
if (pack_garbage.nr > 0)
|
|
clean_pack_garbage();
|
|
|
|
if (auto_gc && too_many_loose_objects())
|
|
warning(_("There are too many unreachable loose objects; "
|
|
"run 'git prune' to remove them."));
|
|
|
|
if (!daemonized)
|
|
unlink(git_path("gc.log"));
|
|
|
|
return 0;
|
|
}
|