parseopt: autocorrect mistyped subcommands

Try to autocorrect the mistyped mandatory subcommand before showing an
error and exiting. Subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL
are skipped.

In autocorrect_subcommand(), AUTOCORR_HINTONLY does the same as
AUTOCORR_NEVER, because builtins have a limited number of subcommands.
Those lists are currently not too large. Therefore, displaying all
subcommands via usage_with_options() is good enough here. This also
keeps the autocorrection handling simple.

Use a dynamic threshold for similar_enough() to check if the result is
usable. This can yield more accurate typo corrections. Even though
subcommands are often short, they can still vary across builtins. And in
the current implementation, a fixed threshold can't do better on both
short and long subcommands at the same time.

Signed-off-by: Jiamu Sun <39@barroit.sh>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jiamu Sun
2026-03-10 20:41:05 +09:00
committed by Junio C Hamano
parent 46483152a4
commit bdd1ed4ad2

View File

@@ -6,6 +6,8 @@
#include "strbuf.h"
#include "string-list.h"
#include "utf8.h"
#include "autocorrect.h"
#include "levenshtein.h"
static int disallow_abbreviated_options;
@@ -622,13 +624,72 @@ static int parse_subcommand(const char *arg, const struct option *options)
return -1;
}
static void find_subcommands(struct string_list *list,
const struct option *options)
{
for (; options->type != OPTION_END; options++) {
if (options->type == OPTION_SUBCOMMAND)
string_list_append(list, options->long_name);
}
}
static int similar_enough(const char *cmd, unsigned int dist)
{
size_t len = strlen(cmd);
unsigned int threshold = len < 3 ? 1 : len < 6 ? 3 : 6;
return dist < threshold;
}
static const char *autocorrect_subcommand(const char *cmd,
struct string_list *cmds)
{
struct autocorr autocorr = { 0 };
unsigned int min = UINT_MAX;
unsigned int ties = 0;
struct string_list_item *cand;
struct string_list_item *best = NULL;
autocorr_resolve(&autocorr);
if (autocorr.mode == AUTOCORRECT_NEVER ||
autocorr.mode == AUTOCORRECT_HINTONLY)
return NULL;
for_each_string_list_item(cand, cmds) {
unsigned int dist = levenshtein(cmd, cand->string, 0, 2, 1, 3);
if (dist < min) {
min = dist;
best = cand;
ties = 0;
} else if (dist == min) {
ties++;
}
}
if (!ties && similar_enough(cmd, min)) {
fprintf_ln(stderr,
_("WARNING: You called a subcommand named '%s', which does not exist."),
cmd);
autocorr_confirm(&autocorr, best->string);
return best->string;
}
return NULL;
}
static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx,
const char *arg,
const struct option *options,
const char * const usagestr[])
{
int err = parse_subcommand(arg, options);
int err;
const char *assumed;
struct string_list cmds = STRING_LIST_INIT_NODUP;
err = parse_subcommand(arg, options);
if (!err)
return PARSE_OPT_SUBCOMMAND;
@@ -641,8 +702,17 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx,
if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL)
return PARSE_OPT_DONE;
error(_("unknown subcommand: `%s'"), arg);
usage_with_options(usagestr, options);
find_subcommands(&cmds, options);
assumed = autocorrect_subcommand(arg, &cmds);
if (!assumed) {
error(_("unknown subcommand: `%s'"), arg);
usage_with_options(usagestr, options);
}
string_list_clear(&cmds, 0);
parse_subcommand(assumed, options);
return PARSE_OPT_SUBCOMMAND;
}
static void check_typos(const char *arg, const struct option *options)