From c52f085a477c8eece87821c5bbc035e5a900eb12 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 28 Feb 2026 16:50:45 +0530 Subject: [PATCH] send-email: validate charset name in 8bit encoding prompt When a non-ASCII character is detected in the body or subject of the email the user is prompted with, Which 8bit encoding should I declare [UTF-8]? foo After this the input string is validated by the regex, based on the fact that the charset string will be minimum 4 characters [1]. If the string is more than 4 letters the email is sent, if not then a second prompt to confirm is asked to the user, Are you sure you want to use [y/N]? y This relies on a length based regex heuristic check to validate the user input, and can allow clearly invalid charset names to pass if the input is greater than 4 characters. Add a semantic validation of the charset name using the Encode::find_encoding() which is a bundled module of perl. If the encoding is not recognized, warn the user and ask for confirmation before proceeding. After this validation the lenght based validation becomes redundant and also breaks flow, so change the regex of valid input to any non blank string. Make the encoding warning logic specific to the 8bit prompt, also add a unique confirmation prompt which reduces the load on ask(), and improves maintainability. Additionally, the wording of the first prompt can confuse the user if not read properly or under any default assumptions for a yes/no prompt. Change the wording to make it explicitly clear to the user that the prompt needs a string input, UTF-8 being the default. The intended flow is, Declare which 8bit encoding to use [default: UTF-8]? foobar 'foobar' does not appear to be a valid charset name. Use it anyway [y/N]? [1]- https://github.com/git/git/commit/852a15d748034eec87adbee73a72689c8936fb8b Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- git-send-email.perl | 25 ++++++++++++++++++++++--- t/t9001-send-email.sh | 2 +- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/git-send-email.perl b/git-send-email.perl index cd4b316ddc..3186104709 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -23,6 +23,7 @@ use Getopt::Long; use Git::LoadCPAN::Error qw(:try); use Git; use Git::I18N; +use Encode qw(find_encoding); Getopt::Long::Configure qw/ pass_through /; @@ -1044,9 +1045,27 @@ if (!defined $auto_8bit_encoding && scalar %broken_encoding) { foreach my $f (sort keys %broken_encoding) { print " $f\n"; } - $auto_8bit_encoding = ask(__("Which 8bit encoding should I declare [UTF-8]? "), - valid_re => qr/.{4}/, confirm_only => 1, - default => "UTF-8"); + while (1) { + my $encoding = ask( + __("Declare which 8bit encoding to use [default: UTF-8]? "), + valid_re => qr/^\S+$/, + default => "UTF-8"); + next unless defined $encoding; + if (find_encoding($encoding)) { + $auto_8bit_encoding = $encoding; + last; + } + my $yesno = ask( + sprintf( + __("'%s' does not appear to be a valid charset name. Use it anyway [y/N]? "), + $encoding), + valid_re => qr/^(?:y|n)/i, + default => "n"); + if (defined $yesno && $yesno =~ /^y/i) { + $auto_8bit_encoding = $encoding; + last; + } + } } if (!$force) { diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index e56e0c8d77..24f6c76aee 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -1691,7 +1691,7 @@ test_expect_success $PREREQ 'asks about and fixes 8bit encodings' ' email-using-8bit >stdout && grep "do not declare a Content-Transfer-Encoding" stdout && grep email-using-8bit stdout && - grep "Which 8bit encoding" stdout && + grep "Declare which 8bit encoding to use" stdout && grep -E "Content|MIME" msgtxt1 >actual && test_cmp content-type-decl actual '