From 980013e90dd12ec1ff9eec23cebb0c845f198dab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 3 Mar 2024 11:13:25 +0100 Subject: [PATCH 1/4] t-ctype: allow NUL anywhere in the specification string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the custom function is_in() for looking up a character in the specification string with memchr(3) and sizeof. This is shorter, simpler and allows NUL anywhere in the string, which may come in handy if we ever want to support more character classes that contain it. Getting the string size using sizeof only works in a macro and with a string constant. Use ARRAY_SIZE and compile-time checks to make sure we are not passed a string pointer. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- t/unit-tests/t-ctype.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/t/unit-tests/t-ctype.c b/t/unit-tests/t-ctype.c index f315489984..35473c41d8 100644 --- a/t/unit-tests/t-ctype.c +++ b/t/unit-tests/t-ctype.c @@ -1,23 +1,13 @@ #include "test-lib.h" -static int is_in(const char *s, int ch) -{ - /* - * We can't find NUL using strchr. Accept it as the first - * character in the spec -- there are no empty classes. - */ - if (ch == '\0') - return ch == *s; - if (*s == '\0') - s++; - return !!strchr(s, ch); -} - /* Macro to test a character type */ #define TEST_CTYPE_FUNC(func, string) \ static void test_ctype_##func(void) { \ + size_t len = ARRAY_SIZE(string) - 1 + \ + BUILD_ASSERT_OR_ZERO(ARRAY_SIZE(string) > 0) + \ + BUILD_ASSERT_OR_ZERO(sizeof(string[0]) == sizeof(char)); \ for (int i = 0; i < 256; i++) { \ - if (!check_int(func(i), ==, is_in(string, i))) \ + if (!check_int(func(i), ==, !!memchr(string, i, len))) \ test_msg(" i: 0x%02x", i); \ } \ if (!check(!func(EOF))) \ From 752cb6ef816c8b2b37e38e418bd11698c00027e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 3 Mar 2024 11:13:26 +0100 Subject: [PATCH 2/4] t-ctype: simplify EOF check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EOF is not a member of any character class. If a classifier function returns a non-zero result for it, presumably by mistake, then the unit test check reports: # check "!iseof(EOF)" failed at t/unit-tests/t-ctype.c:53 # i: 0xffffffff (EOF) The numeric value of EOF is not particularly interesting in this context. Stop printing the second line. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- t/unit-tests/t-ctype.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/t/unit-tests/t-ctype.c b/t/unit-tests/t-ctype.c index 35473c41d8..f0d61d6eb2 100644 --- a/t/unit-tests/t-ctype.c +++ b/t/unit-tests/t-ctype.c @@ -10,8 +10,7 @@ static void test_ctype_##func(void) { \ if (!check_int(func(i), ==, !!memchr(string, i, len))) \ test_msg(" i: 0x%02x", i); \ } \ - if (!check(!func(EOF))) \ - test_msg(" i: 0x%02x (EOF)", EOF); \ + check(!func(EOF)); \ } #define TEST_CHAR_CLASS(class) TEST(test_ctype_##class(), #class " works") From 7a8d6c0a10e836d47e5531e321b5e1a944aa6123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 3 Mar 2024 11:13:27 +0100 Subject: [PATCH 3/4] t-ctype: align output of i MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unit test reports misclassified characters like this: # check "isdigit(i) == !!memchr("123456789", i, len)" failed at t/unit-tests/t-ctype.c:36 # left: 1 # right: 0 # i: 0x30 Reduce the indent of i to put its colon directly below the ones in the preceding lines for consistency. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- t/unit-tests/t-ctype.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/unit-tests/t-ctype.c b/t/unit-tests/t-ctype.c index f0d61d6eb2..02d8569aa3 100644 --- a/t/unit-tests/t-ctype.c +++ b/t/unit-tests/t-ctype.c @@ -8,7 +8,7 @@ static void test_ctype_##func(void) { \ BUILD_ASSERT_OR_ZERO(sizeof(string[0]) == sizeof(char)); \ for (int i = 0; i < 256; i++) { \ if (!check_int(func(i), ==, !!memchr(string, i, len))) \ - test_msg(" i: 0x%02x", i); \ + test_msg(" i: 0x%02x", i); \ } \ check(!func(EOF)); \ } From 6cf06e9c6e9b4e99fc5abb334172cc201a42d66b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 3 Mar 2024 11:13:28 +0100 Subject: [PATCH 4/4] t-ctype: avoid duplicating class names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TEST_CTYPE_FUNC defines a function for testing a character classifier, TEST_CHAR_CLASS calls it, causing the class name to be mentioned twice. Avoid the need to define a class-specific function by letting TEST_CHAR_CLASS do all the work. This is done by using the internal functions test__run_begin() and test__run_end(), but they do exist to be used in test macros after all. Alternatively we could unroll the loop to provide a very long expression that tests all 256 characters and EOF and hand that to TEST, but that seems awkward and hard to read. No change of behavior or output intended. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- t/unit-tests/t-ctype.c | 64 ++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/t/unit-tests/t-ctype.c b/t/unit-tests/t-ctype.c index 02d8569aa3..d6ac1fe678 100644 --- a/t/unit-tests/t-ctype.c +++ b/t/unit-tests/t-ctype.c @@ -1,19 +1,19 @@ #include "test-lib.h" -/* Macro to test a character type */ -#define TEST_CTYPE_FUNC(func, string) \ -static void test_ctype_##func(void) { \ +#define TEST_CHAR_CLASS(class, string) do { \ size_t len = ARRAY_SIZE(string) - 1 + \ BUILD_ASSERT_OR_ZERO(ARRAY_SIZE(string) > 0) + \ BUILD_ASSERT_OR_ZERO(sizeof(string[0]) == sizeof(char)); \ - for (int i = 0; i < 256; i++) { \ - if (!check_int(func(i), ==, !!memchr(string, i, len))) \ - test_msg(" i: 0x%02x", i); \ + int skip = test__run_begin(); \ + if (!skip) { \ + for (int i = 0; i < 256; i++) { \ + if (!check_int(class(i), ==, !!memchr(string, i, len)))\ + test_msg(" i: 0x%02x", i); \ + } \ + check(!class(EOF)); \ } \ - check(!func(EOF)); \ -} - -#define TEST_CHAR_CLASS(class) TEST(test_ctype_##class(), #class " works") + test__run_end(!skip, TEST_LOCATION(), #class " works"); \ +} while (0) #define DIGIT "0123456789" #define LOWER "abcdefghijklmnopqrstuvwxyz" @@ -33,37 +33,21 @@ static void test_ctype_##func(void) { \ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \ "\x7f" -TEST_CTYPE_FUNC(isdigit, DIGIT) -TEST_CTYPE_FUNC(isspace, " \n\r\t") -TEST_CTYPE_FUNC(isalpha, LOWER UPPER) -TEST_CTYPE_FUNC(isalnum, LOWER UPPER DIGIT) -TEST_CTYPE_FUNC(is_glob_special, "*?[\\") -TEST_CTYPE_FUNC(is_regex_special, "$()*+.?[\\^{|") -TEST_CTYPE_FUNC(is_pathspec_magic, "!\"#%&',-/:;<=>@_`~") -TEST_CTYPE_FUNC(isascii, ASCII) -TEST_CTYPE_FUNC(islower, LOWER) -TEST_CTYPE_FUNC(isupper, UPPER) -TEST_CTYPE_FUNC(iscntrl, CNTRL) -TEST_CTYPE_FUNC(ispunct, PUNCT) -TEST_CTYPE_FUNC(isxdigit, DIGIT "abcdefABCDEF") -TEST_CTYPE_FUNC(isprint, LOWER UPPER DIGIT PUNCT " ") - int cmd_main(int argc, const char **argv) { - /* Run all character type tests */ - TEST_CHAR_CLASS(isspace); - TEST_CHAR_CLASS(isdigit); - TEST_CHAR_CLASS(isalpha); - TEST_CHAR_CLASS(isalnum); - TEST_CHAR_CLASS(is_glob_special); - TEST_CHAR_CLASS(is_regex_special); - TEST_CHAR_CLASS(is_pathspec_magic); - TEST_CHAR_CLASS(isascii); - TEST_CHAR_CLASS(islower); - TEST_CHAR_CLASS(isupper); - TEST_CHAR_CLASS(iscntrl); - TEST_CHAR_CLASS(ispunct); - TEST_CHAR_CLASS(isxdigit); - TEST_CHAR_CLASS(isprint); + TEST_CHAR_CLASS(isspace, " \n\r\t"); + TEST_CHAR_CLASS(isdigit, DIGIT); + TEST_CHAR_CLASS(isalpha, LOWER UPPER); + TEST_CHAR_CLASS(isalnum, LOWER UPPER DIGIT); + TEST_CHAR_CLASS(is_glob_special, "*?[\\"); + TEST_CHAR_CLASS(is_regex_special, "$()*+.?[\\^{|"); + TEST_CHAR_CLASS(is_pathspec_magic, "!\"#%&',-/:;<=>@_`~"); + TEST_CHAR_CLASS(isascii, ASCII); + TEST_CHAR_CLASS(islower, LOWER); + TEST_CHAR_CLASS(isupper, UPPER); + TEST_CHAR_CLASS(iscntrl, CNTRL); + TEST_CHAR_CLASS(ispunct, PUNCT); + TEST_CHAR_CLASS(isxdigit, DIGIT "abcdefABCDEF"); + TEST_CHAR_CLASS(isprint, LOWER UPPER DIGIT PUNCT " "); return test_done(); }