From 5ec4c22e49839c2eeb319070cde14ddbea3d1adb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 15 Feb 2026 04:00:52 -0500 Subject: [PATCH 1/5] ref-filter: factor out refname component counting The "lstrip" and "rstrip" options to the %(refname) placeholder both accept a negative length, which asks us to keep that many path components (rather than stripping that many). The code to count components and convert the negative value to a positive was copied from lstrip to rstrip in 1a34728e6b (ref-filter: add an 'rstrip=' option to atoms which deal with refnames, 2017-01-10). Let's factor it out into a separate function. This reduces duplication and also makes the lstrip/rstrip functions much easier to follow, since the bulk of their code is now the actual stripping. Note that the computed "remaining" value is currently stored as a "long", so in theory that's what our function should return. But this is purely historical. When the variable was added in 0571979bd6 (tag: do not show ambiguous tag names as "tags/foo", 2016-01-25), we parsed the value from strtol(), and thus used a long. But these days we take "len" as an int, and also use an int to count up components. So let's just consistently use int here. This value could only overflow in a pathological case (e.g., 4GB worth of "a/a/...") and even then will not result in out-of-bounds memory access (we keep stripping until we run out of string to parse). The minimal Myers diff here is a little hard to read; with --patience the code movement is shown much more clearly. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index c318f9ca0e..ff14ac53de 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2173,12 +2173,8 @@ static inline char *copy_advance(char *dst, const char *src) return dst; } -static const char *lstrip_ref_components(const char *refname, int len) +static int normalize_component_count(const char *refname, int len) { - long remaining = len; - const char *start = xstrdup(refname); - const char *to_free = start; - if (len < 0) { int i; const char *p = refname; @@ -2192,8 +2188,16 @@ static const char *lstrip_ref_components(const char *refname, int len) * because we count the number of '/', but the number * of components is one more than the no of '/'). */ - remaining = i + len + 1; + len = i + len + 1; } + return len; +} + +static const char *lstrip_ref_components(const char *refname, int len) +{ + int remaining = normalize_component_count(refname, len); + const char *start = xstrdup(refname); + const char *to_free = start; while (remaining > 0) { switch (*start++) { @@ -2213,26 +2217,10 @@ static const char *lstrip_ref_components(const char *refname, int len) static const char *rstrip_ref_components(const char *refname, int len) { - long remaining = len; + int remaining = normalize_component_count(refname, len); const char *start = xstrdup(refname); const char *to_free = start; - if (len < 0) { - int i; - const char *p = refname; - - /* Find total no of '/' separated path-components */ - for (i = 0; p[i]; p[i] == '/' ? i++ : *p++) - ; - /* - * The number of components we need to strip is now - * the total minus the components to be left (Plus one - * because we count the number of '/', but the number - * of components is one more than the no of '/'). - */ - remaining = i + len + 1; - } - while (remaining-- > 0) { char *p = strrchr(start, '/'); if (!p) { From 87cb6dc9b0832683a31d0c3126ecad4ad444489c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 15 Feb 2026 04:02:23 -0500 Subject: [PATCH 2/5] ref-filter: simplify lstrip_ref_components() memory handling We're walking forward in the string, skipping path components from left-to-right. So when we've stripped as much as we want, the pointer we have is a complete NUL-terminated string and we can just return it (after duplicating it, of course). So there is no need for a temporary allocated string. But we do make an extra temporary copy due to f0062d3b74 (ref-filter: free item->value and item->value->s, 2018-10-18). This is probably from cargo-culting the technique used in rstrip_ref_components(), which _does_ need a separate string (since it is stripping from the end and ties off the temporary string with a NUL). Let's drop the extra allocation. This is slightly more efficient, but more importantly makes the code much simpler. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index ff14ac53de..f5f0cb4ad6 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2196,13 +2196,10 @@ static int normalize_component_count(const char *refname, int len) static const char *lstrip_ref_components(const char *refname, int len) { int remaining = normalize_component_count(refname, len); - const char *start = xstrdup(refname); - const char *to_free = start; while (remaining > 0) { - switch (*start++) { + switch (*refname++) { case '\0': - free((char *)to_free); return xstrdup(""); case '/': remaining--; @@ -2210,9 +2207,7 @@ static const char *lstrip_ref_components(const char *refname, int len) } } - start = xstrdup(start); - free((char *)to_free); - return start; + return xstrdup(refname); } static const char *rstrip_ref_components(const char *refname, int len) From 2ec30e71f44233b9afceda0f2992029674187674 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 15 Feb 2026 04:05:34 -0500 Subject: [PATCH 3/5] ref-filter: simplify rstrip_ref_components() memory handling We're stripping path components from the end of a string, which we do by assigning a NUL as we parse each component, shortening the string. This requires an extra temporary buffer to avoid munging our input string. But the way that we allocate the buffer is unusual. We have an extra "to_free" variable. Usually this is used when the access variable is conceptually const, like: const char *foo; char *to_free = NULL; if (...) foo = to_free = xstrdup(...); else foo = some_const_string; ... free(to_free); But that's not what's happening here. Our "start" variable always points to the allocated buffer, and to_free is redundant. Worse, it is marked as const itself, requiring a cast when we free it. Let's drop to_free entirely, and mark "start" as non-const, making the memory handling more clear. As a bonus, this also silences a warning from glibc-2.43 that our call to strrchr() implicitly strips away the const-ness of "start". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index f5f0cb4ad6..9589418c25 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2213,13 +2213,12 @@ static const char *lstrip_ref_components(const char *refname, int len) static const char *rstrip_ref_components(const char *refname, int len) { int remaining = normalize_component_count(refname, len); - const char *start = xstrdup(refname); - const char *to_free = start; + char *start = xstrdup(refname); while (remaining-- > 0) { char *p = strrchr(start, '/'); if (!p) { - free((char *)to_free); + free(start); return xstrdup(""); } else p[0] = '\0'; From fe732a8b9f1837189eb3533a262548a652c11e61 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 15 Feb 2026 04:07:44 -0500 Subject: [PATCH 4/5] ref-filter: avoid strrchr() in rstrip_ref_components() To strip path components from our refname string, we repeatedly call strrchr() to find the trailing slash, shortening the string each time by assigning NUL over it. This has two downsides: 1. Calling strrchr() in a loop is quadratic, since each call has to call strlen() under the hood to find the end of the string (even though we know exactly where it is from the last loop iteration). 2. We need a temporary buffer, since we're munging the string with NUL as we shorten it (which we must do, because strrchr() has no other way of knowing what we consider the end of the string). Using memrchr() would let us fix both of these, but it isn't portable. So instead, let's just open-code the string traversal from back to front as we loop. I doubt that the quadratic nature is a serious concern. You can see it in practice with something like: git init git commit --allow-empty -m foo echo "$(git rev-parse HEAD) refs/heads$(perl -e 'print "/a" x 500_000')" >.git/packed-refs time git for-each-ref --format='%(refname:rstrip=-1)' That takes ~5.5s to run on my machine before this patch, and ~11ms after. But I don't think there's a reasonable way for somebody to infect you with such a garbage ref, as the wire protocol is limited to 64k pkt-lines. The difference is measurable for me for a 32k-component ref (about 19ms vs 7ms), so perhaps you could create some chaos by pushing a lot of them. But we also run into filesystem limits (if the loose backend is in use), and in practice it seems like there are probably simpler and more effective ways to waste CPU. Likewise the extra allocation probably isn't really measurable. In fact, since our goal is to return an allocated string, we end up having to make the same allocation anyway (though it is sized to the result, rather than the input). My main goal was simplicity in avoiding the need to handle cleaning it up in the early return path. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 9589418c25..01512d50bd 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2213,17 +2213,15 @@ static const char *lstrip_ref_components(const char *refname, int len) static const char *rstrip_ref_components(const char *refname, int len) { int remaining = normalize_component_count(refname, len); - char *start = xstrdup(refname); + const char *end = refname + strlen(refname); - while (remaining-- > 0) { - char *p = strrchr(start, '/'); - if (!p) { - free(start); + while (remaining > 0) { + if (end == refname) return xstrdup(""); - } else - p[0] = '\0'; + if (*--end == '/') + remaining--; } - return start; + return xmemdupz(refname, end - refname); } static const char *show_ref(struct refname_atom *atom, const char *refname) From 8b0061b5c5008375ef0986b7aafedbd7d79da0f6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 20 Feb 2026 01:00:03 -0500 Subject: [PATCH 5/5] ref-filter: clarify lstrip/rstrip component counting When a strip option to the %(refname) placeholder is asked to leave N path components, we first count up the path components to know how many to remove. That happens with a loop like this: /* Find total no of '/' separated path-components */ for (i = 0; p[i]; p[i] == '/' ? i++ : *p++) ; which is a little hard to understand for two reasons. First, the dereference in "*p++" is seemingly useless, since nobody looks at the result. And static analyzers like Coverity will complain about that. But removing the "*" will cause gcc to complain with -Wint-conversion, since the two sides of the ternary do not match (one is a pointer and the other an int). Second, it is not clear what the meaning of "p" is at each iteration of the loop, as its position with respect to our walk over the string depends on how many slashes we've seen. The answer is that by itself, it doesn't really mean anything: "p + i" represents the current state of our walk, with "i" counting up slashes, and "p" by itself essentially meaningless. None of this behaves incorrectly, but ultimately the loop is just counting the slashes in the refname. We can do that much more simply with a for-loop iterating over the string and a separate slash counter. We can also drop the comment, which is somewhat misleading. We are counting slashes, not components (and a comment later in the function makes it clear that we must add one to compensate). In the new code it is obvious that we are counting slashes here. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ref-filter.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 01512d50bd..db83d3b32c 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2176,19 +2176,20 @@ static inline char *copy_advance(char *dst, const char *src) static int normalize_component_count(const char *refname, int len) { if (len < 0) { - int i; - const char *p = refname; + int slashes = 0; + + for (const char *p = refname; *p; p++) { + if (*p == '/') + slashes++; + } - /* Find total no of '/' separated path-components */ - for (i = 0; p[i]; p[i] == '/' ? i++ : *p++) - ; /* * The number of components we need to strip is now * the total minus the components to be left (Plus one * because we count the number of '/', but the number * of components is one more than the no of '/'). */ - len = i + len + 1; + len = slashes + len + 1; } return len; }