From e715f776820f22d0951e02947dd0c4f889e83df8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Aug 2025 16:59:29 -0400 Subject: [PATCH 1/5] describe: pass oid struct by const pointer We pass a "struct object_id" to describe_blob() by value. This isn't wrong, as an oid is composed only of copy-able values. But it's unusual; typically we pass structs by const pointer, including object_ids. Let's do so. It similarly makes sense for us to hold that pointer in the callback data (rather than yet another copy of the oid). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/describe.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index d7dd8139de..383d3e6b9a 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -490,7 +490,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) struct process_commit_data { struct object_id current_commit; - struct object_id looking_for; + const struct object_id *looking_for; struct strbuf *dst; struct rev_info *revs; }; @@ -505,7 +505,7 @@ static void process_object(struct object *obj, const char *path, void *data) { struct process_commit_data *pcd = data; - if (oideq(&pcd->looking_for, &obj->oid) && !pcd->dst->len) { + if (oideq(pcd->looking_for, &obj->oid) && !pcd->dst->len) { reset_revision_walk(); describe_commit(&pcd->current_commit, pcd->dst); strbuf_addf(pcd->dst, ":%s", path); @@ -514,7 +514,7 @@ static void process_object(struct object *obj, const char *path, void *data) } } -static void describe_blob(struct object_id oid, struct strbuf *dst) +static void describe_blob(const struct object_id *oid, struct strbuf *dst) { struct rev_info revs; struct strvec args = STRVEC_INIT; @@ -554,7 +554,7 @@ static void describe(const char *arg, int last_one) describe_commit(&oid, &sb); else if (odb_read_object_info(the_repository->objects, &oid, NULL) == OBJ_BLOB) - describe_blob(oid, &sb); + describe_blob(&oid, &sb); else die(_("%s is neither a commit nor blob"), arg); From db2664b6f7c88910b1ab21bcdbac87be098df8a2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Aug 2025 17:01:25 -0400 Subject: [PATCH 2/5] describe: error if blob not found If describe_blob() does not find the blob in question, it returns an empty strbuf, and we print an empty line. This differs from describe_commit(), which always either returns an answer or calls die() itself. As the blob function was bolted onto the command afterwards, I think its behavior is not intentional, and it is just a bug that it does not report an error. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/describe.c | 3 +++ t/t6120-describe.sh | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/builtin/describe.c b/builtin/describe.c index 383d3e6b9a..06e413d937 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -535,6 +535,9 @@ static void describe_blob(const struct object_id *oid, struct strbuf *dst) reset_revision_walk(); release_revisions(&revs); strvec_clear(&args); + + if (!dst->len) + die(_("blob '%s' not reachable from HEAD"), oid_to_hex(oid)); } static void describe(const char *arg, int last_one) diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh index 256ccaefb7..470631d17d 100755 --- a/t/t6120-describe.sh +++ b/t/t6120-describe.sh @@ -409,6 +409,12 @@ test_expect_success 'describe tag object' ' test_grep "fatal: test-blob-1 is neither a commit nor blob" actual ' +test_expect_success 'describe an unreachable blob' ' + blob=$(echo not-found-anywhere | git hash-object -w --stdin) && + test_must_fail git describe $blob 2>actual && + test_grep "blob .$blob. not reachable from HEAD" actual +' + test_expect_success ULIMIT_STACK_SIZE 'name-rev works in a deep repo' ' i=1 && while test $i -lt 8000 From c6478715a52b8f757e898e1d9f8f8d1732fafb24 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Aug 2025 17:01:54 -0400 Subject: [PATCH 3/5] describe: catch unborn branch in describe_blob() When describing a blob, we search for it by traversing from HEAD. We do this by feeding the name HEAD to setup_revisions(). But if we are on an unborn branch, this will fail with a confusing message: $ git describe $blob fatal: ambiguous argument 'HEAD': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git [...] -- [...]' It is OK for this to be an error (we cannot find $blob in an empty traversal, so we'd eventually complain about that). But the error message could be more helpful. Let's resolve HEAD ourselves and pass the resolved object id to setup_revisions(). If resolving fails, then we can print a more useful message. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/describe.c | 8 +++++++- t/t6120-describe.sh | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/builtin/describe.c b/builtin/describe.c index 06e413d937..f7bea3c8c5 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -518,10 +518,16 @@ static void describe_blob(const struct object_id *oid, struct strbuf *dst) { struct rev_info revs; struct strvec args = STRVEC_INIT; + struct object_id head_oid; struct process_commit_data pcd = { *null_oid(the_hash_algo), oid, dst, &revs}; + if (repo_get_oid(the_repository, "HEAD", &head_oid)) + die(_("cannot search for blob '%s' on an unborn branch"), + oid_to_hex(oid)); + strvec_pushl(&args, "internal: The first arg is not parsed", - "--objects", "--in-commit-order", "--reverse", "HEAD", + "--objects", "--in-commit-order", "--reverse", + oid_to_hex(&head_oid), NULL); repo_init_revisions(the_repository, &revs, NULL); diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh index 470631d17d..feec57bcbc 100755 --- a/t/t6120-describe.sh +++ b/t/t6120-describe.sh @@ -415,6 +415,14 @@ test_expect_success 'describe an unreachable blob' ' test_grep "blob .$blob. not reachable from HEAD" actual ' +test_expect_success 'describe blob on an unborn branch' ' + oldbranch=$(git symbolic-ref HEAD) && + test_when_finished "git symbolic-ref HEAD $oldbranch" && + git symbolic-ref HEAD refs/heads/does-not-exist && + test_must_fail git describe test-blob 2>actual && + test_grep "cannot search .* on an unborn branch" actual +' + test_expect_success ULIMIT_STACK_SIZE 'name-rev works in a deep repo' ' i=1 && while test $i -lt 8000 From 8cfd4ac215e3711757acef8043c1c3bf3689f606 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Aug 2025 02:30:34 -0400 Subject: [PATCH 4/5] describe: handle blob traversal with no commits When describing a blob, we traverse from HEAD, remembering each commit we saw, and then checking each blob to report the containing commit. But if we haven't seen any commits at all, we'll segfault (we store the "current" commit as an oid initialized to the null oid, causing lookup_commit_reference() to return NULL). This shouldn't be able to happen normally. We always start our traversal at HEAD, which must be a commit (a property which is enforced by the refs code). But you can trigger the segfault like this: blob=$(echo foo | git hash-object -w --stdin) echo $blob >.git/HEAD git describe $blob We can instead catch this case and return an empty result, which hits the usual "we didn't find $blob while traversing HEAD" error. This is a minor lie in that we did "find" the blob. And this even hints at a bigger problem in this code: what if the traversal pointed to the blob as _not_ part of a commit at all, but we had previously filled in the recorded "current commit"? One could imagine this happening due to a tag pointing directly to the blob in question. But that can't happen, because we only traverse from HEAD, never from any other refs. And the intent of the blob-describing code is to find blobs within commits. So I think this matches the original intent as closely as we can (and again, this segfault cannot be triggered without corrupting your repository!). The test here does not use the formula above, which works only for the files backend (and not reftables). Instead we use another loophole to create the bogus state using only Git commands. See the comment in the test for details. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/describe.c | 6 ++++-- t/t6120-describe.sh | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index f7bea3c8c5..72b2e1162c 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -507,8 +507,10 @@ static void process_object(struct object *obj, const char *path, void *data) if (oideq(pcd->looking_for, &obj->oid) && !pcd->dst->len) { reset_revision_walk(); - describe_commit(&pcd->current_commit, pcd->dst); - strbuf_addf(pcd->dst, ":%s", path); + if (!is_null_oid(&pcd->current_commit)) { + describe_commit(&pcd->current_commit, pcd->dst); + strbuf_addf(pcd->dst, ":%s", path); + } free_commit_list(pcd->revs->commits); pcd->revs->commits = NULL; } diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh index feec57bcbc..2c70cc561a 100755 --- a/t/t6120-describe.sh +++ b/t/t6120-describe.sh @@ -423,6 +423,22 @@ test_expect_success 'describe blob on an unborn branch' ' test_grep "cannot search .* on an unborn branch" actual ' +# This test creates a repository state that we generally try to disallow: HEAD +# is pointing to an object that is not a commit. The ref update code forbids +# non-commit writes directly to HEAD or to any branch in refs/heads/. But we +# can use the loophole of pointing HEAD to another non-branch ref (something we +# should forbid, but don't for historical reasons). +# +# Do not take this test as an endorsement of the loophole! If we ever tighten +# it, it is reasonable to just drop this test entirely. +test_expect_success 'describe blob on a non-commit HEAD' ' + oldbranch=$(git symbolic-ref HEAD) && + test_when_finished "git symbolic-ref HEAD $oldbranch" && + git symbolic-ref HEAD refs/tags/test-blob && + test_must_fail git describe test-blob 2>actual && + test_grep "blob .* not reachable from HEAD" actual +' + test_expect_success ULIMIT_STACK_SIZE 'name-rev works in a deep repo' ' i=1 && while test $i -lt 8000 From 7c10e48e81ae63974e3badf3b7df71df74a0640b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Aug 2025 17:04:17 -0400 Subject: [PATCH 5/5] describe: pass commit to describe_commit() There's a call in describe_commit() to lookup_commit_reference(), but we don't check the return value. If it returns NULL, we'll segfault as we immediately dereference the result. In practice this can never happen, since all callers pass an oid which came from a "struct commit" already. So we can make this more obvious by just taking that commit struct in the first place. Reported-by: Cheng Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/describe.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 72b2e1162c..04df89d56b 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -313,9 +313,9 @@ static void append_suffix(int depth, const struct object_id *oid, struct strbuf repo_find_unique_abbrev(the_repository, oid, abbrev)); } -static void describe_commit(struct object_id *oid, struct strbuf *dst) +static void describe_commit(struct commit *cmit, struct strbuf *dst) { - struct commit *cmit, *gave_up_on = NULL; + struct commit *gave_up_on = NULL; struct commit_list *list; struct commit_name *n; struct possible_tag all_matches[MAX_TAGS]; @@ -323,8 +323,6 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) unsigned long seen_commits = 0; unsigned int unannotated_cnt = 0; - cmit = lookup_commit_reference(the_repository, oid); - n = find_commit_name(&cmit->object.oid); if (n && (tags || all || n->prio == 2)) { /* @@ -332,7 +330,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) */ append_name(n, dst); if (n->misnamed || longformat) - append_suffix(0, n->tag ? get_tagged_oid(n->tag) : oid, dst); + append_suffix(0, n->tag ? get_tagged_oid(n->tag) : &cmit->object.oid, dst); if (suffix) strbuf_addstr(dst, suffix); return; @@ -489,7 +487,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) } struct process_commit_data { - struct object_id current_commit; + struct commit *current_commit; const struct object_id *looking_for; struct strbuf *dst; struct rev_info *revs; @@ -498,7 +496,7 @@ struct process_commit_data { static void process_commit(struct commit *commit, void *data) { struct process_commit_data *pcd = data; - pcd->current_commit = commit->object.oid; + pcd->current_commit = commit; } static void process_object(struct object *obj, const char *path, void *data) @@ -507,8 +505,8 @@ static void process_object(struct object *obj, const char *path, void *data) if (oideq(pcd->looking_for, &obj->oid) && !pcd->dst->len) { reset_revision_walk(); - if (!is_null_oid(&pcd->current_commit)) { - describe_commit(&pcd->current_commit, pcd->dst); + if (pcd->current_commit) { + describe_commit(pcd->current_commit, pcd->dst); strbuf_addf(pcd->dst, ":%s", path); } free_commit_list(pcd->revs->commits); @@ -521,7 +519,7 @@ static void describe_blob(const struct object_id *oid, struct strbuf *dst) struct rev_info revs; struct strvec args = STRVEC_INIT; struct object_id head_oid; - struct process_commit_data pcd = { *null_oid(the_hash_algo), oid, dst, &revs}; + struct process_commit_data pcd = { NULL, oid, dst, &revs}; if (repo_get_oid(the_repository, "HEAD", &head_oid)) die(_("cannot search for blob '%s' on an unborn branch"), @@ -562,7 +560,7 @@ static void describe(const char *arg, int last_one) cmit = lookup_commit_reference_gently(the_repository, &oid, 1); if (cmit) - describe_commit(&oid, &sb); + describe_commit(cmit, &sb); else if (odb_read_object_info(the_repository->objects, &oid, NULL) == OBJ_BLOB) describe_blob(&oid, &sb);