From 767ee993b7ea3175691444af67550817de7f6c73 Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Tue, 17 Feb 2026 20:31:30 -0600 Subject: [PATCH 1/3] contrib/subtree: capture additional test-cases Patch series e7b07376e5 (Merge branch 'rs/subtree-fixes', 2018-10-26) corrects several defects in `git subtree split`. The defects affect `split --rejoin` and merge commit processing. There is no test coverage for this, and e7b07376e5 did not introduce any. Convert the minimum working example [1] from the original patch submission [2] into test cases. [1]: https://gist.github.com/FoxFireX/1b794384612b7fd5e7cd157cff96269e [2]: <20180928183540.48968-1-roger.strain@swri.org> Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/t/t7900-subtree.sh | 110 +++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/contrib/subtree/t/t7900-subtree.sh b/contrib/subtree/t/t7900-subtree.sh index e7040718f2..3ee2f95d86 100755 --- a/contrib/subtree/t/t7900-subtree.sh +++ b/contrib/subtree/t/t7900-subtree.sh @@ -1575,6 +1575,116 @@ test_expect_success 'push split to subproj' ' ) ' +# --ignore-joins must ignore mainline content outside of the +# subtree. This test verifies that the logic in +# `find_existing_splits()` correctly handles a `git subtree add` +# In this test, the split history must not contain a commit titled +# +# Add 'sub/' from commit ... +# +# see: dd21d43b58 (subtree: make --ignore-joins pay +# attention to adds, 2018-09-28) +test_expect_success 'split --ignore-joins respects subtree add' ' + subtree_test_create_repo "$test_count" && + ( + cd "$test_count" && + test_commit main_must_not_be_in_subtree && + test_create_subtree_add . mksubtree sub sub1 && + test_commit sub/sub2 && + test_commit main_must_not_be_in_subtree2 && + git subtree split --prefix sub -b first_split --rejoin && + test_commit sub/sub3 && + no_ignore_joins="$(git subtree split --prefix sub -b no_ignore_joins)" && + ignore_joins="$(git subtree split --prefix sub --ignore-joins -b ignore_joins)" && + git checkout ignore_joins && + test_path_is_file sub1.t && + test_path_is_file sub2.t && + test_path_is_file sub3.t && + ! test_path_is_file main_must_not_be_in_subtree.t && + ! test_path_is_file main_must_not_be_in_subtree2.t && + test -z "$(git log -1 --grep "Add '''sub/''' from commit" ignore_joins)" && + test "$no_ignore_joins" = "$ignore_joins" && + test "$(git rev-list --count ignore_joins)" -eq 3; + ) +' + +# split excludes commits reachable from any previous --rejoin. +# These ignored commits can still be the basis for new work +# after the --rejoin. These commits must be processed, even +# if they are excluded. Otherwise, the split history will be +# incorrect. +# +# here, the merge +# +# git merge --no-ff new_work_based_on_prejoin +# +# doesn't contain any subtree changes and so should not end +# up in the split history. this subtree should be flat, +# with no merges. +# +# see: 315a84f9aa (subtree: use commits before rejoins for +# splits, 2018-09-28) +test_expect_success 'split links out-of-tree pre --rejoin commits with post --rejoin commits' ' + subtree_test_create_repo "$test_count" && + ( + cd "$test_count" && + test_commit main_must_not_be_in_subtree && + mkdir sub && + test_commit sub/sub1 && + test_commit sub/sub2 && + git subtree split --prefix sub --rejoin && + test "$(git rev-list --count HEAD)" -eq 6 && + git checkout sub/sub1 && + git checkout -b new_work_based_on_prejoin && + test_commit main_must_not_be_in_subtree2 && + git checkout main && + git merge --no-ff new_work_based_on_prejoin && + test_commit sub/sub3 && + git subtree split -d --prefix sub -b second_split && + git checkout second_split && + test_path_is_file sub1.t && + test_path_is_file sub2.t && + test_path_is_file sub3.t && + ! test_path_is_file main_must_not_be_in_subtree.t && + ! test_path_is_file main_must_not_be_in_subtree2.t && + test "$(git rev-list --count --merges second_split)" -eq 0 && + test "$(git rev-list --count second_split)" -eq 3; + ) +' + +# split must keep merge commits with unrelated histories, even +# if both parents are treesame. When deciding whether or not +# to eliminate a parent, copy_or_skip compares the merge-base +# of each parent. +# +# in the split_of_merges branch: +# +# * expect 4 commits +# * HEAD~ must be a merge +# +# see: 68f8ff8151 (subtree: improve decision on merges kept +# in split, 2018-09-28) +test_expect_success 'split preserves merges with unrelated history' ' + subtree_test_create_repo "$test_count" && + ( + cd "$test_count" && + test_commit main_must_not_be_in_subtree && + mkdir sub && + test_commit sub/sub1 && + git checkout --orphan new_history && + git checkout sub/sub1 -- . && + git add . && + git commit -m "treesame history but not a merge-base" && + git checkout main && + git merge --allow-unrelated-histories --no-ff new_history && + test "$(git rev-parse "HEAD^1^{tree}")" = "$(git rev-parse "HEAD^2^{tree}")" && + test_commit sub/sub2 && + git subtree split -d --prefix sub -b split_of_merges && + test "$(git rev-list --count split_of_merges)" -eq 4 && + test -n "$(git rev-list --merges HEAD~)"; + ) +' + # # This test covers 2 cases in subtree split copy_or_skip code # 1) Merges where one parent is a superset of the changes of the other From 715b406e47d51a6f4f6be3b0ed42cfbd59217258 Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Tue, 17 Feb 2026 20:31:31 -0600 Subject: [PATCH 2/3] contrib/subtree: test history depth Add history depth checks to some of the subtree unit tests. These checks were previously introduced as part of 28a7e27cff (contrib/subtree: detect rewritten subtree commits, 2026-01-09), which has since been reverted. Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/t/t7900-subtree.sh | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/contrib/subtree/t/t7900-subtree.sh b/contrib/subtree/t/t7900-subtree.sh index 3ee2f95d86..dad8dea63a 100755 --- a/contrib/subtree/t/t7900-subtree.sh +++ b/contrib/subtree/t/t7900-subtree.sh @@ -411,8 +411,9 @@ test_expect_success 'split sub dir/ with --rejoin' ' git fetch ./"sub proj" HEAD && git subtree merge --prefix="sub dir" FETCH_HEAD && split_hash=$(git subtree split --prefix="sub dir" --annotate="*") && - git subtree split --prefix="sub dir" --annotate="*" --rejoin && - test "$(last_commit_subject)" = "Split '\''sub dir/'\'' into commit '\''$split_hash'\''" + git subtree split --prefix="sub dir" --annotate="*" -b spl --rejoin && + test "$(last_commit_subject)" = "Split '\''sub dir/'\'' into commit '\''$split_hash'\''" && + test "$(git rev-list --count spl)" -eq 5 ) ' @@ -442,18 +443,25 @@ test_expect_success 'split with multiple subtrees' ' git -C "$test_count" subtree add --prefix=subADir FETCH_HEAD && git -C "$test_count" fetch ./subB HEAD && git -C "$test_count" subtree add --prefix=subBDir FETCH_HEAD && + test "$(git -C "$test_count" rev-list --count main)" -eq 7 && test_create_commit "$test_count" subADir/main-subA1 && test_create_commit "$test_count" subBDir/main-subB1 && git -C "$test_count" subtree split --prefix=subADir \ - --squash --rejoin -m "Sub A Split 1" && + --squash --rejoin -m "Sub A Split 1" -b a1 && + test "$(git -C "$test_count" rev-list --count main..a1)" -eq 1 && git -C "$test_count" subtree split --prefix=subBDir \ - --squash --rejoin -m "Sub B Split 1" && + --squash --rejoin -m "Sub B Split 1" -b b1 && + test "$(git -C "$test_count" rev-list --count main..b1)" -eq 1 && test_create_commit "$test_count" subADir/main-subA2 && test_create_commit "$test_count" subBDir/main-subB2 && git -C "$test_count" subtree split --prefix=subADir \ - --squash --rejoin -m "Sub A Split 2" && + --squash --rejoin -m "Sub A Split 2" -b a2 && + test "$(git -C "$test_count" rev-list --count main..a2)" -eq 2 && + test "$(git -C "$test_count" rev-list --count a1..a2)" -eq 1 && test "$(git -C "$test_count" subtree split --prefix=subBDir \ - --squash --rejoin -d -m "Sub B Split 1" 2>&1 | grep -w "\[1\]")" = "" + --squash --rejoin -d -m "Sub B Split 1" -b b2 2>&1 | grep -w "\[1\]")" = "" && + test "$(git -C "$test_count" rev-list --count main..b2)" -eq 2 && + test "$(git -C "$test_count" rev-list --count b1..b2)" -eq 1 ' # When subtree split-ing a directory that has other subtree @@ -477,6 +485,7 @@ do test_path_is_file subA/file1.t && test_path_is_file subA/subB/file2.t && git subtree split --prefix=subA --branch=bsplit && + test "$(git rev-list --count bsplit)" -eq 2 && git checkout bsplit && test_path_is_file file1.t && test_path_is_file subB/file2.t && @@ -489,6 +498,7 @@ do --prefix=subA/subB mksubtree && test_path_is_file subA/subB/file3.t && git subtree split --prefix=subA --branch=bsplit && + test "$(git rev-list --count bsplit)" -eq 3 && git checkout bsplit && test_path_is_file file1.t && test_path_is_file subB/file2.t && From 1f70684b517f4fcfeb7b998b0b7f3146ee8a8c75 Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Tue, 17 Feb 2026 20:31:32 -0600 Subject: [PATCH 3/3] contrib/subtree: process out-of-prefix subtrees `should_ignore_subtree_split_commit` detects subtrees which are outside of the current path --prefix and ignores them. This can speed up splits of repositories that have many subtrees. Since its inception [1], every iteration of this logic [2], [3] incorrectly excludes commits. This alters the split history. The split history and its commit hashes are API contract, so this is not permissible. While a commit from a different subtree may look like it doesn't contribute anything to a split, sometimes it does. Merge commits are a particular hot spot. For these, the pruning logic in `copy_or_skip` performs: 1. a check for "treesame" parents 2. two different common ancestry checks These checks operate on the **split history**, not the input history. The split history omits commits that do not affect the --prefix. This can significantly alter the ancestry of a merge. In order to determine if `copy_or_skip` will skip a merge, it is likely necessary to compute all the split history... which is what `should_ignore_subtree_split_commit` tries to avoid. To make this logic API-preserving, we could gate it behind a new CLI argument. The present implementation is actually a speed penalty in many cases, however, so this is not done here. Remove the `should_ignore_subtree_split_commit` logic. This fixes the regression reported in [4]. [1]: 98ba49ccc2 (subtree: fix split processing with multiple subtrees present, 2023-12-01) [2]: 83f9dad7d6 (contrib/subtree: fix split with squashed subtrees, 2025-09-09) [3]: 28a7e27cff (contrib/subtree: detect rewritten subtree commits, 2026-01-09) [4]: <20251230170719.845029-1-george@mail.dietrich.pub> Reported-by: George Reported-by: Christian Heusel Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 50 +--------------------- contrib/subtree/t/t7900-subtree.sh | 68 ++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 53 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 17106d1a72..ba9fb2ee5d 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -785,42 +785,6 @@ ensure_valid_ref_format () { die "fatal: '$1' does not look like a ref" } -# Usage: should_ignore_subtree_split_commit REV -# -# Check if REV is a commit from another subtree and should be -# ignored from processing for splits -should_ignore_subtree_split_commit () { - assert test $# = 1 - - git show \ - --no-patch \ - --no-show-signature \ - --format='%(trailers:key=git-subtree-dir,key=git-subtree-mainline)' \ - "$1" | - ( - have_mainline= - subtree_dir= - - while read -r trailer val - do - case "$trailer" in - git-subtree-dir:) - subtree_dir="${val%/}" ;; - git-subtree-mainline:) - have_mainline=y ;; - esac - done - - if test -n "${subtree_dir}" && - test -z "${have_mainline}" && - test "${subtree_dir}" != "$arg_prefix" - then - return 0 - fi - return 1 - ) -} - # Usage: process_split_commit REV PARENTS process_split_commit () { assert test $# = 2 @@ -1006,19 +970,7 @@ cmd_split () { eval "$grl" | while read rev parents do - if should_ignore_subtree_split_commit "$rev" - then - continue - fi - parsedparents='' - for parent in $parents - do - if ! should_ignore_subtree_split_commit "$parent" - then - parsedparents="$parsedparents$parent " - fi - done - process_split_commit "$rev" "$parsedparents" + process_split_commit "$rev" "$parents" done || exit $? latest_new=$(cache_get latest_new) || exit $? diff --git a/contrib/subtree/t/t7900-subtree.sh b/contrib/subtree/t/t7900-subtree.sh index dad8dea63a..05a774ad47 100755 --- a/contrib/subtree/t/t7900-subtree.sh +++ b/contrib/subtree/t/t7900-subtree.sh @@ -428,8 +428,7 @@ test_expect_success 'split sub dir/ with --rejoin' ' # - Perform 'split' on subtree B # - Create new commits with changes to subtree A and B # - Perform split on subtree A -# - Check that the commits in subtree B are not processed -# as part of the subtree A split +# - Check for expected history test_expect_success 'split with multiple subtrees' ' subtree_test_create_repo "$test_count" && subtree_test_create_repo "$test_count/subA" && @@ -458,8 +457,8 @@ test_expect_success 'split with multiple subtrees' ' --squash --rejoin -m "Sub A Split 2" -b a2 && test "$(git -C "$test_count" rev-list --count main..a2)" -eq 2 && test "$(git -C "$test_count" rev-list --count a1..a2)" -eq 1 && - test "$(git -C "$test_count" subtree split --prefix=subBDir \ - --squash --rejoin -d -m "Sub B Split 1" -b b2 2>&1 | grep -w "\[1\]")" = "" && + git -C "$test_count" subtree split --prefix=subBDir \ + --squash --rejoin -d -m "Sub B Split 1" -b b2 && test "$(git -C "$test_count" rev-list --count main..b2)" -eq 2 && test "$(git -C "$test_count" rev-list --count b1..b2)" -eq 1 ' @@ -507,6 +506,67 @@ do ' done +# Usually, +# +# git subtree merge -P subA --squash f00... +# +# makes two commits, in this order: +# +# 1. Squashed 'subA/' content from commit f00... +# 2. Merge commit (1) as 'subA' +# +# Commit 1 updates the subtree but does *not* rewrite paths. +# Commit 2 rewrites all trees to start with `subA/` +# +# Commit 1 either has no parents or depends only on other +# "Squashed 'subA/' content" commits. +# +# For merge without --squash, subtree produces just one commit: +# a merge commit with git-subtree trailers. +# +# In either case, if the user rebases these commits, they will +# still have the git-subtree-* trailers… but will NOT have +# the layout described above. +# +# Test that subsequent `git subtree split` are not confused by this. +test_expect_success 'split with rebased subtree commit' ' + subtree_test_create_repo "$test_count" && + ( + cd "$test_count" && + test_commit file0 && + test_create_subtree_add \ + . mksubtree subA file1 --squash && + test_path_is_file subA/file1.t && + mkdir subB && + test_commit subB/bfile && + git commit --amend -F - <<'EOF' && +Squashed '\''subB/'\'' content from commit '\''badf00da911bbe895347b4b236f5461d55dc9877'\'' + +Simulate a cherry-picked or rebased subtree commit. + +git-subtree-dir: subB +git-subtree-split: badf00da911bbe895347b4b236f5461d55dc9877 +EOF + test_commit subA/file2 && + test_commit subB/bfile2 && + git commit --amend -F - <<'EOF' && +Split '\''subB/'\'' into commit '\''badf00da911bbe895347b4b236f5461d55dc9877'\'' + +Simulate a cherry-picked or rebased subtree commit. + +git-subtree-dir: subB +git-subtree-mainline: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +git-subtree-split: badf00da911bbe895347b4b236f5461d55dc9877 +EOF + git subtree split --prefix=subA --branch=bsplit && + git checkout bsplit && + test_path_is_file file1.t && + test_path_is_file file2.t && + test "$(last_commit_subject)" = "subA/file2" && + test "$(git rev-list --count bsplit)" -eq 2 + ) +' + test_expect_success 'split sub dir/ with --rejoin from scratch' ' subtree_test_create_repo "$test_count" && test_create_commit "$test_count" main1 &&