From 864075ec433644e5908589fe79f8b3296f896867 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 14 Dec 2020 16:21:30 +0000 Subject: [PATCH 01/28] merge-ort: add basic data structures for handling renames This will grow later, but we only need a few fields for basic rename handling. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 414e7b7eea..1c1a7fa4bf 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -46,6 +46,25 @@ enum merge_side { MERGE_SIDE2 = 2 }; +struct rename_info { + /* + * pairs: pairing of filenames from diffcore_rename() + * + * Index 1 and 2 correspond to sides 1 & 2 as used in + * conflict_info.stages. Index 0 unused. + */ + struct diff_queue_struct pairs[3]; + + /* + * needed_limit: value needed for inexact rename detection to run + * + * If the current rename limit wasn't high enough for inexact + * rename detection to run, this records the limit needed. Otherwise, + * this value remains 0. + */ + int needed_limit; +}; + struct merge_options_internal { /* * paths: primary data structure in all of merge ort. @@ -113,6 +132,11 @@ struct merge_options_internal { */ struct strmap output; + /* + * renames: various data relating to rename detection + */ + struct rename_info renames; + /* * current_dir_name: temporary var used in collect_merge_info_callback() * From e1a124e8dc3d21afedc80cc2842d705c1d5a90c9 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 14 Dec 2020 16:21:31 +0000 Subject: [PATCH 02/28] merge-ort: add initial outline for basic rename detection Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 1c1a7fa4bf..8552f5e231 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -644,20 +644,72 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to regular rename detection ***/ +static int process_renames(struct merge_options *opt, + struct diff_queue_struct *renames) +{ + die("Not yet implemented."); +} + +static int compare_pairs(const void *a_, const void *b_) +{ + die("Not yet implemented."); +} + +/* Call diffcore_rename() to compute which files have changed on given side */ +static void detect_regular_renames(struct merge_options *opt, + struct tree *merge_base, + struct tree *side, + unsigned side_index) +{ + die("Not yet implemented."); +} + +/* + * Get information of all renames which occurred in 'side_pairs', discarding + * non-renames. + */ +static int collect_renames(struct merge_options *opt, + struct diff_queue_struct *result, + unsigned side_index) +{ + die("Not yet implemented."); +} + static int detect_and_process_renames(struct merge_options *opt, struct tree *merge_base, struct tree *side1, struct tree *side2) { - int clean = 1; + struct diff_queue_struct combined; + struct rename_info *renames = &opt->priv->renames; + int s, clean = 1; + + memset(&combined, 0, sizeof(combined)); + + detect_regular_renames(opt, merge_base, side1, MERGE_SIDE1); + detect_regular_renames(opt, merge_base, side2, MERGE_SIDE2); + + ALLOC_GROW(combined.queue, + renames->pairs[1].nr + renames->pairs[2].nr, + combined.alloc); + clean &= collect_renames(opt, &combined, MERGE_SIDE1); + clean &= collect_renames(opt, &combined, MERGE_SIDE2); + QSORT(combined.queue, combined.nr, compare_pairs); + + clean &= process_renames(opt, &combined); + + /* Free memory for renames->pairs[] and combined */ + for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) { + free(renames->pairs[s].queue); + DIFF_QUEUE_CLEAR(&renames->pairs[s]); + } + if (combined.nr) { + int i; + for (i = 0; i < combined.nr; i++) + diff_free_filepair(combined.queue[i]); + free(combined.queue); + } - /* - * Rename detection works by detecting file similarity. Here we use - * a really easy-to-implement scheme: files are similar IFF they have - * the same filename. Therefore, by this scheme, there are no renames. - * - * TODO: Actually implement a real rename detection scheme. - */ return clean; } From f39d05ca2637dfcbf0498824d06e7854427936a5 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 14 Dec 2020 16:21:32 +0000 Subject: [PATCH 03/28] merge-ort: implement detect_regular_renames() Based heavily on merge-recursive's get_diffpairs() function, and also includes the necessary paired call to diff_warn_rename_limit() so that users will be warned if merge.renameLimit is not sufficiently large for rename detection to run. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 8552f5e231..66f84d39b4 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -661,7 +661,33 @@ static void detect_regular_renames(struct merge_options *opt, struct tree *side, unsigned side_index) { - die("Not yet implemented."); + struct diff_options diff_opts; + struct rename_info *renames = &opt->priv->renames; + + repo_diff_setup(opt->repo, &diff_opts); + diff_opts.flags.recursive = 1; + diff_opts.flags.rename_empty = 0; + diff_opts.detect_rename = DIFF_DETECT_RENAME; + diff_opts.rename_limit = opt->rename_limit; + if (opt->rename_limit <= 0) + diff_opts.rename_limit = 1000; + diff_opts.rename_score = opt->rename_score; + diff_opts.show_rename_progress = opt->show_rename_progress; + diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; + diff_setup_done(&diff_opts); + diff_tree_oid(&merge_base->object.oid, &side->object.oid, "", + &diff_opts); + diffcore_std(&diff_opts); + + if (diff_opts.needed_rename_limit > renames->needed_limit) + renames->needed_limit = diff_opts.needed_rename_limit; + + renames->pairs[side_index] = diff_queued_diff; + + diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; + diff_queued_diff.nr = 0; + diff_queued_diff.queue = NULL; + diff_flush(&diff_opts); } /* @@ -1406,6 +1432,10 @@ void merge_switch_to_result(struct merge_options *opt, printf("%s", sb->buf); } string_list_clear(&olist, 0); + + /* Also include needed rename limit adjustment now */ + diff_warn_rename_limit("merge.renamelimit", + opti->renames.needed_limit, 0); } merge_finalize(opt, result); From 965a7bc21c6bb11fb15eb798fd472ec9deb5e3fa Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 14 Dec 2020 16:21:33 +0000 Subject: [PATCH 04/28] merge-ort: implement compare_pairs() and collect_renames() Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 66f84d39b4..10550c542b 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -652,7 +652,10 @@ static int process_renames(struct merge_options *opt, static int compare_pairs(const void *a_, const void *b_) { - die("Not yet implemented."); + const struct diff_filepair *a = *((const struct diff_filepair **)a_); + const struct diff_filepair *b = *((const struct diff_filepair **)b_); + + return strcmp(a->one->path, b->one->path); } /* Call diffcore_rename() to compute which files have changed on given side */ @@ -698,7 +701,35 @@ static int collect_renames(struct merge_options *opt, struct diff_queue_struct *result, unsigned side_index) { - die("Not yet implemented."); + int i, clean = 1; + struct diff_queue_struct *side_pairs; + struct rename_info *renames = &opt->priv->renames; + + side_pairs = &renames->pairs[side_index]; + + for (i = 0; i < side_pairs->nr; ++i) { + struct diff_filepair *p = side_pairs->queue[i]; + + if (p->status != 'R') { + diff_free_filepair(p); + continue; + } + + /* + * p->score comes back from diffcore_rename_extended() with + * the similarity of the renamed file. The similarity is + * was used to determine that the two files were related + * and are a rename, which we have already used, but beyond + * that we have no use for the similarity. So p->score is + * now irrelevant. However, process_renames() will need to + * know which side of the merge this rename was associated + * with, so overwrite p->score with that value. + */ + p->score = side_index; + result->queue[result->nr++] = p; + } + + return clean; } static int detect_and_process_renames(struct merge_options *opt, From c2d267df0215d49f06638463889465389134021a Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 14 Dec 2020 16:21:34 +0000 Subject: [PATCH 05/28] merge-ort: add basic outline for process_renames() Add code which determines which kind of special rename case each rename corresponds to, but leave the handling of each type unimplemented for now. Future commits will implement each one. There is some tenuous resemblance to merge-recursive's process_renames(), but comparing the two is very unlikely to yield any insights. merge-ort's process_renames() is a bit complex and I would prefer if I could simplify it more, but it is far easier to grok than merge-recursive's function of the same name in my opinion. Plus, merge-ort handles more rename conflict types than merge-recursive does. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 10550c542b..ebe275ef73 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -647,7 +647,103 @@ static int handle_content_merge(struct merge_options *opt, static int process_renames(struct merge_options *opt, struct diff_queue_struct *renames) { - die("Not yet implemented."); + int clean_merge = 1, i; + + for (i = 0; i < renames->nr; ++i) { + const char *oldpath = NULL, *newpath; + struct diff_filepair *pair = renames->queue[i]; + struct conflict_info *oldinfo = NULL, *newinfo = NULL; + struct strmap_entry *old_ent, *new_ent; + unsigned int old_sidemask; + int target_index, other_source_index; + int source_deleted, collision, type_changed; + + old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path); + oldpath = old_ent->key; + oldinfo = old_ent->value; + + new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path); + newpath = new_ent->key; + newinfo = new_ent->value; + + /* + * diff_filepairs have copies of pathnames, thus we have to + * use standard 'strcmp()' (negated) instead of '=='. + */ + if (i + 1 < renames->nr && + !strcmp(oldpath, renames->queue[i+1]->one->path)) { + /* Handle rename/rename(1to2) or rename/rename(1to1) */ + const char *pathnames[3]; + + pathnames[0] = oldpath; + pathnames[1] = newpath; + pathnames[2] = renames->queue[i+1]->two->path; + + if (!strcmp(pathnames[1], pathnames[2])) { + /* Both sides renamed the same way. */ + die("Not yet implemented"); + + /* We handled both renames, i.e. i+1 handled */ + i++; + /* Move to next rename */ + continue; + } + + /* This is a rename/rename(1to2) */ + die("Not yet implemented"); + + i++; /* We handled both renames, i.e. i+1 handled */ + continue; + } + + VERIFY_CI(oldinfo); + VERIFY_CI(newinfo); + target_index = pair->score; /* from collect_renames() */ + assert(target_index == 1 || target_index == 2); + other_source_index = 3 - target_index; + old_sidemask = (1 << other_source_index); /* 2 or 4 */ + source_deleted = (oldinfo->filemask == 1); + collision = ((newinfo->filemask & old_sidemask) != 0); + type_changed = !source_deleted && + (S_ISREG(oldinfo->stages[other_source_index].mode) != + S_ISREG(newinfo->stages[target_index].mode)); + if (type_changed && collision) { + /* special handling so later blocks can handle this */ + die("Not yet implemented"); + } + + assert(source_deleted || oldinfo->filemask & old_sidemask); + + /* Need to check for special types of rename conflicts... */ + if (collision && !source_deleted) { + /* collision: rename/add or rename/rename(2to1) */ + die("Not yet implemented"); + } else if (collision && source_deleted) { + /* rename/add/delete or rename/rename(2to1)/delete */ + die("Not yet implemented"); + } else { + /* a few different cases... */ + if (type_changed) { + /* rename vs. typechange */ + die("Not yet implemented"); + } else if (source_deleted) { + /* rename/delete */ + die("Not yet implemented"); + } else { + /* normal rename */ + die("Not yet implemented"); + } + } + + if (!type_changed) { + /* Mark the original as resolved by removal */ + oldinfo->merged.is_null = 1; + oldinfo->merged.clean = 1; + } + + } + + return clean_merge; } static int compare_pairs(const void *a_, const void *b_) From af1e56c49e61a39d5a7d78cecb97f350b5a3834a Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Dec 2020 18:28:01 +0000 Subject: [PATCH 06/28] merge-ort: add implementation of both sides renaming identically Implement rename/rename(1to1) handling, i.e. both sides of history renaming a file but renaming the same way. This code replaces the following from merge-recurisve.c: * all the 1to1 code in process_renames() * the RENAME_ONE_FILE_TO_ONE case of process_entry() Also, there is some shared code from merge-recursive.c for multiple different rename cases which we will no longer need for this case (or other rename cases): * handle_rename_normal() * setup_rename_conflict_info() The consolidation of four separate codepaths into one is made possible by a change in design: process_renames() tweaks the conflict_info entries within opt->priv->paths such that process_entry() can then handle all the non-rename conflict types (directory/file, modify/delete, etc.) orthogonally. This means we're much less likely to miss special implementation of some kind of combination of conflict types (see commits brought in by 66c62eaec6 ("Merge branch 'en/merge-tests'", 2020-11-18), especially commit ef52778708 ("merge tests: expect improved directory/file conflict handling in ort", 2020-10-26) for more details). That, together with letting worktree/index updating be handled orthogonally in the merge_switch_to_result() function, dramatically simplifies the code for various special rename cases. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index ebe275ef73..da3715baa6 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -674,14 +674,30 @@ static int process_renames(struct merge_options *opt, !strcmp(oldpath, renames->queue[i+1]->one->path)) { /* Handle rename/rename(1to2) or rename/rename(1to1) */ const char *pathnames[3]; + struct version_info merged; + struct conflict_info *base, *side1, *side2; pathnames[0] = oldpath; pathnames[1] = newpath; pathnames[2] = renames->queue[i+1]->two->path; + base = strmap_get(&opt->priv->paths, pathnames[0]); + side1 = strmap_get(&opt->priv->paths, pathnames[1]); + side2 = strmap_get(&opt->priv->paths, pathnames[2]); + + VERIFY_CI(base); + VERIFY_CI(side1); + VERIFY_CI(side2); + if (!strcmp(pathnames[1], pathnames[2])) { - /* Both sides renamed the same way. */ - die("Not yet implemented"); + /* Both sides renamed the same way */ + assert(side1 == side2); + memcpy(&side1->stages[0], &base->stages[0], + sizeof(merged)); + side1->filemask |= (1 << MERGE_BASE); + /* Mark base as resolved by removal */ + base->merged.is_null = 1; + base->merged.clean = 1; /* We handled both renames, i.e. i+1 handled */ i++; From 53e88a03539db70a7e990489784400b0b2916fa9 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Dec 2020 18:28:02 +0000 Subject: [PATCH 07/28] merge-ort: add implementation of both sides renaming differently Implement rename/rename(1to2) handling, i.e. both sides of history renaming a file and rename it differently. This code replaces the following from merge-recurisve.c: * all the 1to2 code in process_renames() * the RENAME_ONE_FILE_TO_TWO case of process_entry() * handle_rename_rename_1to2() Also, there is some shared code from merge-recursive.c for multiple different rename cases which we will no longer need for this case (or other rename cases): * handle_file_collision() * setup_rename_conflict_info() The consolidation of five separate codepaths into one is made possible by a change in design: process_renames() tweaks the conflict_info entries within opt->priv->paths such that process_entry() can then handle all the non-rename conflict types (directory/file, modify/delete, etc.) orthogonally. This means we're much less likely to miss special implementation of some kind of combination of conflict types (see commits brought in by 66c62eaec6 ("Merge branch 'en/merge-tests'", 2020-11-18), especially commit ef52778708 ("merge tests: expect improved directory/file conflict handling in ort", 2020-10-26) for more details). That, together with letting worktree/index updating be handled orthogonally in the merge_switch_to_result() function, dramatically simplifies the code for various special rename cases. To be fair, there is a _slight_ tweak to process_entry() here to make sure that the two different paths aren't marked as clean but are left in a conflicted state. So process_renames() and process_entry() aren't quite entirely orthogonal, but they are pretty close. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index da3715baa6..19477cfae6 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -676,6 +676,7 @@ static int process_renames(struct merge_options *opt, const char *pathnames[3]; struct version_info merged; struct conflict_info *base, *side1, *side2; + unsigned was_binary_blob = 0; pathnames[0] = oldpath; pathnames[1] = newpath; @@ -706,7 +707,58 @@ static int process_renames(struct merge_options *opt, } /* This is a rename/rename(1to2) */ - die("Not yet implemented"); + clean_merge = handle_content_merge(opt, + pair->one->path, + &base->stages[0], + &side1->stages[1], + &side2->stages[2], + pathnames, + 1 + 2 * opt->priv->call_depth, + &merged); + if (!clean_merge && + merged.mode == side1->stages[1].mode && + oideq(&merged.oid, &side1->stages[1].oid)) + was_binary_blob = 1; + memcpy(&side1->stages[1], &merged, sizeof(merged)); + if (was_binary_blob) { + /* + * Getting here means we were attempting to + * merge a binary blob. + * + * Since we can't merge binaries, + * handle_content_merge() just takes one + * side. But we don't want to copy the + * contents of one side to both paths. We + * used the contents of side1 above for + * side1->stages, let's use the contents of + * side2 for side2->stages below. + */ + oidcpy(&merged.oid, &side2->stages[2].oid); + merged.mode = side2->stages[2].mode; + } + memcpy(&side2->stages[2], &merged, sizeof(merged)); + + side1->path_conflict = 1; + side2->path_conflict = 1; + /* + * TODO: For renames we normally remove the path at the + * old name. It would thus seem consistent to do the + * same for rename/rename(1to2) cases, but we haven't + * done so traditionally and a number of the regression + * tests now encode an expectation that the file is + * left there at stage 1. If we ever decide to change + * this, add the following two lines here: + * base->merged.is_null = 1; + * base->merged.clean = 1; + * and remove the setting of base->path_conflict to 1. + */ + base->path_conflict = 1; + path_msg(opt, oldpath, 0, + _("CONFLICT (rename/rename): %s renamed to " + "%s in %s and to %s in %s."), + pathnames[0], + pathnames[1], opt->branch1, + pathnames[2], opt->branch2); i++; /* We handled both renames, i.e. i+1 handled */ continue; @@ -1291,13 +1343,13 @@ static void process_entry(struct merge_options *opt, int side = (ci->filemask == 4) ? 2 : 1; ci->merged.result.mode = ci->stages[side].mode; oidcpy(&ci->merged.result.oid, &ci->stages[side].oid); - ci->merged.clean = !ci->df_conflict; + ci->merged.clean = !ci->df_conflict && !ci->path_conflict; } else if (ci->filemask == 1) { /* Deleted on both sides */ ci->merged.is_null = 1; ci->merged.result.mode = 0; oidcpy(&ci->merged.result.oid, &null_oid); - ci->merged.clean = 1; + ci->merged.clean = !ci->path_conflict; } /* From 2e91ddd24e7240460fd2a9aee1962345ed858b6b Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Dec 2020 18:28:03 +0000 Subject: [PATCH 08/28] merge-ort: add implementation of rename/delete conflicts Implement rename/delete conflicts, i.e. one side renames a file and the other deletes the file. This code replaces the following from merge-recurisve.c: * the code relevant to RENAME_DELETE in process_renames() * the RENAME_DELETE case of process_entry() * handle_rename_delete() Also, there is some shared code from merge-recursive.c for multiple different rename cases which we will no longer need for this case (or other rename cases): * handle_change_delete() * setup_rename_conflict_info() The consolidation of five separate codepaths into one is made possible by a change in design: process_renames() tweaks the conflict_info entries within opt->priv->paths such that process_entry() can then handle all the non-rename conflict types (directory/file, modify/delete, etc.) orthogonally. This means we're much less likely to miss special implementation of some kind of combination of conflict types (see commits brought in by 66c62eaec6 ("Merge branch 'en/merge-tests'", 2020-11-18), especially commit ef52778708 ("merge tests: expect improved directory/file conflict handling in ort", 2020-10-26) for more details). That, together with letting worktree/index updating be handled orthogonally in the merge_switch_to_result() function, dramatically simplifies the code for various special rename cases. To be fair, there is a _slight_ tweak to process_entry() here, because rename/delete cases will also trigger the modify/delete codepath. However, we only want a modify/delete message to be printed for a rename/delete conflict if there is a content change in the renamed file in addition to the rename. So process_renames() and process_entry() aren't quite fully orthogonal, but they are pretty close. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 19477cfae6..a10c3f5046 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -657,6 +657,7 @@ static int process_renames(struct merge_options *opt, unsigned int old_sidemask; int target_index, other_source_index; int source_deleted, collision, type_changed; + const char *rename_branch = NULL, *delete_branch = NULL; old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path); oldpath = old_ent->key; @@ -779,6 +780,15 @@ static int process_renames(struct merge_options *opt, /* special handling so later blocks can handle this */ die("Not yet implemented"); } + if (source_deleted) { + if (target_index == 1) { + rename_branch = opt->branch1; + delete_branch = opt->branch2; + } else { + rename_branch = opt->branch2; + delete_branch = opt->branch1; + } + } assert(source_deleted || oldinfo->filemask & old_sidemask); @@ -790,13 +800,26 @@ static int process_renames(struct merge_options *opt, /* rename/add/delete or rename/rename(2to1)/delete */ die("Not yet implemented"); } else { - /* a few different cases... */ + /* + * a few different cases...start by copying the + * existing stage(s) from oldinfo over the newinfo + * and update the pathname(s). + */ + memcpy(&newinfo->stages[0], &oldinfo->stages[0], + sizeof(newinfo->stages[0])); + newinfo->filemask |= (1 << MERGE_BASE); + newinfo->pathnames[0] = oldpath; if (type_changed) { /* rename vs. typechange */ die("Not yet implemented"); } else if (source_deleted) { /* rename/delete */ - die("Not yet implemented"); + newinfo->path_conflict = 1; + path_msg(opt, newpath, 0, + _("CONFLICT (rename/delete): %s renamed" + " to %s in %s, but deleted in %s."), + oldpath, newpath, + rename_branch, delete_branch); } else { /* normal rename */ die("Not yet implemented"); @@ -1332,12 +1355,21 @@ static void process_entry(struct merge_options *opt, modify_branch = (side == 1) ? opt->branch1 : opt->branch2; delete_branch = (side == 1) ? opt->branch2 : opt->branch1; - path_msg(opt, path, 0, - _("CONFLICT (modify/delete): %s deleted in %s " - "and modified in %s. Version %s of %s left " - "in tree."), - path, delete_branch, modify_branch, - modify_branch, path); + if (ci->path_conflict && + oideq(&ci->stages[0].oid, &ci->stages[side].oid)) { + /* + * This came from a rename/delete; no action to take, + * but avoid printing "modify/delete" conflict notice + * since the contents were not modified. + */ + } else { + path_msg(opt, path, 0, + _("CONFLICT (modify/delete): %s deleted in %s " + "and modified in %s. Version %s of %s left " + "in tree."), + path, delete_branch, modify_branch, + modify_branch, path); + } } else if (ci->filemask == 2 || ci->filemask == 4) { /* Added on one side */ int side = (ci->filemask == 4) ? 2 : 1; From 35e47e3514b5129e6075a67fa9f4659da2f1a1a1 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Dec 2020 18:28:04 +0000 Subject: [PATCH 09/28] merge-ort: add implementation of rename collisions Implement rename/rename(2to1) and rename/add handling, i.e. a file is renamed into a location where another file is added (with that other file either being a plain add or itself coming from a rename). Note that rename collisions can also have a special case stacked on top: the file being renamed on one side of history is deleted on the other (yielding either a rename/add/delete conflict or perhaps a rename/rename(2to1)/delete[/delete]) conflict. One thing to note here is that when there is a double rename, the code in question only handles one of them at a time; a later iteration through the loop will handle the other. After they've both been handled, process_entry()'s normal add/add code can handle the collision. This code replaces the following from merge-recurisve.c: * all the 2to1 code in process_renames() * the RENAME_TWO_FILES_TO_ONE case of process_entry() * handle_rename_rename_2to1() * handle_rename_add() Also, there is some shared code from merge-recursive.c for multiple different rename cases which we will no longer need for this case (or other rename cases): * handle_file_collision() * setup_rename_conflict_info() The consolidation of six separate codepaths into one is made possible by a change in design: process_renames() tweaks the conflict_info entries within opt->priv->paths such that process_entry() can then handle all the non-rename conflict types (directory/file, modify/delete, etc.) orthogonally. This means we're much less likely to miss special implementation of some kind of combination of conflict types (see commits brought in by 66c62eaec6 ("Merge branch 'en/merge-tests'", 2020-11-18), especially commit ef52778708 ("merge tests: expect improved directory/file conflict handling in ort", 2020-10-26) for more details). That, together with letting worktree/index updating be handled orthogonally in the merge_switch_to_result() function, dramatically simplifies the code for various special rename cases. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index a10c3f5046..1c5b2f7e3b 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -795,10 +795,58 @@ static int process_renames(struct merge_options *opt, /* Need to check for special types of rename conflicts... */ if (collision && !source_deleted) { /* collision: rename/add or rename/rename(2to1) */ - die("Not yet implemented"); + const char *pathnames[3]; + struct version_info merged; + + struct conflict_info *base, *side1, *side2; + unsigned clean; + + pathnames[0] = oldpath; + pathnames[other_source_index] = oldpath; + pathnames[target_index] = newpath; + + base = strmap_get(&opt->priv->paths, pathnames[0]); + side1 = strmap_get(&opt->priv->paths, pathnames[1]); + side2 = strmap_get(&opt->priv->paths, pathnames[2]); + + VERIFY_CI(base); + VERIFY_CI(side1); + VERIFY_CI(side2); + + clean = handle_content_merge(opt, pair->one->path, + &base->stages[0], + &side1->stages[1], + &side2->stages[2], + pathnames, + 1 + 2 * opt->priv->call_depth, + &merged); + + memcpy(&newinfo->stages[target_index], &merged, + sizeof(merged)); + if (!clean) { + path_msg(opt, newpath, 0, + _("CONFLICT (rename involved in " + "collision): rename of %s -> %s has " + "content conflicts AND collides " + "with another path; this may result " + "in nested conflict markers."), + oldpath, newpath); + } } else if (collision && source_deleted) { - /* rename/add/delete or rename/rename(2to1)/delete */ - die("Not yet implemented"); + /* + * rename/add/delete or rename/rename(2to1)/delete: + * since oldpath was deleted on the side that didn't + * do the rename, there's not much of a content merge + * we can do for the rename. oldinfo->merged.is_null + * was already set, so we just leave things as-is so + * they look like an add/add conflict. + */ + + newinfo->path_conflict = 1; + path_msg(opt, newpath, 0, + _("CONFLICT (rename/delete): %s renamed " + "to %s in %s, but deleted in %s."), + oldpath, newpath, rename_branch, delete_branch); } else { /* * a few different cases...start by copying the From f1665e69188f54c941f9bf10ed388a72a91cc2a1 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Dec 2020 18:28:05 +0000 Subject: [PATCH 10/28] merge-ort: add implementation of normal rename handling Implement handling of normal renames. This code replaces the following from merge-recurisve.c: * the code relevant to RENAME_NORMAL in process_renames() * the RENAME_NORMAL case of process_entry() Also, there is some shared code from merge-recursive.c for multiple different rename cases which we will no longer need for this case (or other rename cases): * handle_rename_normal() * setup_rename_conflict_info() The consolidation of four separate codepaths into one is made possible by a change in design: process_renames() tweaks the conflict_info entries within opt->priv->paths such that process_entry() can then handle all the non-rename conflict types (directory/file, modify/delete, etc.) orthogonally. This means we're much less likely to miss special implementation of some kind of combination of conflict types (see commits brought in by 66c62eaec6 ("Merge branch 'en/merge-tests'", 2020-11-18), especially commit ef52778708 ("merge tests: expect improved directory/file conflict handling in ort", 2020-10-26) for more details). That, together with letting worktree/index updating be handled orthogonally in the merge_switch_to_result() function, dramatically simplifies the code for various special rename cases. (To be fair, the code for handling normal renames wasn't all that complicated beforehand, but it's still much simpler now.) Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 1c5b2f7e3b..26f357e524 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -870,7 +870,11 @@ static int process_renames(struct merge_options *opt, rename_branch, delete_branch); } else { /* normal rename */ - die("Not yet implemented"); + memcpy(&newinfo->stages[other_source_index], + &oldinfo->stages[other_source_index], + sizeof(newinfo->stages[0])); + newinfo->filemask |= (1 << other_source_index); + newinfo->pathnames[other_source_index] = oldpath; } } From 6fcccbd75556d1dcc80493f6eb51109f53a5dc83 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 15 Dec 2020 18:28:06 +0000 Subject: [PATCH 11/28] merge-ort: add implementation of type-changed rename handling Implement cases where renames are involved in type changes (i.e. the side of history that didn't rename the file changed its type from a regular file to a symlink or submodule). There was some code to handle this in merge-recursive but only in the special case when the renamed file had no content changes. The code here works differently -- it knows process_entry() can handle mode conflicts, so it does a few minimal tweaks to ensure process_entry() can just finish the job as needed. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 26f357e524..677c6a878c 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -777,8 +777,33 @@ static int process_renames(struct merge_options *opt, (S_ISREG(oldinfo->stages[other_source_index].mode) != S_ISREG(newinfo->stages[target_index].mode)); if (type_changed && collision) { - /* special handling so later blocks can handle this */ - die("Not yet implemented"); + /* + * special handling so later blocks can handle this... + * + * if type_changed && collision are both true, then this + * was really a double rename, but one side wasn't + * detected due to lack of break detection. I.e. + * something like + * orig: has normal file 'foo' + * side1: renames 'foo' to 'bar', adds 'foo' symlink + * side2: renames 'foo' to 'bar' + * In this case, the foo->bar rename on side1 won't be + * detected because the new symlink named 'foo' is + * there and we don't do break detection. But we detect + * this here because we don't want to merge the content + * of the foo symlink with the foo->bar file, so we + * have some logic to handle this special case. The + * easiest way to do that is make 'bar' on side1 not + * be considered a colliding file but the other part + * of a normal rename. If the file is very different, + * well we're going to get content merge conflicts + * anyway so it doesn't hurt. And if the colliding + * file also has a different type, that'll be handled + * by the content merge logic in process_entry() too. + * + * See also t6430, 'rename vs. rename/symlink' + */ + collision = 0; } if (source_deleted) { if (target_index == 1) { @@ -859,7 +884,11 @@ static int process_renames(struct merge_options *opt, newinfo->pathnames[0] = oldpath; if (type_changed) { /* rename vs. typechange */ - die("Not yet implemented"); + /* Mark the original as resolved by removal */ + memcpy(&oldinfo->stages[0].oid, &null_oid, + sizeof(oldinfo->stages[0].oid)); + oldinfo->stages[0].mode = 0; + oldinfo->filemask &= 0x06; } else if (source_deleted) { /* rename/delete */ newinfo->path_conflict = 1; From c09376d55ff41128f3966609e1c4edf3e9fee840 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 7 Jan 2021 21:35:49 +0000 Subject: [PATCH 12/28] merge-ort: add new data structures for directory rename detection Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index d36a92b59b..652ff730af 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -49,14 +49,42 @@ enum merge_side { }; struct rename_info { + /* + * All variables that are arrays of size 3 correspond to data tracked + * for the sides in enum merge_side. Index 0 is almost always unused + * because we often only need to track information for MERGE_SIDE1 and + * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames + * are determined relative to what changed since the MERGE_BASE). + */ + /* * pairs: pairing of filenames from diffcore_rename() - * - * Index 1 and 2 correspond to sides 1 & 2 as used in - * conflict_info.stages. Index 0 unused. */ struct diff_queue_struct pairs[3]; + /* + * dirs_removed: directories removed on a given side of history. + */ + struct strset dirs_removed[3]; + + /* + * dir_rename_count: tracking where parts of a directory were renamed to + * + * When files in a directory are renamed, they may not all go to the + * same location. Each strmap here tracks: + * old_dir => {new_dir => int} + * That is, dir_rename_count[side] is a strmap to a strintmap. + */ + struct strmap dir_rename_count[3]; + + /* + * dir_renames: computed directory renames + * + * This is a map of old_dir => new_dir and is derived in part from + * dir_rename_count. + */ + struct strmap dir_renames[3]; + /* * needed_limit: value needed for inexact rename detection to run * From f5d9fbc2e9fde9a9dc08de12ad5ed0c6d7e3fb0a Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 7 Jan 2021 21:35:50 +0000 Subject: [PATCH 13/28] merge-ort: initialize and free new directory rename data structures Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 652ff730af..2e6d41b0a0 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -308,8 +308,12 @@ static void free_strmap_strings(struct strmap *map) static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, int reinitialize) { + struct rename_info *renames = &opti->renames; + int i; void (*strmap_func)(struct strmap *, int) = reinitialize ? strmap_partial_clear : strmap_clear; + void (*strset_func)(struct strset *) = + reinitialize ? strset_partial_clear : strset_clear; /* * We marked opti->paths with strdup_strings = 0, so that we @@ -339,6 +343,23 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, string_list_clear(&opti->paths_to_free, 0); opti->paths_to_free.strdup_strings = 0; + /* Free memory used by various renames maps */ + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) { + struct hashmap_iter iter; + struct strmap_entry *entry; + + strset_func(&renames->dirs_removed[i]); + + strmap_for_each_entry(&renames->dir_rename_count[i], + &iter, entry) { + struct strintmap *counts = entry->value; + strintmap_clear(counts); + } + strmap_func(&renames->dir_rename_count[i], 1); + + strmap_func(&renames->dir_renames[i], 0); + } + if (!reinitialize) { struct hashmap_iter iter; struct strmap_entry *e; @@ -1812,6 +1833,9 @@ static struct commit *make_virtual_commit(struct repository *repo, static void merge_start(struct merge_options *opt, struct merge_result *result) { + struct rename_info *renames; + int i; + /* Sanity checks on opt */ assert(opt->repo); @@ -1846,6 +1870,17 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) /* Initialization of opt->priv, our internal merge data */ opt->priv = xcalloc(1, sizeof(*opt->priv)); + /* Initialization of various renames fields */ + renames = &opt->priv->renames; + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) { + strset_init_with_options(&renames->dirs_removed[i], + NULL, 0); + strmap_init_with_options(&renames->dir_rename_count[i], + NULL, 1); + strmap_init_with_options(&renames->dir_renames[i], + NULL, 0); + } + /* * Although we initialize opt->priv->paths with strdup_strings=0, * that's just to avoid making yet another copy of an allocated From eb3e3e1ddffa1348532a6b8d741c235e4c73017c Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 7 Jan 2021 21:35:51 +0000 Subject: [PATCH 14/28] merge-ort: collect which directories are removed in dirs_removed Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 2e6d41b0a0..999a7c91c5 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -480,6 +480,27 @@ static void setup_path_info(struct merge_options *opt, result->util = mi; } +static void collect_rename_info(struct merge_options *opt, + struct name_entry *names, + const char *dirname, + const char *fullname, + unsigned filemask, + unsigned dirmask, + unsigned match_mask) +{ + struct rename_info *renames = &opt->priv->renames; + + /* Update dirs_removed, as needed */ + if (dirmask == 1 || dirmask == 3 || dirmask == 5) { + /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */ + unsigned sides = (0x07 - dirmask)/2; + if (sides & 1) + strset_add(&renames->dirs_removed[1], fullname); + if (sides & 2) + strset_add(&renames->dirs_removed[2], fullname); + } +} + static int collect_merge_info_callback(int n, unsigned long mask, unsigned long dirmask, @@ -580,6 +601,12 @@ static int collect_merge_info_callback(int n, return mask; } + /* + * Gather additional information used in rename detection. + */ + collect_rename_info(opt, names, dirname, fullpath, + filemask, dirmask, match_mask); + /* * Record information about the path so we can resolve later in * process_entries. From 112e11126b963923d84f90f12dae9b96271ccedb Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:40 +0000 Subject: [PATCH 15/28] merge-ort: add outline for computing directory renames Port some directory rename handling changes from merge-recursive.c's detect_and_process_renames() to the same-named function of merge-ort.c. This does not yet add any use or handling of directory renames, just the outline for where we start to compute them. Thus, a future patch will add port additional changes to merge-ort's detect_and_process_renames(). Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 999a7c91c5..b4c1fe28a5 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,6 +721,18 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +static void get_provisional_directory_renames(struct merge_options *opt, + unsigned side, + int *clean) +{ + die("Not yet implemented!"); +} + +static void handle_directory_level_conflicts(struct merge_options *opt) +{ + die("Not yet implemented!"); +} + /*** Function Grouping: functions related to regular rename detection ***/ static int process_renames(struct merge_options *opt, @@ -1086,13 +1098,24 @@ static int detect_and_process_renames(struct merge_options *opt, { struct diff_queue_struct combined; struct rename_info *renames = &opt->priv->renames; - int s, clean = 1; + int need_dir_renames, s, clean = 1; memset(&combined, 0, sizeof(combined)); detect_regular_renames(opt, merge_base, side1, MERGE_SIDE1); detect_regular_renames(opt, merge_base, side2, MERGE_SIDE2); + need_dir_renames = + !opt->priv->call_depth && + (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE || + opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_CONFLICT); + + if (need_dir_renames) { + get_provisional_directory_renames(opt, MERGE_SIDE1, &clean); + get_provisional_directory_renames(opt, MERGE_SIDE2, &clean); + handle_directory_level_conflicts(opt); + } + ALLOC_GROW(combined.queue, renames->pairs[1].nr + renames->pairs[2].nr, combined.alloc); From 04264d4079a4423f32d85d7833d63faa5ace4e33 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:41 +0000 Subject: [PATCH 16/28] merge-ort: add outline of get_provisional_directory_renames() This function is based on merge-recursive.c's get_directory_renames(), except that the first half has been split out into a not-yet-implemented compute_rename_counts(). The primary difference here is our lack of the non_unique_new_dir boolean in our strmap. The lack of that field will at first cause us to fail testcase 2b of t6423; however, future optimizations will obviate the need for that ugly field so we have just left it out. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index b4c1fe28a5..2a1c62c533 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,11 +721,66 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +static void compute_rename_counts(struct diff_queue_struct *pairs, + struct strmap *dir_rename_count, + struct strset *dirs_removed) +{ + die("Not yet implemented!"); +} + static void get_provisional_directory_renames(struct merge_options *opt, unsigned side, int *clean) { - die("Not yet implemented!"); + struct hashmap_iter iter; + struct strmap_entry *entry; + struct rename_info *renames = &opt->priv->renames; + + compute_rename_counts(&renames->pairs[side], + &renames->dir_rename_count[side], + &renames->dirs_removed[side]); + /* + * Collapse + * dir_rename_count: old_directory -> {new_directory -> count} + * down to + * dir_renames: old_directory -> best_new_directory + * where best_new_directory is the one with the unique highest count. + */ + strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) { + const char *source_dir = entry->key; + struct strintmap *counts = entry->value; + struct hashmap_iter count_iter; + struct strmap_entry *count_entry; + int max = 0; + int bad_max = 0; + const char *best = NULL; + + strintmap_for_each_entry(counts, &count_iter, count_entry) { + const char *target_dir = count_entry->key; + intptr_t count = (intptr_t)count_entry->value; + + if (count == max) + bad_max = max; + else if (count > max) { + max = count; + best = target_dir; + } + } + + if (bad_max == max) { + path_msg(opt, source_dir, 0, + _("CONFLICT (directory rename split): " + "Unclear where to rename %s to; it was " + "renamed to multiple other directories, with " + "no destination getting a majority of the " + "files."), + source_dir); + *clean = 0; + } else { + strmap_put(&renames->dir_renames[side], + source_dir, (void*)best); + } + } } static void handle_directory_level_conflicts(struct merge_options *opt) From 9fe37e7bb9e251a3419d93f0090c17df05fafb4e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:42 +0000 Subject: [PATCH 17/28] merge-ort: copy get_renamed_dir_portion() from merge-recursive.c Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 2a1c62c533..eb609ab006 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,6 +721,110 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +MAYBE_UNUSED +static void get_renamed_dir_portion(const char *old_path, const char *new_path, + char **old_dir, char **new_dir) +{ + char *end_of_old, *end_of_new; + + /* Default return values: NULL, meaning no rename */ + *old_dir = NULL; + *new_dir = NULL; + + /* + * For + * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c" + * the "e/foo.c" part is the same, we just want to know that + * "a/b/c/d" was renamed to "a/b/some/thing/else" + * so, for this example, this function returns "a/b/c/d" in + * *old_dir and "a/b/some/thing/else" in *new_dir. + */ + + /* + * If the basename of the file changed, we don't care. We want + * to know which portion of the directory, if any, changed. + */ + end_of_old = strrchr(old_path, '/'); + end_of_new = strrchr(new_path, '/'); + + /* + * If end_of_old is NULL, old_path wasn't in a directory, so there + * could not be a directory rename (our rule elsewhere that a + * directory which still exists is not considered to have been + * renamed means the root directory can never be renamed -- because + * the root directory always exists). + */ + if (end_of_old == NULL) + return; /* Note: *old_dir and *new_dir are still NULL */ + + /* + * If new_path contains no directory (end_of_new is NULL), then we + * have a rename of old_path's directory to the root directory. + */ + if (end_of_new == NULL) { + *old_dir = xstrndup(old_path, end_of_old - old_path); + *new_dir = xstrdup(""); + return; + } + + /* Find the first non-matching character traversing backwards */ + while (*--end_of_new == *--end_of_old && + end_of_old != old_path && + end_of_new != new_path) + ; /* Do nothing; all in the while loop */ + + /* + * If both got back to the beginning of their strings, then the + * directory didn't change at all, only the basename did. + */ + if (end_of_old == old_path && end_of_new == new_path && + *end_of_old == *end_of_new) + return; /* Note: *old_dir and *new_dir are still NULL */ + + /* + * If end_of_new got back to the beginning of its string, and + * end_of_old got back to the beginning of some subdirectory, then + * we have a rename/merge of a subdirectory into the root, which + * needs slightly special handling. + * + * Note: There is no need to consider the opposite case, with a + * rename/merge of the root directory into some subdirectory + * because as noted above the root directory always exists so it + * cannot be considered to be renamed. + */ + if (end_of_new == new_path && + end_of_old != old_path && end_of_old[-1] == '/') { + *old_dir = xstrndup(old_path, --end_of_old - old_path); + *new_dir = xstrdup(""); + return; + } + + /* + * We've found the first non-matching character in the directory + * paths. That means the current characters we were looking at + * were part of the first non-matching subdir name going back from + * the end of the strings. Get the whole name by advancing both + * end_of_old and end_of_new to the NEXT '/' character. That will + * represent the entire directory rename. + * + * The reason for the increment is cases like + * a/b/star/foo/whatever.c -> a/b/tar/foo/random.c + * After dropping the basename and going back to the first + * non-matching character, we're now comparing: + * a/b/s and a/b/ + * and we want to be comparing: + * a/b/star/ and a/b/tar/ + * but without the pre-increment, the one on the right would stay + * a/b/. + */ + end_of_old = strchr(++end_of_old, '/'); + end_of_new = strchr(++end_of_new, '/'); + + /* Copy the old and new directories into *old_dir and *new_dir. */ + *old_dir = xstrndup(old_path, end_of_old - old_path); + *new_dir = xstrndup(new_path, end_of_new - new_path); +} + static void compute_rename_counts(struct diff_queue_struct *pairs, struct strmap *dir_rename_count, struct strset *dirs_removed) From 2f620a4f1983efcb70dee55917240b08065f2c4e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:43 +0000 Subject: [PATCH 18/28] merge-ort: implement compute_rename_counts() This function is based on the first half of get_directory_renames() from merge-recursive.c; as part of the implementation, factor out a routine, increment_count(), to update the bookkeeping to track the number of items renamed into new directories. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index eb609ab006..8aa415c542 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,7 +721,6 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ -MAYBE_UNUSED static void get_renamed_dir_portion(const char *old_path, const char *new_path, char **old_dir, char **new_dir) { @@ -825,11 +824,62 @@ static void get_renamed_dir_portion(const char *old_path, const char *new_path, *new_dir = xstrndup(new_path, end_of_new - new_path); } +static void increment_count(struct strmap *dir_rename_count, + char *old_dir, + char *new_dir) +{ + struct strintmap *counts; + struct strmap_entry *e; + + /* Get the {new_dirs -> counts} mapping using old_dir */ + e = strmap_get_entry(dir_rename_count, old_dir); + if (e) { + counts = e->value; + } else { + counts = xmalloc(sizeof(*counts)); + strintmap_init_with_options(counts, 0, NULL, 1); + strmap_put(dir_rename_count, old_dir, counts); + } + + /* Increment the count for new_dir */ + strintmap_incr(counts, new_dir, 1); +} + static void compute_rename_counts(struct diff_queue_struct *pairs, struct strmap *dir_rename_count, struct strset *dirs_removed) { - die("Not yet implemented!"); + int i; + + for (i = 0; i < pairs->nr; ++i) { + char *old_dir, *new_dir; + struct diff_filepair *pair = pairs->queue[i]; + + /* File not part of directory rename if it wasn't renamed */ + if (pair->status != 'R') + continue; + + /* Get the old and new directory names */ + get_renamed_dir_portion(pair->one->path, pair->two->path, + &old_dir, &new_dir); + if (!old_dir) + /* Directory didn't change at all; ignore this one. */ + continue; + + /* + * Make dir_rename_count contain a map of a map: + * old_directory -> {new_directory -> count} + * In other words, for every pair look at the directories for + * the old filename and the new filename and count how many + * times that pairing occurs. + */ + if (strset_contains(dirs_removed, old_dir)) + increment_count(dir_rename_count, old_dir, new_dir); + + /* Free resources we don't need anymore */ + free(old_dir); + free(new_dir); + } } static void get_provisional_directory_renames(struct merge_options *opt, From 98d0d08128200e244a83d917b63567c30547c36d Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:44 +0000 Subject: [PATCH 19/28] merge-ort: implement handle_directory_level_conflicts() This is modelled on the version of handle_directory_level_conflicts() from merge-recursive.c, but is massively simplified due to the following factors: * strmap API provides simplifications over using direct hashmap * we have a dirs_removed field in struct rename_info that we have an easy way to populate from collect_merge_info(); this was already used in compute_rename_counts() and thus we do not need to check for condition #2. * The removal of condition #2 by handling it earlier in the code also obviates the need to check for condition #3 -- if both sides renamed a directory, meaning that the directory no longer exists on either side, then neither side could have added any new files to that directory, and thus there are no files whose locations we need to move due to such a directory rename. In fact, the same logic that makes condition #3 irrelevant means condition #1 is also irrelevant so we could drop this function. However, it is cheap to check if both sides rename the same directory, and doing so can save future computation. So, simply remove any directories that both sides renamed from the list of directory renames. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 8aa415c542..6dea4206dc 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -939,7 +939,24 @@ static void get_provisional_directory_renames(struct merge_options *opt, static void handle_directory_level_conflicts(struct merge_options *opt) { - die("Not yet implemented!"); + struct hashmap_iter iter; + struct strmap_entry *entry; + struct string_list duplicated = STRING_LIST_INIT_NODUP; + struct rename_info *renames = &opt->priv->renames; + struct strmap *side1_dir_renames = &renames->dir_renames[MERGE_SIDE1]; + struct strmap *side2_dir_renames = &renames->dir_renames[MERGE_SIDE2]; + int i; + + strmap_for_each_entry(side1_dir_renames, &iter, entry) { + if (strmap_contains(side2_dir_renames, entry->key)) + string_list_append(&duplicated, entry->key); + } + + for (i = 0; i < duplicated.nr; i++) { + strmap_remove(side1_dir_renames, duplicated.items[i].string, 0); + strmap_remove(side2_dir_renames, duplicated.items[i].string, 0); + } + string_list_clear(&duplicated, 0); } /*** Function Grouping: functions related to regular rename detection ***/ From fa5e06d6902003f5f115b7497030b5c66537726e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:45 +0000 Subject: [PATCH 20/28] merge-ort: modify collect_renames() for directory rename handling collect_renames() is similar to merge-recursive.c's get_renames(), but lacks the directory rename handling found in the latter. Port that code structure over to merge-ort. This introduces three new die-not-yet-implemented functions that will be defined in future commits. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 4 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 6dea4206dc..c86ed85b09 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,6 +721,11 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +struct collision_info { + struct string_list source_files; + unsigned reported_already:1; +}; + static void get_renamed_dir_portion(const char *old_path, const char *new_path, char **old_dir, char **new_dir) { @@ -959,6 +964,31 @@ static void handle_directory_level_conflicts(struct merge_options *opt) string_list_clear(&duplicated, 0); } +static void compute_collisions(struct strmap *collisions, + struct strmap *dir_renames, + struct diff_queue_struct *pairs) +{ + die("Not yet implemented."); +} + +static char *check_for_directory_rename(struct merge_options *opt, + const char *path, + unsigned side_index, + struct strmap *dir_renames, + struct strmap *dir_rename_exclusions, + struct strmap *collisions, + int *clean_merge) +{ + die("Not yet implemented."); +} + +static void apply_directory_rename_modifications(struct merge_options *opt, + struct diff_filepair *pair, + char *new_path) +{ + die("Not yet implemented."); +} + /*** Function Grouping: functions related to regular rename detection ***/ static int process_renames(struct merge_options *opt, @@ -1284,22 +1314,44 @@ static void detect_regular_renames(struct merge_options *opt, */ static int collect_renames(struct merge_options *opt, struct diff_queue_struct *result, - unsigned side_index) + unsigned side_index, + struct strmap *dir_renames_for_side, + struct strmap *rename_exclusions) { int i, clean = 1; + struct strmap collisions; struct diff_queue_struct *side_pairs; + struct hashmap_iter iter; + struct strmap_entry *entry; struct rename_info *renames = &opt->priv->renames; side_pairs = &renames->pairs[side_index]; + compute_collisions(&collisions, dir_renames_for_side, side_pairs); for (i = 0; i < side_pairs->nr; ++i) { struct diff_filepair *p = side_pairs->queue[i]; + char *new_path; /* non-NULL only with directory renames */ - if (p->status != 'R') { + if (p->status != 'A' && p->status != 'R') { diff_free_filepair(p); continue; } + new_path = check_for_directory_rename(opt, p->two->path, + side_index, + dir_renames_for_side, + rename_exclusions, + &collisions, + &clean); + + if (p->status != 'R' && !new_path) { + diff_free_filepair(p); + continue; + } + + if (new_path) + apply_directory_rename_modifications(opt, p, new_path); + /* * p->score comes back from diffcore_rename_extended() with * the similarity of the renamed file. The similarity is @@ -1314,6 +1366,20 @@ static int collect_renames(struct merge_options *opt, result->queue[result->nr++] = p; } + /* Free each value in the collisions map */ + strmap_for_each_entry(&collisions, &iter, entry) { + struct collision_info *info = entry->value; + string_list_clear(&info->source_files, 0); + } + /* + * In compute_collisions(), we set collisions.strdup_strings to 0 + * so that we wouldn't have to make another copy of the new_path + * allocated by apply_dir_rename(). But now that we've used them + * and have no other references to these strings, it is time to + * deallocate them. + */ + free_strmap_strings(&collisions); + strmap_clear(&collisions, 1); return clean; } @@ -1345,8 +1411,12 @@ static int detect_and_process_renames(struct merge_options *opt, ALLOC_GROW(combined.queue, renames->pairs[1].nr + renames->pairs[2].nr, combined.alloc); - clean &= collect_renames(opt, &combined, MERGE_SIDE1); - clean &= collect_renames(opt, &combined, MERGE_SIDE2); + clean &= collect_renames(opt, &combined, MERGE_SIDE1, + &renames->dir_renames[2], + &renames->dir_renames[1]); + clean &= collect_renames(opt, &combined, MERGE_SIDE2, + &renames->dir_renames[1], + &renames->dir_renames[2]); QSORT(combined.queue, combined.nr, compare_pairs); clean &= process_renames(opt, &combined); From d9d015df4a3d794ec889dc70902d28f3ca87d817 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:46 +0000 Subject: [PATCH 21/28] merge-ort: implement compute_collisions() This is nearly a wholesale copy of compute_collisions() from merge-recursive.c, and the logic remains the same, but it has been tweaked slightly due to: * using strmap.h API (instead of direct hashmaps) * allocation/freeing of data structures were done separately in merge_start() and clear_or_reinit_internal_opts() in an earlier patch in this series * there is no non_unique_new_dir data field in merge-ort; that will be handled a different way It does depend on two new functions, apply_dir_rename() and check_dir_renamed() which were introduced with simple die-not-yet-implemented shells and will be implemented in subsequent patches. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index c86ed85b09..22028d57f3 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -726,6 +726,19 @@ struct collision_info { unsigned reported_already:1; }; +/* + * Return a new string that replaces the beginning portion (which matches + * rename_info->key), with rename_info->util.new_dir. In perl-speak: + * new_path_name = (old_path =~ s/rename_info->key/rename_info->value/); + * NOTE: + * Caller must ensure that old_path starts with rename_info->key + '/'. + */ +static char *apply_dir_rename(struct strmap_entry *rename_info, + const char *old_path) +{ + die("Not yet implemented!"); +} + static void get_renamed_dir_portion(const char *old_path, const char *new_path, char **old_dir, char **new_dir) { @@ -964,11 +977,64 @@ static void handle_directory_level_conflicts(struct merge_options *opt) string_list_clear(&duplicated, 0); } +static struct strmap_entry *check_dir_renamed(const char *path, + struct strmap *dir_renames) +{ + die("Not yet implemented!"); +} + static void compute_collisions(struct strmap *collisions, struct strmap *dir_renames, struct diff_queue_struct *pairs) { - die("Not yet implemented."); + int i; + + strmap_init_with_options(collisions, NULL, 0); + if (strmap_empty(dir_renames)) + return; + + /* + * Multiple files can be mapped to the same path due to directory + * renames done by the other side of history. Since that other + * side of history could have merged multiple directories into one, + * if our side of history added the same file basename to each of + * those directories, then all N of them would get implicitly + * renamed by the directory rename detection into the same path, + * and we'd get an add/add/.../add conflict, and all those adds + * from *this* side of history. This is not representable in the + * index, and users aren't going to easily be able to make sense of + * it. So we need to provide a good warning about what's + * happening, and fall back to no-directory-rename detection + * behavior for those paths. + * + * See testcases 9e and all of section 5 from t6043 for examples. + */ + for (i = 0; i < pairs->nr; ++i) { + struct strmap_entry *rename_info; + struct collision_info *collision_info; + char *new_path; + struct diff_filepair *pair = pairs->queue[i]; + + if (pair->status != 'A' && pair->status != 'R') + continue; + rename_info = check_dir_renamed(pair->two->path, dir_renames); + if (!rename_info) + continue; + + new_path = apply_dir_rename(rename_info, pair->two->path); + assert(new_path); + collision_info = strmap_get(collisions, new_path); + if (collision_info) { + free(new_path); + } else { + collision_info = xcalloc(1, + sizeof(struct collision_info)); + string_list_init(&collision_info->source_files, 0); + strmap_put(collisions, new_path, collision_info); + } + string_list_insert(&collision_info->source_files, + pair->two->path); + } } static char *check_for_directory_rename(struct merge_options *opt, From fbcfc0cc17019da292c4aeef0fe43a8980207beb Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:47 +0000 Subject: [PATCH 22/28] merge-ort: implement apply_dir_rename() and check_dir_renamed() Both of these are copied from merge-recursive.c, with just minor tweaks due to using strmap API and not having a non_unique_new_dir field. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 22028d57f3..db922272ed 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -736,7 +736,29 @@ struct collision_info { static char *apply_dir_rename(struct strmap_entry *rename_info, const char *old_path) { - die("Not yet implemented!"); + struct strbuf new_path = STRBUF_INIT; + const char *old_dir = rename_info->key; + const char *new_dir = rename_info->value; + int oldlen, newlen, new_dir_len; + + oldlen = strlen(old_dir); + if (*new_dir == '\0') + /* + * If someone renamed/merged a subdirectory into the root + * directory (e.g. 'some/subdir' -> ''), then we want to + * avoid returning + * '' + '/filename' + * as the rename; we need to make old_path + oldlen advance + * past the '/' character. + */ + oldlen++; + new_dir_len = strlen(new_dir); + newlen = new_dir_len + (strlen(old_path) - oldlen) + 1; + strbuf_grow(&new_path, newlen); + strbuf_add(&new_path, new_dir, new_dir_len); + strbuf_addstr(&new_path, &old_path[oldlen]); + + return strbuf_detach(&new_path, NULL); } static void get_renamed_dir_portion(const char *old_path, const char *new_path, @@ -980,7 +1002,18 @@ static void handle_directory_level_conflicts(struct merge_options *opt) static struct strmap_entry *check_dir_renamed(const char *path, struct strmap *dir_renames) { - die("Not yet implemented!"); + char *temp = xstrdup(path); + char *end; + struct strmap_entry *e = NULL; + + while ((end = strrchr(temp, '/'))) { + *end = '\0'; + e = strmap_get_entry(dir_renames, temp); + if (e) + break; + } + free(temp); + return e; } static void compute_collisions(struct strmap *collisions, From 47325e8533a35ee48e85f85543e4f054d21a36c1 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:48 +0000 Subject: [PATCH 23/28] merge-ort: implement check_for_directory_rename() This is copied from merge-recursive.c, with minor tweaks due to using strmap API and the fact that it can use opt->priv->paths to get all pathnames that exist instead of taking a tree object. This depends on a new function, handle_path_level_conflicts(), which just has a placeholder die-not-yet-implemented implementation for now; a subsequent patch will implement it. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index db922272ed..86d2214006 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -864,6 +864,21 @@ static void get_renamed_dir_portion(const char *old_path, const char *new_path, *new_dir = xstrndup(new_path, end_of_new - new_path); } +/* + * See if there is a directory rename for path, and if there are any file + * level conflicts on the given side for the renamed location. If there is + * a rename and there are no conflicts, return the new name. Otherwise, + * return NULL. + */ +static char *handle_path_level_conflicts(struct merge_options *opt, + const char *path, + unsigned side_index, + struct strmap_entry *rename_info, + struct strmap *collisions) +{ + die("Not yet implemented"); +} + static void increment_count(struct strmap *dir_rename_count, char *old_dir, char *new_dir) @@ -1078,7 +1093,57 @@ static char *check_for_directory_rename(struct merge_options *opt, struct strmap *collisions, int *clean_merge) { - die("Not yet implemented."); + char *new_path = NULL; + struct strmap_entry *rename_info; + struct strmap_entry *otherinfo = NULL; + const char *new_dir; + + if (strmap_empty(dir_renames)) + return new_path; + rename_info = check_dir_renamed(path, dir_renames); + if (!rename_info) + return new_path; + /* old_dir = rename_info->key; */ + new_dir = rename_info->value; + + /* + * This next part is a little weird. We do not want to do an + * implicit rename into a directory we renamed on our side, because + * that will result in a spurious rename/rename(1to2) conflict. An + * example: + * Base commit: dumbdir/afile, otherdir/bfile + * Side 1: smrtdir/afile, otherdir/bfile + * Side 2: dumbdir/afile, dumbdir/bfile + * Here, while working on Side 1, we could notice that otherdir was + * renamed/merged to dumbdir, and change the diff_filepair for + * otherdir/bfile into a rename into dumbdir/bfile. However, Side + * 2 will notice the rename from dumbdir to smrtdir, and do the + * transitive rename to move it from dumbdir/bfile to + * smrtdir/bfile. That gives us bfile in dumbdir vs being in + * smrtdir, a rename/rename(1to2) conflict. We really just want + * the file to end up in smrtdir. And the way to achieve that is + * to not let Side1 do the rename to dumbdir, since we know that is + * the source of one of our directory renames. + * + * That's why otherinfo and dir_rename_exclusions is here. + * + * As it turns out, this also prevents N-way transient rename + * confusion; See testcases 9c and 9d of t6043. + */ + otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir); + if (otherinfo) { + path_msg(opt, rename_info->key, 1, + _("WARNING: Avoiding applying %s -> %s rename " + "to %s, because %s itself was renamed."), + rename_info->key, new_dir, path, new_dir); + return NULL; + } + + new_path = handle_path_level_conflicts(opt, path, side_index, + rename_info, collisions); + *clean_merge &= (new_path != NULL); + + return new_path; } static void apply_directory_rename_modifications(struct merge_options *opt, From bea433655a79b7fbc04444508c55f13ece942a21 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:49 +0000 Subject: [PATCH 24/28] merge-ort: implement handle_path_level_conflicts() This is copied from merge-recursive.c, with minor tweaks due to: * using strmap API * merge-ort not using the non_unique_new_dir field, since it'll obviate its need entirely later with performance improvements * adding a new path_in_way() function that uses opt->priv->paths instead of doing an expensive tree_has_path() lookup to see if a tree has a given path. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 86d2214006..ad8ecb7e44 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -864,6 +864,16 @@ static void get_renamed_dir_portion(const char *old_path, const char *new_path, *new_dir = xstrndup(new_path, end_of_new - new_path); } +static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask) +{ + struct merged_info *mi = strmap_get(paths, path); + struct conflict_info *ci; + if (!mi) + return 0; + INITIALIZE_CI(ci, mi); + return mi->clean || (side_mask & (ci->filemask | ci->dirmask)); +} + /* * See if there is a directory rename for path, and if there are any file * level conflicts on the given side for the renamed location. If there is @@ -876,7 +886,67 @@ static char *handle_path_level_conflicts(struct merge_options *opt, struct strmap_entry *rename_info, struct strmap *collisions) { - die("Not yet implemented"); + char *new_path = NULL; + struct collision_info *c_info; + int clean = 1; + struct strbuf collision_paths = STRBUF_INIT; + + /* + * entry has the mapping of old directory name to new directory name + * that we want to apply to path. + */ + new_path = apply_dir_rename(rename_info, path); + if (!new_path) + BUG("Failed to apply directory rename!"); + + /* + * The caller needs to have ensured that it has pre-populated + * collisions with all paths that map to new_path. Do a quick check + * to ensure that's the case. + */ + c_info = strmap_get(collisions, new_path); + if (c_info == NULL) + BUG("c_info is NULL"); + + /* + * Check for one-sided add/add/.../add conflicts, i.e. + * where implicit renames from the other side doing + * directory rename(s) can affect this side of history + * to put multiple paths into the same location. Warn + * and bail on directory renames for such paths. + */ + if (c_info->reported_already) { + clean = 0; + } else if (path_in_way(&opt->priv->paths, new_path, 1 << side_index)) { + c_info->reported_already = 1; + strbuf_add_separated_string_list(&collision_paths, ", ", + &c_info->source_files); + path_msg(opt, new_path, 0, + _("CONFLICT (implicit dir rename): Existing file/dir " + "at %s in the way of implicit directory rename(s) " + "putting the following path(s) there: %s."), + new_path, collision_paths.buf); + clean = 0; + } else if (c_info->source_files.nr > 1) { + c_info->reported_already = 1; + strbuf_add_separated_string_list(&collision_paths, ", ", + &c_info->source_files); + path_msg(opt, new_path, 0, + _("CONFLICT (implicit dir rename): Cannot map more " + "than one path to %s; implicit directory renames " + "tried to put these paths there: %s"), + new_path, collision_paths.buf); + clean = 0; + } + + /* Free memory we no longer need */ + strbuf_release(&collision_paths); + if (!clean && new_path) { + free(new_path); + return NULL; + } + + return new_path; } static void increment_count(struct strmap *dir_rename_count, From 05b85c6eeb1710ff83f11f71abefa2064e50e61b Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:50 +0000 Subject: [PATCH 25/28] merge-ort: add a new toplevel_dir field Due to the string-equality-iff-pointer-equality requirements placed on merged_info.directory_name, apply_directory_rename_modifications() will need to have access to the exact toplevel directory name string pointer and can't just use a new empty string. Store it in a field that we can use. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index ad8ecb7e44..5b3b56dc1c 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -168,12 +168,15 @@ struct merge_options_internal { struct rename_info renames; /* - * current_dir_name: temporary var used in collect_merge_info_callback() + * current_dir_name, toplevel_dir: temporary vars * - * Used to set merged_info.directory_name; see documentation for that - * variable and the requirements placed on that field. + * These are used in collect_merge_info_callback(), and will set the + * various merged_info.directory_name for the various paths we get; + * see documentation for that variable and the requirements placed on + * that field. */ const char *current_dir_name; + const char *toplevel_dir; /* call_depth: recursion level counter for merging merge bases */ int call_depth; @@ -682,10 +685,10 @@ static int collect_merge_info(struct merge_options *opt, int ret; struct tree_desc t[3]; struct traverse_info info; - const char *toplevel_dir_placeholder = ""; - opt->priv->current_dir_name = toplevel_dir_placeholder; - setup_traverse_info(&info, toplevel_dir_placeholder); + opt->priv->toplevel_dir = ""; + opt->priv->current_dir_name = opt->priv->toplevel_dir; + setup_traverse_info(&info, opt->priv->toplevel_dir); info.fn = collect_merge_info_callback; info.data = opt; info.show_all_errors = 1; From 089d82bc18514d78513a85e95de257f74da18205 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:51 +0000 Subject: [PATCH 26/28] merge-ort: implement apply_directory_rename_modifications() This function roughly follows the same outline as the function of the same name from merge-recursive.c, but the code diverges in multiple ways due to some special considerations: * merge-ort's version needs to update opt->priv->paths with any new paths (and opt->priv->paths points to struct conflict_infos which track quite a bit of metadata for each path); merge-recursive's version would directly update the index * merge-ort requires that opt->priv->paths has any leading directories of any relevant files also be included in the set of paths. And due to pointer equality requirements on merged_info.directory_name, we have to be careful how we compute and insert these. * due to the above requirements on opt->priv->paths, merge-ort's version starts with a long comment to explain all the special considerations that need to be handled * merge-ort can use the full data stored in opt->priv->paths to avoid making expensive get_tree_entry() calls to regather the necessary data. * due to messages being deferred automatically in merge-ort, this is the best place to handle conflict messages whereas in merge-recursive.c they are deferred manually so that processing of entries does all the printing Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 5b3b56dc1c..1152d0ae21 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1223,7 +1223,173 @@ static void apply_directory_rename_modifications(struct merge_options *opt, struct diff_filepair *pair, char *new_path) { - die("Not yet implemented."); + /* + * The basic idea is to get the conflict_info from opt->priv->paths + * at old path, and insert it into new_path; basically just this: + * ci = strmap_get(&opt->priv->paths, old_path); + * strmap_remove(&opt->priv->paths, old_path, 0); + * strmap_put(&opt->priv->paths, new_path, ci); + * However, there are some factors complicating this: + * - opt->priv->paths may already have an entry at new_path + * - Each ci tracks its containing directory, so we need to + * update that + * - If another ci has the same containing directory, then + * the two char*'s MUST point to the same location. See the + * comment in struct merged_info. strcmp equality is not + * enough; we need pointer equality. + * - opt->priv->paths must hold the parent directories of any + * entries that are added. So, if this directory rename + * causes entirely new directories, we must recursively add + * parent directories. + * - For each parent directory added to opt->priv->paths, we + * also need to get its parent directory stored in its + * conflict_info->merged.directory_name with all the same + * requirements about pointer equality. + */ + struct string_list dirs_to_insert = STRING_LIST_INIT_NODUP; + struct conflict_info *ci, *new_ci; + struct strmap_entry *entry; + const char *branch_with_new_path, *branch_with_dir_rename; + const char *old_path = pair->two->path; + const char *parent_name; + const char *cur_path; + int i, len; + + entry = strmap_get_entry(&opt->priv->paths, old_path); + old_path = entry->key; + ci = entry->value; + VERIFY_CI(ci); + + /* Find parent directories missing from opt->priv->paths */ + cur_path = new_path; + while (1) { + /* Find the parent directory of cur_path */ + char *last_slash = strrchr(cur_path, '/'); + if (last_slash) { + parent_name = xstrndup(cur_path, last_slash - cur_path); + } else { + parent_name = opt->priv->toplevel_dir; + break; + } + + /* Look it up in opt->priv->paths */ + entry = strmap_get_entry(&opt->priv->paths, parent_name); + if (entry) { + free((char*)parent_name); + parent_name = entry->key; /* reuse known pointer */ + break; + } + + /* Record this is one of the directories we need to insert */ + string_list_append(&dirs_to_insert, parent_name); + cur_path = parent_name; + } + + /* Traverse dirs_to_insert and insert them into opt->priv->paths */ + for (i = dirs_to_insert.nr-1; i >= 0; --i) { + struct conflict_info *dir_ci; + char *cur_dir = dirs_to_insert.items[i].string; + + dir_ci = xcalloc(1, sizeof(*dir_ci)); + + dir_ci->merged.directory_name = parent_name; + len = strlen(parent_name); + /* len+1 because of trailing '/' character */ + dir_ci->merged.basename_offset = (len > 0 ? len+1 : len); + dir_ci->dirmask = ci->filemask; + strmap_put(&opt->priv->paths, cur_dir, dir_ci); + + parent_name = cur_dir; + } + + /* + * We are removing old_path from opt->priv->paths. old_path also will + * eventually need to be freed, but it may still be used by e.g. + * ci->pathnames. So, store it in another string-list for now. + */ + string_list_append(&opt->priv->paths_to_free, old_path); + + assert(ci->filemask == 2 || ci->filemask == 4); + assert(ci->dirmask == 0); + strmap_remove(&opt->priv->paths, old_path, 0); + + branch_with_new_path = (ci->filemask == 2) ? opt->branch1 : opt->branch2; + branch_with_dir_rename = (ci->filemask == 2) ? opt->branch2 : opt->branch1; + + /* Now, finally update ci and stick it into opt->priv->paths */ + ci->merged.directory_name = parent_name; + len = strlen(parent_name); + ci->merged.basename_offset = (len > 0 ? len+1 : len); + new_ci = strmap_get(&opt->priv->paths, new_path); + if (!new_ci) { + /* Place ci back into opt->priv->paths, but at new_path */ + strmap_put(&opt->priv->paths, new_path, ci); + } else { + int index; + + /* A few sanity checks */ + VERIFY_CI(new_ci); + assert(ci->filemask == 2 || ci->filemask == 4); + assert((new_ci->filemask & ci->filemask) == 0); + assert(!new_ci->merged.clean); + + /* Copy stuff from ci into new_ci */ + new_ci->filemask |= ci->filemask; + if (new_ci->dirmask) + new_ci->df_conflict = 1; + index = (ci->filemask >> 1); + new_ci->pathnames[index] = ci->pathnames[index]; + new_ci->stages[index].mode = ci->stages[index].mode; + oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid); + + free(ci); + ci = new_ci; + } + + if (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE) { + /* Notify user of updated path */ + if (pair->status == 'A') + path_msg(opt, new_path, 1, + _("Path updated: %s added in %s inside a " + "directory that was renamed in %s; moving " + "it to %s."), + old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + else + path_msg(opt, new_path, 1, + _("Path updated: %s renamed to %s in %s, " + "inside a directory that was renamed in %s; " + "moving it to %s."), + pair->one->path, old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + } else { + /* + * opt->detect_directory_renames has the value + * MERGE_DIRECTORY_RENAMES_CONFLICT, so mark these as conflicts. + */ + ci->path_conflict = 1; + if (pair->status == 'A') + path_msg(opt, new_path, 0, + _("CONFLICT (file location): %s added in %s " + "inside a directory that was renamed in %s, " + "suggesting it should perhaps be moved to " + "%s."), + old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + else + path_msg(opt, new_path, 0, + _("CONFLICT (file location): %s renamed to %s " + "in %s, inside a directory that was renamed " + "in %s, suggesting it should perhaps be " + "moved to %s."), + pair->one->path, old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + } + + /* + * Finally, record the new location. + */ + pair->two->path = new_path; } /*** Function Grouping: functions related to regular rename detection ***/ From 1b6b902d95a5b2c5677b6351c670202dae7cd230 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:52 +0000 Subject: [PATCH 27/28] merge-ort: process_renames() now needs more defensiveness Since directory rename detection adds new paths to opt->priv->paths and removes old ones, process_renames() needs to now check whether pair->one->path actually exists in opt->priv->paths instead of just assuming it does. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 1152d0ae21..7314f9c69c 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1410,12 +1410,28 @@ static int process_renames(struct merge_options *opt, const char *rename_branch = NULL, *delete_branch = NULL; old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path); - oldpath = old_ent->key; - oldinfo = old_ent->value; - new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path); - newpath = new_ent->key; - newinfo = new_ent->value; + if (old_ent) { + oldpath = old_ent->key; + oldinfo = old_ent->value; + } + newpath = pair->two->path; + if (new_ent) { + newpath = new_ent->key; + newinfo = new_ent->value; + } + + /* + * If pair->one->path isn't in opt->priv->paths, that means + * that either directory rename detection removed that + * path, or a parent directory of oldpath was resolved and + * we don't even need the rename; in either case, we can + * skip it. If oldinfo->merged.clean, then the other side + * of history had no changes to oldpath and we don't need + * the rename and can skip it. + */ + if (!oldinfo || oldinfo->merged.clean) + continue; /* * diff_filepairs have copies of pathnames, thus we have to From 203c872c4f29e08867d572eca4f18cd1f39f9e4b Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:53 +0000 Subject: [PATCH 28/28] merge-ort: fix a directory rename detection bug As noted in commit 902c521a35 ("t6423: more involved directory rename test", 2020-10-15), when we have a case where * dir/subdir/ has several files * almost all files in dir/subdir/ are renamed to folder/subdir/ * one of the files in dir/subdir/ is renamed to folder/subdir/newsubdir/ * the other side of history (that doesn't do the renames) adds a new file to dir/subdir/ Then for the majority of the file renames, the directory rename of dir/subdir/ -> folder/subdir/ is actually not represented that way but as dir/ -> folder/ We also had one rename that was represented as dir/subdir/ -> folder/subdir/newsubdir/ Now, since there's a new file in dir/subdir/, where does it go? Well, there's only one rule for dir/subdir/, so the code previously noted that this rule had the "majority" of the one "relevant" rename and thus erroneously used it to place the file in folder/subdir/newsubdir/. We really want the heavy weight associated with dir/ -> folder/ to also be treated as dir/subdir/ -> folder/subdir/, so that we correctly place the file in folder/subdir/. Add a bunch of logic to make sure that we use all relevant renamings in directory rename detection. Note that testcase 12f of t6423 still fails after this, but it gets further than merge-recursive does. There are some performance related bits in that testcase (the region_enter messages) that do not yet succeed, but the rest of the testcase works after this patch. Subsequent patch series will fix up the performance side. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 198 +++++++++++++++++++++------------------------------- 1 file changed, 81 insertions(+), 117 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 7314f9c69c..3192b27625 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -764,109 +764,6 @@ static char *apply_dir_rename(struct strmap_entry *rename_info, return strbuf_detach(&new_path, NULL); } -static void get_renamed_dir_portion(const char *old_path, const char *new_path, - char **old_dir, char **new_dir) -{ - char *end_of_old, *end_of_new; - - /* Default return values: NULL, meaning no rename */ - *old_dir = NULL; - *new_dir = NULL; - - /* - * For - * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c" - * the "e/foo.c" part is the same, we just want to know that - * "a/b/c/d" was renamed to "a/b/some/thing/else" - * so, for this example, this function returns "a/b/c/d" in - * *old_dir and "a/b/some/thing/else" in *new_dir. - */ - - /* - * If the basename of the file changed, we don't care. We want - * to know which portion of the directory, if any, changed. - */ - end_of_old = strrchr(old_path, '/'); - end_of_new = strrchr(new_path, '/'); - - /* - * If end_of_old is NULL, old_path wasn't in a directory, so there - * could not be a directory rename (our rule elsewhere that a - * directory which still exists is not considered to have been - * renamed means the root directory can never be renamed -- because - * the root directory always exists). - */ - if (end_of_old == NULL) - return; /* Note: *old_dir and *new_dir are still NULL */ - - /* - * If new_path contains no directory (end_of_new is NULL), then we - * have a rename of old_path's directory to the root directory. - */ - if (end_of_new == NULL) { - *old_dir = xstrndup(old_path, end_of_old - old_path); - *new_dir = xstrdup(""); - return; - } - - /* Find the first non-matching character traversing backwards */ - while (*--end_of_new == *--end_of_old && - end_of_old != old_path && - end_of_new != new_path) - ; /* Do nothing; all in the while loop */ - - /* - * If both got back to the beginning of their strings, then the - * directory didn't change at all, only the basename did. - */ - if (end_of_old == old_path && end_of_new == new_path && - *end_of_old == *end_of_new) - return; /* Note: *old_dir and *new_dir are still NULL */ - - /* - * If end_of_new got back to the beginning of its string, and - * end_of_old got back to the beginning of some subdirectory, then - * we have a rename/merge of a subdirectory into the root, which - * needs slightly special handling. - * - * Note: There is no need to consider the opposite case, with a - * rename/merge of the root directory into some subdirectory - * because as noted above the root directory always exists so it - * cannot be considered to be renamed. - */ - if (end_of_new == new_path && - end_of_old != old_path && end_of_old[-1] == '/') { - *old_dir = xstrndup(old_path, --end_of_old - old_path); - *new_dir = xstrdup(""); - return; - } - - /* - * We've found the first non-matching character in the directory - * paths. That means the current characters we were looking at - * were part of the first non-matching subdir name going back from - * the end of the strings. Get the whole name by advancing both - * end_of_old and end_of_new to the NEXT '/' character. That will - * represent the entire directory rename. - * - * The reason for the increment is cases like - * a/b/star/foo/whatever.c -> a/b/tar/foo/random.c - * After dropping the basename and going back to the first - * non-matching character, we're now comparing: - * a/b/s and a/b/ - * and we want to be comparing: - * a/b/star/ and a/b/tar/ - * but without the pre-increment, the one on the right would stay - * a/b/. - */ - end_of_old = strchr(++end_of_old, '/'); - end_of_new = strchr(++end_of_new, '/'); - - /* Copy the old and new directories into *old_dir and *new_dir. */ - *old_dir = xstrndup(old_path, end_of_old - old_path); - *new_dir = xstrndup(new_path, end_of_new - new_path); -} - static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask) { struct merged_info *mi = strmap_get(paths, path); @@ -952,6 +849,14 @@ static char *handle_path_level_conflicts(struct merge_options *opt, return new_path; } +static void dirname_munge(char *filename) +{ + char *slash = strrchr(filename, '/'); + if (!slash) + slash = filename; + *slash = '\0'; +} + static void increment_count(struct strmap *dir_rename_count, char *old_dir, char *new_dir) @@ -973,6 +878,76 @@ static void increment_count(struct strmap *dir_rename_count, strintmap_incr(counts, new_dir, 1); } +static void update_dir_rename_counts(struct strmap *dir_rename_count, + struct strset *dirs_removed, + const char *oldname, + const char *newname) +{ + char *old_dir = xstrdup(oldname); + char *new_dir = xstrdup(newname); + char new_dir_first_char = new_dir[0]; + int first_time_in_loop = 1; + + while (1) { + dirname_munge(old_dir); + dirname_munge(new_dir); + + /* + * When renaming + * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c" + * then this suggests that both + * a/b/c/d/e/ => a/b/some/thing/else/e/ + * a/b/c/d/ => a/b/some/thing/else/ + * so we want to increment counters for both. We do NOT, + * however, also want to suggest that there was the following + * rename: + * a/b/c/ => a/b/some/thing/ + * so we need to quit at that point. + * + * Note the when first_time_in_loop, we only strip off the + * basename, and we don't care if that's different. + */ + if (!first_time_in_loop) { + char *old_sub_dir = strchr(old_dir, '\0')+1; + char *new_sub_dir = strchr(new_dir, '\0')+1; + if (!*new_dir) { + /* + * Special case when renaming to root directory, + * i.e. when new_dir == "". In this case, we had + * something like + * a/b/subdir => subdir + * and so dirname_munge() sets things up so that + * old_dir = "a/b\0subdir\0" + * new_dir = "\0ubdir\0" + * We didn't have a '/' to overwrite a '\0' onto + * in new_dir, so we have to compare differently. + */ + if (new_dir_first_char != old_sub_dir[0] || + strcmp(old_sub_dir+1, new_sub_dir)) + break; + } else { + if (strcmp(old_sub_dir, new_sub_dir)) + break; + } + } + + if (strset_contains(dirs_removed, old_dir)) + increment_count(dir_rename_count, old_dir, new_dir); + else + break; + + /* If we hit toplevel directory ("") for old or new dir, quit */ + if (!*old_dir || !*new_dir) + break; + + first_time_in_loop = 0; + } + + /* Free resources we don't need anymore */ + free(old_dir); + free(new_dir); +} + static void compute_rename_counts(struct diff_queue_struct *pairs, struct strmap *dir_rename_count, struct strset *dirs_removed) @@ -980,20 +955,12 @@ static void compute_rename_counts(struct diff_queue_struct *pairs, int i; for (i = 0; i < pairs->nr; ++i) { - char *old_dir, *new_dir; struct diff_filepair *pair = pairs->queue[i]; /* File not part of directory rename if it wasn't renamed */ if (pair->status != 'R') continue; - /* Get the old and new directory names */ - get_renamed_dir_portion(pair->one->path, pair->two->path, - &old_dir, &new_dir); - if (!old_dir) - /* Directory didn't change at all; ignore this one. */ - continue; - /* * Make dir_rename_count contain a map of a map: * old_directory -> {new_directory -> count} @@ -1001,12 +968,9 @@ static void compute_rename_counts(struct diff_queue_struct *pairs, * the old filename and the new filename and count how many * times that pairing occurs. */ - if (strset_contains(dirs_removed, old_dir)) - increment_count(dir_rename_count, old_dir, new_dir); - - /* Free resources we don't need anymore */ - free(old_dir); - free(new_dir); + update_dir_rename_counts(dir_rename_count, dirs_removed, + pair->one->path, + pair->two->path); } }