From 663c5ad0356f5501e2db7a39a7abf76e8db26b7d Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 17 Nov 2021 11:20:23 +0000 Subject: [PATCH 1/3] diff histogram: intern strings Histogram is the only diff algorithm not to call xdl_classify_record(). xdl_classify_record() ensures that the hash values of two strings that are not equal differ which means that it is not necessary to use xdl_recmatch() when comparing lines, all that is necessary is to compare the hash values. This gives a 7% reduction in the runtime of "git log --patch" when using the histogram diff algorithm. Test HEAD^ HEAD ----------------------------------------------------------------------------- 4000.1: log -3000 (baseline) 0.18(0.14+0.04) 0.19(0.17+0.02) +5.6% 4000.2: log --raw -3000 (tree-only) 0.99(0.77+0.21) 0.98(0.78+0.20) -1.0% 4000.3: log -p -3000 (Myers) 4.84(4.31+0.51) 4.81(4.15+0.64) -0.6% 4000.4: log -p -3000 --histogram 6.34(5.86+0.46) 5.87(5.19+0.66) -7.4% 4000.5: log -p -3000 --patience 5.39(4.60+0.76) 5.35(4.60+0.73) -0.7% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xhistogram.c | 5 ++--- xdiff/xprepare.c | 24 ++++++++---------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index e694bfd9e3..6c1c88a69a 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -91,9 +91,8 @@ struct region { static int cmp_recs(xpparam_t const *xpp, xrecord_t *r1, xrecord_t *r2) { - return r1->ha == r2->ha && - xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, - xpp->flags); + return r1->ha == r2->ha; + } #define CMP_ENV(xpp, env, s1, l1, s2, l2) \ diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index abeb8fb84e..7fae0727a0 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -181,15 +181,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) goto abort; - if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) - hbits = hsize = 0; - else { - hbits = xdl_hashbits((unsigned int) narec); - hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) - goto abort; - memset(rhash, 0, hsize * sizeof(xrecord_t *)); - } + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { @@ -208,9 +204,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; - - if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && - xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) + if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) goto abort; } } @@ -279,8 +273,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, enl1 = xdl_guess_lines(mf1, sample) + 1; enl2 = xdl_guess_lines(mf2, sample) + 1; - if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF && - xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) + if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) return -1; if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { @@ -305,8 +298,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, return -1; } - if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) - xdl_free_classifier(&cf); + xdl_free_classifier(&cf); return 0; } From b82dd3f7f20c4d9cc68543871c3634cb32ea0789 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 17 Nov 2021 11:20:24 +0000 Subject: [PATCH 2/3] xdiff: avoid unnecessary memory allocations rindex and ha are only used by xdl_cleanup_records() which is not called by the histogram or patience algorithms. The perf test results show a small reduction in run time but that is probably within the noise. Test HEAD^ HEAD ----------------------------------------------------------------------------- 4000.1: log -3000 (baseline) 0.19(0.17+0.02) 0.19(0.12+0.07) +0.0% 4000.2: log --raw -3000 (tree-only) 0.98(0.78+0.20) 0.98(0.81+0.16) +0.0% 4000.3: log -p -3000 (Myers) 4.81(4.15+0.64) 4.81(4.23+0.56) +0.0% 4000.4: log -p -3000 --histogram 5.87(5.19+0.66) 5.83(5.11+0.70) -0.7% 4000.5: log -p -3000 --patience 5.35(4.60+0.73) 5.31(4.61+0.69) -0.7% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 7fae0727a0..4527a4a07c 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -213,10 +213,13 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ goto abort; memset(rchg, 0, (nrec + 2) * sizeof(char)); - if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) - goto abort; - if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) - goto abort; + if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && + (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) { + if (!(rindex = xdl_malloc((nrec + 1) * sizeof(*rindex)))) + goto abort; + if (!(ha = xdl_malloc((nrec + 1) * sizeof(*ha)))) + goto abort; + } xdf->nrec = nrec; xdf->recs = recs; From 6b13bc32322dc64e52ef29c9f527746ad30d0ad8 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 17 Nov 2021 11:20:25 +0000 Subject: [PATCH 3/3] xdiff: simplify comparison Now that the histogram algorithm calls xdl_classify_record() it is no longer necessary to use xdl_recmatch() to compare lines, it is sufficient just to compare the hash values. This has a negligible effect on performance. Test HEAD~1 HEAD ----------------------------------------------------------------------------- 4000.1: log -3000 (baseline) 0.19(0.12+0.07) 0.18(0.14+0.04) -5.3% 4000.2: log --raw -3000 (tree-only) 0.98(0.81+0.16) 0.98(0.79+0.18) +0.0% 4000.3: log -p -3000 (Myers) 4.81(4.23+0.56) 4.80(4.26+0.53) -0.2% 4000.4: log -p -3000 --histogram 5.83(5.11+0.70) 5.82(5.15+0.65) -0.2% 4000.5: log -p -3000 --patience 5.31(4.61+0.69) 5.30(4.54+0.75) -0.2% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index a4542c05b6..6a3b9280be 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -392,10 +392,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags) { - return (rec1->ha == rec2->ha && - xdl_recmatch(rec1->ptr, rec1->size, - rec2->ptr, rec2->size, - flags)); + return (rec1->ha == rec2->ha); } /*