From c27afcbfd0f440f410758432e2fe11a16fb2b360 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 26 Jan 2026 10:48:51 +0000 Subject: [PATCH 1/2] xdiff: remove "line_hash" field from xrecord_t Prior to commit 6a26019c81 (xdiff: split xrecord_t.ha into line_hash and minimal_perfect_hash, 2025-11-18) the "ha" field of xrecord_t initially held the "line_hash" value and once the line had been interned that field was updated to hold the "minimal_perfect_hash". The "line_hash" is only used to intern the line so there is no point in storing it after all the input lines have been interned. Removing the "line_hash" field from xrecord_t and storing it in xdlclass_t where it is actually used makes it clearer that it is a temporary value and it should not be used once we're calculated the "minimal_perfect_hash". This also reduces the size of xrecord_t by 25% on 64-bit platforms and 40% on 32-bit platforms. While the struct is small we create one instance per input line so any saving is welcome. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 12 +++++++----- xdiff/xtypes.h | 1 - 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 34c82e4f8e..08e5d3f4df 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -34,6 +34,7 @@ #define INVESTIGATE 2 typedef struct s_xdlclass { + uint64_t line_hash; struct s_xdlclass *next; xrecord_t rec; long idx; @@ -92,13 +93,14 @@ static void xdl_free_classifier(xdlclassifier_t *cf) { } -static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) { +static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec, + uint64_t line_hash) { size_t hi; xdlclass_t *rcrec; - hi = XDL_HASHLONG(rec->line_hash, cf->hbits); + hi = XDL_HASHLONG(line_hash, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) - if (rcrec->rec.line_hash == rec->line_hash && + if (rcrec->line_hash == line_hash && xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size, (const char *)rec->ptr, (long)rec->size, cf->flags)) break; @@ -112,6 +114,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t if (XDL_ALLOC_GROW(cf->rcrecs, cf->count, cf->alloc)) return -1; cf->rcrecs[rcrec->idx] = rcrec; + rcrec->line_hash = line_hash; rcrec->rec = *rec; rcrec->len1 = rcrec->len2 = 0; rcrec->next = cf->rchash[hi]; @@ -158,8 +161,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ crec = &xdf->recs[xdf->nrec++]; crec->ptr = prev; crec->size = cur - prev; - crec->line_hash = hav; - if (xdl_classify_record(pass, cf, crec) < 0) + if (xdl_classify_record(pass, cf, crec, hav) < 0) goto abort; } } diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 979586f20a..50aee779be 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -41,7 +41,6 @@ typedef struct s_chastore { typedef struct s_xrecord { uint8_t const *ptr; size_t size; - uint64_t line_hash; size_t minimal_perfect_hash; } xrecord_t; From 5086213bd2f44fdc793fd8a081fd1c40a3267c44 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Mon, 26 Jan 2026 10:48:52 +0000 Subject: [PATCH 2/2] xdiff: remove unused data from xdlclass_t Prior to commit 6d507bd41a (xdiff: delete fields ha, line, size in xdlclass_t in favor of an xrecord_t, 2025-09-26) xdlclass_t carried a copy of all the fields in xrecord_t. That commit embedded xrecord_t in xdlclass_t to make it easier to change the types of the fields in xrecord_t. However commit 6a26019c81 (xdiff: split xrecord_t.ha into line_hash and minimal_perfect_hash, 2025-11-18) added the "minimal_perfect_hash" field to xrecord_t which is not used by xdlclass_t. To avoid wasting space stop copying the whole of xrecord_t and just copy the pointer and length that we need to intern the line. Together with the previous commit this effectively reverts 6d507bd41a. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 08e5d3f4df..cd4fc405eb 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -36,7 +36,8 @@ typedef struct s_xdlclass { uint64_t line_hash; struct s_xdlclass *next; - xrecord_t rec; + const uint8_t *ptr; + size_t size; long idx; long len1, len2; } xdlclass_t; @@ -101,7 +102,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t hi = XDL_HASHLONG(line_hash, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) if (rcrec->line_hash == line_hash && - xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size, + xdl_recmatch((const char *)rcrec->ptr, (long)rcrec->size, (const char *)rec->ptr, (long)rec->size, cf->flags)) break; @@ -115,7 +116,8 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t return -1; cf->rcrecs[rcrec->idx] = rcrec; rcrec->line_hash = line_hash; - rcrec->rec = *rec; + rcrec->ptr = rec->ptr; + rcrec->size = rec->size; rcrec->len1 = rcrec->len2 = 0; rcrec->next = cf->rchash[hi]; cf->rchash[hi] = rcrec;