From 38fd0721d0a2a1a723bc28fc0817e3571987b1ef Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 27 Feb 2006 23:38:28 -0500 Subject: [PATCH] diff-delta: allow reusing of the reference buffer index When a reference buffer is used multiple times then its index can be computed only once and reused multiple times. This patch adds an extra pointer to a pointer argument (from_index) to diff_delta() for this. If from_index is NULL then everything is like before. If from_index is non NULL and *from_index is NULL then the index is created and its location stored to *from_index. In this case the caller has the responsibility to free the memory pointed to by *from_index. If from_index and *from_index are non NULL then the index is reused as is. This currently saves about 10% of CPU time to repack the git archive. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- delta.h | 3 ++- diff-delta.c | 41 +++++++++++++++++++++++++++-------------- diffcore-break.c | 2 +- diffcore-rename.c | 2 +- pack-objects.c | 11 ++++++++--- test-delta.c | 2 +- 6 files changed, 40 insertions(+), 21 deletions(-) diff --git a/delta.h b/delta.h index a15350dab..00fef0b8d 100644 --- a/delta.h +++ b/delta.h @@ -4,7 +4,8 @@ /* handling of delta buffers */ extern void *diff_delta(void *from_buf, unsigned long from_size, void *to_buf, unsigned long to_size, - unsigned long *delta_size, unsigned long max_size); + unsigned long *delta_size, unsigned long max_size, + void **from_index); extern void *patch_delta(void *src_buf, unsigned long src_size, void *delta_buf, unsigned long delta_size, unsigned long *dst_size); diff --git a/diff-delta.c b/diff-delta.c index 0730b24df..dcd3f5572 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -30,8 +30,7 @@ struct index { static struct index ** delta_index(const unsigned char *buf, unsigned long bufsize, - unsigned long trg_bufsize, - unsigned int *hash_shift) + unsigned long trg_bufsize) { unsigned long hsize; unsigned int i, hshift, hlimit, *hash_count; @@ -44,14 +43,17 @@ static struct index ** delta_index(const unsigned char *buf, for (i = 8; (1 << i) < hsize && i < 24; i += 2); hsize = 1 << i; hshift = (i - 8) / 2; - *hash_shift = hshift; - /* allocate lookup index */ - mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry)); + /* + * Allocate lookup index. Note the first hash pointer + * is used to store the hash shift value. + */ + mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry)); if (!mem) return NULL; hash = mem; - entry = mem + hsize * sizeof(*hash); + *hash++ = (void *)hshift; + entry = mem + (1 + hsize) * sizeof(*hash); memset(hash, 0, hsize * sizeof(*hash)); /* allocate an array to count hash entries */ @@ -107,7 +109,7 @@ static struct index ** delta_index(const unsigned char *buf, } free(hash_count); - return hash; + return hash-1; } /* provide the size of the copy opcode given the block offset and size */ @@ -121,7 +123,8 @@ static struct index ** delta_index(const unsigned char *buf, void *diff_delta(void *from_buf, unsigned long from_size, void *to_buf, unsigned long to_size, unsigned long *delta_size, - unsigned long max_size) + unsigned long max_size, + void **from_index) { unsigned int i, outpos, outsize, inscnt, hash_shift; const unsigned char *ref_data, *ref_top, *data, *top; @@ -130,9 +133,16 @@ void *diff_delta(void *from_buf, unsigned long from_size, if (!from_size || !to_size) return NULL; - hash = delta_index(from_buf, from_size, to_size, &hash_shift); - if (!hash) - return NULL; + if (from_index && *from_index) { + hash = *from_index; + } else { + hash = delta_index(from_buf, from_size, to_size); + if (!hash) + return NULL; + if (from_index) + *from_index = hash; + } + hash_shift = (unsigned int)(*hash++); outpos = 0; outsize = 8192; @@ -140,7 +150,8 @@ void *diff_delta(void *from_buf, unsigned long from_size, outsize = max_size + MAX_OP_SIZE + 1; out = malloc(outsize); if (!out) { - free(hash); + if (!from_index) + free(hash-1); return NULL; } @@ -241,7 +252,8 @@ void *diff_delta(void *from_buf, unsigned long from_size, out = realloc(out, outsize); if (!out) { free(tmp); - free(hash); + if (!from_index) + free(hash-1); return NULL; } } @@ -250,7 +262,8 @@ void *diff_delta(void *from_buf, unsigned long from_size, if (inscnt) out[outpos - inscnt - 1] = inscnt; - free(hash); + if (!from_index) + free(hash-1); *delta_size = outpos; return out; } diff --git a/diffcore-break.c b/diffcore-break.c index 95b5eb492..34f1ed073 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -71,7 +71,7 @@ static int should_break(struct diff_filespec *src, delta = diff_delta(src->data, src->size, dst->data, dst->size, - &delta_size, 0); + &delta_size, 0, NULL); if (!delta) return 0; /* error but caught downstream */ diff --git a/diffcore-rename.c b/diffcore-rename.c index ffd126af0..099d2a236 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -168,7 +168,7 @@ static int estimate_similarity(struct diff_filespec *src, delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE; delta = diff_delta(src->data, src->size, dst->data, dst->size, - &delta_size, delta_limit); + &delta_size, delta_limit, NULL); if (!delta) /* If delta_limit is exceeded, we have too much differences */ return 0; diff --git a/pack-objects.c b/pack-objects.c index 136a7f5aa..d6a346360 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -204,7 +204,7 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e if (!otherbuf) die("unable to read %s", sha1_to_hex(entry->delta->sha1)); delta_buf = diff_delta(otherbuf, othersize, - buf, size, &delta_size, 0); + buf, size, &delta_size, 0, NULL); if (!delta_buf || delta_size != entry->delta_size) die("delta size changed"); free(buf); @@ -810,6 +810,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr struct unpacked { struct object_entry *entry; void *data; + void **delta_index; }; /* @@ -891,7 +892,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de if (sizediff >= max_size) return -1; delta_buf = diff_delta(old->data, oldsize, - cur->data, size, &delta_size, max_size); + cur->data, size, &delta_size, + max_size, old->delta_index); if (!delta_buf) return 0; cur_entry->delta = old_entry; @@ -948,6 +950,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) */ continue; + free(n->delta_index); free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); @@ -974,8 +977,10 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (progress) fputc('\n', stderr); - for (i = 0; i < window; ++i) + for (i = 0; i < window; ++i) { + free(array[i].delta_index); free(array[i].data); + } free(array); } diff --git a/test-delta.c b/test-delta.c index 1be8ee0c7..89eb68ed2 100644 --- a/test-delta.c +++ b/test-delta.c @@ -63,7 +63,7 @@ int main(int argc, char *argv[]) if (argv[1][1] == 'd') out_buf = diff_delta(from_buf, from_size, data_buf, data_size, - &out_size, 0); + &out_size, 0, NULL); else out_buf = patch_delta(from_buf, from_size, data_buf, data_size, -- 2.26.2