diff-delta: allow reusing of the reference buffer index
authorNicolas Pitre <nico@cam.org>
Tue, 28 Feb 2006 04:38:28 +0000 (23:38 -0500)
committerJunio C Hamano <junkio@cox.net>
Thu, 2 Mar 2006 05:53:22 +0000 (21:53 -0800)
When a reference buffer is used multiple times then its index can be
computed only once and reused multiple times.  This patch adds an extra
pointer to a pointer argument (from_index) to diff_delta() for this.

If from_index is NULL then everything is like before.

If from_index is non NULL and *from_index is NULL then the index is
created and its location stored to *from_index.  In this case the caller
has the responsibility to free the memory pointed to by *from_index.

If from_index and *from_index are non NULL then the index is reused as
is.

This currently saves about 10% of CPU time to repack the git archive.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
delta.h
diff-delta.c
diffcore-break.c
diffcore-rename.c
pack-objects.c
test-delta.c

diff --git a/delta.h b/delta.h
index a15350dabcd497d4cb21261721706eca072de034..00fef0b8d739702c6cf664b35679c4897c0ca13f 100644 (file)
--- a/delta.h
+++ b/delta.h
@@ -4,7 +4,8 @@
 /* handling of delta buffers */
 extern void *diff_delta(void *from_buf, unsigned long from_size,
                        void *to_buf, unsigned long to_size,
-                       unsigned long *delta_size, unsigned long max_size);
+                       unsigned long *delta_size, unsigned long max_size,
+                       void **from_index);
 extern void *patch_delta(void *src_buf, unsigned long src_size,
                         void *delta_buf, unsigned long delta_size,
                         unsigned long *dst_size);
index 0730b24df8fe679d16e1cd03d066b4d7f33e73cd..dcd3f5572e2682d1ef94b030efe1682dc3f76b67 100644 (file)
@@ -30,8 +30,7 @@ struct index {
 
 static struct index ** delta_index(const unsigned char *buf,
                                   unsigned long bufsize,
-                                  unsigned long trg_bufsize,
-                                  unsigned int *hash_shift)
+                                  unsigned long trg_bufsize)
 {
        unsigned long hsize;
        unsigned int i, hshift, hlimit, *hash_count;
@@ -44,14 +43,17 @@ static struct index ** delta_index(const unsigned char *buf,
        for (i = 8; (1 << i) < hsize && i < 24; i += 2);
        hsize = 1 << i;
        hshift = (i - 8) / 2;
-       *hash_shift = hshift;
 
-       /* allocate lookup index */
-       mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
+       /*
+        * Allocate lookup index.  Note the first hash pointer
+        * is used to store the hash shift value.
+        */
+       mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
        if (!mem)
                return NULL;
        hash = mem;
-       entry = mem + hsize * sizeof(*hash);
+       *hash++ = (void *)hshift;
+       entry = mem + (1 + hsize) * sizeof(*hash);
        memset(hash, 0, hsize * sizeof(*hash));
 
        /* allocate an array to count hash entries */
@@ -107,7 +109,7 @@ static struct index ** delta_index(const unsigned char *buf,
        }
        free(hash_count);
 
-       return hash;
+       return hash-1;
 }
 
 /* provide the size of the copy opcode given the block offset and size */
@@ -121,7 +123,8 @@ static struct index ** delta_index(const unsigned char *buf,
 void *diff_delta(void *from_buf, unsigned long from_size,
                 void *to_buf, unsigned long to_size,
                 unsigned long *delta_size,
-                unsigned long max_size)
+                unsigned long max_size,
+                void **from_index)
 {
        unsigned int i, outpos, outsize, inscnt, hash_shift;
        const unsigned char *ref_data, *ref_top, *data, *top;
@@ -130,9 +133,16 @@ void *diff_delta(void *from_buf, unsigned long from_size,
 
        if (!from_size || !to_size)
                return NULL;
-       hash = delta_index(from_buf, from_size, to_size, &hash_shift);
-       if (!hash)
-               return NULL;
+       if (from_index && *from_index) {
+               hash = *from_index;
+       } else {
+               hash = delta_index(from_buf, from_size, to_size);
+               if (!hash)
+                       return NULL;
+               if (from_index)
+                       *from_index = hash;
+       }
+       hash_shift = (unsigned int)(*hash++);
 
        outpos = 0;
        outsize = 8192;
@@ -140,7 +150,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
                outsize = max_size + MAX_OP_SIZE + 1;
        out = malloc(outsize);
        if (!out) {
-               free(hash);
+               if (!from_index)
+                       free(hash-1);
                return NULL;
        }
 
@@ -241,7 +252,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
                                out = realloc(out, outsize);
                        if (!out) {
                                free(tmp);
-                               free(hash);
+                               if (!from_index)
+                                       free(hash-1);
                                return NULL;
                        }
                }
@@ -250,7 +262,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
        if (inscnt)
                out[outpos - inscnt - 1] = inscnt;
 
-       free(hash);
+       if (!from_index)
+               free(hash-1);
        *delta_size = outpos;
        return out;
 }
index 95b5eb492e3aa1a282347b875df04da31f276399..34f1ed07319c058c9cc8a99b902727fa1f305a02 100644 (file)
@@ -71,7 +71,7 @@ static int should_break(struct diff_filespec *src,
 
        delta = diff_delta(src->data, src->size,
                           dst->data, dst->size,
-                          &delta_size, 0);
+                          &delta_size, 0, NULL);
        if (!delta)
                return 0; /* error but caught downstream */
 
index ffd126af0d2cd3539e0f186e83c7671132758e6b..099d2a2367d79dd06e068eeb20177a3ec1d12d52 100644 (file)
@@ -168,7 +168,7 @@ static int estimate_similarity(struct diff_filespec *src,
        delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
        delta = diff_delta(src->data, src->size,
                           dst->data, dst->size,
-                          &delta_size, delta_limit);
+                          &delta_size, delta_limit, NULL);
        if (!delta)
                /* If delta_limit is exceeded, we have too much differences */
                return 0;
index 136a7f5aad61e8a90866ee7d0b4bc22e387f0396..d6a3463604bf3e61d8cf8b7d55947c229def8f07 100644 (file)
@@ -204,7 +204,7 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e
        if (!otherbuf)
                die("unable to read %s", sha1_to_hex(entry->delta->sha1));
         delta_buf = diff_delta(otherbuf, othersize,
-                              buf, size, &delta_size, 0);
+                              buf, size, &delta_size, 0, NULL);
         if (!delta_buf || delta_size != entry->delta_size)
                die("delta size changed");
         free(buf);
@@ -810,6 +810,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
 struct unpacked {
        struct object_entry *entry;
        void *data;
+       void **delta_index;
 };
 
 /*
@@ -891,7 +892,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
        if (sizediff >= max_size)
                return -1;
        delta_buf = diff_delta(old->data, oldsize,
-                              cur->data, size, &delta_size, max_size);
+                              cur->data, size, &delta_size,
+                              max_size, old->delta_index);
        if (!delta_buf)
                return 0;
        cur_entry->delta = old_entry;
@@ -948,6 +950,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
                         */
                        continue;
 
+               free(n->delta_index);
                free(n->data);
                n->entry = entry;
                n->data = read_sha1_file(entry->sha1, type, &size);
@@ -974,8 +977,10 @@ static void find_deltas(struct object_entry **list, int window, int depth)
        if (progress)
                fputc('\n', stderr);
 
-       for (i = 0; i < window; ++i)
+       for (i = 0; i < window; ++i) {
+               free(array[i].delta_index);
                free(array[i].data);
+       }
        free(array);
 }
 
index 1be8ee0c721ec35372fee4e686d9664b143b4345..89eb68ed21545e4001c08dd21b5e907e22b979bb 100644 (file)
@@ -63,7 +63,7 @@ int main(int argc, char *argv[])
        if (argv[1][1] == 'd')
                out_buf = diff_delta(from_buf, from_size,
                                     data_buf, data_size,
-                                    &out_size, 0);
+                                    &out_size, 0, NULL);
        else
                out_buf = patch_delta(from_buf, from_size,
                                      data_buf, data_size,