Try using Geert similarity code in pack-objects.
authorJunio C Hamano <junkio@cox.net>
Mon, 17 Apr 2006 05:03:01 +0000 (22:03 -0700)
committerJunio C Hamano <junkio@cox.net>
Mon, 17 Apr 2006 05:03:01 +0000 (22:03 -0700)
It appears the fingerprinting itself is too expensive to be worth doing
for this purpose.  A failed experiment.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Makefile
pack-objects.c

index 69ca05b2f96dac2ac72faf3a6ecf5328558f8156..aa499ed52fbfce3641cbd9ec2f737489f8202d67 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -204,7 +204,7 @@ DIFF_OBJS = \
        diffcore-delta.o log-tree.o
 
 LIB_OBJS = \
-       blob.o commit.o connect.o csum-file.o \
+       blob.o commit.o connect.o csum-file.o gsimm.o rabinpoly.o \
        date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \
        object.o pack-check.o patch-delta.o path.o pkt-line.o \
        quote.o read-cache.o refs.o run-command.o \
index 09f4f2c944d95bb145823c0b07f4a7f302bcab1f..18f8f82571f82e9618b87797065be36b7087ce74 100644 (file)
@@ -8,6 +8,8 @@
 #include "pack.h"
 #include "csum-file.h"
 #include "tree-walk.h"
+#include "rabinpoly.h"
+#include "gsimm.h"
 #include <sys/time.h>
 #include <signal.h>
 
@@ -993,6 +995,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
 
 struct unpacked {
        struct object_entry *entry;
+       unsigned char fingerprint[MD_LENGTH];
        void *data;
 };
 
@@ -1041,6 +1044,9 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
        if (old_entry->depth >= max_depth)
                return 0;
 
+       if (gb_simm_score(cur->fingerprint, old->fingerprint) < 0.4)
+               return 0;
+
        /*
         * NOTE!
         *
@@ -1077,6 +1083,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
        unsigned processed = 0;
        unsigned last_percent = 999;
 
+       rabin_reset ();
        memset(array, 0, array_size);
        i = nr_objects;
        idx = 0;
@@ -1115,6 +1122,8 @@ static void find_deltas(struct object_entry **list, int window, int depth)
                if (size != entry->size)
                        die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 
+               gb_simm_process(n->data, size, n->fingerprint);
+
                j = window;
                while (--j > 0) {
                        unsigned int other_idx = idx + j;
@@ -1124,6 +1133,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
                        m = array + other_idx;
                        if (!m->entry)
                                break;
+
                        if (try_delta(n, m, depth) < 0)
                                break;
                }