Speed up git notes lookup
authorJohannes Schindelin <Johannes.Schindelin@gmx.de>
Fri, 9 Oct 2009 10:21:59 +0000 (12:21 +0200)
committerJunio C Hamano <gitster@pobox.com>
Tue, 20 Oct 2009 01:59:42 +0000 (18:59 -0700)
To avoid looking up each and every commit in the notes ref's tree
object, which is very expensive, speed things up by slurping the tree
object's contents into a hash_map.

The idea for the hashmap singleton is from David Reiss, initial
benchmarking by Jeff King.

Note: the implementation allows for arbitrary entries in the notes
tree object, ignoring those that do not reference a valid object.  This
allows you to annotate arbitrary branches, or objects.

This patch has been improved by the following contributions:
- Junio C Hamano: fixed an obvious error in initialize_hash_map()

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
notes.c

diff --git a/notes.c b/notes.c
index 66379ffd22ea2d08f6abc070ddf02a9c1bf45ab5..2b66723f5fe52b4c32830904748679dd62a3cc47 100644 (file)
--- a/notes.c
+++ b/notes.c
 #include "refs.h"
 #include "utf8.h"
 #include "strbuf.h"
+#include "tree-walk.h"
+
+struct entry {
+       unsigned char commit_sha1[20];
+       unsigned char notes_sha1[20];
+};
+
+struct hash_map {
+       struct entry *entries;
+       off_t count, size;
+};
 
 static int initialized;
+static struct hash_map hash_map;
+
+static int hash_index(struct hash_map *map, const unsigned char *sha1)
+{
+       int i = ((*(unsigned int *)sha1) % map->size);
+
+       for (;;) {
+               unsigned char *current = map->entries[i].commit_sha1;
+
+               if (!hashcmp(sha1, current))
+                       return i;
+
+               if (is_null_sha1(current))
+                       return -1 - i;
+
+               if (++i == map->size)
+                       i = 0;
+       }
+}
+
+static void add_entry(const unsigned char *commit_sha1,
+               const unsigned char *notes_sha1)
+{
+       int index;
+
+       if (hash_map.count + 1 > hash_map.size >> 1) {
+               int i, old_size = hash_map.size;
+               struct entry *old = hash_map.entries;
+
+               hash_map.size = old_size ? old_size << 1 : 64;
+               hash_map.entries = (struct entry *)
+                       xcalloc(sizeof(struct entry), hash_map.size);
+
+               for (i = 0; i < old_size; i++)
+                       if (!is_null_sha1(old[i].commit_sha1)) {
+                               index = -1 - hash_index(&hash_map,
+                                               old[i].commit_sha1);
+                               memcpy(hash_map.entries + index, old + i,
+                                       sizeof(struct entry));
+                       }
+               free(old);
+       }
+
+       index = hash_index(&hash_map, commit_sha1);
+       if (index < 0) {
+               index = -1 - index;
+               hash_map.count++;
+       }
+
+       hashcpy(hash_map.entries[index].commit_sha1, commit_sha1);
+       hashcpy(hash_map.entries[index].notes_sha1, notes_sha1);
+}
+
+static void initialize_hash_map(const char *notes_ref_name)
+{
+       unsigned char sha1[20], commit_sha1[20];
+       unsigned mode;
+       struct tree_desc desc;
+       struct name_entry entry;
+       void *buf;
+
+       if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) ||
+           get_tree_entry(commit_sha1, "", sha1, &mode))
+               return;
+
+       buf = fill_tree_descriptor(&desc, sha1);
+       if (!buf)
+               die("Could not read %s for notes-index", sha1_to_hex(sha1));
+
+       while (tree_entry(&desc, &entry))
+               if (!get_sha1(entry.path, commit_sha1))
+                       add_entry(commit_sha1, entry.sha1);
+       free(buf);
+}
+
+static unsigned char *lookup_notes(const unsigned char *commit_sha1)
+{
+       int index;
+
+       if (!hash_map.size)
+               return NULL;
+
+       index = hash_index(&hash_map, commit_sha1);
+       if (index < 0)
+               return NULL;
+       return hash_map.entries[index].notes_sha1;
+}
 
 void get_commit_notes(const struct commit *commit, struct strbuf *sb,
                const char *output_encoding)
 {
        static const char utf8[] = "utf-8";
-       struct strbuf name = STRBUF_INIT;
-       unsigned char sha1[20];
+       unsigned char *sha1;
        char *msg, *msg_p;
        unsigned long linelen, msglen;
        enum object_type type;
@@ -23,17 +120,12 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb,
                        notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT);
                else if (!notes_ref_name)
                        notes_ref_name = GIT_NOTES_DEFAULT_REF;
-               if (notes_ref_name && read_ref(notes_ref_name, sha1))
-                       notes_ref_name = NULL;
+               initialize_hash_map(notes_ref_name);
                initialized = 1;
        }
 
-       if (!notes_ref_name)
-               return;
-
-       strbuf_addf(&name, "%s:%s", notes_ref_name,
-                       sha1_to_hex(commit->object.sha1));
-       if (get_sha1(name.buf, sha1))
+       sha1 = lookup_notes(commit->object.sha1);
+       if (!sha1)
                return;
 
        if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen ||