refs: Use binary search to lookup refs faster
authorJulian Phillips <julian@quantumfyre.co.uk>
Thu, 29 Sep 2011 22:11:42 +0000 (23:11 +0100)
committerJunio C Hamano <gitster@pobox.com>
Fri, 30 Sep 2011 19:28:34 +0000 (12:28 -0700)
Currently we linearly search through lists of refs when we need to
find a specific ref.  This can be very slow if we need to lookup a
large number of refs.  By changing to a binary search we can make this
faster.

In order to be able to use a binary search we need to change from
using linked lists to arrays, which we can manage using ALLOC_GROW.

We can now also use the standard library qsort function to sort the
refs arrays.

Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
refs.c

diff --git a/refs.c b/refs.c
index d021595578338788cb65eaeb455dd655dba2497f..5835b40b0cb1e707323aaa0d4c837a7ab21d99d0 100644 (file)
--- a/refs.c
+++ b/refs.c
@@ -8,14 +8,18 @@
 #define REF_KNOWS_PEELED 04
 #define REF_BROKEN 010
 
-struct ref_list {
-       struct ref_list *next;
+struct ref_entry {
        unsigned char flag; /* ISSYMREF? ISPACKED? */
        unsigned char sha1[20];
        unsigned char peeled[20];
        char name[FLEX_ARRAY];
 };
 
+struct ref_array {
+       int nr, alloc;
+       struct ref_entry **refs;
+};
+
 static const char *parse_ref_line(char *line, unsigned char *sha1)
 {
        /*
@@ -44,108 +48,80 @@ static const char *parse_ref_line(char *line, unsigned char *sha1)
        return line;
 }
 
-static struct ref_list *add_ref(const char *name, const unsigned char *sha1,
-                               int flag, struct ref_list *list,
-                               struct ref_list **new_entry)
+static void add_ref(const char *name, const unsigned char *sha1,
+                   int flag, struct ref_array *refs,
+                   struct ref_entry **new_entry)
 {
        int len;
-       struct ref_list *entry;
+       struct ref_entry *entry;
 
        /* Allocate it and add it in.. */
        len = strlen(name) + 1;
-       entry = xmalloc(sizeof(struct ref_list) + len);
+       entry = xmalloc(sizeof(struct ref_entry) + len);
        hashcpy(entry->sha1, sha1);
        hashclr(entry->peeled);
        memcpy(entry->name, name, len);
        entry->flag = flag;
-       entry->next = list;
        if (new_entry)
                *new_entry = entry;
-       return entry;
+       ALLOC_GROW(refs->refs, refs->nr + 1, refs->alloc);
+       refs->refs[refs->nr++] = entry;
 }
 
-/* merge sort the ref list */
-static struct ref_list *sort_ref_list(struct ref_list *list)
+static int ref_entry_cmp(const void *a, const void *b)
 {
-       int psize, qsize, last_merge_count, cmp;
-       struct ref_list *p, *q, *l, *e;
-       struct ref_list *new_list = list;
-       int k = 1;
-       int merge_count = 0;
+       struct ref_entry *one = *(struct ref_entry **)a;
+       struct ref_entry *two = *(struct ref_entry **)b;
+       return strcmp(one->name, two->name);
+}
 
-       if (!list)
-               return list;
+static void sort_ref_array(struct ref_array *array)
+{
+       int i = 0, j = 1;
 
-       do {
-               last_merge_count = merge_count;
-               merge_count = 0;
+       /* Nothing to sort unless there are at least two entries */
+       if (array->nr < 2)
+               return;
 
-               psize = 0;
+       qsort(array->refs, array->nr, sizeof(*array->refs), ref_entry_cmp);
 
-               p = new_list;
-               q = new_list;
-               new_list = NULL;
-               l = NULL;
+       /* Remove any duplicates from the ref_array */
+       for (; j < array->nr; j++) {
+               struct ref_entry *a = array->refs[i];
+               struct ref_entry *b = array->refs[j];
+               if (!strcmp(a->name, b->name)) {
+                       if (hashcmp(a->sha1, b->sha1))
+                               die("Duplicated ref, and SHA1s don't match: %s",
+                                   a->name);
+                       warning("Duplicated ref: %s", a->name);
+                       continue;
+               }
+               i++;
+               array->refs[i] = array->refs[j];
+       }
+       array->nr = i + 1;
+}
 
-               while (p) {
-                       merge_count++;
+static struct ref_entry *search_ref_array(struct ref_array *array, const char *name)
+{
+       struct ref_entry *e, **r;
+       int len;
 
-                       while (psize < k && q->next) {
-                               q = q->next;
-                               psize++;
-                       }
-                       qsize = k;
-
-                       while ((psize > 0) || (qsize > 0 && q)) {
-                               if (qsize == 0 || !q) {
-                                       e = p;
-                                       p = p->next;
-                                       psize--;
-                               } else if (psize == 0) {
-                                       e = q;
-                                       q = q->next;
-                                       qsize--;
-                               } else {
-                                       cmp = strcmp(q->name, p->name);
-                                       if (cmp < 0) {
-                                               e = q;
-                                               q = q->next;
-                                               qsize--;
-                                       } else if (cmp > 0) {
-                                               e = p;
-                                               p = p->next;
-                                               psize--;
-                                       } else {
-                                               if (hashcmp(q->sha1, p->sha1))
-                                                       die("Duplicated ref, and SHA1s don't match: %s",
-                                                           q->name);
-                                               warning("Duplicated ref: %s", q->name);
-                                               e = q;
-                                               q = q->next;
-                                               qsize--;
-                                               free(e);
-                                               e = p;
-                                               p = p->next;
-                                               psize--;
-                                       }
-                               }
+       if (name == NULL)
+               return NULL;
 
-                               e->next = NULL;
+       len = strlen(name) + 1;
+       e = xmalloc(sizeof(struct ref_entry) + len);
+       memcpy(e->name, name, len);
 
-                               if (l)
-                                       l->next = e;
-                               if (!new_list)
-                                       new_list = e;
-                               l = e;
-                       }
+       r = bsearch(&e, array->refs, array->nr, sizeof(*array->refs), ref_entry_cmp);
 
-                       p = q;
-               };
+       free(e);
 
-               k = k * 2;
-       } while ((last_merge_count != merge_count) || (last_merge_count != 1));
+       if (r == NULL)
+               return NULL;
 
-       return new_list;
+       return *r;
 }
 
 /*
@@ -155,38 +131,37 @@ static struct ref_list *sort_ref_list(struct ref_list *list)
 static struct cached_refs {
        char did_loose;
        char did_packed;
-       struct ref_list *loose;
-       struct ref_list *packed;
+       struct ref_array loose;
+       struct ref_array packed;
 } cached_refs, submodule_refs;
-static struct ref_list *current_ref;
+static struct ref_entry *current_ref;
 
-static struct ref_list *extra_refs;
+static struct ref_array extra_refs;
 
-static void free_ref_list(struct ref_list *list)
+static void free_ref_array(struct ref_array *array)
 {
-       struct ref_list *next;
-       for ( ; list; list = next) {
-               next = list->next;
-               free(list);
-       }
+       int i;
+       for (i = 0; i < array->nr; i++)
+               free(array->refs[i]);
+       free(array->refs);
+       array->nr = array->alloc = 0;
+       array->refs = NULL;
 }
 
 static void invalidate_cached_refs(void)
 {
        struct cached_refs *ca = &cached_refs;
 
-       if (ca->did_loose && ca->loose)
-               free_ref_list(ca->loose);
-       if (ca->did_packed && ca->packed)
-               free_ref_list(ca->packed);
-       ca->loose = ca->packed = NULL;
+       if (ca->did_loose)
+               free_ref_array(&ca->loose);
+       if (ca->did_packed)
+               free_ref_array(&ca->packed);
        ca->did_loose = ca->did_packed = 0;
 }
 
 static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
 {
-       struct ref_list *list = NULL;
-       struct ref_list *last = NULL;
+       struct ref_entry *last = NULL;
        char refline[PATH_MAX];
        int flag = REF_ISPACKED;
 
@@ -205,7 +180,7 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
 
                name = parse_ref_line(refline, sha1);
                if (name) {
-                       list = add_ref(name, sha1, flag, list, &last);
+                       add_ref(name, sha1, flag, &cached_refs->packed, &last);
                        continue;
                }
                if (last &&
@@ -215,21 +190,20 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
                    !get_sha1_hex(refline + 1, sha1))
                        hashcpy(last->peeled, sha1);
        }
-       cached_refs->packed = sort_ref_list(list);
+       sort_ref_array(&cached_refs->packed);
 }
 
 void add_extra_ref(const char *name, const unsigned char *sha1, int flag)
 {
-       extra_refs = add_ref(name, sha1, flag, extra_refs, NULL);
+       add_ref(name, sha1, flag, &extra_refs, NULL);
 }
 
 void clear_extra_refs(void)
 {
-       free_ref_list(extra_refs);
-       extra_refs = NULL;
+       free_ref_array(&extra_refs);
 }
 
-static struct ref_list *get_packed_refs(const char *submodule)
+static struct ref_array *get_packed_refs(const char *submodule)
 {
        const char *packed_refs_file;
        struct cached_refs *refs;
@@ -237,7 +211,7 @@ static struct ref_list *get_packed_refs(const char *submodule)
        if (submodule) {
                packed_refs_file = git_path_submodule(submodule, "packed-refs");
                refs = &submodule_refs;
-               free_ref_list(refs->packed);
+               free_ref_array(&refs->packed);
        } else {
                packed_refs_file = git_path("packed-refs");
                refs = &cached_refs;
@@ -245,18 +219,17 @@ static struct ref_list *get_packed_refs(const char *submodule)
 
        if (!refs->did_packed || submodule) {
                FILE *f = fopen(packed_refs_file, "r");
-               refs->packed = NULL;
                if (f) {
                        read_packed_refs(f, refs);
                        fclose(f);
                }
                refs->did_packed = 1;
        }
-       return refs->packed;
+       return &refs->packed;
 }
 
-static struct ref_list *get_ref_dir(const char *submodule, const char *base,
-                                   struct ref_list *list)
+static void get_ref_dir(const char *submodule, const char *base,
+                       struct ref_array *array)
 {
        DIR *dir;
        const char *path;
@@ -299,7 +272,7 @@ static struct ref_list *get_ref_dir(const char *submodule, const char *base,
                        if (stat(refdir, &st) < 0)
                                continue;
                        if (S_ISDIR(st.st_mode)) {
-                               list = get_ref_dir(submodule, ref, list);
+                               get_ref_dir(submodule, ref, array);
                                continue;
                        }
                        if (submodule) {
@@ -314,12 +287,11 @@ static struct ref_list *get_ref_dir(const char *submodule, const char *base,
                                        hashclr(sha1);
                                        flag |= REF_BROKEN;
                                }
-                       list = add_ref(ref, sha1, flag, list, NULL);
+                       add_ref(ref, sha1, flag, array, NULL);
                }
                free(ref);
                closedir(dir);
        }
-       return list;
 }
 
 struct warn_if_dangling_data {
@@ -356,21 +328,21 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname)
        for_each_rawref(warn_if_dangling_symref, &data);
 }
 
-static struct ref_list *get_loose_refs(const char *submodule)
+static struct ref_array *get_loose_refs(const char *submodule)
 {
        if (submodule) {
-               free_ref_list(submodule_refs.loose);
-               submodule_refs.loose = get_ref_dir(submodule, "refs", NULL);
-               submodule_refs.loose = sort_ref_list(submodule_refs.loose);
-               return submodule_refs.loose;
+               free_ref_array(&submodule_refs.loose);
+               get_ref_dir(submodule, "refs", &submodule_refs.loose);
+               sort_ref_array(&submodule_refs.loose);
+               return &submodule_refs.loose;
        }
 
        if (!cached_refs.did_loose) {
-               cached_refs.loose = get_ref_dir(NULL, "refs", NULL);
-               cached_refs.loose = sort_ref_list(cached_refs.loose);
+               get_ref_dir(NULL, "refs", &cached_refs.loose);
+               sort_ref_array(&cached_refs.loose);
                cached_refs.did_loose = 1;
        }
-       return cached_refs.loose;
+       return &cached_refs.loose;
 }
 
 /* We allow "recursive" symbolic refs. Only within reason, though */
@@ -381,8 +353,8 @@ static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refna
 {
        FILE *f;
        struct cached_refs refs;
-       struct ref_list *ref;
-       int retval;
+       struct ref_entry *ref;
+       int retval = -1;
 
        strcpy(name + pathlen, "packed-refs");
        f = fopen(name, "r");
@@ -390,17 +362,12 @@ static int resolve_gitlink_packed_ref(char *name, int pathlen, const char *refna
                return -1;
        read_packed_refs(f, &refs);
        fclose(f);
-       ref = refs.packed;
-       retval = -1;
-       while (ref) {
-               if (!strcmp(ref->name, refname)) {
-                       retval = 0;
-                       memcpy(result, ref->sha1, 20);
-                       break;
-               }
-               ref = ref->next;
+       ref = search_ref_array(&refs.packed, refname);
+       if (ref != NULL) {
+               memcpy(result, ref->sha1, 20);
+               retval = 0;
        }
-       free_ref_list(refs.packed);
+       free_ref_array(&refs.packed);
        return retval;
 }
 
@@ -501,15 +468,13 @@ const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int *
                git_snpath(path, sizeof(path), "%s", ref);
                /* Special case: non-existing file. */
                if (lstat(path, &st) < 0) {
-                       struct ref_list *list = get_packed_refs(NULL);
-                       while (list) {
-                               if (!strcmp(ref, list->name)) {
-                                       hashcpy(sha1, list->sha1);
-                                       if (flag)
-                                               *flag |= REF_ISPACKED;
-                                       return ref;
-                               }
-                               list = list->next;
+                       struct ref_array *packed = get_packed_refs(NULL);
+                       struct ref_entry *r = search_ref_array(packed, ref);
+                       if (r != NULL) {
+                               hashcpy(sha1, r->sha1);
+                               if (flag)
+                                       *flag |= REF_ISPACKED;
+                               return ref;
                        }
                        if (reading || errno != ENOENT)
                                return NULL;
@@ -584,7 +549,7 @@ int read_ref(const char *ref, unsigned char *sha1)
 
 #define DO_FOR_EACH_INCLUDE_BROKEN 01
 static int do_one_ref(const char *base, each_ref_fn fn, int trim,
-                     int flags, void *cb_data, struct ref_list *entry)
+                     int flags, void *cb_data, struct ref_entry *entry)
 {
        if (strncmp(base, entry->name, trim))
                return 0;
@@ -630,18 +595,12 @@ int peel_ref(const char *ref, unsigned char *sha1)
                return -1;
 
        if ((flag & REF_ISPACKED)) {
-               struct ref_list *list = get_packed_refs(NULL);
+               struct ref_array *array = get_packed_refs(NULL);
+               struct ref_entry *r = search_ref_array(array, ref);
 
-               while (list) {
-                       if (!strcmp(list->name, ref)) {
-                               if (list->flag & REF_KNOWS_PEELED) {
-                                       hashcpy(sha1, list->peeled);
-                                       return 0;
-                               }
-                               /* older pack-refs did not leave peeled ones */
-                               break;
-                       }
-                       list = list->next;
+               if (r != NULL && r->flag & REF_KNOWS_PEELED) {
+                       hashcpy(sha1, r->peeled);
+                       return 0;
                }
        }
 
@@ -660,36 +619,39 @@ fallback:
 static int do_for_each_ref(const char *submodule, const char *base, each_ref_fn fn,
                           int trim, int flags, void *cb_data)
 {
-       int retval = 0;
-       struct ref_list *packed = get_packed_refs(submodule);
-       struct ref_list *loose = get_loose_refs(submodule);
+       int retval = 0, i, p = 0, l = 0;
+       struct ref_array *packed = get_packed_refs(submodule);
+       struct ref_array *loose = get_loose_refs(submodule);
 
-       struct ref_list *extra;
+       struct ref_array *extra = &extra_refs;
 
-       for (extra = extra_refs; extra; extra = extra->next)
-               retval = do_one_ref(base, fn, trim, flags, cb_data, extra);
+       for (i = 0; i < extra->nr; i++)
+               retval = do_one_ref(base, fn, trim, flags, cb_data, extra->refs[i]);
 
-       while (packed && loose) {
-               struct ref_list *entry;
-               int cmp = strcmp(packed->name, loose->name);
+       while (p < packed->nr && l < loose->nr) {
+               struct ref_entry *entry;
+               int cmp = strcmp(packed->refs[p]->name, loose->refs[l]->name);
                if (!cmp) {
-                       packed = packed->next;
+                       p++;
                        continue;
                }
                if (cmp > 0) {
-                       entry = loose;
-                       loose = loose->next;
+                       entry = loose->refs[l++];
                } else {
-                       entry = packed;
-                       packed = packed->next;
+                       entry = packed->refs[p++];
                }
                retval = do_one_ref(base, fn, trim, flags, cb_data, entry);
                if (retval)
                        goto end_each;
        }
 
-       for (packed = packed ? packed : loose; packed; packed = packed->next) {
-               retval = do_one_ref(base, fn, trim, flags, cb_data, packed);
+       if (l < loose->nr) {
+               p = l;
+               packed = loose;
+       }
+
+       for (; p < packed->nr; p++) {
+               retval = do_one_ref(base, fn, trim, flags, cb_data, packed->refs[p]);
                if (retval)
                        goto end_each;
        }
@@ -980,24 +942,24 @@ static int remove_empty_directories(const char *file)
 }
 
 static int is_refname_available(const char *ref, const char *oldref,
-                               struct ref_list *list, int quiet)
-{
-       int namlen = strlen(ref); /* e.g. 'foo/bar' */
-       while (list) {
-               /* list->name could be 'foo' or 'foo/bar/baz' */
-               if (!oldref || strcmp(oldref, list->name)) {
-                       int len = strlen(list->name);
+                               struct ref_array *array, int quiet)
+{
+       int i, namlen = strlen(ref); /* e.g. 'foo/bar' */
+       for (i = 0; i < array->nr; i++ ) {
+               struct ref_entry *entry = array->refs[i];
+               /* entry->name could be 'foo' or 'foo/bar/baz' */
+               if (!oldref || strcmp(oldref, entry->name)) {
+                       int len = strlen(entry->name);
                        int cmplen = (namlen < len) ? namlen : len;
-                       const char *lead = (namlen < len) ? list->name : ref;
-                       if (!strncmp(ref, list->name, cmplen) &&
+                       const char *lead = (namlen < len) ? entry->name : ref;
+                       if (!strncmp(ref, entry->name, cmplen) &&
                            lead[cmplen] == '/') {
                                if (!quiet)
                                        error("'%s' exists; cannot create '%s'",
-                                             list->name, ref);
+                                             entry->name, ref);
                                return 0;
                        }
                }
-               list = list->next;
        }
        return 1;
 }
@@ -1104,18 +1066,13 @@ static struct lock_file packlock;
 
 static int repack_without_ref(const char *refname)
 {
-       struct ref_list *list, *packed_ref_list;
-       int fd;
-       int found = 0;
+       struct ref_array *packed;
+       struct ref_entry *ref;
+       int fd, i;
 
-       packed_ref_list = get_packed_refs(NULL);
-       for (list = packed_ref_list; list; list = list->next) {
-               if (!strcmp(refname, list->name)) {
-                       found = 1;
-                       break;
-               }
-       }
-       if (!found)
+       packed = get_packed_refs(NULL);
+       ref = search_ref_array(packed, refname);
+       if (ref == NULL)
                return 0;
        fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), 0);
        if (fd < 0) {
@@ -1123,17 +1080,19 @@ static int repack_without_ref(const char *refname)
                return error("cannot delete '%s' from packed refs", refname);
        }
 
-       for (list = packed_ref_list; list; list = list->next) {
+       for (i = 0; i < packed->nr; i++) {
                char line[PATH_MAX + 100];
                int len;
 
-               if (!strcmp(refname, list->name))
+               ref = packed->refs[i];
+
+               if (!strcmp(refname, ref->name))
                        continue;
                len = snprintf(line, sizeof(line), "%s %s\n",
-                              sha1_to_hex(list->sha1), list->name);
+                              sha1_to_hex(ref->sha1), ref->name);
                /* this should not happen but just being defensive */
                if (len > sizeof(line))
-                       die("too long a refname '%s'", list->name);
+                       die("too long a refname '%s'", ref->name);
                write_or_die(fd, line, len);
        }
        return commit_lock_file(&packlock);