index-pack: support multithreaded delta resolving

author Nguyễn Thái Ngọc Duy <pclouds@gmail.com>

Sun, 6 May 2012 12:31:55 +0000 (19:31 +0700)

committer Junio C Hamano <gitster@pobox.com>

Mon, 7 May 2012 22:48:15 +0000 (15:48 -0700)
author Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sun, 6 May 2012 12:31:55 +0000 (19:31 +0700)
committer Junio C Hamano <gitster@pobox.com>
Mon, 7 May 2012 22:48:15 +0000 (15:48 -0700)
diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt

index 909687fed4269d8ad2e02b90d5a1f56fbcfde40e..39e6d0ddd84eeeacf43f41e0807cc1e305089ed0 100644 (file)
--- a/Documentation/git-index-pack.txt
+++ b/Documentation/git-index-pack.txt
@@ -74,6 +74,16 @@ OPTIONS
  --strict::
         Die, if the pack contains broken objects or links.
  
+--threads=<n>::
+       Specifies the number of threads to spawn when resolving
+       deltas. This requires that index-pack be compiled with
+       pthreads otherwise this option is ignored with a warning.
+       This is meant to reduce packing time on multiprocessor
+       machines. The required amount of memory for the delta search
+       window is however multiplied by the number of threads.
+       Specifying 0 will cause git to auto-detect the number of CPU's
+       and use maximum 3 threads.
+
  
  Note
  ----
diff --git a/Makefile b/Makefile

index cf2c40b44f8383d002235400660d76ef7f6de33c..e41955ff955693ddc78722c14fff2a6e98663a46 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2160,7 +2160,7 @@ builtin/branch.o builtin/checkout.o builtin/clone.o builtin/reset.o branch.o tra
  builtin/bundle.o bundle.o transport.o: bundle.h
  builtin/bisect--helper.o builtin/rev-list.o bisect.o: bisect.h
  builtin/clone.o builtin/fetch-pack.o transport.o: fetch-pack.h
-builtin/grep.o builtin/pack-objects.o transport-helper.o thread-utils.o: thread-utils.h
+builtin/index-pack.o builtin/grep.o builtin/pack-objects.o transport-helper.o thread-utils.o: thread-utils.h
  builtin/send-pack.o transport.o: send-pack.h
  builtin/log.o builtin/shortlog.o: shortlog.h
  builtin/prune.o builtin/reflog.o reachable.o: reachable.h
diff --git a/builtin/index-pack.c b/builtin/index-pack.c

index a4be4a65670600c6c412ef6d9b51713899cfab2a..d4685c50d7b4a4fec0c210725689d4d72029bda5 100644 (file)
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -9,6 +9,7 @@
  #include "progress.h"
  #include "fsck.h"
  #include "exec_cmd.h"
+#include "thread-utils.h"
  
  static const char index_pack_usage[] =
  "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
@@ -38,6 +39,14 @@ struct base_data {
         int ofs_first, ofs_last;
  };
  
+struct thread_local {
+#ifndef NO_PTHREADS
+       pthread_t thread;
+#endif
+       struct base_data *base_cache;
+       size_t base_cache_used;
+};
+
  /*
   * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
   * to memcmp() only the first 20 bytes.
@@ -54,11 +63,11 @@ struct delta_entry {
  
  static struct object_entry *objects;
  static struct delta_entry *deltas;
-static struct base_data *base_cache;
-static size_t base_cache_used;
+static struct thread_local nothread_data;
  static int nr_objects;
  static int nr_deltas;
  static int nr_resolved_deltas;
+static int nr_threads;
  
  static int from_stdin;
  static int strict;
@@ -75,6 +84,77 @@ static git_SHA_CTX input_ctx;
  static uint32_t input_crc32;
  static int input_fd, output_fd, pack_fd;
  
+#ifndef NO_PTHREADS
+
+static struct thread_local *thread_data;
+static int nr_dispatched;
+static int threads_active;
+
+static pthread_mutex_t read_mutex;
+#define read_lock()            lock_mutex(&read_mutex)
+#define read_unlock()          unlock_mutex(&read_mutex)
+
+static pthread_mutex_t counter_mutex;
+#define counter_lock()         lock_mutex(&counter_mutex)
+#define counter_unlock()       unlock_mutex(&counter_mutex)
+
+static pthread_mutex_t work_mutex;
+#define work_lock()            lock_mutex(&work_mutex)
+#define work_unlock()          unlock_mutex(&work_mutex)
+
+static pthread_key_t key;
+
+static inline void lock_mutex(pthread_mutex_t *mutex)
+{
+       if (threads_active)
+               pthread_mutex_lock(mutex);
+}
+
+static inline void unlock_mutex(pthread_mutex_t *mutex)
+{
+       if (threads_active)
+               pthread_mutex_unlock(mutex);
+}
+
+/*
+ * Mutex and conditional variable can't be statically-initialized on Windows.
+ */
+static void init_thread(void)
+{
+       init_recursive_mutex(&read_mutex);
+       pthread_mutex_init(&counter_mutex, NULL);
+       pthread_mutex_init(&work_mutex, NULL);
+       pthread_key_create(&key, NULL);
+       thread_data = xcalloc(nr_threads, sizeof(*thread_data));
+       threads_active = 1;
+}
+
+static void cleanup_thread(void)
+{
+       if (!threads_active)
+               return;
+       threads_active = 0;
+       pthread_mutex_destroy(&read_mutex);
+       pthread_mutex_destroy(&counter_mutex);
+       pthread_mutex_destroy(&work_mutex);
+       pthread_key_delete(key);
+       free(thread_data);
+}
+
+#else
+
+#define read_lock()
+#define read_unlock()
+
+#define counter_lock()
+#define counter_unlock()
+
+#define work_lock()
+#define work_unlock()
+
+#endif
+
+
  static int mark_link(struct object *obj, int type, void *data)
  {
         if (!obj)
@@ -223,6 +303,25 @@ static NORETURN void bad_object(unsigned long offset, const char *format, ...)
         die("pack has bad object at offset %lu: %s", offset, buf);
  }
  
+static inline struct thread_local *get_thread_data(void)
+{
+#ifndef NO_PTHREADS
+       if (threads_active)
+               return pthread_getspecific(key);
+       assert(!threads_active &&
+              "This should only be reached when all threads are gone");
+#endif
+       return &nothread_data;
+}
+
+#ifndef NO_PTHREADS
+static void set_thread_data(struct thread_local *data)
+{
+       if (threads_active)
+               pthread_setspecific(key, data);
+}
+#endif
+
  static struct base_data *alloc_base_data(void)
  {
         struct base_data *base = xmalloc(sizeof(struct base_data));
@@ -237,15 +336,16 @@ static void free_base_data(struct base_data *c)
         if (c->data) {
                 free(c->data);
                 c->data = NULL;
-               base_cache_used -= c->size;
+               get_thread_data()->base_cache_used -= c->size;
         }
  }
  
  static void prune_base_data(struct base_data *retain)
  {
         struct base_data *b;
-       for (b = base_cache;
-            base_cache_used > delta_base_cache_limit && b;
+       struct thread_local *data = get_thread_data();
+       for (b = data->base_cache;
+            data->base_cache_used > delta_base_cache_limit && b;
              b = b->child) {
                 if (b->data && b != retain)
                         free_base_data(b);
@@ -257,12 +357,12 @@ static void link_base_data(struct base_data *base, struct base_data *c)
         if (base)
                 base->child = c;
         else
-               base_cache = c;
+               get_thread_data()->base_cache = c;
  
         c->base = base;
         c->child = NULL;
         if (c->data)
-               base_cache_used += c->size;
+               get_thread_data()->base_cache_used += c->size;
         prune_base_data(c);
  }
  
@@ -272,7 +372,7 @@ static void unlink_base_data(struct base_data *c)
         if (base)
                 base->child = NULL;
         else
-               base_cache = NULL;
+               get_thread_data()->base_cache = NULL;
         free_base_data(c);
  }
  
@@ -461,19 +561,24 @@ static void sha1_object(const void *data, unsigned long size,
                         enum object_type type, unsigned char *sha1)
  {
         hash_sha1_file(data, size, typename(type), sha1);
+       read_lock();
         if (has_sha1_file(sha1)) {
                 void *has_data;
                 enum object_type has_type;
                 unsigned long has_size;
                 has_data = read_sha1_file(sha1, &has_type, &has_size);
+               read_unlock();
                 if (!has_data)
                         die("cannot read existing object %s", sha1_to_hex(sha1));
                 if (size != has_size || type != has_type ||
                     memcmp(data, has_data, size) != 0)
                         die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1));
                 free(has_data);
-       }
+       } else
+               read_unlock();
+
         if (strict) {
+               read_lock();
                 if (type == OBJ_BLOB) {
                         struct blob *blob = lookup_blob(sha1);
                         if (blob)
@@ -507,6 +612,7 @@ static void sha1_object(const void *data, unsigned long size,
                         }
                         obj->flags |= FLAG_CHECKED;
                 }
+               read_unlock();
         }
  }
  
@@ -552,7 +658,7 @@ static void *get_base_data(struct base_data *c)
                 if (!delta_nr) {
                         c->data = get_data_from_pack(obj);
                         c->size = obj->size;
-                       base_cache_used += c->size;
+                       get_thread_data()->base_cache_used += c->size;
                         prune_base_data(c);
                 }
                 for (; delta_nr > 0; delta_nr--) {
@@ -568,7 +674,7 @@ static void *get_base_data(struct base_data *c)
                         free(raw);
                         if (!c->data)
                                 bad_object(obj->idx.offset, "failed to apply delta");
-                       base_cache_used += c->size;
+                       get_thread_data()->base_cache_used += c->size;
                         prune_base_data(c);
                 }
                 free(delta);
@@ -596,7 +702,9 @@ static void resolve_delta(struct object_entry *delta_obj,
                 bad_object(delta_obj->idx.offset, "failed to apply delta");
         sha1_object(result->data, result->size, delta_obj->real_type,
                     delta_obj->idx.sha1);
+       counter_lock();
         nr_resolved_deltas++;
+       counter_unlock();
  }
  
  static struct base_data *find_unresolved_deltas_1(struct base_data *base,
@@ -690,6 +798,30 @@ static void resolve_base(struct object_entry *obj)
         find_unresolved_deltas(base_obj);
  }
  
+#ifndef NO_PTHREADS
+static void *threaded_second_pass(void *data)
+{
+       set_thread_data(data);
+       for (;;) {
+               int i;
+               work_lock();
+               display_progress(progress, nr_resolved_deltas);
+               while (nr_dispatched < nr_objects &&
+                      is_delta_type(objects[nr_dispatched].type))
+                       nr_dispatched++;
+               if (nr_dispatched >= nr_objects) {
+                       work_unlock();
+                       break;
+               }
+               i = nr_dispatched++;
+               work_unlock();
+
+               resolve_base(&objects[i]);
+       }
+       return NULL;
+}
+#endif
+
  /*
   * First pass:
   * - find locations of all objects;
@@ -758,6 +890,24 @@ static void resolve_deltas(void)
  
         if (verbose)
                 progress = start_progress("Resolving deltas", nr_deltas);
+
+#ifndef NO_PTHREADS
+       nr_dispatched = 0;
+       if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
+               init_thread();
+               for (i = 0; i < nr_threads; i++) {
+                       int ret = pthread_create(&thread_data[i].thread, NULL,
+                                                threaded_second_pass, thread_data + i);
+                       if (ret)
+                               die("unable to create thread: %s", strerror(ret));
+               }
+               for (i = 0; i < nr_threads; i++)
+                       pthread_join(thread_data[i].thread, NULL);
+               cleanup_thread();
+               return;
+       }
+#endif
+
         for (i = 0; i < nr_objects; i++) {
                 struct object_entry *obj = &objects[i];
  
@@ -1016,6 +1166,18 @@ static int git_index_pack_config(const char *k, const char *v, void *cb)
                         die("bad pack.indexversion=%"PRIu32, opts->version);
                 return 0;
         }
+       if (!strcmp(k, "pack.threads")) {
+               nr_threads = git_config_int(k, v);
+               if (nr_threads < 0)
+                       die("invalid number of threads specified (%d)",
+                           nr_threads);
+#ifdef NO_PTHREADS
+               if (nr_threads != 1)
+                       warning("no threads support, ignoring %s", k);
+               nr_threads = 1;
+#endif
+               return 0;
+       }
         return git_default_config(k, v, cb);
  }
  
@@ -1174,6 +1336,17 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                                 keep_msg = "";
                         } else if (!prefixcmp(arg, "--keep=")) {
                                 keep_msg = arg + 7;
+                       } else if (!prefixcmp(arg, "--threads=")) {
+                               char *end;
+                               nr_threads = strtoul(arg+10, &end, 0);
+                               if (!arg[10] || *end || nr_threads < 0)
+                                       usage(index_pack_usage);
+#ifdef NO_PTHREADS
+                               if (nr_threads != 1)
+                                       warning("no threads support, "
+                                               "ignoring %s", arg);
+                               nr_threads = 1;
+#endif
                         } else if (!prefixcmp(arg, "--pack_header=")) {
                                 struct pack_header *hdr;
                                 char *c;
@@ -1245,6 +1418,15 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
         if (strict)
                 opts.flags |= WRITE_IDX_STRICT;
  
+#ifndef NO_PTHREADS
+       if (!nr_threads) {
+               nr_threads = online_cpus();
+               /* An experiment showed that more threads does not mean faster */
+               if (nr_threads > 3)
+                       nr_threads = 3;
+       }
+#endif
+
         curr_pack = open_pack_file(pack_name);
         parse_pack_header();
         objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
diff --git a/t/perf/p5302-pack-index.sh b/t/perf/p5302-pack-index.sh

new file mode 100755 (executable)

index 0000000..6cb5b0d
--- /dev/null
+++ b/t/perf/p5302-pack-index.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+test_description="Tests index-pack performance"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+
+test_expect_success 'repack' '
+       git repack -ad &&
+       PACK=`ls .git/objects/pack/*.pack | head -n1` &&
+       test -f "$PACK" &&
+       export PACK
+'
+
+test_perf 'index-pack 0 threads' '
+       GIT_DIR=t1 git index-pack --threads=1 --stdin < $PACK
+'
+
+test_perf 'index-pack 1 thread ' '
+       GIT_DIR=t2 GIT_FORCE_THREADS=1 git index-pack --threads=1 --stdin < $PACK
+'
+
+test_perf 'index-pack 2 threads' '
+       GIT_DIR=t3 git index-pack --threads=2 --stdin < $PACK
+'
+
+test_perf 'index-pack 4 threads' '
+       GIT_DIR=t4 git index-pack --threads=4 --stdin < $PACK
+'
+
+test_perf 'index-pack 8 threads' '
+       GIT_DIR=t5 git index-pack --threads=8 --stdin < $PACK
+'
+
+test_perf 'index-pack default number of threads' '
+       GIT_DIR=t6 git index-pack --stdin < $PACK
+'
+
+test_done
author	Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
	Sun, 6 May 2012 12:31:55 +0000 (19:31 +0700)
committer	Junio C Hamano <gitster@pobox.com>
	Mon, 7 May 2012 22:48:15 +0000 (15:48 -0700)
Documentation/git-index-pack.txt		patch \| blob \| history
Makefile		patch \| blob \| history
builtin/index-pack.c		patch \| blob \| history
t/perf/p5302-pack-index.sh	[new file with mode: 0755]	patch \| blob