let pack-objects do the writing of unreachable objects as loose objects
authorNicolas Pitre <nico@cam.org>
Wed, 14 May 2008 05:33:53 +0000 (01:33 -0400)
committerJunio C Hamano <gitster@pobox.com>
Wed, 14 May 2008 05:45:44 +0000 (22:45 -0700)
Commit ccc1297226b184c40459e9d373cc9eebfb7bd898 changed the behavior
of 'git repack -A' so unreachable objects are stored as loose objects.
However it did so in a naive and inn efficient way by making packs
about to be deleted inaccessible and feeding their content through
'git unpack-objects'.  While this works, there are major flaws with
this approach:

- It is unacceptably sloooooooooooooow.

  In the Linux kernel repository with no actual unreachable objects,
  doing 'git repack -A -d' before:

real    2m33.220s
user    2m21.675s
sys     0m3.510s

  And with this change:

real    0m36.849s
user    0m24.365s
sys     0m1.950s

  For reference, here's the timing for 'git repack -a -d':

real    0m35.816s
user    0m22.571s
sys     0m2.011s

  This is explained by the fact that 'git unpack-objects' was used to
  unpack _every_ objects even if (almost) 100% of them were thrown away.

- There is a black out period.

  Between the removal of the .idx file for the redundant pack and the
  completion of its unpacking, the unreachable objects become completely
  unaccessible.  This is not a big issue as we're talking about unreachable
  objects, but some consistency is always good.

- There is no way to easily set a sensible mtime for the newly created
  unreachable loose objects.

So, while having a command called "pack-objects" to perform object
unpacking looks really odd, this is probably the best compromize to be
able to solve the above issues in an efficient way.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin-pack-objects.c
git-repack.sh

index 777f272668c39931b330be555f41399adfbce397..5a10119fbd4f62c7a94c49c8d91c966f177331c1 100644 (file)
@@ -28,7 +28,8 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\
        [--window=N] [--window-memory=N] [--depth=N] \n\
        [--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
        [--threads=N] [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
-       [--stdout | base-name] [--include-tag] [--keep-unreachable] \n\
+       [--stdout | base-name] [--include-tag] \n\
+       [--keep-unreachable | --unpack-unreachable] \n\
        [<ref-list | <object-list]";
 
 struct object_entry {
@@ -65,7 +66,7 @@ static struct pack_idx_entry **written_list;
 static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
 
 static int non_empty;
-static int no_reuse_delta, no_reuse_object, keep_unreachable, include_tag;
+static int no_reuse_delta, no_reuse_object, keep_unreachable, unpack_unreachable, include_tag;
 static int local;
 static int incremental;
 static int allow_ofs_delta;
@@ -1905,6 +1906,32 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs)
        free(in_pack.array);
 }
 
+static void loosen_unused_packed_objects(struct rev_info *revs)
+{
+       struct packed_git *p;
+       uint32_t i;
+       const unsigned char *sha1;
+
+       for (p = packed_git; p; p = p->next) {
+               for (i = 0; i < revs->num_ignore_packed; i++) {
+                       if (matches_pack_name(p, revs->ignore_packed[i]))
+                               break;
+               }
+               if (revs->num_ignore_packed <= i)
+                       continue;
+
+               if (open_pack_index(p))
+                       die("cannot open pack index");
+
+               for (i = 0; i < p->num_objects; i++) {
+                       sha1 = nth_packed_object_sha1(p, i);
+                       if (!locate_object_entry(sha1))
+                               if (force_object_loose(sha1, p->mtime))
+                                       die("unable to force loose object");
+               }
+       }
+}
+
 static void get_object_list(int ac, const char **av)
 {
        struct rev_info revs;
@@ -1939,6 +1966,8 @@ static void get_object_list(int ac, const char **av)
 
        if (keep_unreachable)
                add_objects_in_unpacked_packs(&revs);
+       if (unpack_unreachable)
+               loosen_unused_packed_objects(&revs);
 }
 
 static int adjust_perm(const char *path, mode_t mode)
@@ -2073,6 +2102,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                        keep_unreachable = 1;
                        continue;
                }
+               if (!strcmp("--unpack-unreachable", arg)) {
+                       unpack_unreachable = 1;
+                       continue;
+               }
                if (!strcmp("--include-tag", arg)) {
                        include_tag = 1;
                        continue;
@@ -2138,6 +2171,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        if (!pack_to_stdout && thin)
                die("--thin cannot be used to build an indexable pack.");
 
+       if (keep_unreachable && unpack_unreachable)
+               die("--keep-unreachable and --unpack-unreachable are incompatible.");
+
 #ifdef THREADED_DELTA_SEARCH
        if (!delta_search_threads)      /* --threads=0 means autodetect */
                delta_search_threads = online_cpus();
index a0e06ed0768f65241b59cfdec7b0f6b833580c53..607f217b78997605b0cca7a9df8c9876022c41c4 100755 (executable)
@@ -8,7 +8,7 @@ OPTIONS_SPEC="\
 git-repack [options]
 --
 a               pack everything in a single pack
-A               same as -a, and keep unreachable objects too
+A               same as -a, and turn unreachable objects loose
 d               remove redundant packs, and run git-prune-packed
 f               pass --no-reuse-delta to git-pack-objects
 q,quiet         be quiet
@@ -22,7 +22,7 @@ max-pack-size=  maximum size of each packfile
 SUBDIRECTORY_OK='Yes'
 . git-sh-setup
 
-no_update_info= all_into_one= remove_redundant= keep_unreachable=
+no_update_info= all_into_one= remove_redundant= unpack_unreachable=
 local= quiet= no_reuse= extra=
 while test $# != 0
 do
@@ -30,7 +30,7 @@ do
        -n)     no_update_info=t ;;
        -a)     all_into_one=t ;;
        -A)     all_into_one=t
-               keep_unreachable=t ;;
+               unpack_unreachable=--unpack-unreachable ;;
        -d)     remove_redundant=t ;;
        -q)     quiet=-q ;;
        -f)     no_reuse=--no-reuse-object ;;
@@ -78,6 +78,9 @@ case ",$all_into_one," in
        if test -z "$args"
        then
                args='--unpacked --incremental'
+       elif test -n "$unpack_unreachable"
+       then
+               args="$args $unpack_unreachable"
        fi
        ;;
 esac
@@ -127,18 +130,7 @@ then
                  do
                        case " $fullbases " in
                        *" $e "*) ;;
-                       *)
-                               rm -f "$e.idx" "$e.keep"
-                               if test -n "$keep_unreachable" &&
-                                  test -f "$e.pack"
-                               then
-                                       git unpack-objects < "$e.pack" || {
-                                               echo >&2 "Failed unpacking unreachable objects from redundant pack file $e.pack"
-                                               exit 1
-                                       }
-                               fi
-                               rm -f "$e.pack"
-                       ;;
+                       *)      rm -f "$e.pack" "$e.idx" "$e.keep" ;;
                        esac
                  done
                )