process_{tree,blob}: show objects without buffering
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Apr 2009 00:27:58 +0000 (17:27 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 13 Apr 2009 00:28:31 +0000 (17:28 -0700)
Here's a less trivial thing, and slightly more dubious one.

I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.

Now, there are possible downsides to this:

 - the "buffer using object_array" _can_ in theory result in at least
   better I-cache usage (two tight loops rather than one more spread out
   one). I don't think this is a real issue, but in theory..

 - this _does_ change the order of the objects printed. Instead of doing a
   "process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
   over the commits (which puts all the root trees _first_ in the object
   list, this patch just adds them to the list of pending objects, and
   then we'll traverse them in that order (and thus show each root tree
   object together with the objects we discover under it)

   I _think_ the new ordering actually makes more sense, but the object
   ordering is actually a subtle thing when it comes to packing
   efficiency, so any change in order is going to have implications for
   packing. Good or bad, I dunno.

 - There may be some reason why we did it that odd way with the object
   array, that I have simply forgotten.

Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.

This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...

Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.

Or maybe there _used_ to be a reason, and no longer is.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin-pack-objects.c
builtin-rev-list.c
list-objects.c
list-objects.h
revision.c
revision.h
upload-pack.c

index a6adc8c271e20374b86462391016897f1d17a0f3..dde8cc3f01a73f422e6cd025382bf6662804ea11 100644 (file)
@@ -1856,13 +1856,17 @@ static void show_commit(struct commit *commit)
        commit->object.flags |= OBJECT_ADDED;
 }
 
-static void show_object(struct object_array_entry *p)
+static void show_object(struct object *obj, const char *name)
 {
-       add_preferred_base_object(p->name);
-       add_object_entry(p->item->sha1, p->item->type, p->name, 0);
-       p->item->flags |= OBJECT_ADDED;
-       free((char *)p->name);
-       p->name = NULL;
+       add_preferred_base_object(name);
+       add_object_entry(obj->sha1, obj->type, name, 0);
+       obj->flags |= OBJECT_ADDED;
+
+       /*
+        * We will have generated the hash from the name,
+        * but not saved a pointer to it - we can free it
+        */
+       free((char *)name);
 }
 
 static void show_edge(struct commit *commit)
index facaff288dba2789f0637c4554bd130440e2a3da..759e6714ce37abc82921f5c42c0cac5fd53a3222 100644 (file)
@@ -169,27 +169,27 @@ static void finish_commit(struct commit *commit)
        commit->buffer = NULL;
 }
 
-static void finish_object(struct object_array_entry *p)
+static void finish_object(struct object *obj, const char *name)
 {
-       if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
-               die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
+       if (obj->type == OBJ_BLOB && !has_sha1_file(obj->sha1))
+               die("missing blob object '%s'", sha1_to_hex(obj->sha1));
 }
 
-static void show_object(struct object_array_entry *p)
+static void show_object(struct object *obj, const char *name)
 {
        /* An object with name "foo\n0000000..." can be used to
         * confuse downstream "git pack-objects" very badly.
         */
-       const char *ep = strchr(p->name, '\n');
+       const char *ep = strchr(name, '\n');
 
-       finish_object(p);
+       finish_object(obj, name);
        if (ep) {
-               printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
-                      (int) (ep - p->name),
-                      p->name);
+               printf("%s %.*s\n", sha1_to_hex(obj->sha1),
+                      (int) (ep - name),
+                      name);
        }
        else
-               printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name);
+               printf("%s %s\n", sha1_to_hex(obj->sha1), name);
 }
 
 static void show_edge(struct commit *commit)
index dd243c7c662c2f3fe9463b616bb00bed2cc503a7..5a4af62bdc8b939939511ff66151343cda25a45d 100644 (file)
@@ -10,7 +10,7 @@
 
 static void process_blob(struct rev_info *revs,
                         struct blob *blob,
-                        struct object_array *p,
+                        show_object_fn show,
                         struct name_path *path,
                         const char *name)
 {
@@ -23,7 +23,7 @@ static void process_blob(struct rev_info *revs,
        if (obj->flags & (UNINTERESTING | SEEN))
                return;
        obj->flags |= SEEN;
-       add_object(obj, p, path, name);
+       show(obj, path_name(path, name));
 }
 
 /*
@@ -50,7 +50,7 @@ static void process_blob(struct rev_info *revs,
  */
 static void process_gitlink(struct rev_info *revs,
                            const unsigned char *sha1,
-                           struct object_array *p,
+                           show_object_fn show,
                            struct name_path *path,
                            const char *name)
 {
@@ -59,7 +59,7 @@ static void process_gitlink(struct rev_info *revs,
 
 static void process_tree(struct rev_info *revs,
                         struct tree *tree,
-                        struct object_array *p,
+                        show_object_fn show,
                         struct name_path *path,
                         const char *name)
 {
@@ -77,7 +77,7 @@ static void process_tree(struct rev_info *revs,
        if (parse_tree(tree) < 0)
                die("bad tree object %s", sha1_to_hex(obj->sha1));
        obj->flags |= SEEN;
-       add_object(obj, p, path, name);
+       show(obj, path_name(path, name));
        me.up = path;
        me.elem = name;
        me.elem_len = strlen(name);
@@ -88,14 +88,14 @@ static void process_tree(struct rev_info *revs,
                if (S_ISDIR(entry.mode))
                        process_tree(revs,
                                     lookup_tree(entry.sha1),
-                                    p, &me, entry.path);
+                                    show, &me, entry.path);
                else if (S_ISGITLINK(entry.mode))
                        process_gitlink(revs, entry.sha1,
-                                       p, &me, entry.path);
+                                       show, &me, entry.path);
                else
                        process_blob(revs,
                                     lookup_blob(entry.sha1),
-                                    p, &me, entry.path);
+                                    show, &me, entry.path);
        }
        free(tree->buffer);
        tree->buffer = NULL;
@@ -134,16 +134,20 @@ void mark_edges_uninteresting(struct commit_list *list,
        }
 }
 
+static void add_pending_tree(struct rev_info *revs, struct tree *tree)
+{
+       add_pending_object(revs, &tree->object, "");
+}
+
 void traverse_commit_list(struct rev_info *revs,
                          void (*show_commit)(struct commit *),
-                         void (*show_object)(struct object_array_entry *))
+                         void (*show_object)(struct object *, const char *))
 {
        int i;
        struct commit *commit;
-       struct object_array objects = { 0, 0, NULL };
 
        while ((commit = get_revision(revs)) != NULL) {
-               process_tree(revs, commit->tree, &objects, NULL, "");
+               add_pending_tree(revs, commit->tree);
                show_commit(commit);
        }
        for (i = 0; i < revs->pending.nr; i++) {
@@ -154,25 +158,22 @@ void traverse_commit_list(struct rev_info *revs,
                        continue;
                if (obj->type == OBJ_TAG) {
                        obj->flags |= SEEN;
-                       add_object_array(obj, name, &objects);
+                       show_object(obj, name);
                        continue;
                }
                if (obj->type == OBJ_TREE) {
-                       process_tree(revs, (struct tree *)obj, &objects,
+                       process_tree(revs, (struct tree *)obj, show_object,
                                     NULL, name);
                        continue;
                }
                if (obj->type == OBJ_BLOB) {
-                       process_blob(revs, (struct blob *)obj, &objects,
+                       process_blob(revs, (struct blob *)obj, show_object,
                                     NULL, name);
                        continue;
                }
                die("unknown pending object %s (%s)",
                    sha1_to_hex(obj->sha1), name);
        }
-       for (i = 0; i < objects.nr; i++)
-               show_object(&objects.objects[i]);
-       free(objects.objects);
        if (revs->pending.nr) {
                free(revs->pending.objects);
                revs->pending.nr = 0;
index 0f41391ecc00eac324ea76de7654781c4fce094e..13b0dd998ebfd5b870e398c705a19781979cb8e8 100644 (file)
@@ -2,7 +2,7 @@
 #define LIST_OBJECTS_H
 
 typedef void (*show_commit_fn)(struct commit *);
-typedef void (*show_object_fn)(struct object_array_entry *);
+typedef void (*show_object_fn)(struct object *, const char *);
 typedef void (*show_edge_fn)(struct commit *);
 
 void traverse_commit_list(struct rev_info *revs, show_commit_fn, show_object_fn);
index 45fd7a366055d254529c53b21a06c6340427fa49..f95104b08081b0827ef5183f8b90f387665f0fb2 100644 (file)
@@ -14,7 +14,7 @@
 
 volatile show_early_output_fn_t show_early_output;
 
-static char *path_name(struct name_path *path, const char *name)
+char *path_name(struct name_path *path, const char *name)
 {
        struct name_path *p;
        char *n, *m;
index 91f194478bb91d381ab2b2440215144d8bb8d18d..6fcfb8ce0c956a07a77be73f04375567d7f0cd37 100644 (file)
@@ -141,6 +141,8 @@ struct name_path {
        const char *elem;
 };
 
+char *path_name(struct name_path *path, const char *name);
+
 extern void add_object(struct object *obj,
                       struct object_array *p,
                       struct name_path *path,
index e5adbc011e0ab71eeb06a42c4bd40cbea0bf3fa2..bdbd67bc1d0198e54c8b6607d65ef98e72bde03f 100644 (file)
@@ -78,20 +78,20 @@ static void show_commit(struct commit *commit)
        commit->buffer = NULL;
 }
 
-static void show_object(struct object_array_entry *p)
+static void show_object(struct object *obj, const char *name)
 {
        /* An object with name "foo\n0000000..." can be used to
         * confuse downstream git-pack-objects very badly.
         */
-       const char *ep = strchr(p->name, '\n');
+       const char *ep = strchr(name, '\n');
        if (ep) {
-               fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(p->item->sha1),
-                      (int) (ep - p->name),
-                      p->name);
+               fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(obj->sha1),
+                      (int) (ep - name),
+                      name);
        }
        else
                fprintf(pack_pipe, "%s %s\n",
-                               sha1_to_hex(p->item->sha1), p->name);
+                               sha1_to_hex(obj->sha1), name);
 }
 
 static void show_edge(struct commit *commit)