git-pickaxe: swap comparison loop used for -C
authorJunio C Hamano <junkio@cox.net>
Sat, 21 Oct 2006 10:30:53 +0000 (03:30 -0700)
committerJunio C Hamano <junkio@cox.net>
Sat, 21 Oct 2006 10:30:53 +0000 (03:30 -0700)
When assigning blames for code movements across file boundaries,
we used to iterate over blame entries (i.e. groups of lines to
be blamed) in the outer loop and compared each entry with paths
in the parent commit in an inner loop.  This meant that we
opened the blob data from each path number of times.

Reorganize the loop so that we read the same path only once, and
compare it against all relevant blame entries.

This should perform better, but seems to give mixed results,
though.

Signed-off-by: Junio C Hamano <junkio@cox.net>
builtin-pickaxe.c

index cf474b0d11e1508b7f885f3886abfdeaeee30b0c..663b96dac55cff41cd5ed223568508297a9f8788 100644 (file)
@@ -737,11 +737,27 @@ static int find_copy_in_parent(struct scoreboard *sb,
        struct diff_options diff_opts;
        const char *paths[1];
        struct blame_entry *e;
-       int i;
+       int i, j;
+       struct blame_list {
+               struct blame_entry *ent;
+               struct blame_entry split[3];
+       } *blame_list;
+       int num_ents;
 
-       if (find_last_in_target(sb, target) < 0)
+       /* Count the number of entries the target is suspected for,
+        * and prepare a list of entry and the best split.
+        */
+       for (e = sb->ent, num_ents = 0; e; e = e->next)
+               if (!e->guilty && !cmp_suspect(e->suspect, target))
+                       num_ents++;
+       if (!num_ents)
                return 1; /* nothing remains for this target */
 
+       blame_list = xcalloc(num_ents, sizeof(struct blame_list));
+       for (e = sb->ent, i = 0; e; e = e->next)
+               if (!e->guilty && !cmp_suspect(e->suspect, target))
+                       blame_list[i++].ent = e;
+
        diff_setup(&diff_opts);
        diff_opts.recursive = 1;
        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
@@ -761,52 +777,45 @@ static int find_copy_in_parent(struct scoreboard *sb,
                       "", &diff_opts);
        diffcore_std(&diff_opts);
 
-
-       /*
-        * NEEDSWORK: This loop is wasteful in that it opens the same
-        * blob in the parent number of times.  We should swap the
-        * loop inside out, which would require keeping track of
-        * "best blame so far" for blame entries that the current
-        * "target" is being suspected.
-        */
-
-       for (e = sb->ent; e; e = e->next) {
-               struct blame_entry split[3];
-               if (e->guilty || cmp_suspect(e->suspect, target))
+       for (i = 0; i < diff_queued_diff.nr; i++) {
+               struct diff_filepair *p = diff_queued_diff.queue[i];
+               struct origin *norigin;
+               mmfile_t file_p;
+               char type[10];
+               char *blob;
+               struct blame_entry this[3];
+
+               if (!DIFF_FILE_VALID(p->one))
+                       continue; /* does not exist in parent */
+               if (porigin && !strcmp(p->one->path, porigin->path))
+                       /* find_move already dealt with this path */
                        continue;
 
-               memset(split, 0, sizeof(split));
-               for (i = 0; i < diff_queued_diff.nr; i++) {
-                       struct diff_filepair *p = diff_queued_diff.queue[i];
-                       struct origin *norigin;
-                       mmfile_t file_p;
-                       char type[10];
-                       char *blob;
-                       struct blame_entry this[3];
+               norigin = get_origin(sb, parent, p->one->path);
+               hashcpy(norigin->blob_sha1, p->one->sha1);
+               blob = read_sha1_file(norigin->blob_sha1, type,
+                                     (unsigned long *) &file_p.size);
+               file_p.ptr = blob;
+               if (!file_p.ptr)
+                       continue;
 
-                       if (!DIFF_FILE_VALID(p->one))
-                               continue; /* does not exist in parent */
-                       if (porigin && !strcmp(p->one->path, porigin->path))
-                               /* find_move already dealt with this path */
-                               continue;
-                       norigin = get_origin(sb, parent, p->one->path);
-                       hashcpy(norigin->blob_sha1, p->one->sha1);
-                       blob = read_sha1_file(norigin->blob_sha1, type,
-                                             (unsigned long *) &file_p.size);
-                       file_p.ptr = blob;
-                       if (!file_p.ptr) {
-                               free(blob);
-                               continue;
-                       }
-                       find_copy_in_blob(sb, e, norigin, this, &file_p);
-                       copy_split_if_better(sb, split, this);
-                       free(blob);
+               for (j = 0; j < num_ents; j++) {
+                       find_copy_in_blob(sb, blame_list[j].ent, norigin,
+                                         this, &file_p);
+                       copy_split_if_better(sb, blame_list[j].split,
+                                            this);
                }
+               free(blob);
+       }
+       diff_flush(&diff_opts);
+
+       for (j = 0; j < num_ents; j++) {
+               struct blame_entry *split = blame_list[j].split;
                if (split[1].suspect &&
                    blame_copy_score < ent_score(sb, &split[1]))
-                       split_blame(sb, split, e);
+                       split_blame(sb, split, blame_list[j].ent);
        }
-       diff_flush(&diff_opts);
+       free(blame_list);
 
        return 0;
 }