git-diff: squelch "empty" diffs
authorJunio C Hamano <gitster@pobox.com>
Fri, 3 Aug 2007 20:33:31 +0000 (13:33 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 14 Aug 2007 08:55:00 +0000 (01:55 -0700)
After starting to edit a working tree file but later when your edit ends
up identical to the original (this can also happen when you ran a
wholesale regexp replace with something like "perl -i" that does not
actually modify many of the paths), "git diff" between the index and the
working tree outputs many "empty" diffs that show "diff --git" headers
and nothing else, because these paths are stat-dirty.  While it was a
way to warn the user that the earlier action of the user made the index
ineffective as an optimization mechanism, it was felt too loud for the
purpose of warning even to experienced users, and also resulted in
confusing people new to git.

This replaces the "empty" diffs with a single warning message at the
end.  Having many such paths hurts performance, and you can run
"git-update-index --refresh" to update the lstat(2) information recorded
in the index in such a case.  "git-status" does so as a side effect, and
that is more familiar to the end-user, so we recommend it to them.

The change affects only "git diff" that outputs patch text, because that
is where the annoyance of too many "empty" diff is most strongly felt,
and because the warning message can be safely ignored by downstream
tools without getting mistaken as part of the patch.  For the low-level
"git diff-files" and "git diff-index", the traditional behaviour is
retained.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin-diff.c
diff.c
diff.h

index 8dc17b0dd7af33f83aebeae88d4e616e76da52eb..6ed7b6842ec533902427f2d47790d57aa5082365 100644 (file)
@@ -222,6 +222,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
        prefix = setup_git_directory_gently(&nongit);
        git_config(git_diff_ui_config);
        init_revisions(&rev, prefix);
+       rev.diffopt.skip_stat_unmatch = 1;
 
        if (!setup_diff_no_index(&rev, argc, argv, nongit, prefix))
                argc = 0;
@@ -344,5 +345,12 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
                                             ent, ents);
        if (rev.diffopt.exit_with_status)
                result = rev.diffopt.has_changes;
+
+       if ((rev.diffopt.output_format & DIFF_FORMAT_PATCH)
+           && (1 < rev.diffopt.skip_stat_unmatch))
+               printf("Warning: %d path%s touched but unmodified. "
+                      "Consider running git-status.\n",
+                      rev.diffopt.skip_stat_unmatch - 1,
+                      rev.diffopt.skip_stat_unmatch == 2 ? "" : "s");
        return result;
 }
diff --git a/diff.c b/diff.c
index a5fc56bdad5d96b2a4e0e1140206b8367a257867..f884de77ac2a0175e317628e25ec9ee3aaa902f7 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -3143,11 +3143,63 @@ static void diffcore_apply_filter(const char *filter)
        *q = outq;
 }
 
+static void diffcore_skip_stat_unmatch(struct diff_options *diffopt)
+{
+       int i;
+       struct diff_queue_struct *q = &diff_queued_diff;
+       struct diff_queue_struct outq;
+       outq.queue = NULL;
+       outq.nr = outq.alloc = 0;
+
+       for (i = 0; i < q->nr; i++) {
+               struct diff_filepair *p = q->queue[i];
+
+               /*
+                * 1. Entries that come from stat info dirtyness
+                *    always have both sides (iow, not create/delete),
+                *    one side of the object name is unknown, with
+                *    the same mode and size.  Keep the ones that
+                *    do not match these criteria.  They have real
+                *    differences.
+                *
+                * 2. At this point, the file is known to be modified,
+                *    with the same mode and size, and the object
+                *    name of one side is unknown.  Need to inspect
+                *    the identical contents.
+                */
+               if (!DIFF_FILE_VALID(p->one) || /* (1) */
+                   !DIFF_FILE_VALID(p->two) ||
+                   (p->one->sha1_valid && p->two->sha1_valid) ||
+                   (p->one->mode != p->two->mode) ||
+                   diff_populate_filespec(p->one, 1) ||
+                   diff_populate_filespec(p->two, 1) ||
+                   (p->one->size != p->two->size) ||
+
+                   diff_populate_filespec(p->one, 0) || /* (2) */
+                   diff_populate_filespec(p->two, 0) ||
+                   memcmp(p->one->data, p->two->data, p->one->size))
+                       diff_q(&outq, p);
+               else {
+                       /*
+                        * The caller can subtract 1 from skip_stat_unmatch
+                        * to determine how many paths were dirty only
+                        * due to stat info mismatch.
+                        */
+                       diffopt->skip_stat_unmatch++;
+                       diff_free_filepair(p);
+               }
+       }
+       free(q->queue);
+       *q = outq;
+}
+
 void diffcore_std(struct diff_options *options)
 {
        if (options->quiet)
                return;
 
+       if (options->skip_stat_unmatch && !options->find_copies_harder)
+               diffcore_skip_stat_unmatch(options);
        if (options->break_opt != -1)
                diffcore_break(options->break_opt);
        if (options->detect_rename)
diff --git a/diff.h b/diff.h
index 9fd6d447d4c62e158c941a736e547c7e516cdd93..de21f8ecd045a299907e6f6d873ad4c120ca30da 100644 (file)
--- a/diff.h
+++ b/diff.h
@@ -65,6 +65,7 @@ struct diff_options {
        int context;
        int break_opt;
        int detect_rename;
+       int skip_stat_unmatch;
        int line_termination;
        int output_format;
        int pickaxe_opts;