git-diff --numstat -z: make it machine readable
authorJunio C Hamano <gitster@pobox.com>
Wed, 12 Dec 2007 07:46:30 +0000 (23:46 -0800)
committerJunio C Hamano <gitster@pobox.com>
Wed, 12 Dec 2007 18:59:22 +0000 (10:59 -0800)
The "-z" format is all about machine parsability, but showing renamed
paths as "common/{a => b}/suffix" makes it impossible.  The scripts would
never have successfully parsed "--numstat -z -M" in the old format.

This fixes the output format in a (hopefully minimally) backward
incompatible way.

 * The output without -z is not changed.  This has given a good way for
   humans to view added and deleted lines separately, and showing the
   path in combined, shorter way would preserve readability.

 * The output with -z is unchanged for paths that do not involve renames.
   Existing scripts that do not pass -M/-C are not affected at all.

 * The output with -z for a renamed path is shown in a format that can
   easily be distinguished from an unrenamed path.

This is based on Jakub Narebski's patch.  Bugs and documentation typos
are mine.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/diff-format.txt
diff.c

index 2c3a4c433b2a6d2b0846243a4f1dbebeed45236e..400cbb3b1c120b93278472678ee7bdb87a74f95b 100644 (file)
@@ -84,3 +84,64 @@ all parents.
 
 
 include::diff-generate-patch.txt[]
+
+
+other diff formats
+------------------
+
+The `--summary` option describes newly added, deleted, renamed and
+copied files.  The `--stat` option adds diffstat(1) graph to the
+output.  These options can be combined with other options, such as
+`-p`, and are meant for human consumption.
+
+When showing a change that involves a rename or a copy, `--stat` output
+formats the pathnames compactly by combining common prefix and suffix of
+the pathnames.  For example, a change that moves `arch/i386/Makefile` to
+`arch/x86/Makefile` while modifying 4 lines will be shown like this:
+
+------------------------------------
+arch/{i386 => x86}/Makefile    |   4 +--
+------------------------------------
+
+The `--numstat` option gives the diffstat(1) information but is designed
+for easier machine consumption.  An entry in `--numstat` output looks
+like this:
+
+----------------------------------------
+1      2       README
+3      1       arch/{i386 => x86}/Makefile
+----------------------------------------
+
+That is, from left to right:
+
+. the number of added lines;
+. a tab;
+. the number of deleted lines;
+. a tab;
+. pathname (possibly with rename/copy information);
+. a newline.
+
+When `-z` output option is in effect, the output is formatted this way:
+
+----------------------------------------
+1      2       README NUL
+3      1       NUL arch/i386/Makefile NUL arch/x86/Makefile NUL
+----------------------------------------
+
+That is:
+
+. the number of added lines;
+. a tab;
+. the number of deleted lines;
+. a tab;
+. a NUL (only exists if renamed/copied);
+. pathname in preimage;
+. a NUL (only exists if renamed/copied);
+. pathname in postimage (only exists if renamed/copied);
+. a NUL.
+
+The extra `NUL` before the preimage path in renamed case is to allow
+scripts that read the output to tell if the current record being read is
+a single-path record or a rename/copy record without reading ahead.
+After reading added and deleted lines, reading up to `NUL` would yield
+the pathname, but if that is `NUL`, the record will show two paths.
diff --git a/diff.c b/diff.c
index e74a30399eed872b3fb09015f84787103463b454..d97ebc501af20929a07e754cd74bea06c7114811 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -734,7 +734,9 @@ struct diffstat_t {
        int nr;
        int alloc;
        struct diffstat_file {
+               char *from_name;
                char *name;
+               char *print_name;
                unsigned is_unmerged:1;
                unsigned is_binary:1;
                unsigned is_renamed:1;
@@ -755,11 +757,14 @@ static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
        }
        diffstat->files[diffstat->nr++] = x;
        if (name_b) {
-               x->name = pprint_rename(name_a, name_b);
+               x->from_name = xstrdup(name_a);
+               x->name = xstrdup(name_b);
                x->is_renamed = 1;
        }
-       else
+       else {
+               x->from_name = NULL;
                x->name = xstrdup(name_a);
+       }
        return x;
 }
 
@@ -803,6 +808,28 @@ static void show_graph(char ch, int cnt, const char *set, const char *reset)
        printf("%s", reset);
 }
 
+static void fill_print_name(struct diffstat_file *file)
+{
+       char *pname;
+
+       if (file->print_name)
+               return;
+
+       if (!file->is_renamed) {
+               struct strbuf buf;
+               strbuf_init(&buf, 0);
+               if (quote_c_style(file->name, &buf, NULL, 0)) {
+                       pname = strbuf_detach(&buf, NULL);
+               } else {
+                       pname = file->name;
+                       strbuf_release(&buf);
+               }
+       } else {
+               pname = pprint_rename(file->from_name, file->name);
+       }
+       file->print_name = pname;
+}
+
 static void show_stats(struct diffstat_t* data, struct diff_options *options)
 {
        int i, len, add, del, total, adds = 0, dels = 0;
@@ -836,19 +863,8 @@ static void show_stats(struct diffstat_t* data, struct diff_options *options)
        for (i = 0; i < data->nr; i++) {
                struct diffstat_file *file = data->files[i];
                int change = file->added + file->deleted;
-
-               if (!file->is_renamed) {  /* renames are already quoted by pprint_rename */
-                       struct strbuf buf;
-                       strbuf_init(&buf, 0);
-                       if (quote_c_style(file->name, &buf, NULL, 0)) {
-                               free(file->name);
-                               file->name = strbuf_detach(&buf, NULL);
-                       } else {
-                               strbuf_release(&buf);
-                       }
-               }
-
-               len = strlen(file->name);
+               fill_print_name(file);
+               len = strlen(file->print_name);
                if (max_len < len)
                        max_len = len;
 
@@ -873,7 +889,7 @@ static void show_stats(struct diffstat_t* data, struct diff_options *options)
 
        for (i = 0; i < data->nr; i++) {
                const char *prefix = "";
-               char *name = data->files[i]->name;
+               char *name = data->files[i]->print_name;
                int added = data->files[i]->added;
                int deleted = data->files[i]->deleted;
                int name_len;
@@ -901,17 +917,17 @@ static void show_stats(struct diffstat_t* data, struct diff_options *options)
                        printf("%s%d%s", add_c, added, reset);
                        printf(" bytes");
                        printf("\n");
-                       goto free_diffstat_file;
+                       continue;
                }
                else if (data->files[i]->is_unmerged) {
                        show_name(prefix, name, len, reset, set);
                        printf("  Unmerged\n");
-                       goto free_diffstat_file;
+                       continue;
                }
                else if (!data->files[i]->is_renamed &&
                         (added + deleted == 0)) {
                        total_files--;
-                       goto free_diffstat_file;
+                       continue;
                }
 
                /*
@@ -933,11 +949,7 @@ static void show_stats(struct diffstat_t* data, struct diff_options *options)
                show_graph('+', add, add_c, reset);
                show_graph('-', del, del_c, reset);
                putchar('\n');
-       free_diffstat_file:
-               free(data->files[i]->name);
-               free(data->files[i]);
        }
-       free(data->files);
        printf("%s %d files changed, %d insertions(+), %d deletions(-)%s\n",
               set, total_files, adds, dels, reset);
 }
@@ -962,11 +974,7 @@ static void show_shortstats(struct diffstat_t* data)
                                dels += deleted;
                        }
                }
-               free(data->files[i]->name);
-               free(data->files[i]);
        }
-       free(data->files);
-
        printf(" %d files changed, %d insertions(+), %d deletions(-)\n",
               total_files, adds, dels);
 }
@@ -975,6 +983,9 @@ static void show_numstat(struct diffstat_t* data, struct diff_options *options)
 {
        int i;
 
+       if (data->nr == 0)
+               return;
+
        for (i = 0; i < data->nr; i++) {
                struct diffstat_file *file = data->files[i];
 
@@ -982,15 +993,39 @@ static void show_numstat(struct diffstat_t* data, struct diff_options *options)
                        printf("-\t-\t");
                else
                        printf("%d\t%d\t", file->added, file->deleted);
-               if (!file->is_renamed) {
-                       write_name_quoted(file->name, stdout, options->line_termination);
+               if (options->line_termination) {
+                       fill_print_name(file);
+                       if (!file->is_renamed)
+                               write_name_quoted(file->name, stdout,
+                                                 options->line_termination);
+                       else {
+                               fputs(file->print_name, stdout);
+                               putchar(options->line_termination);
+                       }
                } else {
-                       fputs(file->name, stdout);
-                       putchar(options->line_termination);
+                       if (file->is_renamed) {
+                               putchar('\0');
+                               write_name_quoted(file->from_name, stdout, '\0');
+                       }
+                       write_name_quoted(file->name, stdout, '\0');
                }
        }
 }
 
+static void free_diffstat_info(struct diffstat_t *diffstat)
+{
+       int i;
+       for (i = 0; i < diffstat->nr; i++) {
+               struct diffstat_file *f = diffstat->files[i];
+               if (f->name != f->print_name)
+                       free(f->print_name);
+               free(f->name);
+               free(f->from_name);
+               free(f);
+       }
+       free(diffstat->files);
+}
+
 struct checkdiff_t {
        struct xdiff_emit_state xm;
        const char *filename;
@@ -2943,8 +2978,9 @@ void diff_flush(struct diff_options *options)
                        show_numstat(&diffstat, options);
                if (output_format & DIFF_FORMAT_DIFFSTAT)
                        show_stats(&diffstat, options);
-               else if (output_format & DIFF_FORMAT_SHORTSTAT)
+               if (output_format & DIFF_FORMAT_SHORTSTAT)
                        show_shortstats(&diffstat);
+               free_diffstat_info(&diffstat);
                separator++;
        }