Unify whitespace checking
authorWincent Colaiuta <win@wincent.com>
Thu, 13 Dec 2007 13:32:29 +0000 (14:32 +0100)
committerJunio C Hamano <gitster@pobox.com>
Fri, 14 Dec 2007 07:43:58 +0000 (23:43 -0800)
This commit unifies three separate places where whitespace checking was
performed:

 - the whitespace checking previously done in builtin-apply.c is
extracted into a function in ws.c

 - the equivalent logic in "git diff" is removed

 - the emit_line_with_ws() function is also removed because that also
rechecks the whitespace, and its functionality is rolled into ws.c

The new function is called check_and_emit_line() and it does two things:
checks a line for whitespace errors and optionally emits it. The checking
is based on lines of content rather than patch lines (in other words, the
caller must strip the leading "+" or "-"); this was suggested by Junio on
the mailing list to allow for a future extension to "git show" to display
whitespace errors in blobs.

At the same time we teach it to report all classes of whitespace errors
found for a given line rather than reporting only the first found error.

Signed-off-by: Wincent Colaiuta <win@wincent.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin-apply.c
cache.h
diff.c
t/t4015-diff-whitespace.sh
ws.c

index f2e9a332ca0e148366f49b2825ed8826576f1bad..ab393f32e9740c00b5e8fb40076516de7924c16f 100644 (file)
@@ -900,56 +900,22 @@ static int find_header(char *line, unsigned long size, int *hdrsize, struct patc
 
 static void check_whitespace(const char *line, int len, unsigned ws_rule)
 {
-       const char *err = "Adds trailing whitespace";
-       int seen_space = 0;
-       int i;
-
-       /*
-        * We know len is at least two, since we have a '+' and we
-        * checked that the last character was a '\n' before calling
-        * this function.  That is, an addition of an empty line would
-        * check the '+' here.  Sneaky...
-        */
-       if ((ws_rule & WS_TRAILING_SPACE) && isspace(line[len-2]))
-               goto error;
-
-       /*
-        * Make sure that there is no space followed by a tab in
-        * indentation.
-        */
-       if (ws_rule & WS_SPACE_BEFORE_TAB) {
-               err = "Space in indent is followed by a tab";
-               for (i = 1; i < len; i++) {
-                       if (line[i] == '\t') {
-                               if (seen_space)
-                                       goto error;
-                       }
-                       else if (line[i] == ' ')
-                               seen_space = 1;
-                       else
-                               break;
-               }
-       }
-
-       /*
-        * Make sure that the indentation does not contain more than
-        * 8 spaces.
-        */
-       if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
-           (8 < len) && !strncmp("+        ", line, 9)) {
-               err = "Indent more than 8 places with spaces";
-               goto error;
-       }
-       return;
+       char *err;
+       unsigned result = check_and_emit_line(line + 1, len - 1, ws_rule,
+           NULL, NULL, NULL, NULL);
+       if (!result)
+               return;
 
- error:
        whitespace_error++;
        if (squelch_whitespace_errors &&
            squelch_whitespace_errors < whitespace_error)
                ;
-       else
+       else {
+               err = whitespace_error_string(result);
                fprintf(stderr, "%s.\n%s:%d:%.*s\n",
-                       err, patch_input_file, linenr, len-2, line+1);
+                    err, patch_input_file, linenr, len - 2, line + 1);
+               free(err);
+       }
 }
 
 /*
diff --git a/cache.h b/cache.h
index 27d90fe543b97eac91f61bf96f170c32d735e44d..39331c28be15605f4b943cea22a7a8caa5b501c2 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -655,6 +655,10 @@ void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, i
 extern unsigned whitespace_rule_cfg;
 extern unsigned whitespace_rule(const char *);
 extern unsigned parse_whitespace_rule(const char *);
+extern unsigned check_and_emit_line(const char *line, int len, unsigned ws_rule,
+    FILE *stream, const char *set,
+    const char *reset, const char *ws);
+extern char *whitespace_error_string(unsigned ws);
 
 /* ls-files */
 int pathspec_match(const char **spec, char *matched, const char *filename, int skiplen);
diff --git a/diff.c b/diff.c
index 3e46ff8a75c5840a5f3374aa76e93cfb468c86f1..577c773c3e9919015a15563669ed0e6f73fb9d3f 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -486,88 +486,9 @@ const char *diff_get_color(int diff_use_color, enum color_diff ix)
 
 static void emit_line(const char *set, const char *reset, const char *line, int len)
 {
-       if (len > 0 && line[len-1] == '\n')
-               len--;
        fputs(set, stdout);
        fwrite(line, len, 1, stdout);
-       puts(reset);
-}
-
-static void emit_line_with_ws(int nparents,
-                             const char *set, const char *reset, const char *ws,
-                             const char *line, int len, unsigned ws_rule)
-{
-       int col0 = nparents;
-       int last_tab_in_indent = -1;
-       int last_space_in_indent = -1;
-       int i;
-       int tail = len;
-       int need_highlight_leading_space = 0;
-       /*
-        * The line is a newly added line.  Does it have funny leading
-        * whitespaces?  In indent, SP should never precede a TAB.  In
-        * addition, under "indent with non tab" rule, there should not
-        * be more than 8 consecutive spaces.
-        */
-       for (i = col0; i < len; i++) {
-               if (line[i] == '\t') {
-                       last_tab_in_indent = i;
-                       if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
-                           0 <= last_space_in_indent)
-                               need_highlight_leading_space = 1;
-               }
-               else if (line[i] == ' ')
-                       last_space_in_indent = i;
-               else
-                       break;
-       }
-       if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
-           0 <= last_space_in_indent &&
-           last_tab_in_indent < 0 &&
-           8 <= (i - col0)) {
-               last_tab_in_indent = i;
-               need_highlight_leading_space = 1;
-       }
-       fputs(set, stdout);
-       fwrite(line, col0, 1, stdout);
        fputs(reset, stdout);
-       if (((i == len) || line[i] == '\n') && i != col0) {
-               /* The whole line was indent */
-               emit_line(ws, reset, line + col0, len - col0);
-               return;
-       }
-       i = col0;
-       if (need_highlight_leading_space) {
-               while (i < last_tab_in_indent) {
-                       if (line[i] == ' ') {
-                               fputs(ws, stdout);
-                               putchar(' ');
-                               fputs(reset, stdout);
-                       }
-                       else
-                               putchar(line[i]);
-                       i++;
-               }
-       }
-       tail = len - 1;
-       if (line[tail] == '\n' && i < tail)
-               tail--;
-       if (ws_rule & WS_TRAILING_SPACE) {
-               while (i < tail) {
-                       if (!isspace(line[tail]))
-                               break;
-                       tail--;
-               }
-       }
-       if ((i < tail && line[tail + 1] != '\n')) {
-               /* This has whitespace between tail+1..len */
-               fputs(set, stdout);
-               fwrite(line + i, tail - i + 1, 1, stdout);
-               fputs(reset, stdout);
-               emit_line(ws, reset, line + tail + 1, len - tail - 1);
-       }
-       else
-               emit_line(set, reset, line + i, len - i);
 }
 
 static void emit_add_line(const char *reset, struct emit_callback *ecbdata, const char *line, int len)
@@ -577,9 +498,13 @@ static void emit_add_line(const char *reset, struct emit_callback *ecbdata, cons
 
        if (!*ws)
                emit_line(set, reset, line, len);
-       else
-               emit_line_with_ws(ecbdata->nparents, set, reset, ws,
-                                 line, len, ecbdata->ws_rule);
+       else {
+               /* Emit just the prefix, then the rest. */
+               emit_line(set, reset, line, ecbdata->nparents);
+               (void)check_and_emit_line(line + ecbdata->nparents,
+                   len - ecbdata->nparents, ecbdata->ws_rule,
+                   stdout, set, reset, ws);
+       }
 }
 
 static void fn_out_consume(void *priv, char *line, unsigned long len)
@@ -1040,45 +965,20 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
        const char *ws = diff_get_color(data->color_diff, DIFF_WHITESPACE);
        const char *reset = diff_get_color(data->color_diff, DIFF_RESET);
        const char *set = diff_get_color(data->color_diff, DIFF_FILE_NEW);
+       char *err;
 
        if (line[0] == '+') {
-               int i, spaces = 0, space_before_tab = 0, white_space_at_end = 0;
-
-               /* check space before tab */
-               for (i = 1; i < len; i++) {
-                       if (line[i] == ' ')
-                               spaces++;
-                       else if (line[i] == '\t') {
-                               if (spaces) {
-                                       space_before_tab = 1;
-                                       break;
-                               }
-                       }
-                       else
-                               break;
-               }
-
-               /* check whitespace at line end */
-               if (line[len - 1] == '\n')
-                       len--;
-               if (isspace(line[len - 1]))
-                       white_space_at_end = 1;
-
-               if (space_before_tab || white_space_at_end) {
-                       data->status = 1;
-                       printf("%s:%d: %s", data->filename, data->lineno, ws);
-                       if (space_before_tab) {
-                               printf("space before tab");
-                               if (white_space_at_end)
-                                       putchar(',');
-                       }
-                       if (white_space_at_end)
-                               printf("whitespace at end");
-                       printf(":%s ", reset);
-                       emit_line_with_ws(1, set, reset, ws, line, len,
-                                         data->ws_rule);
-               }
-
+               data->status = check_and_emit_line(line + 1, len - 1,
+                   data->ws_rule, NULL, NULL, NULL, NULL);
+               if (!data->status)
+                       return;
+               err = whitespace_error_string(data->status);
+               printf("%s:%d: %s%s:%s ", data->filename, data->lineno,
+                   ws, err, reset);
+               free(err);
+               emit_line(set, reset, line, 1);
+               (void)check_and_emit_line(line + 1, len - 1, data->ws_rule,
+                   stdout, set, reset, ws);
                data->lineno++;
        } else if (line[0] == ' ')
                data->lineno++;
index 757a27abdbbffe27637dc68126d59fc1ea128119..595cd60623e02159c726cfca933dc99705d90d41 100755 (executable)
@@ -121,7 +121,7 @@ test_expect_success 'check mixed spaces and tabs in indent' '
 
        # This is indented with SP HT SP.
        echo "   foo();" > x &&
-       git diff --check | grep "space before tab"
+       git diff --check | grep "Space in indent is followed by a tab"
 
 '
 
diff --git a/ws.c b/ws.c
index 52c10caf3523b877ef7fa77f7af3c64de3055b4f..d7d1522f8a177639d3269e5e9a83be39a14a7d89 100644 (file)
--- a/ws.c
+++ b/ws.c
@@ -94,3 +94,108 @@ unsigned whitespace_rule(const char *pathname)
                return whitespace_rule_cfg;
        }
 }
+
+/* The returned string should be freed by the caller. */
+char *whitespace_error_string(unsigned ws)
+{
+       struct strbuf err;
+       strbuf_init(&err, 0);
+       if (ws & WS_TRAILING_SPACE)
+               strbuf_addstr(&err, "Adds trailing whitespace");
+       if (ws & WS_SPACE_BEFORE_TAB) {
+               if (err.len)
+                       strbuf_addstr(&err, ", ");
+               strbuf_addstr(&err, "Space in indent is followed by a tab");
+       }
+       if (ws & WS_INDENT_WITH_NON_TAB) {
+               if (err.len)
+                       strbuf_addstr(&err, ", ");
+               strbuf_addstr(&err, "Indent more than 8 places with spaces");
+       }
+       return strbuf_detach(&err, NULL);
+}
+
+/* If stream is non-NULL, emits the line after checking. */
+unsigned check_and_emit_line(const char *line, int len, unsigned ws_rule,
+                            FILE *stream, const char *set,
+                            const char *reset, const char *ws)
+{
+       unsigned result = 0;
+       int leading_space = -1;
+       int trailing_whitespace = -1;
+       int trailing_newline = 0;
+       int i;
+
+       /* Logic is simpler if we temporarily ignore the trailing newline. */
+       if (len > 0 && line[len - 1] == '\n') {
+               trailing_newline = 1;
+               len--;
+       }
+
+       /* Check for trailing whitespace. */
+       if (ws_rule & WS_TRAILING_SPACE) {
+               for (i = len - 1; i >= 0; i--) {
+                       if (isspace(line[i])) {
+                               trailing_whitespace = i;
+                               result |= WS_TRAILING_SPACE;
+                       }
+                       else
+                               break;
+               }
+       }
+
+       /* Check for space before tab in initial indent. */
+       for (i = 0; i < len; i++) {
+               if (line[i] == '\t') {
+                       if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
+                           (leading_space != -1))
+                               result |= WS_SPACE_BEFORE_TAB;
+                       break;
+               }
+               else if (line[i] == ' ')
+                       leading_space = i;
+               else
+                       break;
+       }
+
+       /* Check for indent using non-tab. */
+       if ((ws_rule & WS_INDENT_WITH_NON_TAB) && leading_space >= 8)
+               result |= WS_INDENT_WITH_NON_TAB;
+
+       if (stream) {
+               /* Highlight errors in leading whitespace. */
+               if ((result & WS_SPACE_BEFORE_TAB) ||
+                   (result & WS_INDENT_WITH_NON_TAB)) {
+                       fputs(ws, stream);
+                       fwrite(line, leading_space + 1, 1, stream);
+                       fputs(reset, stream);
+                       leading_space++;
+               }
+               else
+                       leading_space = 0;
+
+               /* Now the rest of the line starts at leading_space.
+                * The non-highlighted part ends at trailing_whitespace. */
+               if (trailing_whitespace == -1)
+                       trailing_whitespace = len;
+
+               /* Emit non-highlighted (middle) segment. */
+               if (trailing_whitespace - leading_space > 0) {
+                       fputs(set, stream);
+                       fwrite(line + leading_space,
+                           trailing_whitespace - leading_space, 1, stream);
+                       fputs(reset, stream);
+               }
+
+               /* Highlight errors in trailing whitespace. */
+               if (trailing_whitespace != len) {
+                       fputs(ws, stream);
+                       fwrite(line + trailing_whitespace,
+                           len - trailing_whitespace, 1, stream);
+                       fputs(reset, stream);
+               }
+               if (trailing_newline)
+                       fputc('\n', stream);
+       }
+       return result;
+}