color-words: make regex configurable via attributes
authorThomas Rast <trast@student.ethz.ch>
Sat, 17 Jan 2009 16:29:48 +0000 (17:29 +0100)
committerJunio C Hamano <gitster@pobox.com>
Sat, 17 Jan 2009 18:44:21 +0000 (10:44 -0800)
Make the --color-words splitting regular expression configurable via
the diff driver's 'wordregex' attribute.  The user can then set the
driver on a file in .gitattributes.  If a regex is given on the
command line, it overrides the driver's setting.

We also provide built-in regexes for the languages that already had
funcname patterns, and add an appropriate diff driver entry for C/++.
(The patterns are designed to run UTF-8 sequences into a single chunk
to make sure they remain readable.)

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/diff-options.txt
Documentation/gitattributes.txt
diff.c
t/t4034-diff-words.sh
userdiff.c
userdiff.h

index 8689a92d8d10cd98ba7d0d5381e7cc855971413e..1edb82e8e1c0dd9ebb4f246bd4d7f1f68477ea02 100644 (file)
@@ -102,6 +102,10 @@ differences.  You may want to append `|[^[:space:]]` to your regular
 expression to make sure that it matches all non-whitespace characters.
 A match that contains a newline is silently truncated(!) at the
 newline.
++
+The regex can also be set via a diff driver, see
+linkgit:gitattributes[1]; giving it explicitly overrides any diff
+driver setting.
 
 --no-renames::
        Turn off rename detection, even when the configuration
index 8af22eccac85492416d36259baa60594c9b8382f..ba3ba12730a3c4ef1c7651d5f86645a8db98b79f 100644 (file)
@@ -317,6 +317,8 @@ patterns are available:
 
 - `bibtex` suitable for files with BibTeX coded references.
 
+- `cpp` suitable for source code in the C and C++ languages.
+
 - `html` suitable for HTML/XHTML documents.
 
 - `java` suitable for source code in the Java language.
@@ -334,6 +336,25 @@ patterns are available:
 - `tex` suitable for source code for LaTeX documents.
 
 
+Customizing word diff
+^^^^^^^^^^^^^^^^^^^^^
+
+You can customize the rules that `git diff --color-words` uses to
+split words in a line, by specifying an appropriate regular expression
+in the "diff.*.wordregex" configuration variable.  For example, in TeX
+a backslash followed by a sequence of letters forms a command, but
+several such commands can be run together without intervening
+whitespace.  To separate them, use a regular expression such as
+
+------------------------
+[diff "tex"]
+       wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+"
+------------------------
+
+A built-in pattern is provided for all languages listed in the
+previous section.
+
+
 Performing text diffs of binary files
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/diff.c b/diff.c
index 00c661f82e744b3114db6cbd4a94d9c44d10a564..9fcde963dbc6b2951ef3c5adca9f8ddb46023b8c 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe
        return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
 }
 
+static const char *userdiff_word_regex(struct diff_filespec *one)
+{
+       diff_filespec_load_driver(one);
+       return one->driver->word_regex;
+}
+
 void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
 {
        if (!options->a_prefix)
@@ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a,
                        ecbdata.diff_words =
                                xcalloc(1, sizeof(struct diff_words_data));
                        ecbdata.diff_words->file = o->file;
+                       if (!o->word_regex)
+                               o->word_regex = userdiff_word_regex(one);
+                       if (!o->word_regex)
+                               o->word_regex = userdiff_word_regex(two);
                        if (o->word_regex) {
                                ecbdata.diff_words->word_regex = (regex_t *)
                                        xmalloc(sizeof(regex_t));
index 48734863013734002deac08b20b1442322cbfbd3..744221bef9927e9ff4f53a4893de23659ca51277 100755 (executable)
@@ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' '
 
 '
 
+test_expect_success 'set a diff driver' '
+       git config diff.testdriver.wordregex "[^[:space:]]" &&
+       cat <<EOF > .gitattributes
+pre diff=testdriver
+post diff=testdriver
+EOF
+'
+
+test_expect_success 'option overrides default' '
+
+       word_diff --color-words="[a-z]+"
+
+'
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+h(4)<GREEN>,hh[44]<RESET>
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa )<RESET>
+EOF
+
+test_expect_success 'use default supplied by driver' '
+
+       word_diff --color-words
+
+'
+
 echo 'aaa (aaa)' > pre
 echo 'aaa (aaa) aaa' > post
 
@@ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' '
 
        word_diff --color-words="a+"
 
+
 '
 
 echo '(:' > pre
index 3681062ebfef85af08d71ed6e1ff734804906d6a..2b555094856ec9eeb95d9fbb6fcbb48de209525c 100644 (file)
@@ -6,14 +6,20 @@ static struct userdiff_driver *drivers;
 static int ndrivers;
 static int drivers_alloc;
 
-#define FUNCNAME(name, pattern) \
-       { name, NULL, -1, { pattern, REG_EXTENDED } }
+#define PATTERNS(name, pattern, wordregex)                     \
+       { name, NULL, -1, { pattern, REG_EXTENDED }, wordregex }
 static struct userdiff_driver builtin_drivers[] = {
-FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
-FUNCNAME("java",
+PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
+        "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("java",
         "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
-        "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
-FUNCNAME("objc",
+        "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+        "[a-zA-Z_][a-zA-Z0-9_]*"
+        "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+        "|[-+*/<>%&^|=!]="
+        "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"
+        "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("objc",
         /* Negate C statements that can look like functions */
         "!^[ \t]*(do|for|if|else|return|switch|while)\n"
         /* Objective-C methods */
@@ -21,20 +27,60 @@ FUNCNAME("objc",
         /* C functions */
         "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n"
         /* Objective-C class/protocol definitions */
-        "^(@(implementation|interface|protocol)[ \t].*)$"),
-FUNCNAME("pascal",
+        "^(@(implementation|interface|protocol)[ \t].*)$",
+        /* -- */
+        "[a-zA-Z_][a-zA-Z0-9_]*"
+        "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+        "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
+        "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("pascal",
         "^((procedure|function|constructor|destructor|interface|"
                "implementation|initialization|finalization)[ \t]*.*)$"
         "\n"
-        "^(.*=[ \t]*(class|record).*)$"),
-FUNCNAME("php", "^[\t ]*((function|class).*)"),
-FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
-FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
-FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
-FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
+        "^(.*=[ \t]*(class|record).*)$",
+        /* -- */
+        "[a-zA-Z_][a-zA-Z0-9_]*"
+        "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
+        "|<>|<=|>=|:=|\\.\\."
+        "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("php", "^[\t ]*((function|class).*)",
+        /* -- */
+        "[a-zA-Z_][a-zA-Z0-9_]*"
+        "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
+        "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"
+        "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
+        /* -- */
+        "[a-zA-Z_][a-zA-Z0-9_]*"
+        "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?"
+        "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"
+        "|[^[:space:]|[\x80-\xff]+"),
+        /* -- */
+PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
+        /* -- */
+        "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
+        "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
+        "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"
+        "|[^[:space:]|[\x80-\xff]+"),
+PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+        "[={}\"]|[^={}\" \t]+"),
+PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
+        "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"),
+PATTERNS("cpp",
+        /* Jump targets or access declarations */
+        "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n"
+        /* C/++ functions/methods at top level */
+        "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n"
+        /* compound type at top level */
+        "^((struct|class|enum)[^;]*)$",
+        /* -- */
+        "[a-zA-Z_][a-zA-Z0-9_]*"
+        "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+        "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
+        "|[^[:space:]]|[\x80-\xff]+"),
 { "default", NULL, -1, { NULL, 0 } },
 };
-#undef FUNCNAME
+#undef PATTERNS
 
 static struct userdiff_driver driver_true = {
        "diff=true",
@@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v)
                return parse_string(&drv->external, k, v);
        if ((drv = parse_driver(k, v, "textconv")))
                return parse_string(&drv->textconv, k, v);
+       if ((drv = parse_driver(k, v, "wordregex")))
+               return parse_string(&drv->word_regex, k, v);
 
        return 0;
 }
index ba2945770b379f51aa8da45d112a2ef896ec4c10..c3151594f5c0643fead757accc27bf1093cf4a68 100644 (file)
@@ -11,6 +11,7 @@ struct userdiff_driver {
        const char *external;
        int binary;
        struct userdiff_funcname funcname;
+       const char *word_regex;
        const char *textconv;
 };