grep: support newline separated pattern list
authorRené Scharfe <rene.scharfe@lsrfire.ath.cx>
Sun, 20 May 2012 14:33:07 +0000 (16:33 +0200)
committerJunio C Hamano <gitster@pobox.com>
Sun, 20 May 2012 22:25:46 +0000 (15:25 -0700)
Currently, patterns that contain newline characters don't match anything
when given to git grep.  Regular grep(1) interprets patterns as lists of
newline separated search strings instead.

Implement this functionality by creating and inserting extra grep_pat
structures for patterns consisting of multiple lines when appending to
the pattern lists.  For simplicity, all pattern strings are duplicated.
The original pattern is truncated in place to make it contain only the
first line.

Requested-by: Torne (Richard Coles) <torne@google.com>
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-grep.txt
grep.c
grep.h
t/t7810-grep.sh

index e150c77cffb5d082822bd65c7bfbeb1f431fc42f..c3306f3a3b855318397e8d7f4174943f589eeb3e 100644 (file)
@@ -29,7 +29,9 @@ SYNOPSIS
 DESCRIPTION
 -----------
 Look for specified patterns in the tracked files in the work tree, blobs
-registered in the index file, or blobs in given tree objects.
+registered in the index file, or blobs in given tree objects.  Patterns
+are lists of one or more search expressions separated by newline
+characters.  An empty string as search expression matches all lines.
 
 
 CONFIGURATION
diff --git a/grep.c b/grep.c
index c35a7ce57d323fdc1e64fdcc33f90cf977e62342..02258039d919d62f0ad08eb5933765c7f9a5d602 100644 (file)
--- a/grep.c
+++ b/grep.c
@@ -9,7 +9,7 @@ static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
                                        enum grep_header_field field)
 {
        struct grep_pat *p = xcalloc(1, sizeof(*p));
-       p->pattern = pat;
+       p->pattern = xmemdupz(pat, patlen);
        p->patternlen = patlen;
        p->origin = origin;
        p->no = no;
@@ -23,6 +23,36 @@ static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
        **tail = p;
        *tail = &p->next;
        p->next = NULL;
+
+       switch (p->token) {
+       case GREP_PATTERN: /* atom */
+       case GREP_PATTERN_HEAD:
+       case GREP_PATTERN_BODY:
+               for (;;) {
+                       struct grep_pat *new_pat;
+                       size_t len = 0;
+                       char *cp = p->pattern + p->patternlen, *nl = NULL;
+                       while (++len <= p->patternlen) {
+                               if (*(--cp) == '\n') {
+                                       nl = cp;
+                                       break;
+                               }
+                       }
+                       if (!nl)
+                               break;
+                       new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
+                                                 p->no, p->token, p->field);
+                       new_pat->next = p->next;
+                       if (!p->next)
+                               *tail = &new_pat->next;
+                       p->next = new_pat;
+                       *nl = '\0';
+                       p->patternlen -= len;
+               }
+               break;
+       default:
+               break;
+       }
 }
 
 void append_header_grep_pattern(struct grep_opt *opt,
@@ -408,6 +438,7 @@ void free_grep_patterns(struct grep_opt *opt)
                                free_pcre_regexp(p);
                        else
                                regfree(&p->regexp);
+                       free(p->pattern);
                        break;
                default:
                        break;
diff --git a/grep.h b/grep.h
index cd055cdfa8cac903382d592f1ec7e2a22bf7f897..5b083affe13da91614a32107032d3f42096568e8 100644 (file)
--- a/grep.h
+++ b/grep.h
@@ -35,7 +35,7 @@ struct grep_pat {
        const char *origin;
        int no;
        enum grep_pat_token token;
-       const char *pattern;
+       char *pattern;
        size_t patternlen;
        enum grep_header_field field;
        regex_t regexp;
index 6379ad60bcb9b5b56eb773b5a987745356c24df6..bc9a522085c794ccfcb9648c19e6e4c0c1ed6d19 100755 (executable)
@@ -322,6 +322,11 @@ test_expect_success 'grep -f, multiple patterns' '
        test_cmp expected actual
 '
 
+test_expect_success 'grep, multiple patterns' '
+       git grep "$(cat patterns)" >actual &&
+       test_cmp expected actual
+'
+
 cat >expected <<EOF
 file:foo mmap bar
 file:foo_mmap bar