git-grep: Learn PCRE
authorMichał Kiedrowicz <michal.kiedrowicz@gmail.com>
Mon, 9 May 2011 21:52:05 +0000 (23:52 +0200)
committerJunio C Hamano <gitster@pobox.com>
Mon, 9 May 2011 23:29:33 +0000 (16:29 -0700)
This patch teaches git-grep the --perl-regexp/-P options (naming
borrowed from GNU grep) in order to allow specifying PCRE regexes on the
command line.

PCRE has a number of features which make them more handy to use than
POSIX regexes, like consistent escaping rules, extended character
classes, ungreedy matching etc.

git isn't build with PCRE support automatically. USE_LIBPCRE environment
variable must be enabled (like `make USE_LIBPCRE=YesPlease`).

Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-grep.txt
Makefile
builtin/grep.c
contrib/completion/git-completion.bash
grep.c
grep.h

index 4a5837881dc46358e47412e418bf0bceac3ddc3f..e150c77cffb5d082822bd65c7bfbeb1f431fc42f 100644 (file)
@@ -12,6 +12,7 @@ SYNOPSIS
 'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
           [-v | --invert-match] [-h|-H] [--full-name]
           [-E | --extended-regexp] [-G | --basic-regexp]
+          [-P | --perl-regexp]
           [-F | --fixed-strings] [-n | --line-number]
           [-l | --files-with-matches] [-L | --files-without-match]
           [(-O | --open-files-in-pager) [<pager>]]
@@ -97,6 +98,11 @@ OPTIONS
        Use POSIX extended/basic regexp for patterns.  Default
        is to use basic regexp.
 
+-P::
+--perl-regexp::
+       Use Perl-compatible regexp for patterns. Requires libpcre to be
+       compiled in.
+
 -F::
 --fixed-strings::
        Use fixed strings for patterns (don't interpret pattern
index cbc3fce2d573ac313ee1b8f19749432cff3b31b5..fea55c04dd7de83a4d5e7f055b83747aa0d714d4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,12 @@ all::
 # Define NO_OPENSSL environment variable if you do not have OpenSSL.
 # This also implies BLK_SHA1.
 #
+# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
+# able to use Perl-compatible regular expressions.
+#
+# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
 # Define NO_CURL if you do not have libcurl installed.  git-http-pull and
 # git-http-push are not built, and you cannot use http:// and https://
 # transports.
@@ -1248,6 +1254,15 @@ ifdef NO_LIBGEN_H
        COMPAT_OBJS += compat/basename.o
 endif
 
+ifdef USE_LIBPCRE
+       BASIC_CFLAGS += -DUSE_LIBPCRE
+       ifdef LIBPCREDIR
+               BASIC_CFLAGS += -I$(LIBPCREDIR)/include
+               EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+       endif
+       EXTLIBS += -lpcre
+endif
+
 ifdef NO_CURL
        BASIC_CFLAGS += -DNO_CURL
        REMOTE_CURL_PRIMARY =
index 10a1f65310f28f2014bab3f3295205abf6dc59ad..6831975104b35aa1c94d251d525f7f11702a757b 100644 (file)
@@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
                        REG_EXTENDED),
                OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
                        "interpret patterns as fixed strings"),
+               OPT_BOOLEAN('P', "perl-regexp", &opt.pcre,
+                               "use Perl-compatible regular expressions"),
                OPT_GROUP(""),
                OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
                OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
index 3dc9cbe9f9d4a22db004047bf00165c4834a9b3b..2facd08d1639115188d8e0bcec35c47f4ac793c4 100755 (executable)
@@ -1487,6 +1487,7 @@ _git_grep ()
                        --text --ignore-case --word-regexp --invert-match
                        --full-name --line-number
                        --extended-regexp --basic-regexp --fixed-strings
+                       --perl-regexp
                        --files-with-matches --name-only
                        --files-without-match
                        --max-depth
diff --git a/grep.c b/grep.c
index 870d10cf6969d8b78bb6a2d2ea5262c1ddef0f62..d03d9e24c23eff2d60ae7226a412f3ccf66670fd 100644 (file)
--- a/grep.c
+++ b/grep.c
@@ -74,6 +74,69 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p,
        die("%s'%s': %s", where, p->pattern, error);
 }
 
+#ifdef USE_LIBPCRE
+static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
+{
+       const char *error;
+       int erroffset;
+       int options = 0;
+
+       if (opt->ignore_case)
+               options |= PCRE_CASELESS;
+
+       p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
+                       NULL);
+       if (!p->pcre_regexp)
+               compile_regexp_failed(p, error);
+
+       p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
+       if (!p->pcre_extra_info && error)
+               die("%s", error);
+}
+
+static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
+               regmatch_t *match, int eflags)
+{
+       int ovector[30], ret, flags = 0;
+
+       if (eflags & REG_NOTBOL)
+               flags |= PCRE_NOTBOL;
+
+       ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
+                       0, flags, ovector, ARRAY_SIZE(ovector));
+       if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
+               die("pcre_exec failed with error code %d", ret);
+       if (ret > 0) {
+               ret = 0;
+               match->rm_so = ovector[0];
+               match->rm_eo = ovector[1];
+       }
+
+       return ret;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+       pcre_free(p->pcre_regexp);
+       pcre_free(p->pcre_extra_info);
+}
+#else /* !USE_LIBPCRE */
+static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
+{
+       die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
+}
+
+static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
+               regmatch_t *match, int eflags)
+{
+       return 1;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+}
+#endif /* !USE_LIBPCRE */
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
        int err;
@@ -85,6 +148,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
        if (p->fixed)
                return;
 
+       if (opt->pcre) {
+               compile_pcre_regexp(p, opt);
+               return;
+       }
+
        err = regcomp(&p->regexp, p->pattern, opt->regflags);
        if (err) {
                char errbuf[1024];
@@ -327,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt)
                case GREP_PATTERN: /* atom */
                case GREP_PATTERN_HEAD:
                case GREP_PATTERN_BODY:
-                       regfree(&p->regexp);
+                       if (p->pcre_regexp)
+                               free_pcre_regexp(p);
+                       else
+                               regfree(&p->regexp);
                        break;
                default:
                        break;
@@ -426,6 +497,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
 
        if (p->fixed)
                hit = !fixmatch(p, line, eol, match);
+       else if (p->pcre_regexp)
+               hit = !pcrematch(p, line, eol, match, eflags);
        else
                hit = !regmatch(&p->regexp, line, eol, match, eflags);
 
diff --git a/grep.h b/grep.h
index 06621fe663545af52fbc42827a8374ab5bd42f38..cd055cdfa8cac903382d592f1ec7e2a22bf7f897 100644 (file)
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,12 @@
 #ifndef GREP_H
 #define GREP_H
 #include "color.h"
+#ifdef USE_LIBPCRE
+#include <pcre.h>
+#else
+typedef int pcre;
+typedef int pcre_extra;
+#endif
 
 enum grep_pat_token {
        GREP_PATTERN,
@@ -33,6 +39,8 @@ struct grep_pat {
        size_t patternlen;
        enum grep_header_field field;
        regex_t regexp;
+       pcre *pcre_regexp;
+       pcre_extra *pcre_extra_info;
        unsigned fixed:1;
        unsigned ignore_case:1;
        unsigned word_regexp:1;
@@ -83,6 +91,7 @@ struct grep_opt {
 #define GREP_BINARY_TEXT       2
        int binary;
        int extended;
+       int pcre;
        int relative;
        int pathname;
        int null_following_name;