pathspec: apply "*.c" optimization from exclude
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sat, 24 Nov 2012 04:33:50 +0000 (11:33 +0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 26 Nov 2012 19:13:13 +0000 (11:13 -0800)
When a pattern contains only a single asterisk as wildcard,
e.g. "foo*bar", after literally comparing the leading part "foo" with
the string, we can compare the tail of the string and make sure it
matches "bar", instead of running fnmatch() on "*bar" against the
remainder of the string.

-O2 build on linux-2.6, without the patch:

$ time git rev-list --quiet HEAD -- '*.c'

real    0m40.770s
user    0m40.290s
sys     0m0.256s

With the patch

$ time ~/w/git/git rev-list --quiet HEAD -- '*.c'

real    0m34.288s
user    0m33.997s
sys     0m0.205s

The above command is not supposed to be widely popular. It's chosen
because it exercises pathspec matching a lot. The point is it cuts
down matching time for popular patterns like *.c, which could be used
as pathspec in other places.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
cache.h
dir.c
dir.h
tree-walk.c

diff --git a/cache.h b/cache.h
index bf031f133c939ee48fa2e93a0ae3433342453c23..babf9e54c902d7a70a326815d6c73d7900cdc63c 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -473,6 +473,8 @@ extern int index_name_is_other(const struct index_state *, const char *, int);
 extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
 extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
 
+#define PATHSPEC_ONESTAR 1     /* the pathspec pattern sastisfies GFNM_ONESTAR */
+
 struct pathspec {
        const char **raw; /* get_pathspec() result, not freed by free_pathspec() */
        int nr;
@@ -483,6 +485,7 @@ struct pathspec {
                const char *match;
                int len;
                int nowildcard_len;
+               int flags;
        } *items;
 };
 
diff --git a/dir.c b/dir.c
index f81e1d291758a462f434f555cd77f471faeecc5b..9afd388604c05b5c7f7c34cd19f1e6ff2a14fdce 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -46,6 +46,13 @@ inline int git_fnmatch(const char *pattern, const char *string,
                pattern += prefix;
                string += prefix;
        }
+       if (flags & GFNM_ONESTAR) {
+               int pattern_len = strlen(++pattern);
+               int string_len = strlen(string);
+               return string_len < pattern_len ||
+                      strcmp(pattern,
+                             string + string_len - pattern_len);
+       }
        return fnmatch(pattern, string, fnm_flags);
 }
 
@@ -246,7 +253,9 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
        }
 
        if (item->nowildcard_len < item->len &&
-           !git_fnmatch(match, name, 0, item->nowildcard_len - prefix))
+           !git_fnmatch(match, name,
+                        item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
+                        item->nowildcard_len - prefix))
                return MATCHED_FNMATCH;
 
        return 0;
@@ -1446,8 +1455,13 @@ int init_pathspec(struct pathspec *pathspec, const char **paths)
                item->match = path;
                item->len = strlen(path);
                item->nowildcard_len = simple_length(path);
-               if (item->nowildcard_len < item->len)
+               item->flags = 0;
+               if (item->nowildcard_len < item->len) {
                        pathspec->has_wildcard = 1;
+                       if (path[item->nowildcard_len] == '*' &&
+                           no_wildcard(path + item->nowildcard_len + 1))
+                               item->flags |= PATHSPEC_ONESTAR;
+               }
        }
 
        qsort(pathspec->items, pathspec->nr,
diff --git a/dir.h b/dir.h
index 0e8ae84628c2983c2bad9d636c5dec208d24c17e..ab5af42b2eedcf7045abd0b6029e84ba804f6057 100644 (file)
--- a/dir.h
+++ b/dir.h
@@ -143,6 +143,7 @@ extern int fnmatch_icase(const char *pattern, const char *string, int flags);
  * The prefix part of pattern must not contains wildcards.
  */
 #define GFNM_PATHNAME 1                /* similar to FNM_PATHNAME */
+#define GFNM_ONESTAR  2                /* there is only _one_ wildcard, a star */
 
 extern int git_fnmatch(const char *pattern, const char *string,
                       int flags, int prefix);
index 2fcf3c02da2377bd3cdf8df4bb235e852517683d..585899ea24c8f51e58e737e361bf64497ee3d3f6 100644 (file)
@@ -628,7 +628,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
 
                        if (item->nowildcard_len < item->len) {
                                if (!git_fnmatch(match + baselen, entry->path,
-                                                0, item->nowildcard_len - baselen))
+                                                item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
+                                                item->nowildcard_len - baselen))
                                        return entry_interesting;
 
                                /*
@@ -654,7 +655,8 @@ match_wildcards:
                strbuf_add(base, entry->path, pathlen);
 
                if (!git_fnmatch(match, base->buf + base_offset,
-                                0, item->nowildcard_len)) {
+                                item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
+                                item->nowildcard_len)) {
                        strbuf_setlen(base, base_offset + baselen);
                        return entry_interesting;
                }