wildmatch: advance faster in <asterisk> + <literal> patterns
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Tue, 1 Jan 2013 02:44:10 +0000 (09:44 +0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 1 Jan 2013 23:32:37 +0000 (15:32 -0800)
Normally when we match "*X" on "abcX", we call dowild("X", "abcX"),
dowild("X", "bcX"), dowild("X", "cX") and dowild("X", "X"). Only the
last call may have a chance of matching. By skipping the text before
"X", we can eliminate the first three useless calls.

compat, '*/*/*' on linux-2.6.git file list 2000 times, before:
wildmatch 7s 985049us
fnmatch   2s 735541us or 34.26% faster

and after:
wildmatch 4s 492549us
fnmatch   0s 888263us or 19.77% slower

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
t/t3070-wildmatch.sh
wildmatch.c

index 97f1daff8e075c8877814737205b9a5003bdc3c8..4c37057ddf4a6796c88dba45f0123da1a44fb4af 100755 (executable)
@@ -207,6 +207,11 @@ match 0 x foo '*/*/*'
 match 0 x foo/bar '*/*/*'
 match 1 x foo/bba/arr '*/*/*'
 match 0 x foo/bb/aa/rr '*/*/*'
+match 1 x foo/bb/aa/rr '**/**/**'
+match 1 x abcXdefXghi '*X*i'
+match 0 x ab/cXd/efXg/hi '*X*i'
+match 1 x ab/cXd/efXg/hi '*/*X*/*/*i'
+match 1 x ab/cXd/efXg/hi '**/*X*/**/*i'
 
 pathmatch 1 foo foo
 pathmatch 0 foo fo
@@ -226,5 +231,8 @@ pathmatch 0 foo '*/*/*'
 pathmatch 0 foo/bar '*/*/*'
 pathmatch 1 foo/bba/arr '*/*/*'
 pathmatch 1 foo/bb/aa/rr '*/*/*'
+pathmatch 1 abcXdefXghi '*X*i'
+pathmatch 1 ab/cXd/efXg/hi '*/*X*/*/*i'
+pathmatch 1 ab/cXd/efXg/hi '*Xg*i'
 
 test_done
index bb425220b07bbc2c35d2d7ec28b13fd6c53b4ac4..7192bdc1b880728a81b33a38091cca8de6a30445 100644 (file)
@@ -133,6 +133,29 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags)
                        while (1) {
                                if (t_ch == '\0')
                                        break;
+                               /*
+                                * Try to advance faster when an asterisk is
+                                * followed by a literal. We know in this case
+                                * that the the string before the literal
+                                * must belong to "*".
+                                * If match_slash is false, do not look past
+                                * the first slash as it cannot belong to '*'.
+                                */
+                               if (!is_glob_special(*p)) {
+                                       p_ch = *p;
+                                       if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
+                                               p_ch = tolower(p_ch);
+                                       while ((t_ch = *text) != '\0' &&
+                                              (match_slash || t_ch != '/')) {
+                                               if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
+                                                       t_ch = tolower(t_ch);
+                                               if (t_ch == p_ch)
+                                                       break;
+                                               text++;
+                                       }
+                                       if (t_ch != p_ch)
+                                               return WM_NOMATCH;
+                               }
                                if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
                                        if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
                                                return matched;