lib: Add regexp expansion for for tags and paths
authorDavid Bremner <david@tethera.net>
Wed, 8 Mar 2017 02:32:26 +0000 (22:32 -0400)
committerDavid Bremner <david@tethera.net>
Tue, 9 May 2017 10:44:29 +0000 (07:44 -0300)
From a UI perspective this looks similar to what was already provided
for from, subject, and mid, but the implementation is quite
different. It uses the database's list of terms to construct a term
based query equivalent to the passed regular expression.

lib/database.cc
lib/regexp-fields.cc
test/T650-regexp-query.sh

index 49b3849c32586124db4364c4c6815a25316f2bc7..5b13f5417118ce5282b5b148941e65cd22637cde 100644 (file)
@@ -259,12 +259,15 @@ prefix_t prefix_table[] = {
     { "file-direntry",         "XFDIRENTRY",   NOTMUCH_FIELD_NO_FLAGS },
     { "directory-direntry",    "XDDIRENTRY",   NOTMUCH_FIELD_NO_FLAGS },
     { "thread",                        "G",            NOTMUCH_FIELD_EXTERNAL },
-    { "tag",                   "K",            NOTMUCH_FIELD_EXTERNAL },
-    { "is",                    "K",            NOTMUCH_FIELD_EXTERNAL },
+    { "tag",                   "K",            NOTMUCH_FIELD_EXTERNAL |
+                                               NOTMUCH_FIELD_PROCESSOR },
+    { "is",                    "K",            NOTMUCH_FIELD_EXTERNAL |
+                                               NOTMUCH_FIELD_PROCESSOR },
     { "id",                    "Q",            NOTMUCH_FIELD_EXTERNAL },
     { "mid",                   "Q",            NOTMUCH_FIELD_EXTERNAL |
                                                NOTMUCH_FIELD_PROCESSOR },
-    { "path",                  "P",            NOTMUCH_FIELD_EXTERNAL },
+    { "path",                  "P",            NOTMUCH_FIELD_EXTERNAL|
+                                               NOTMUCH_FIELD_PROCESSOR },
     { "property",              "XPROPERTY",    NOTMUCH_FIELD_EXTERNAL },
     /*
      * Unconditionally add ':' to reduce potential ambiguity with
@@ -272,7 +275,8 @@ prefix_t prefix_table[] = {
      * letters. See Xapian document termprefixes.html for related
      * discussion.
      */
-    { "folder",                        "XFOLDER:",     NOTMUCH_FIELD_EXTERNAL },
+    { "folder",                        "XFOLDER:",     NOTMUCH_FIELD_EXTERNAL |
+                                               NOTMUCH_FIELD_PROCESSOR },
 #if HAVE_XAPIAN_FIELD_PROCESSOR
     { "date",                  NULL,           NOTMUCH_FIELD_EXTERNAL |
                                                NOTMUCH_FIELD_PROCESSOR },
index 7ae55e70016c1f5e26c8b3c47dc1a5a5624e5b2d..084bc8c019999cefe7b7d0dff01d27463d6cc4a7 100644 (file)
@@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix)
     else if (prefix == "mid")
        return NOTMUCH_VALUE_MESSAGE_ID;
     else
-       throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
+       return Xapian::BAD_VALUENO;
 }
 
 RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
@@ -156,15 +156,35 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
-    if (str.size () == 0)
-       return Xapian::Query(Xapian::Query::OP_AND_NOT,
+    if (str.empty ()) {
+       if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+           return Xapian::Query(Xapian::Query::OP_AND_NOT,
                             Xapian::Query::MatchAll,
                             Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
+       } else {
+           return Xapian::Query (term_prefix);
+       }
+    }
 
     if (str.at (0) == '/') {
-       if (str.at (str.size () - 1) == '/'){
-           RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
-           return Xapian::Query (postings->release ());
+       if (str.length() > 1 && str.at (str.size () - 1) == '/'){
+           std::string regexp_str = str.substr(1,str.size () - 2);
+           if (slot != Xapian::BAD_VALUENO) {
+               RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+               return Xapian::Query (postings->release ());
+           } else {
+               std::vector<std::string> terms;
+               regex_t regexp;
+
+               compile_regex(regexp, regexp_str.c_str ());
+               for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+                    it != notmuch->xapian_db->allterms_end (); ++it) {
+                   if (regexec (&regexp, (*it).c_str () + term_prefix.size(),
+                                0, NULL, 0) == 0)
+                       terms.push_back(*it);
+               }
+               return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end());
+           }
        } else {
            throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
        }
index 27fc9ab9831dba70672a167e28f0f541215e2b89..b7bdda118e97850f4c8c25bb9607c313fd4b1123 100755 (executable)
@@ -2,13 +2,54 @@
 test_description='regular expression searches'
 . ./test-lib.sh || exit 1
 
-add_email_corpus
-
-
 if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 0 ]; then
     test_done
 fi
 
+add_message '[dir]=bad' '[subject]="To the bone"'
+add_message '[dir]=.' '[subject]="Top level"'
+add_message '[dir]=bad/news' '[subject]="Bears"'
+mkdir -p "${MAIL_DIR}/duplicate/bad/news"
+cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news"
+
+add_message '[dir]=things' '[subject]="These are a few"'
+add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"'
+add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"'
+
+test_begin_subtest "empty path:// search"
+notmuch search 'path:""' > EXPECTED
+notmuch search 'path:/^$/' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty folder:// search"
+notmuch search 'folder:""' > EXPECTED
+notmuch search 'folder:/^$/' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "unanchored folder:// specification"
+output=$(notmuch search folder:/bad/ | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
+
+test_begin_subtest "anchored folder:// search"
+output=$(notmuch search 'folder:/^bad$/' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)"
+
+test_begin_subtest "unanchored path:// specification"
+output=$(notmuch search path:/bad/ | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
+
+test_begin_subtest "anchored path:// search"
+output=$(notmuch search 'path:/^bad$/' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)"
+
+# Use "standard" corpus from here on.
+rm -rf $MAIL_DIR
+add_email_corpus
+
 notmuch search --output=messages from:cworth > cworth.msg-ids
 
 # these headers will generate no document terms
@@ -120,4 +161,15 @@ thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older ver
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "unanchored tag search"
+notmuch search tag:signed or tag:inbox > EXPECTED
+notmuch search tag:/i/ > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+notmuch tag +testsi '*'
+test_begin_subtest "anchored tag search"
+notmuch search tag:signed > EXPECTED
+notmuch search tag:/^si/ > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done