1 Return-Path: <bremner@tethera.net>
\r
2 X-Original-To: notmuch@notmuchmail.org
\r
3 Delivered-To: notmuch@notmuchmail.org
\r
4 Received: from localhost (localhost [127.0.0.1])
\r
5 by arlo.cworth.org (Postfix) with ESMTP id C682C6DE0130
\r
6 for <notmuch@notmuchmail.org>; Mon, 6 Jun 2016 19:06:20 -0700 (PDT)
\r
7 X-Virus-Scanned: Debian amavisd-new at cworth.org
\r
11 X-Spam-Status: No, score=-0.011 tagged_above=-999 required=5
\r
12 tests=[AWL=-0.000, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01]
\r
14 Received: from arlo.cworth.org ([127.0.0.1])
\r
15 by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024)
\r
16 with ESMTP id rI7TzeRsZYMf for <notmuch@notmuchmail.org>;
\r
17 Mon, 6 Jun 2016 19:06:10 -0700 (PDT)
\r
18 Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197])
\r
19 by arlo.cworth.org (Postfix) with ESMTPS id C2C196DE00DA
\r
20 for <notmuch@notmuchmail.org>; Mon, 6 Jun 2016 19:06:10 -0700 (PDT)
\r
21 Received: from remotemail by fethera.tethera.net with local (Exim 4.84)
\r
22 (envelope-from <bremner@tethera.net>)
\r
23 id 1bA6Om-0000AV-CX; Mon, 06 Jun 2016 22:05:44 -0400
\r
24 Received: (nullmailer pid 7230 invoked by uid 1000);
\r
25 Tue, 07 Jun 2016 02:05:54 -0000
\r
26 From: David Bremner <david@tethera.net>
\r
27 To: Austin Clements <aclements@csail.mit.edu>,
\r
28 David Bremner <david@tethera.net>
\r
29 Cc: sfischme@uwaterloo.ca, Gaute Hope <eg@gaute.vetsj.com>,
\r
30 notmuch <notmuch@notmuchmail.org>
\r
31 Subject: [PATCH] WIP: regexp matching in subjects
\r
32 Date: Mon, 6 Jun 2016 23:05:49 -0300
\r
33 Message-Id: <1465265149-7174-1-git-send-email-david@tethera.net>
\r
34 X-Mailer: git-send-email 2.8.1
\r
36 <CAH-f9WtC6CeVecfg8wFZUVc8K2rUfzsP72xo97sJX2y_mLW6-g@mail.gmail.com>
\r
38 <CAH-f9WtC6CeVecfg8wFZUVc8K2rUfzsP72xo97sJX2y_mLW6-g@mail.gmail.com>
\r
40 Content-Type: text/plain; charset=UTF-8
\r
41 Content-Transfer-Encoding: 8bit
\r
42 X-BeenThere: notmuch@notmuchmail.org
\r
43 X-Mailman-Version: 2.1.20
\r
45 List-Id: "Use and development of the notmuch mail system."
\r
46 <notmuch.notmuchmail.org>
\r
47 List-Unsubscribe: <https://notmuchmail.org/mailman/options/notmuch>,
\r
48 <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>
\r
49 List-Archive: <http://notmuchmail.org/pipermail/notmuch/>
\r
50 List-Post: <mailto:notmuch@notmuchmail.org>
\r
51 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>
\r
52 List-Subscribe: <https://notmuchmail.org/mailman/listinfo/notmuch>,
\r
53 <mailto:notmuch-request@notmuchmail.org?subject=subscribe>
\r
54 X-List-Received-Date: Tue, 07 Jun 2016 02:06:20 -0000
\r
56 the idea is that you can run
\r
58 % notmuch search 'subject:rx:<your-favourite-regexp>'
\r
62 % notmuch search subject:"your usual phrase search"
\r
64 This should also work with bindings.
\r
67 Here is Austin's "hack", crammed into the field processor framework.
\r
68 I seem to have broken one of the existing subject search tests with my
\r
69 recursive query parsing. I didn't have time to figure out why, yet.
\r
71 lib/Makefile.local | 2 ++
\r
72 lib/database-private.h | 1 +
\r
73 lib/database.cc | 5 +++
\r
74 lib/regexp-ps.cc | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
\r
75 lib/regexp-ps.h | 37 ++++++++++++++++++++
\r
76 lib/subject-fp.cc | 41 ++++++++++++++++++++++
\r
77 lib/subject-fp.h | 43 +++++++++++++++++++++++
\r
78 7 files changed, 221 insertions(+)
\r
79 create mode 100644 lib/regexp-ps.cc
\r
80 create mode 100644 lib/regexp-ps.h
\r
81 create mode 100644 lib/subject-fp.cc
\r
82 create mode 100644 lib/subject-fp.h
\r
84 diff --git a/lib/Makefile.local b/lib/Makefile.local
\r
85 index beb9635..0e7311f 100644
\r
86 --- a/lib/Makefile.local
\r
87 +++ b/lib/Makefile.local
\r
88 @@ -51,6 +51,8 @@ libnotmuch_cxx_srcs = \
\r
90 $(dir)/query-fp.cc \
\r
92 + $(dir)/regexp-ps.cc \
\r
93 + $(dir)/subject-fp.cc \
\r
96 libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o)
\r
97 diff --git a/lib/database-private.h b/lib/database-private.h
\r
98 index ca71a92..5de0b81 100644
\r
99 --- a/lib/database-private.h
\r
100 +++ b/lib/database-private.h
\r
101 @@ -186,6 +186,7 @@ struct _notmuch_database {
\r
102 #if HAVE_XAPIAN_FIELD_PROCESSOR
\r
103 Xapian::FieldProcessor *date_field_processor;
\r
104 Xapian::FieldProcessor *query_field_processor;
\r
105 + Xapian::FieldProcessor *subject_field_processor;
\r
107 Xapian::ValueRangeProcessor *last_mod_range_processor;
\r
109 diff --git a/lib/database.cc b/lib/database.cc
\r
110 index 86bf261..adfbb81 100644
\r
111 --- a/lib/database.cc
\r
112 +++ b/lib/database.cc
\r
114 #include "database-private.h"
\r
115 #include "parse-time-vrp.h"
\r
116 #include "query-fp.h"
\r
117 +#include "subject-fp.h"
\r
118 #include "string-util.h"
\r
120 #include <iostream>
\r
121 @@ -1008,6 +1009,8 @@ notmuch_database_open_verbose (const char *path,
\r
122 notmuch->query_parser->add_boolean_prefix("date", notmuch->date_field_processor);
\r
123 notmuch->query_field_processor = new QueryFieldProcessor (*notmuch->query_parser, notmuch);
\r
124 notmuch->query_parser->add_boolean_prefix("query", notmuch->query_field_processor);
\r
125 + notmuch->subject_field_processor = new SubjectFieldProcessor (*notmuch->query_parser, notmuch);
\r
126 + notmuch->query_parser->add_boolean_prefix("subject", notmuch->subject_field_processor);
\r
128 notmuch->last_mod_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, "lastmod:");
\r
130 @@ -1027,6 +1030,8 @@ notmuch_database_open_verbose (const char *path,
\r
132 for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) {
\r
133 prefix_t *prefix = &PROBABILISTIC_PREFIX[i];
\r
134 + if (strcmp (prefix->name, "subject") == 0)
\r
136 notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
\r
138 } catch (const Xapian::Error &error) {
\r
139 diff --git a/lib/regexp-ps.cc b/lib/regexp-ps.cc
\r
140 new file mode 100644
\r
141 index 0000000..540c7d6
\r
143 +++ b/lib/regexp-ps.cc
\r
145 +/* query-fp.cc - "query:" field processor glue
\r
147 + * This file is part of notmuch.
\r
149 + * Copyright © 2016 David Bremner
\r
151 + * This program is free software: you can redistribute it and/or modify
\r
152 + * it under the terms of the GNU General Public License as published by
\r
153 + * the Free Software Foundation, either version 3 of the License, or
\r
154 + * (at your option) any later version.
\r
156 + * This program is distributed in the hope that it will be useful,
\r
157 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
158 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
159 + * GNU General Public License for more details.
\r
161 + * You should have received a copy of the GNU General Public License
\r
162 + * along with this program. If not, see https://www.gnu.org/licenses/ .
\r
164 + * Author: Austin Clements <aclements@csail.mit.edu>
\r
165 + * David Bremner <david@tethera.net>
\r
168 +#include "regexp-ps.h"
\r
170 +RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
\r
173 + int r = regcomp (®exp_, regexp.c_str (), REG_EXTENDED | REG_NOSUB);
\r
176 + /* XXX Report a query syntax error using regerror */
\r
177 + throw "regcomp failed";
\r
180 +RegexpPostingSource::~RegexpPostingSource ()
\r
182 + regfree (®exp_);
\r
186 +RegexpPostingSource::init (const Xapian::Database &db)
\r
189 + it_ = db_.valuestream_begin (slot_);
\r
190 + end_ = db.valuestream_end (slot_);
\r
191 + started_ = false;
\r
195 +RegexpPostingSource::get_termfreq_min () const
\r
201 +RegexpPostingSource::get_termfreq_est () const
\r
203 + return get_termfreq_max () / 2;
\r
207 +RegexpPostingSource::get_termfreq_max () const
\r
209 + return db_.get_value_freq (slot_);
\r
213 +RegexpPostingSource::get_docid () const
\r
215 + return it_.get_docid ();
\r
219 +RegexpPostingSource::at_end () const
\r
221 + return it_ == end_;
\r
225 +RegexpPostingSource::next (unused (double min_wt))
\r
227 + if (started_ && ! at_end ())
\r
231 + for (; ! at_end (); ++it_) {
\r
232 + std::string value = *it_;
\r
233 + if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
\r
237 diff --git a/lib/regexp-ps.h b/lib/regexp-ps.h
\r
238 new file mode 100644
\r
239 index 0000000..a4553a7
\r
241 +++ b/lib/regexp-ps.h
\r
243 +#ifndef NOTMUCH_REGEX_PS_H
\r
244 +#define NOTMUCH_REGEX_PS_H
\r
246 +#include <sys/types.h>
\r
247 +#include <regex.h>
\r
248 +#include <xapian.h>
\r
249 +#include "notmuch-private.h"
\r
251 +/* A posting source that returns documents where a value matches a
\r
254 +class RegexpPostingSource : public Xapian::PostingSource
\r
257 +const Xapian::valueno slot_;
\r
259 +Xapian::Database db_;
\r
261 +Xapian::ValueIterator it_, end_;
\r
264 +RegexpPostingSource (const RegexpPostingSource &);
\r
265 +RegexpPostingSource &operator= (const RegexpPostingSource &);
\r
268 + RegexpPostingSource (Xapian::valueno slot, const std::string ®exp);
\r
269 +~RegexpPostingSource ();
\r
270 +void init (const Xapian::Database &db);
\r
271 +Xapian::doccount get_termfreq_min () const;
\r
272 +Xapian::doccount get_termfreq_est () const;
\r
273 +Xapian::doccount get_termfreq_max () const;
\r
274 +Xapian::docid get_docid () const;
\r
275 +bool at_end () const;
\r
276 +void next (unused (double min_wt));
\r
280 diff --git a/lib/subject-fp.cc b/lib/subject-fp.cc
\r
281 new file mode 100644
\r
282 index 0000000..1627721
\r
284 +++ b/lib/subject-fp.cc
\r
286 +/* subject-fp.cc - "subject:" field processor glue
\r
288 + * This file is part of notmuch.
\r
290 + * Copyright © 2016 David Bremner
\r
292 + * This program is free software: you can redistribute it and/or modify
\r
293 + * it under the terms of the GNU General Public License as published by
\r
294 + * the Free Software Foundation, either version 3 of the License, or
\r
295 + * (at your option) any later version.
\r
297 + * This program is distributed in the hope that it will be useful,
\r
298 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
299 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
300 + * GNU General Public License for more details.
\r
302 + * You should have received a copy of the GNU General Public License
\r
303 + * along with this program. If not, see https://www.gnu.org/licenses/ .
\r
305 + * Author: David Bremner <david@tethera.net>
\r
308 +#include "database-private.h"
\r
309 +#include "subject-fp.h"
\r
310 +#include <iostream>
\r
312 +#if HAVE_XAPIAN_FIELD_PROCESSOR
\r
315 +SubjectFieldProcessor::operator() (const std::string & str)
\r
317 + std::string prefix = "rx:";
\r
319 + if (str.compare(0,prefix.size(),prefix)==0) {
\r
320 + postings = new RegexpPostingSource(NOTMUCH_VALUE_SUBJECT, str.substr(prefix.size()));
\r
321 + return Xapian::Query(postings);
\r
323 + return parser.parse_query (str, NOTMUCH_QUERY_PARSER_FLAGS, _find_prefix ("subject"));
\r
327 diff --git a/lib/subject-fp.h b/lib/subject-fp.h
\r
328 new file mode 100644
\r
329 index 0000000..ca622ba
\r
331 +++ b/lib/subject-fp.h
\r
333 +/* subject-fp.h - subject field processor glue
\r
335 + * This file is part of notmuch.
\r
337 + * Copyright © 2016 David Bremner
\r
339 + * This program is free software: you can redistribute it and/or modify
\r
340 + * it under the terms of the GNU General Public License as published by
\r
341 + * the Free Software Foundation, either version 3 of the License, or
\r
342 + * (at your option) any later version.
\r
344 + * This program is distributed in the hope that it will be useful,
\r
345 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
346 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
347 + * GNU General Public License for more details.
\r
349 + * You should have received a copy of the GNU General Public License
\r
350 + * along with this program. If not, see https://www.gnu.org/licenses/ .
\r
352 + * Author: David Bremner <david@tethera.net>
\r
355 +#ifndef NOTMUCH_SUBJECT_FP_H
\r
356 +#define NOTMUCH_SUBJECT_FP_H
\r
358 +#include <xapian.h>
\r
359 +#include "notmuch.h"
\r
360 +#include "regexp-ps.h"
\r
362 +#if HAVE_XAPIAN_FIELD_PROCESSOR
\r
363 +class SubjectFieldProcessor : public Xapian::FieldProcessor {
\r
365 + Xapian::QueryParser &parser;
\r
366 + notmuch_database_t *notmuch;
\r
367 + RegexpPostingSource *postings = NULL;
\r
369 + SubjectFieldProcessor (Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
\r
370 + : parser(parser_), notmuch(notmuch_) { };
\r
372 + Xapian::Query operator()(const std::string & str);
\r
375 +#endif /* NOTMUCH_SUBJECT_FP_H */
\r