--- /dev/null
+Return-Path: <amthrax@drake.mit.edu>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+ by olra.theworths.org (Postfix) with ESMTP id B3E1D42D2A1\r
+ for <notmuch@notmuchmail.org>; Sun, 16 Jan 2011 00:16:24 -0800 (PST)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: 0\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none]\r
+ autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+ by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+ with ESMTP id 0UysSoUmmtej for <notmuch@notmuchmail.org>;\r
+ Sun, 16 Jan 2011 00:16:23 -0800 (PST)\r
+X-Greylist: delayed 300 seconds by postgrey-1.32 at olra;\r
+ Sun, 16 Jan 2011 00:16:23 PST\r
+Received: from dmz-mailsec-scanner-3.mit.edu (DMZ-MAILSEC-SCANNER-3.MIT.EDU\r
+ [18.9.25.14])\r
+ by olra.theworths.org (Postfix) with ESMTP id 4D1FE42D28F\r
+ for <notmuch@notmuchmail.org>; Sun, 16 Jan 2011 00:16:23 -0800 (PST)\r
+X-AuditID: 1209190e-b7b3bae000000a71-4d-4d32a8292d6d\r
+Received: from mailhub-auth-1.mit.edu ( [18.9.21.35])\r
+ by dmz-mailsec-scanner-3.mit.edu (Symantec Brightmail Gateway) with\r
+ SMTP id 9C.7B.02673.928A23D4; Sun, 16 Jan 2011 03:11:21 -0500 (EST)\r
+Received: from outgoing.mit.edu (OUTGOING-AUTH.MIT.EDU [18.7.22.103])\r
+ by mailhub-auth-1.mit.edu (8.13.8/8.9.2) with ESMTP id p0G8BL6i004717; \r
+ Sun, 16 Jan 2011 03:11:21 -0500\r
+Received: from drake.mit.edu (a074.catapulsion.net [70.36.81.74])\r
+ (authenticated bits=0)\r
+ (User authenticated as amdragon@ATHENA.MIT.EDU)\r
+ by outgoing.mit.edu (8.13.6/8.12.4) with ESMTP id p0G8BJ6K010519\r
+ (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT);\r
+ Sun, 16 Jan 2011 03:11:21 -0500 (EST)\r
+Received: from amthrax by drake.mit.edu with local (Exim 4.72)\r
+ (envelope-from <amthrax@drake.mit.edu>)\r
+ id 1PeNhn-0002XR-HT; Sun, 16 Jan 2011 03:11:19 -0500\r
+From: Austin Clements <amdragon@MIT.EDU>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH 2/8] Parse NEAR and ADJ operators.\r
+Date: Sun, 16 Jan 2011 03:10:52 -0500\r
+Message-Id: <1295165458-9573-3-git-send-email-amdragon@mit.edu>\r
+X-Mailer: git-send-email 1.7.2.3\r
+In-Reply-To: <1295165458-9573-1-git-send-email-amdragon@mit.edu>\r
+References: <1295165458-9573-1-git-send-email-amdragon@mit.edu>\r
+X-Brightmail-Tracker: AAAAAA==\r
+Cc: amdragon@mit.edu\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+ <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Sun, 16 Jan 2011 08:16:24 -0000\r
+\r
+NEAR and ADJ are treated as n-ary operators where all operands must be\r
+terms, which fits with Xapian's own restrictions on near/adj queries.\r
+This implementation is slightly more lenient than Xapian's in that it\r
+allows phrases (both quoted and implicit) as operands and folds the\r
+phrase terms in as operands to the near/adj operator.\r
+---\r
+ lib/notmuch-private.h | 10 +++++\r
+ lib/qparser.cc | 103 ++++++++++++++++++++++++++++++++++++++++++++++---\r
+ 2 files changed, 107 insertions(+), 6 deletions(-)\r
+\r
+diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h\r
+index 06239b9..a42afd6 100644\r
+--- a/lib/notmuch-private.h\r
++++ b/lib/notmuch-private.h\r
+@@ -518,6 +518,12 @@ enum _notmuch_token_type {\r
+ TOK_LOVE, TOK_HATE, TOK_BRA, TOK_KET,\r
+ /* Binary operators. These should have left and right children. */\r
+ TOK_AND, TOK_OR, TOK_XOR,\r
++ /* n-ary operators. In the AST, these are represented like lists\r
++ * of TOK_TERMS, with the left child being a TOK_TERMS and the\r
++ * right being another TOK_ADJ/TOK_NEAR. The final right must be\r
++ * NULL. Both tokens can also carry distances; the highest\r
++ * distance in the chain will be used. */\r
++ TOK_ADJ, TOK_NEAR,\r
+ /* Unary operators. These have only a left child. Xapian::Query\r
+ * has no pure NOT operator, so the generator treats NOT as the\r
+ * child of an AND specially, and otherwise represents it as\r
+@@ -555,6 +561,10 @@ typedef struct _notmuch_token {\r
+ enum _notmuch_token_type type;\r
+ const char *text;\r
+ \r
++ /* For TOK_ADJ and TOK_NEAR, this specifies the distance\r
++ * argument. */\r
++ int distance;\r
++\r
+ /* For TOK_PREFIX, the flags of this prefix. */\r
+ int prefixFlags;\r
+ \r
+diff --git a/lib/qparser.cc b/lib/qparser.cc\r
+index b86a445..5a6d39b 100644\r
+--- a/lib/qparser.cc\r
++++ b/lib/qparser.cc\r
+@@ -40,7 +40,6 @@\r
+ * _notmuch_qparser_generate to perform step 4.\r
+ *\r
+ * Still missing from this implementation:\r
+- * * NEAR/ADJ operators\r
+ * * Stemming - The stemming should probably be marked on TOK_TERMS\r
+ * tokens. Ideally, we can just pass this to the term generator.\r
+ * * Wildcard queries - This should be available in the IR so it's\r
+@@ -101,14 +100,14 @@ struct _notmuch_generate_state {\r
+ \r
+ static const char *token_types[] = {\r
+ "LOVE", "HATE", "BRA", "KET",\r
+- "AND", "OR", "XOR",\r
++ "AND", "OR", "XOR", "ADJ", "NEAR",\r
+ "NOT", "FILTER", "PREFIX",\r
+ "TERMS", "LIT", "ERROR", "END"\r
+ };\r
+ \r
+ /* The distinguished end token. This simplifies the parser since it\r
+ * never has to worry about dereferencing next. */\r
+-static _notmuch_token_t tok_end = {TOK_END, NULL, FALSE, NULL,\r
++static _notmuch_token_t tok_end = {TOK_END, NULL, -1, FALSE, NULL,\r
+ &tok_end, NULL, NULL};\r
+ \r
+ _notmuch_token_t *\r
+@@ -118,6 +117,7 @@ _notmuch_token_create_op (const void *ctx, enum _notmuch_token_type type,\r
+ _notmuch_token_t *tok = talloc (ctx, struct _notmuch_token);\r
+ memset (tok, 0, sizeof (*tok));\r
+ tok->type = type;\r
++ tok->distance = -1;\r
+ tok->left = left;\r
+ tok->right = right;\r
+ return tok;\r
+@@ -135,6 +135,7 @@ _notmuch_token_create_term (const void *ctx, enum _notmuch_token_type type,\r
+ char *\r
+ _notmuch_token_show (const void *ctx, _notmuch_token_t *tok)\r
+ {\r
++ char dist[32] = "";\r
+ int ispre = tok->type == TOK_PREFIX;\r
+ \r
+ if ((unsigned)tok->type > TOK_END)\r
+@@ -147,9 +148,11 @@ _notmuch_token_show (const void *ctx, _notmuch_token_t *tok)\r
+ else if (tok->type == TOK_ERROR)\r
+ return talloc_asprintf (ctx, "ERROR/\"%s\"", tok->text);\r
+ \r
+- return talloc_asprintf (ctx, "%s%s%s",\r
++ if (tok->distance != -1)\r
++ sprintf(dist, "/%d", tok->distance);\r
++ return talloc_asprintf (ctx, "%s%s%s%s",\r
+ token_types[tok->type],\r
+- ispre ? "/" : "",\r
++ dist, ispre ? "/" : "",\r
+ ispre ? tok->text : "");\r
+ }\r
+ \r
+@@ -308,10 +311,31 @@ static notmuch_bool_t\r
+ lex_operator (struct _notmuch_lex_state *s, char *term,\r
+ const char *op, enum _notmuch_token_type type)\r
+ {\r
++ size_t oplen = strlen (op);\r
++\r
+ if (strcasecmp (term, op) == 0) {\r
+ lex_emit (s, type, term);\r
+ return true;\r
+ }\r
++\r
++ /* Check for ADJ or NEAR with argument. Our parsing of this is\r
++ * slightly incompatible with Xapian, but I believe this to be a\r
++ * bug in Xapian. Xapian parses "x NEAR/y z" as three term\r
++ * phrases, "x", "near y", and "z", like we do. However, it\r
++ * behaves differently if the bad NEAR operator is at the end of\r
++ * the query, parsing "x NEAR/y" like "x NEAR y". */\r
++ if ((type == TOK_ADJ || type == TOK_NEAR) &&\r
++ strncasecmp (term, op, oplen) == 0 &&\r
++ term[oplen] == '/') {\r
++ /* Try to parse the distance argument */\r
++ char *end;\r
++ int distance = strtol (&term[oplen + 1], &end, 10);\r
++ if (distance && !*end) {\r
++ struct _notmuch_token *tok = lex_emit (s, type, term);\r
++ tok->distance = distance;\r
++ return true;\r
++ }\r
++ }\r
+ \r
+ return false;\r
+ }\r
+@@ -403,7 +427,9 @@ lex (const void *ctx, _notmuch_qparser_t *qparser, const char *query)\r
+ if (lex_operator (s, term, "and", TOK_AND) ||\r
+ lex_operator (s, term, "not", TOK_NOT) ||\r
+ lex_operator (s, term, "xor", TOK_XOR) ||\r
+- lex_operator (s, term, "or", TOK_OR))\r
++ lex_operator (s, term, "or", TOK_OR) ||\r
++ lex_operator (s, term, "adj", TOK_ADJ) ||\r
++ lex_operator (s, term, "near", TOK_NEAR))\r
+ continue;\r
+ \r
+ /* Must be a term */\r
+@@ -495,6 +521,8 @@ parse_prob (struct _notmuch_parse_state *s, int prec, _notmuch_token_t **tok)\r
+ case TOK_BRA:\r
+ case TOK_TERMS:\r
+ case TOK_LIT:\r
++ case TOK_ADJ:\r
++ case TOK_NEAR:\r
+ sub = parse_expr (s, prec + 1, tok);\r
+ add_to_query (s->ctx, &probs, TOK_AND, sub);\r
+ break;\r
+@@ -529,6 +557,37 @@ parse_prob (struct _notmuch_parse_state *s, int prec, _notmuch_token_t **tok)\r
+ }\r
+ \r
+ static _notmuch_token_t *\r
++parse_near (struct _notmuch_parse_state *s, int prec, _notmuch_token_t **tok)\r
++{\r
++ _notmuch_token_type first = (*tok)->type, conj = (*tok)->next->type;\r
++ _notmuch_token_t *root = parse_expr (s, prec + 1, tok);\r
++ _notmuch_token_t **tail = NULL;\r
++\r
++ /* XXX Xapian allows prefixed terms in near/adj. */\r
++ if (first != TOK_TERMS || !(conj == TOK_NEAR || conj == TOK_ADJ))\r
++ return root;\r
++\r
++ while ((*tok)->type == conj && (*tok)->next->type == TOK_TERMS) {\r
++ if (!tail) {\r
++ /* First operator. Create the list root. */\r
++ _notmuch_token_t *nroot =\r
++ _notmuch_token_create_op (s->ctx, conj, root, NULL);\r
++ root = nroot;\r
++ tail = &nroot->right;\r
++ }\r
++\r
++ /* Append the operator and term token to the list */\r
++ *tail = *tok;\r
++ *tok = (*tok)->next;\r
++ (*tail)->left = *tok;\r
++ *tok = (*tok)->next;\r
++ tail = &(*tail)->right;\r
++ }\r
++\r
++ return root;\r
++}\r
++\r
++static _notmuch_token_t *\r
+ parse_term (struct _notmuch_parse_state *s, int prec, _notmuch_token_t **tok)\r
+ {\r
+ _notmuch_token_t *sub;\r
+@@ -596,6 +655,8 @@ parse_expr (struct _notmuch_parse_state *s, int prec, _notmuch_token_t **tok)\r
+ return parse_prob (s, prec, tok);\r
+ }\r
+ if (bprec == 4)\r
++ return parse_near (s, prec, tok);\r
++ if (bprec == 5)\r
+ return parse_term (s, prec, tok);\r
+ \r
+ _notmuch_token_t *left = parse_expr (s, prec + 1, tok);\r
+@@ -756,6 +817,36 @@ generate (struct _notmuch_generate_state *s, _notmuch_token_t *root)\r
+ return Query (root->type == TOK_OR ? Query::OP_OR : Query::OP_XOR,\r
+ l, r);\r
+ \r
++ case TOK_ADJ:\r
++ case TOK_NEAR:\r
++ {\r
++ _notmuch_token_t *node;\r
++ int dist = -1;\r
++ char *terms = talloc_strdup (root, "");\r
++ /* Concatenate the operands and get the highest distance */\r
++ for (node = root; node; node = node->right) {\r
++ if (node->left->type != TOK_TERMS)\r
++ INTERNAL_ERROR ("Illegal token in NEAR/ADJ: %s",\r
++ _notmuch_token_show (s->ctx, node->left));\r
++ if (node->left->prefix)\r
++ INTERNAL_ERROR ("Prefixes not supported in NEAR/ADJ");\r
++\r
++ terms = talloc_asprintf_append (terms, "%s ", node->left->text);\r
++ if (node->distance > dist)\r
++ dist = node->distance;\r
++ }\r
++ /* The default distance is 10. */\r
++ if (dist == -1)\r
++ dist = 10;\r
++ /* Generate a PHRASE or NEAR query. If there are implicit\r
++ * phrases, they will be split out and treated like any other\r
++ * term in the operand list. */\r
++ op = root->type == TOK_ADJ ? Query::OP_PHRASE : Query::OP_NEAR;\r
++ l = generate_terms (s, terms, NULL, dist - 1, op);\r
++ talloc_free (terms);\r
++ return l;\r
++ }\r
++\r
+ case TOK_PREFIX:\r
+ return generate (s, root->left);\r
+ \r
+-- \r
+1.7.2.3\r
+\r