[PATCH 5/5] lib: Separate all phrases indexed by _notmuch_message_gen_terms
authorAustin Clements <amdragon@MIT.EDU>
Mon, 16 Jun 2014 02:40:34 +0000 (22:40 +2000)
committerW. Trevor King <wking@tremily.us>
Fri, 7 Nov 2014 18:03:14 +0000 (10:03 -0800)
6c/0eb84f39d0970e9d352d5f32291d07337ac85e [new file with mode: 0644]

diff --git a/6c/0eb84f39d0970e9d352d5f32291d07337ac85e b/6c/0eb84f39d0970e9d352d5f32291d07337ac85e
new file mode 100644 (file)
index 0000000..69de76d
--- /dev/null
@@ -0,0 +1,141 @@
+Return-Path: <amdragon@mit.edu>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+       by olra.theworths.org (Postfix) with ESMTP id 0F260431FAE\r
+       for <notmuch@notmuchmail.org>; Sun, 15 Jun 2014 19:41:23 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: -0.7\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5\r
+       tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+       by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+       with ESMTP id gUraBrmZ9Xqv for <notmuch@notmuchmail.org>;\r
+       Sun, 15 Jun 2014 19:41:14 -0700 (PDT)\r
+Received: from dmz-mailsec-scanner-6.mit.edu (dmz-mailsec-scanner-6.mit.edu\r
+       [18.7.68.35])\r
+       (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))\r
+       (No client certificate requested)\r
+       by olra.theworths.org (Postfix) with ESMTPS id D0639431FC0\r
+       for <notmuch@notmuchmail.org>; Sun, 15 Jun 2014 19:41:03 -0700 (PDT)\r
+X-AuditID: 12074423-f79916d000000c54-39-539e593f6636\r
+Received: from mailhub-auth-3.mit.edu ( [18.9.21.43])\r
+       (using TLS with cipher AES256-SHA (256/256 bits))\r
+       (Client did not present a certificate)\r
+       by dmz-mailsec-scanner-6.mit.edu (Symantec Messaging Gateway) with SMTP\r
+       id 2E.6D.03156.F395E935; Sun, 15 Jun 2014 22:41:03 -0400 (EDT)\r
+Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11])\r
+       by mailhub-auth-3.mit.edu (8.13.8/8.9.2) with ESMTP id s5G2exmY013506; \r
+       Sun, 15 Jun 2014 22:40:59 -0400\r
+Received: from drake.dyndns.org\r
+       (216-15-114-40.c3-0.arl-ubr1.sbo-arl.ma.cable.rcn.com\r
+       [216.15.114.40]) (authenticated bits=0)\r
+       (User authenticated as amdragon@ATHENA.MIT.EDU)\r
+       by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s5G2esLr003416\r
+       (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT);\r
+       Sun, 15 Jun 2014 22:40:59 -0400\r
+Received: from amthrax by drake.dyndns.org with local (Exim 4.77)\r
+       (envelope-from <amdragon@mit.edu>)\r
+       id 1WwMqs-0004HK-Ff; Sun, 15 Jun 2014 22:40:54 -0400\r
+From: Austin Clements <amdragon@MIT.EDU>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH 5/5] lib: Separate all phrases indexed by\r
+       _notmuch_message_gen_terms\r
+Date: Sun, 15 Jun 2014 22:40:34 -0400\r
+Message-Id: <1402886434-16169-6-git-send-email-amdragon@mit.edu>\r
+X-Mailer: git-send-email 2.0.0.rc2\r
+In-Reply-To: <1402886434-16169-1-git-send-email-amdragon@mit.edu>\r
+References: <1402886434-16169-1-git-send-email-amdragon@mit.edu>\r
+X-Brightmail-Tracker:\r
+ H4sIAAAAAAAAA+NgFtrLIsWRmVeSWpSXmKPExsUixCmqrWsfOS/Y4OpSUYvrN2cyOzB6PFt1\r
+       izmAMYrLJiU1J7MstUjfLoErY3fzNeaCs4IVt2cvYG1gXM/XxcjJISFgInF/Xw8bhC0mceHe\r
+       ejBbSGA2k8Sid0pdjFxA9kZGifff21kgnDtMEt9+n2WEcOYySmzvbWIEaWET0JDYtn85mC0i\r
+       IC2x8+5s1i5GDg5mATWJP10qIKawQIjE/M16IBUsAqoSbx78ZQaxeQUcJObdPsQCcYSSxL0r\r
+       c8CmcAo4SkxcfYod4iAHiX9nbjBNYORfwMiwilE2JbdKNzcxM6c4NVm3ODkxLy+1SNdMLzez\r
+       RC81pXQTIyhk2F2UdzD+Oah0iFGAg1GJh3eC47xgIdbEsuLK3EOMkhxMSqK8zCAhvqT8lMqM\r
+       xOKM+KLSnNTiQ4wSHMxKIrz2PkA53pTEyqrUonyYlDQHi5I471trq2AhgfTEktTs1NSC1CKY\r
+       rAwHh5IEL0MEUKNgUWp6akVaZk4JQpqJgxNkOA/QcEuQGt7igsTc4sx0iPwpRl2OCzNPtTEJ\r
+       seTl56VKifNyghQJgBRllObBzYHF+itGcaC3hHlNQap4gGkCbtIroCVMQEtuLp4FsqQkESEl\r
+       1cAofk8z7svPFTt9nyx4enFt7OLzp667pzyMWtAWy5n0rdNz2d5paqtnbG26uVHO6dWFHSV+\r
+       nkespvTcOuPr7ix+vWz+LL8Jy3i1Vi56c+dNb+DM1a5W5ucfbr91/sIrV8aICaclfPt6Nzj9\r
+       4kzf/UdPY4pv2JEJKe9bNLt/d7k/z5FN5L82b4rCPyWW4oxEQy3mouJEAIezBWfQAgAA\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+       <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Mon, 16 Jun 2014 02:41:23 -0000\r
+\r
+This adds a 100 termpos gap between all phrases indexed by\r
+_notmuch_message_gen_terms.  This fixes a bug where terms from the end\r
+of one header and the beginning of another header could match together\r
+in a single phrase and a separate bug where term positions of\r
+un-prefixed terms overlapped.\r
+\r
+This fix only affects newly indexed messages.  Messages that are\r
+already indexed won't benefit from this fix without re-indexing, but\r
+the fix won't make things any worse for existing messages.\r
+---\r
+ lib/message.cc      | 9 +++++++--\r
+ test/T080-search.sh | 2 --\r
+ 2 files changed, 7 insertions(+), 4 deletions(-)\r
+\r
+diff --git a/lib/message.cc b/lib/message.cc\r
+index 9243b76..d0b7351 100644\r
+--- a/lib/message.cc\r
++++ b/lib/message.cc\r
+@@ -1023,16 +1023,21 @@ _notmuch_message_gen_terms (notmuch_message_t *message,\r
+       return NOTMUCH_PRIVATE_STATUS_NULL_POINTER;\r
\r
+     term_gen->set_document (message->doc);\r
+-    term_gen->set_termpos (message->termpos);\r
\r
+     if (prefix_name) {\r
+       const char *prefix = _find_prefix (prefix_name);\r
\r
++      term_gen->set_termpos (message->termpos);\r
+       term_gen->index_text (text, 1, prefix);\r
+-      message->termpos = term_gen->get_termpos ();\r
++      /* Create a gap between this an the next terms so they don't\r
++       * appear to be a phrase. */\r
++      message->termpos = term_gen->get_termpos () + 100;\r
+     }\r
\r
++    term_gen->set_termpos (message->termpos);\r
+     term_gen->index_text (text);\r
++    /* Create a term gap, as above. */\r
++    message->termpos = term_gen->get_termpos () + 100;\r
\r
+     return NOTMUCH_PRIVATE_STATUS_SUCCESS;\r
+ }\r
+diff --git a/test/T080-search.sh b/test/T080-search.sh\r
+index 3f6b565..05027fb 100755\r
+--- a/test/T080-search.sh\r
++++ b/test/T080-search.sh\r
+@@ -170,14 +170,12 @@ EOF\r
+ notmuch new > /dev/null\r
\r
+ test_begin_subtest "headers do not have adjacent term positions"\r
+-test_subtest_known_broken\r
+ # Regression test for a bug where term positions for non-prefixed\r
+ # terms weren't updated\r
+ output=$(notmuch search id:termpos and '"com dest"')\r
+ test_expect_equal "$output" ""\r
\r
+ test_begin_subtest "parts have non-overlapping term positions"\r
+-test_subtest_known_broken\r
+ output=$(notmuch search id:termpos and '"a y c"')\r
+ test_expect_equal "$output" ""\r
\r
+-- \r
+2.0.0.rc2\r
+\r