From 9baf5c4b9181b6e064156cd4ee255ea2e58824cd Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 16 Jun 2014 22:40:34 +2000 Subject: [PATCH] [PATCH 5/5] lib: Separate all phrases indexed by _notmuch_message_gen_terms --- 6c/0eb84f39d0970e9d352d5f32291d07337ac85e | 141 ++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 6c/0eb84f39d0970e9d352d5f32291d07337ac85e diff --git a/6c/0eb84f39d0970e9d352d5f32291d07337ac85e b/6c/0eb84f39d0970e9d352d5f32291d07337ac85e new file mode 100644 index 000000000..69de76df2 --- /dev/null +++ b/6c/0eb84f39d0970e9d352d5f32291d07337ac85e @@ -0,0 +1,141 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 0F260431FAE + for ; Sun, 15 Jun 2014 19:41:23 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id gUraBrmZ9Xqv for ; + Sun, 15 Jun 2014 19:41:14 -0700 (PDT) +Received: from dmz-mailsec-scanner-6.mit.edu (dmz-mailsec-scanner-6.mit.edu + [18.7.68.35]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id D0639431FC0 + for ; Sun, 15 Jun 2014 19:41:03 -0700 (PDT) +X-AuditID: 12074423-f79916d000000c54-39-539e593f6636 +Received: from mailhub-auth-3.mit.edu ( [18.9.21.43]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-6.mit.edu (Symantec Messaging Gateway) with SMTP + id 2E.6D.03156.F395E935; Sun, 15 Jun 2014 22:41:03 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-3.mit.edu (8.13.8/8.9.2) with ESMTP id s5G2exmY013506; + Sun, 15 Jun 2014 22:40:59 -0400 +Received: from drake.dyndns.org + (216-15-114-40.c3-0.arl-ubr1.sbo-arl.ma.cable.rcn.com + [216.15.114.40]) (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s5G2esLr003416 + (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT); + Sun, 15 Jun 2014 22:40:59 -0400 +Received: from amthrax by drake.dyndns.org with local (Exim 4.77) + (envelope-from ) + id 1WwMqs-0004HK-Ff; Sun, 15 Jun 2014 22:40:54 -0400 +From: Austin Clements +To: notmuch@notmuchmail.org +Subject: [PATCH 5/5] lib: Separate all phrases indexed by + _notmuch_message_gen_terms +Date: Sun, 15 Jun 2014 22:40:34 -0400 +Message-Id: <1402886434-16169-6-git-send-email-amdragon@mit.edu> +X-Mailer: git-send-email 2.0.0.rc2 +In-Reply-To: <1402886434-16169-1-git-send-email-amdragon@mit.edu> +References: <1402886434-16169-1-git-send-email-amdragon@mit.edu> +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFtrLIsWRmVeSWpSXmKPExsUixCmqrWsfOS/Y4OpSUYvrN2cyOzB6PFt1 + izmAMYrLJiU1J7MstUjfLoErY3fzNeaCs4IVt2cvYG1gXM/XxcjJISFgInF/Xw8bhC0mceHe + ejBbSGA2k8Sid0pdjFxA9kZGifff21kgnDtMEt9+n2WEcOYySmzvbWIEaWET0JDYtn85mC0i + IC2x8+5s1i5GDg5mATWJP10qIKawQIjE/M16IBUsAqoSbx78ZQaxeQUcJObdPsQCcYSSxL0r + c8CmcAo4SkxcfYod4iAHiX9nbjBNYORfwMiwilE2JbdKNzcxM6c4NVm3ODkxLy+1SNdMLzez + RC81pXQTIyhk2F2UdzD+Oah0iFGAg1GJh3eC47xgIdbEsuLK3EOMkhxMSqK8zCAhvqT8lMqM + xOKM+KLSnNTiQ4wSHMxKIrz2PkA53pTEyqrUonyYlDQHi5I471trq2AhgfTEktTs1NSC1CKY + rAwHh5IEL0MEUKNgUWp6akVaZk4JQpqJgxNkOA/QcEuQGt7igsTc4sx0iPwpRl2OCzNPtTEJ + seTl56VKifNyghQJgBRllObBzYHF+itGcaC3hHlNQap4gGkCbtIroCVMQEtuLp4FsqQkESEl + 1cAofk8z7svPFTt9nyx4enFt7OLzp667pzyMWtAWy5n0rdNz2d5paqtnbG26uVHO6dWFHSV+ + nkespvTcOuPr7ix+vWz+LL8Jy3i1Vi56c+dNb+DM1a5W5ucfbr91/sIrV8aICaclfPt6Nzj9 + 4kzf/UdPY4pv2JEJKe9bNLt/d7k/z5FN5L82b4rCPyWW4oxEQy3mouJEAIezBWfQAgAA +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Mon, 16 Jun 2014 02:41:23 -0000 + +This adds a 100 termpos gap between all phrases indexed by +_notmuch_message_gen_terms. This fixes a bug where terms from the end +of one header and the beginning of another header could match together +in a single phrase and a separate bug where term positions of +un-prefixed terms overlapped. + +This fix only affects newly indexed messages. Messages that are +already indexed won't benefit from this fix without re-indexing, but +the fix won't make things any worse for existing messages. +--- + lib/message.cc | 9 +++++++-- + test/T080-search.sh | 2 -- + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/lib/message.cc b/lib/message.cc +index 9243b76..d0b7351 100644 +--- a/lib/message.cc ++++ b/lib/message.cc +@@ -1023,16 +1023,21 @@ _notmuch_message_gen_terms (notmuch_message_t *message, + return NOTMUCH_PRIVATE_STATUS_NULL_POINTER; + + term_gen->set_document (message->doc); +- term_gen->set_termpos (message->termpos); + + if (prefix_name) { + const char *prefix = _find_prefix (prefix_name); + ++ term_gen->set_termpos (message->termpos); + term_gen->index_text (text, 1, prefix); +- message->termpos = term_gen->get_termpos (); ++ /* Create a gap between this an the next terms so they don't ++ * appear to be a phrase. */ ++ message->termpos = term_gen->get_termpos () + 100; + } + ++ term_gen->set_termpos (message->termpos); + term_gen->index_text (text); ++ /* Create a term gap, as above. */ ++ message->termpos = term_gen->get_termpos () + 100; + + return NOTMUCH_PRIVATE_STATUS_SUCCESS; + } +diff --git a/test/T080-search.sh b/test/T080-search.sh +index 3f6b565..05027fb 100755 +--- a/test/T080-search.sh ++++ b/test/T080-search.sh +@@ -170,14 +170,12 @@ EOF + notmuch new > /dev/null + + test_begin_subtest "headers do not have adjacent term positions" +-test_subtest_known_broken + # Regression test for a bug where term positions for non-prefixed + # terms weren't updated + output=$(notmuch search id:termpos and '"com dest"') + test_expect_equal "$output" "" + + test_begin_subtest "parts have non-overlapping term positions" +-test_subtest_known_broken + output=$(notmuch search id:termpos and '"a y c"') + test_expect_equal "$output" "" + +-- +2.0.0.rc2 + -- 2.26.2