[PATCH 3/5] lib: Index name and address of from/to headers as a phrase
authorAustin Clements <amdragon@MIT.EDU>
Mon, 16 Jun 2014 02:40:32 +0000 (22:40 +2000)
committerW. Trevor King <wking@tremily.us>
Fri, 7 Nov 2014 18:03:13 +0000 (10:03 -0800)
5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce [new file with mode: 0644]

diff --git a/5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce b/5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce
new file mode 100644 (file)
index 0000000..a205436
--- /dev/null
@@ -0,0 +1,162 @@
+Return-Path: <amdragon@mit.edu>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+       by olra.theworths.org (Postfix) with ESMTP id 369E3431FAE\r
+       for <notmuch@notmuchmail.org>; Sun, 15 Jun 2014 19:41:18 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: -0.7\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5\r
+       tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+       by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+       with ESMTP id tbQvUzeh4zlR for <notmuch@notmuchmail.org>;\r
+       Sun, 15 Jun 2014 19:41:12 -0700 (PDT)\r
+Received: from dmz-mailsec-scanner-6.mit.edu (dmz-mailsec-scanner-6.mit.edu\r
+       [18.7.68.35])\r
+       (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))\r
+       (No client certificate requested)\r
+       by olra.theworths.org (Postfix) with ESMTPS id AD41E431FCF\r
+       for <notmuch@notmuchmail.org>; Sun, 15 Jun 2014 19:40:58 -0700 (PDT)\r
+X-AuditID: 12074423-f79916d000000c54-2f-539e59391b2c\r
+Received: from mailhub-auth-1.mit.edu ( [18.9.21.35])\r
+       (using TLS with cipher AES256-SHA (256/256 bits))\r
+       (Client did not present a certificate)\r
+       by dmz-mailsec-scanner-6.mit.edu (Symantec Messaging Gateway) with SMTP\r
+       id 9D.6D.03156.9395E935; Sun, 15 Jun 2014 22:40:57 -0400 (EDT)\r
+Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11])\r
+       by mailhub-auth-1.mit.edu (8.13.8/8.9.2) with ESMTP id s5G2et1S005170; \r
+       Sun, 15 Jun 2014 22:40:56 -0400\r
+Received: from drake.dyndns.org\r
+       (216-15-114-40.c3-0.arl-ubr1.sbo-arl.ma.cable.rcn.com\r
+       [216.15.114.40]) (authenticated bits=0)\r
+       (User authenticated as amdragon@ATHENA.MIT.EDU)\r
+       by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s5G2es9g003414\r
+       (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT);\r
+       Sun, 15 Jun 2014 22:40:55 -0400\r
+Received: from amthrax by drake.dyndns.org with local (Exim 4.77)\r
+       (envelope-from <amdragon@mit.edu>)\r
+       id 1WwMqn-0004HF-Bb; Sun, 15 Jun 2014 22:40:49 -0400\r
+From: Austin Clements <amdragon@MIT.EDU>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH 3/5] lib: Index name and address of from/to headers as a\r
+ phrase\r
+Date: Sun, 15 Jun 2014 22:40:32 -0400\r
+Message-Id: <1402886434-16169-4-git-send-email-amdragon@mit.edu>\r
+X-Mailer: git-send-email 2.0.0.rc2\r
+In-Reply-To: <1402886434-16169-1-git-send-email-amdragon@mit.edu>\r
+References: <1402886434-16169-1-git-send-email-amdragon@mit.edu>\r
+X-Brightmail-Tracker:\r
+ H4sIAAAAAAAAA+NgFtrIIsWRmVeSWpSXmKPExsUixCmqrGsZOS/YYNltU4vrN2cyOzB6PFt1\r
+       izmAMYrLJiU1J7MstUjfLoEro7fxLEvBJJmKJfvTGhg/i3UxcnJICJhILJpygBHCFpO4cG89\r
+       WxcjF4eQwGwmiYdPtjJCOBsZJbqP3GKCcO4wSSxYtxCqbC6jxL62VWD9bAIaEtv2LwezRQSk\r
+       JXbenc3axcjBwSygJvGnSwXEFBbwl1jWVgpSwSKgKvHy7WcWEJtXwEFix+Y1LBBXKEncuzIH\r
+       bAqngKPExNWn2EFsIaCaf2duME1g5F/AyLCKUTYlt0o3NzEzpzg1Wbc4OTEvL7VI10wvN7NE\r
+       LzWldBMjKGTYXZR3MP45qHSIUYCDUYmHd4LjvGAh1sSy4srcQ4ySHExKorzMICG+pPyUyozE\r
+       4oz4otKc1OJDjBIczEoivPY+QDnelMTKqtSifJiUNAeLkjjvW2urYCGB9MSS1OzU1ILUIpis\r
+       DAeHkgTvgXCgRsGi1PTUirTMnBKENBMHJ8hwHqDh/0BqeIsLEnOLM9Mh8qcYFaXEeVeFASUE\r
+       QBIZpXlwvbCYfsUoDvSKMO93kHYeYDqA634FNJgJaPDNxbNABpckIqSkGhibHD2K0/TCz819\r
+       GL4gmnlr5ZkQgW+uVspudcFfnHsN4qyFzmeEpr5K40vad6y9Zocl77rP7w3/LP2bP8Py9es/\r
+       jDl7XMIkpFdLrdR+Y6W46LpB8f8Jfld0H3n8PcCv7K/1Yqa49fNfrmqH7koItwj9EtfmfuSg\r
+       wD5LsGbDZ8EvPUe3FF+4fliJpTgj0VCLuag4EQBN9Pk0xAIAAA==\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+       <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Mon, 16 Jun 2014 02:41:18 -0000\r
+\r
+Previously, we indexed the name and address parts of from/to headers\r
+with two calls to _notmuch_message_gen_terms.  In general, this\r
+indicates that these parts are separate phrases.  However, because of\r
+an implementation quirk, the two calls to _notmuch_message_gen_terms\r
+generated adjacent term positions for the prefixed terms, which\r
+happens to be the right thing to do in this case, but the wrong thing\r
+to do for all other calls.  Furthermore, _notmuch_message_gen_terms\r
+produced potentially overlapping term positions for the un-prefixed\r
+copies of the terms, which is simply wrong.\r
+\r
+This change indexes both the name and address in a single call to\r
+_notmuch_message_gen_terms, indicating that they should be part of a\r
+single phrase.  This masks the problem with the un-prefixed terms\r
+(fixing the two known-broken tests) and puts us in a position to fix\r
+the unintentionally phrases generated by other calls to\r
+_notmuch_message_gen_terms.\r
+---\r
+ lib/index.cc        | 24 ++++++++++--------------\r
+ test/T080-search.sh |  2 --\r
+ 2 files changed, 10 insertions(+), 16 deletions(-)\r
+\r
+diff --git a/lib/index.cc b/lib/index.cc\r
+index e1e2a38..1a2e63d 100644\r
+--- a/lib/index.cc\r
++++ b/lib/index.cc\r
+@@ -231,26 +231,22 @@ _index_address_mailbox (notmuch_message_t *message,\r
+                       InternetAddress *address)\r
+ {\r
+     InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address);\r
+-    const char *name, *addr;\r
++    const char *name, *addr, *combined;\r
+     void *local = talloc_new (message);\r
\r
+     name = internet_address_get_name (address);\r
+     addr = internet_address_mailbox_get_addr (mailbox);\r
\r
+-    /* In the absence of a name, we'll strip the part before the @\r
+-     * from the address. */\r
+-    if (! name) {\r
+-      const char *at;\r
++    /* Combine the name and address and index them as a phrase. */\r
++    if (name && addr)\r
++      combined = talloc_asprintf (local, "%s %s", name, addr);\r
++    else if (name)\r
++      combined = name;\r
++    else\r
++      combined = addr;\r
\r
+-      at = strchr (addr, '@');\r
+-      if (at)\r
+-          name = talloc_strndup (local, addr, at - addr);\r
+-    }\r
+-\r
+-    if (name)\r
+-      _notmuch_message_gen_terms (message, prefix_name, name);\r
+-    if (addr)\r
+-      _notmuch_message_gen_terms (message, prefix_name, addr);\r
++    if (combined)\r
++      _notmuch_message_gen_terms (message, prefix_name, combined);\r
\r
+     talloc_free (local);\r
+ }\r
+diff --git a/test/T080-search.sh b/test/T080-search.sh\r
+index 8ed5701..b63bf02 100755\r
+--- a/test/T080-search.sh\r
++++ b/test/T080-search.sh\r
+@@ -67,7 +67,6 @@ output=$(notmuch search 'from:"Search By From Name <test@example.com>"' | notmuc\r
+ test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"\r
\r
+ test_begin_subtest "Search by from: without prefix (name and address)"\r
+-test_subtest_known_broken\r
+ output=$(notmuch search '"Search By From Name <test@example.com>"' | notmuch_search_sanitize)\r
+ test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)"\r
\r
+@@ -86,7 +85,6 @@ output=$(notmuch search 'to:"Search By To Name <test@example.com>"' | notmuch_se\r
+ test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"\r
\r
+ test_begin_subtest "Search by to: without prefix (name and adress)"\r
+-test_subtest_known_broken\r
+ output=$(notmuch search '"Search By To Name <test@example.com>"' | notmuch_search_sanitize)\r
+ test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)"\r
\r
+-- \r
+2.0.0.rc2\r
+\r