From 2ee034c6ced5514d91de7548910e6ca69dea9e69 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 16 Jun 2014 22:40:32 +2000 Subject: [PATCH] [PATCH 3/5] lib: Index name and address of from/to headers as a phrase --- 5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce | 162 ++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce diff --git a/5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce b/5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce new file mode 100644 index 000000000..a20543653 --- /dev/null +++ b/5d/85ace5ebc985ee4bd7d1a8e5bb1a53948e59ce @@ -0,0 +1,162 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 369E3431FAE + for ; Sun, 15 Jun 2014 19:41:18 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id tbQvUzeh4zlR for ; + Sun, 15 Jun 2014 19:41:12 -0700 (PDT) +Received: from dmz-mailsec-scanner-6.mit.edu (dmz-mailsec-scanner-6.mit.edu + [18.7.68.35]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id AD41E431FCF + for ; Sun, 15 Jun 2014 19:40:58 -0700 (PDT) +X-AuditID: 12074423-f79916d000000c54-2f-539e59391b2c +Received: from mailhub-auth-1.mit.edu ( [18.9.21.35]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-6.mit.edu (Symantec Messaging Gateway) with SMTP + id 9D.6D.03156.9395E935; Sun, 15 Jun 2014 22:40:57 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-1.mit.edu (8.13.8/8.9.2) with ESMTP id s5G2et1S005170; + Sun, 15 Jun 2014 22:40:56 -0400 +Received: from drake.dyndns.org + (216-15-114-40.c3-0.arl-ubr1.sbo-arl.ma.cable.rcn.com + [216.15.114.40]) (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s5G2es9g003414 + (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT); + Sun, 15 Jun 2014 22:40:55 -0400 +Received: from amthrax by drake.dyndns.org with local (Exim 4.77) + (envelope-from ) + id 1WwMqn-0004HF-Bb; Sun, 15 Jun 2014 22:40:49 -0400 +From: Austin Clements +To: notmuch@notmuchmail.org +Subject: [PATCH 3/5] lib: Index name and address of from/to headers as a + phrase +Date: Sun, 15 Jun 2014 22:40:32 -0400 +Message-Id: <1402886434-16169-4-git-send-email-amdragon@mit.edu> +X-Mailer: git-send-email 2.0.0.rc2 +In-Reply-To: <1402886434-16169-1-git-send-email-amdragon@mit.edu> +References: <1402886434-16169-1-git-send-email-amdragon@mit.edu> +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFtrIIsWRmVeSWpSXmKPExsUixCmqrGsZOS/YYNltU4vrN2cyOzB6PFt1 + izmAMYrLJiU1J7MstUjfLoEro7fxLEvBJJmKJfvTGhg/i3UxcnJICJhILJpygBHCFpO4cG89 + WxcjF4eQwGwmiYdPtjJCOBsZJbqP3GKCcO4wSSxYtxCqbC6jxL62VWD9bAIaEtv2LwezRQSk + JXbenc3axcjBwSygJvGnSwXEFBbwl1jWVgpSwSKgKvHy7WcWEJtXwEFix+Y1LBBXKEncuzIH + bAqngKPExNWn2EFsIaCaf2duME1g5F/AyLCKUTYlt0o3NzEzpzg1Wbc4OTEvL7VI10wvN7NE + LzWldBMjKGTYXZR3MP45qHSIUYCDUYmHd4LjvGAh1sSy4srcQ4ySHExKorzMICG+pPyUyozE + 4oz4otKc1OJDjBIczEoivPY+QDnelMTKqtSifJiUNAeLkjjvW2urYCGB9MSS1OzU1ILUIpis + DAeHkgTvgXCgRsGi1PTUirTMnBKENBMHJ8hwHqDh/0BqeIsLEnOLM9Mh8qcYFaXEeVeFASUE + QBIZpXlwvbCYfsUoDvSKMO93kHYeYDqA634FNJgJaPDNxbNABpckIqSkGhibHD2K0/TCz819 + GL4gmnlr5ZkQgW+uVspudcFfnHsN4qyFzmeEpr5K40vad6y9Zocl77rP7w3/LP2bP8Py9es/ + jDl7XMIkpFdLrdR+Y6W46LpB8f8Jfld0H3n8PcCv7K/1Yqa49fNfrmqH7koItwj9EtfmfuSg + wD5LsGbDZ8EvPUe3FF+4fliJpTgj0VCLuag4EQBN9Pk0xAIAAA== +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Mon, 16 Jun 2014 02:41:18 -0000 + +Previously, we indexed the name and address parts of from/to headers +with two calls to _notmuch_message_gen_terms. In general, this +indicates that these parts are separate phrases. However, because of +an implementation quirk, the two calls to _notmuch_message_gen_terms +generated adjacent term positions for the prefixed terms, which +happens to be the right thing to do in this case, but the wrong thing +to do for all other calls. Furthermore, _notmuch_message_gen_terms +produced potentially overlapping term positions for the un-prefixed +copies of the terms, which is simply wrong. + +This change indexes both the name and address in a single call to +_notmuch_message_gen_terms, indicating that they should be part of a +single phrase. This masks the problem with the un-prefixed terms +(fixing the two known-broken tests) and puts us in a position to fix +the unintentionally phrases generated by other calls to +_notmuch_message_gen_terms. +--- + lib/index.cc | 24 ++++++++++-------------- + test/T080-search.sh | 2 -- + 2 files changed, 10 insertions(+), 16 deletions(-) + +diff --git a/lib/index.cc b/lib/index.cc +index e1e2a38..1a2e63d 100644 +--- a/lib/index.cc ++++ b/lib/index.cc +@@ -231,26 +231,22 @@ _index_address_mailbox (notmuch_message_t *message, + InternetAddress *address) + { + InternetAddressMailbox *mailbox = INTERNET_ADDRESS_MAILBOX (address); +- const char *name, *addr; ++ const char *name, *addr, *combined; + void *local = talloc_new (message); + + name = internet_address_get_name (address); + addr = internet_address_mailbox_get_addr (mailbox); + +- /* In the absence of a name, we'll strip the part before the @ +- * from the address. */ +- if (! name) { +- const char *at; ++ /* Combine the name and address and index them as a phrase. */ ++ if (name && addr) ++ combined = talloc_asprintf (local, "%s %s", name, addr); ++ else if (name) ++ combined = name; ++ else ++ combined = addr; + +- at = strchr (addr, '@'); +- if (at) +- name = talloc_strndup (local, addr, at - addr); +- } +- +- if (name) +- _notmuch_message_gen_terms (message, prefix_name, name); +- if (addr) +- _notmuch_message_gen_terms (message, prefix_name, addr); ++ if (combined) ++ _notmuch_message_gen_terms (message, prefix_name, combined); + + talloc_free (local); + } +diff --git a/test/T080-search.sh b/test/T080-search.sh +index 8ed5701..b63bf02 100755 +--- a/test/T080-search.sh ++++ b/test/T080-search.sh +@@ -67,7 +67,6 @@ output=$(notmuch search 'from:"Search By From Name "' | notmuc + test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" + + test_begin_subtest "Search by from: without prefix (name and address)" +-test_subtest_known_broken + output=$(notmuch search '"Search By From Name "' | notmuch_search_sanitize) + test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Search By From Name; search by from (name) (inbox unread)" + +@@ -86,7 +85,6 @@ output=$(notmuch search 'to:"Search By To Name "' | notmuch_se + test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" + + test_begin_subtest "Search by to: without prefix (name and adress)" +-test_subtest_known_broken + output=$(notmuch search '"Search By To Name "' | notmuch_search_sanitize) + test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; search by to (name) (inbox unread)" + +-- +2.0.0.rc2 + -- 2.26.2