1 Return-Path: <hohndel@gr8dns.org>
\r
2 X-Original-To: notmuch@notmuchmail.org
\r
3 Delivered-To: notmuch@notmuchmail.org
\r
4 Received: from localhost (localhost [127.0.0.1])
\r
5 by olra.theworths.org (Postfix) with ESMTP id D3E2E418C36
\r
6 for <notmuch@notmuchmail.org>; Fri, 16 Apr 2010 13:51:57 -0700 (PDT)
\r
7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org
\r
11 X-Spam-Status: No, score=-0.5 tagged_above=-999 required=5
\r
12 tests=[BAYES_05=-0.5] autolearn=ham
\r
13 Received: from olra.theworths.org ([127.0.0.1])
\r
14 by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)
\r
15 with ESMTP id hCiLgBHZ4cHg for <notmuch@notmuchmail.org>;
\r
16 Fri, 16 Apr 2010 13:51:55 -0700 (PDT)
\r
17 Received: from mail.hohndel.org (mail.hohndel.org [65.23.157.147])
\r
18 by olra.theworths.org (Postfix) with ESMTP id 5FFF54196F0
\r
19 for <notmuch@notmuchmail.org>; Fri, 16 Apr 2010 13:51:55 -0700 (PDT)
\r
20 Received: by mail.hohndel.org (Postfix, from userid 112)
\r
21 id EA728340FD; Fri, 16 Apr 2010 16:51:54 -0400 (EDT)
\r
22 Received: from x200.gr8dns.org (unknown [65.23.157.147])
\r
23 by mail.hohndel.org (Postfix) with ESMTP id 21343340FC;
\r
24 Fri, 16 Apr 2010 16:51:53 -0400 (EDT)
\r
25 Received: by x200.gr8dns.org (Postfix, from userid 500)
\r
26 id BAC81C0163; Fri, 16 Apr 2010 13:51:52 -0700 (PDT)
\r
27 From: Dirk Hohndel <hohndel@infradead.org>
\r
28 To: <notmuch@notmuchmail.org>
\r
29 Subject: [PATCH 2/2] Improve heuristic for guessing best from address in
\r
31 Date: Fri, 16 Apr 2010 13:51:42 -0700
\r
32 Message-Id: <1271451102-11336-3-git-send-email-hohndel@infradead.org>
\r
33 X-Mailer: git-send-email 1.6.6.1
\r
34 In-Reply-To: <1271451102-11336-2-git-send-email-hohndel@infradead.org>
\r
35 References: <1271451102-11336-1-git-send-email-hohndel@infradead.org>
\r
36 <1271451102-11336-2-git-send-email-hohndel@infradead.org>
\r
37 X-BeenThere: notmuch@notmuchmail.org
\r
38 X-Mailman-Version: 2.1.13
\r
40 List-Id: "Use and development of the notmuch mail system."
\r
41 <notmuch.notmuchmail.org>
\r
42 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,
\r
43 <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>
\r
44 List-Archive: <http://notmuchmail.org/pipermail/notmuch>
\r
45 List-Post: <mailto:notmuch@notmuchmail.org>
\r
46 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>
\r
47 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,
\r
48 <mailto:notmuch-request@notmuchmail.org?subject=subscribe>
\r
49 X-List-Received-Date: Fri, 16 Apr 2010 20:51:58 -0000
\r
51 We now look at Envelope-To: and Original-To: headers
\r
52 Then concat all of the Received headers and walk through them to find
\r
53 either a "for email@add.res" clause or a host in a known domain.
\r
55 This should deal with most of the fetchmail and mail hoster induced
\r
56 pain (and failure) of the old heuristic.
\r
58 Signed-off-by: Dirk Hohndel <hohndel@infradead.org>
\r
60 notmuch-reply.c | 125 +++++++++++++++++++++++++++++++++++++++++--------------
\r
61 1 files changed, 94 insertions(+), 31 deletions(-)
\r
63 diff --git a/notmuch-reply.c b/notmuch-reply.c
\r
64 index 230cacc..78d3914 100644
\r
65 --- a/notmuch-reply.c
\r
66 +++ b/notmuch-reply.c
\r
67 @@ -305,33 +305,95 @@ add_recipients_from_message (GMimeMessage *reply,
\r
69 guess_from_received_header (notmuch_config_t *config, notmuch_message_t *message)
\r
71 - const char *received,*primary;
\r
73 - char *by,*mta,*ptr,*token;
\r
74 + const char *received,*primary,*by;
\r
75 + char **other,*tohdr;
\r
76 + char *mta,*ptr,*token;
\r
79 const char *delim=". \t";
\r
82 - received = notmuch_message_get_header (message, "received");
\r
83 - by = strstr (received, " by ");
\r
84 - if (by && *(by+4)) {
\r
85 - /* sadly, the format of Received: headers is a bit inconsistent,
\r
86 - * depending on the MTA used. So we try to extract just the MTA
\r
87 - * here by removing leading whitespace and assuming that the MTA
\r
88 - * name ends at the next whitespace
\r
89 - * we test for *(by+4) to be non-'\0' to make sure there's something
\r
90 - * there at all - and then assume that the first whitespace delimited
\r
91 - * token that follows is the last receiving server
\r
92 + const char *to_headers[] = {"Envelope-to", "X-Original-To"};
\r
94 + primary = notmuch_config_get_user_primary_email (config);
\r
95 + other = notmuch_config_get_user_other_email (config, &other_len);
\r
97 + /* sadly, there is no standard way to find out to which email
\r
98 + * address a mail was delivered - what is in the headers depends
\r
99 + * on the MTAs used along the way. So we are trying a number of
\r
100 + * heuristics which hopefully will answer this question.
\r
102 + * We only got here if none of the users email addresses are in
\r
103 + * the To: or Cc: header. From here we try the following in order:
\r
104 + * 1) check for an Envelope-to: header
\r
105 + * 2) check for an X-Original-To: header
\r
106 + * 3) check for a (for <email@add.res>) clause in Received: headers
\r
107 + * 4) check for the domain part of known email addresses in the
\r
108 + * 'by' part of Received headers
\r
109 + * If none of these work, we give up and return NULL
\r
111 + for (i = 0; i < sizeof(to_headers)/sizeof(*to_headers); i++) {
\r
112 + tohdr = xstrdup(notmuch_message_get_header (message, to_headers[i]));
\r
113 + if (tohdr && *tohdr) {
\r
114 + /* tohdr is potentialy a list of email addresses, so here we
\r
115 + * check if one of the email addresses is a substring of tohdr
\r
117 + if (strcasestr(tohdr, primary)) {
\r
121 + for (i = 0; i < other_len; i++)
\r
122 + if (strcasestr (tohdr, other[i])) {
\r
130 + /* We get the concatenated Received: headers and search from the
\r
131 + * front (last Received: header added) and try to extract from
\r
132 + * them indications to which email address this message was
\r
135 + received = notmuch_message_get_concat_header (message, "received");
\r
136 + /* First we look for a " for <email@add.res>" in the received
\r
139 + ptr = strstr (received, " for ");
\r
141 + /* the text following is potentialy a list of email addresses,
\r
142 + * so again we check if one of the email addresses is a
\r
143 + * substring of ptr
\r
145 - mta = strdup (by+4);
\r
148 + if (strcasestr(ptr, primary)) {
\r
151 + for (i = 0; i < other_len; i++)
\r
152 + if (strcasestr (ptr, other[i])) {
\r
156 + /* Finally, we parse all the " by MTA ..." headers to guess the
\r
157 + * email address that this was originally delivered to.
\r
158 + * We extract just the MTA here by removing leading whitespace and
\r
159 + * assuming that the MTA name ends at the next whitespace.
\r
160 + * We test for *(by+4) to be non-'\0' to make sure there's
\r
161 + * something there at all - and then assume that the first
\r
162 + * whitespace delimited token that follows is the receiving
\r
163 + * system in this step of the receive chain
\r
166 + while((by = strstr (by, " by ")) != NULL) {
\r
170 + mta = xstrdup (by);
\r
171 token = strtok(mta," \t");
\r
175 /* Now extract the last two components of the MTA host name
\r
176 - * as domain and tld
\r
177 + * as domain and tld.
\r
179 while ((ptr = strsep (&token, delim)) != NULL) {
\r
181 @@ -341,23 +403,24 @@ guess_from_received_header (notmuch_config_t *config, notmuch_message_t *message
\r
185 - /* recombine domain and tld and look for it among the configured
\r
186 - * email addresses
\r
187 + /* Recombine domain and tld and look for it among the configured
\r
188 + * email addresses.
\r
189 + * This time we have a known domain name and nothing else - so
\r
190 + * the test is the other way around: we check if this is a
\r
191 + * substring of one of the email addresses.
\r
194 - primary = notmuch_config_get_user_primary_email (config);
\r
195 - if (strcasestr (primary, domain)) {
\r
199 + if (strcasestr(primary, domain)) {
\r
203 + for (i = 0; i < other_len; i++)
\r
204 + if (strcasestr (other[i],domain)) {
\r
208 - other = notmuch_config_get_user_other_email (config, &other_len);
\r
209 - for (i = 0; i < other_len; i++)
\r
210 - if (strcasestr (other[i], domain)) {
\r