1 Return-Path: <james@jameswestby.net>
\r
2 X-Original-To: notmuch@notmuchmail.org
\r
3 Delivered-To: notmuch@notmuchmail.org
\r
4 Received: from localhost (localhost [127.0.0.1])
\r
5 by olra.theworths.org (Postfix) with ESMTP id E6D90431FC0
\r
6 for <notmuch@notmuchmail.org>; Fri, 18 Dec 2009 17:29:21 -0800 (PST)
\r
7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org
\r
8 Received: from olra.theworths.org ([127.0.0.1])
\r
9 by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)
\r
10 with ESMTP id p8p3yqJabbGu for <notmuch@notmuchmail.org>;
\r
11 Fri, 18 Dec 2009 17:29:21 -0800 (PST)
\r
12 Received: from jameswestby.net (jameswestby.net [89.145.97.141])
\r
13 by olra.theworths.org (Postfix) with ESMTP id ADF0B431FAE
\r
14 for <notmuch@notmuchmail.org>; Fri, 18 Dec 2009 17:29:20 -0800 (PST)
\r
15 Received: from cpc4-aztw22-2-0-cust59.aztw.cable.virginmedia.com
\r
16 ([94.169.116.60] helo=flash)
\r
17 by jameswestby.net with esmtpa (Exim 4.69)
\r
18 (envelope-from <james@jameswestby.net>)
\r
19 id 1NLo8E-0006Gd-RM; Sat, 19 Dec 2009 01:29:18 +0000
\r
20 Received: by flash (Postfix, from userid 1000)
\r
21 id DE6886E546A; Sat, 19 Dec 2009 01:29:12 +0000 (GMT)
\r
22 From: James Westby <jw+debian@jameswestby.net>
\r
23 To: notmuch@notmuchmail.org
\r
24 Date: Sat, 19 Dec 2009 01:29:09 +0000
\r
25 Message-Id: <1261186149-24078-1-git-send-email-jw+debian@jameswestby.net>
\r
26 X-Mailer: git-send-email 1.6.3.3
\r
27 In-Reply-To: <871virzzjy.fsf@yoom.home.cworth.org>
\r
28 References: <871virzzjy.fsf@yoom.home.cworth.org>
\r
29 Subject: [notmuch] [PATCH] Reindex larger files that duplicate ids we have
\r
30 X-BeenThere: notmuch@notmuchmail.org
\r
31 X-Mailman-Version: 2.1.12
\r
33 List-Id: "Use and development of the notmuch mail system."
\r
34 <notmuch.notmuchmail.org>
\r
35 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,
\r
36 <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>
\r
37 List-Archive: <http://notmuchmail.org/pipermail/notmuch>
\r
38 List-Post: <mailto:notmuch@notmuchmail.org>
\r
39 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>
\r
40 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,
\r
41 <mailto:notmuch-request@notmuchmail.org?subject=subscribe>
\r
42 X-List-Received-Date: Sat, 19 Dec 2009 01:29:22 -0000
\r
44 When we see a message where we already have the file
\r
45 id stored, check if the size is larger. If it is then
\r
46 re-index and set the file size and name to be the
\r
50 Here's the (quite simple) patch to implement indexing the
\r
51 largest copy of each mail that we have.
\r
53 Does the re-indexing replace the old terms? In the case
\r
54 where you had a collision with different text this could
\r
55 make a search return mails that don't contain that text.
\r
56 I don't think it's a big issue though, even if that is the
\r
63 lib/database.cc | 4 +++-
\r
64 lib/index.cc | 27 +++++++++++++++++++++++++++
\r
65 lib/message.cc | 31 ++++++++++++++++++++++++++-----
\r
66 lib/notmuch-private.h | 13 +++++++++++++
\r
67 lib/notmuch.h | 5 +++--
\r
68 5 files changed, 72 insertions(+), 8 deletions(-)
\r
70 diff --git a/lib/database.cc b/lib/database.cc
\r
71 index d834d94..64f29b9 100644
\r
72 --- a/lib/database.cc
\r
73 +++ b/lib/database.cc
\r
74 @@ -1000,7 +1000,9 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
\r
78 - ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
\r
79 + ret = _notmuch_message_possibly_reindex (message, filename, size);
\r
81 + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
\r
85 diff --git a/lib/index.cc b/lib/index.cc
\r
86 index 125fa6c..14c3268 100644
\r
89 @@ -312,3 +312,30 @@ _notmuch_message_index_file (notmuch_message_t *message,
\r
95 +_notmuch_message_possibly_reindex (notmuch_message_t *message,
\r
96 + const char *filename,
\r
99 + off_t realsize = size;
\r
100 + off_t stored_size;
\r
101 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
\r
103 + ret = _notmuch_message_size_on_disk (message, filename, &realsize);
\r
106 + stored_size = _notmuch_message_get_filesize (message);
\r
107 + if (realsize > stored_size) {
\r
108 + ret = _notmuch_message_index_file (message, filename);
\r
111 + ret = _notmuch_message_set_filesize (message, filename, realsize);
\r
112 + _notmuch_message_set_filename (message, filename);
\r
113 + _notmuch_message_sync (message);
\r
120 diff --git a/lib/message.cc b/lib/message.cc
\r
121 index 2bfc5ed..cc32741 100644
\r
122 --- a/lib/message.cc
\r
123 +++ b/lib/message.cc
\r
124 @@ -427,23 +427,38 @@ _notmuch_message_set_filename (notmuch_message_t *message,
\r
128 -_notmuch_message_set_filesize (notmuch_message_t *message,
\r
129 +_notmuch_message_size_on_disk (notmuch_message_t *message,
\r
130 const char *filename,
\r
131 - const off_t size)
\r
135 - off_t realsize = size;
\r
136 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
\r
138 - if (realsize < 0) {
\r
140 if (stat (filename, &st)) {
\r
141 ret = NOTMUCH_STATUS_FILE_ERROR;
\r
144 - realsize = st.st_size;
\r
145 + *size = st.st_size;
\r
154 +_notmuch_message_set_filesize (notmuch_message_t *message,
\r
155 + const char *filename,
\r
156 + const off_t size)
\r
158 + off_t realsize = size;
\r
159 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
\r
161 + ret = _notmuch_message_size_on_disk (message, filename, &realsize);
\r
165 message->doc.add_value (NOTMUCH_VALUE_FILESIZE,
\r
166 Xapian::sortable_serialise (realsize));
\r
168 @@ -451,6 +466,12 @@ _notmuch_message_set_filesize (notmuch_message_t *message,
\r
173 +_notmuch_message_get_filesize (notmuch_message_t *message)
\r
175 + return Xapian::sortable_unserialise (message->doc.get_value (NOTMUCH_VALUE_FILESIZE));
\r
179 notmuch_message_get_filename (notmuch_message_t *message)
\r
181 diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
\r
182 index 1ba3055..cf65fd9 100644
\r
183 --- a/lib/notmuch-private.h
\r
184 +++ b/lib/notmuch-private.h
\r
185 @@ -199,6 +199,14 @@ _notmuch_message_set_filesize (notmuch_message_t *message,
\r
186 const char *filename,
\r
190 +_notmuch_message_get_filesize (notmuch_message_t *message);
\r
193 +_notmuch_message_size_on_disk (notmuch_message_t *message,
\r
194 + const char *filename,
\r
198 _notmuch_message_ensure_thread_id (notmuch_message_t *message);
\r
200 @@ -218,6 +226,11 @@ notmuch_status_t
\r
201 _notmuch_message_index_file (notmuch_message_t *message,
\r
202 const char *filename);
\r
205 +_notmuch_message_possibly_reindex (notmuch_message_t *message,
\r
206 + const char *filename,
\r
207 + const off_t size);
\r
209 /* message-file.c */
\r
211 /* XXX: I haven't decided yet whether these will actually get exported
\r
212 diff --git a/lib/notmuch.h b/lib/notmuch.h
\r
213 index 5d0d224..892e420 100644
\r
214 --- a/lib/notmuch.h
\r
215 +++ b/lib/notmuch.h
\r
216 @@ -256,8 +256,9 @@ notmuch_database_get_timestamp (notmuch_database_t *database,
\r
217 * NOTMUCH_STATUS_SUCCESS: Message successfully added to database.
\r
219 * NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: Message has the same message
\r
220 - * ID as another message already in the database. Nothing added
\r
221 - * to the database.
\r
222 + * ID as another message already in the database. This may have
\r
223 + * caused some further indexing to be done, but it is not an entirely
\r
226 * NOTMUCH_STATUS_FILE_ERROR: an error occurred trying to open the
\r
227 * file, (such as permission denied, or file not found,
\r