1 Return-Path: <ethan.glasser.camp@gmail.com>
\r
2 X-Original-To: notmuch@notmuchmail.org
\r
3 Delivered-To: notmuch@notmuchmail.org
\r
4 Received: from localhost (localhost [127.0.0.1])
\r
5 by olra.theworths.org (Postfix) with ESMTP id 159B5431FD4
\r
6 for <notmuch@notmuchmail.org>; Sun, 1 Jul 2012 09:47:48 -0700 (PDT)
\r
7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org
\r
11 X-Spam-Status: No, score=1.061 tagged_above=-999 required=5
\r
12 tests=[DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1,
\r
13 FREEMAIL_FROM=0.001, RCVD_IN_BL_SPAMCOP_NET=1.246,
\r
14 RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_SORBS_WEB=0.614] autolearn=disabled
\r
15 Received: from olra.theworths.org ([127.0.0.1])
\r
16 by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)
\r
17 with ESMTP id VkfYK+ryi0dV for <notmuch@notmuchmail.org>;
\r
18 Sun, 1 Jul 2012 09:47:47 -0700 (PDT)
\r
19 Received: from mail-wi0-f169.google.com (mail-wi0-f169.google.com
\r
20 [209.85.212.169]) (using TLSv1 with cipher RC4-SHA (128/128 bits))
\r
21 (No client certificate requested)
\r
22 by olra.theworths.org (Postfix) with ESMTPS id BB53A431FAF
\r
23 for <notmuch@notmuchmail.org>; Sun, 1 Jul 2012 09:47:46 -0700 (PDT)
\r
24 Received: by wibhm2 with SMTP id hm2so2252707wib.2
\r
25 for <notmuch@notmuchmail.org>; Sun, 01 Jul 2012 09:47:45 -0700 (PDT)
\r
26 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113;
\r
27 h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references;
\r
28 bh=L5VXoVxKCC9xdk9sRXrYZXjP8v7jJYaDqCn9aGHBbrg=;
\r
29 b=RxYaXLeTxFLzHZvBj+0cnX8nNZc83uwoPGIj4mUllJc/gXvjZDUZYP4coyoPVV7XqG
\r
30 tw7Hj/P+GbvVAppQBziQ+z61AQSOypyozltok1TJYHP58aT4hCwsymnkvhHbpU9nzVPc
\r
31 h2Y7wsPqKQSe4jK0uNRdXsyv5VTmGvPpXu9JyBJruNJhfaVdAiuXiwZ9qvaC64MEOJ3t
\r
32 xxxI8JmfngXokPgPdNjPlc+FNq+hiRH/iVDphCHLzI70fxH3oThgLtV5WCJDN3P6ZZiZ
\r
33 ceyFlIeLIZ1Pk0ePSd7hFsxrPWpu8phBJFr5pLrPGCEyH+6jPv37s7UlMRPMF5KBXxzN
\r
35 Received: by 10.180.105.6 with SMTP id gi6mr17978731wib.4.1341161265405;
\r
36 Sun, 01 Jul 2012 09:47:45 -0700 (PDT)
\r
37 Received: from localhost ([195.24.209.21])
\r
38 by mx.google.com with ESMTPS id m4sm19729724wie.1.2012.07.01.09.47.30
\r
39 (version=TLSv1/SSLv3 cipher=OTHER);
\r
40 Sun, 01 Jul 2012 09:47:44 -0700 (PDT)
\r
41 From: Ethan Glasser-Camp <ethan.glasser.camp@gmail.com>
\r
42 To: notmuch@notmuchmail.org
\r
43 Subject: [PATCH v2 8/8] new: Add scan support for mbox:// URIs
\r
44 Date: Sun, 1 Jul 2012 12:39:50 -0400
\r
45 Message-Id: <1341160790-14525-9-git-send-email-ethan@betacantrips.com>
\r
46 X-Mailer: git-send-email 1.7.9.5
\r
47 In-Reply-To: <1341160790-14525-1-git-send-email-ethan@betacantrips.com>
\r
49 <CAOJ+Ob0MSOez2MvD2fCgF7t32kFPk4g2+xCud88QmBLt_b5pOA@mail.gmail.com>
\r
50 <1341160790-14525-1-git-send-email-ethan@betacantrips.com>
\r
51 Cc: Ethan Glasser-Camp <ethan@betacantrips.com>
\r
52 X-BeenThere: notmuch@notmuchmail.org
\r
53 X-Mailman-Version: 2.1.13
\r
55 List-Id: "Use and development of the notmuch mail system."
\r
56 <notmuch.notmuchmail.org>
\r
57 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,
\r
58 <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>
\r
59 List-Archive: <http://notmuchmail.org/pipermail/notmuch>
\r
60 List-Post: <mailto:notmuch@notmuchmail.org>
\r
61 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>
\r
62 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,
\r
63 <mailto:notmuch-request@notmuchmail.org?subject=subscribe>
\r
64 X-List-Received-Date: Sun, 01 Jul 2012 16:47:48 -0000
\r
66 This fixes the broken tests introduced by the last commit.
\r
68 Signed-off-by: Ethan Glasser-Camp <ethan@betacantrips.com>
\r
70 More text was added to clarify how mbox scanning works.
\r
72 notmuch-config.c | 4 +
\r
73 notmuch-new.c | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
\r
74 2 files changed, 303 insertions(+), 5 deletions(-)
\r
76 diff --git a/notmuch-config.c b/notmuch-config.c
\r
77 index 387f855..e02b6a9 100644
\r
78 --- a/notmuch-config.c
\r
79 +++ b/notmuch-config.c
\r
80 @@ -56,6 +56,10 @@ static const char new_config_comment[] =
\r
81 "\t Each URL denotes a \"root\" which will be searched for mail files.\n"
\r
82 "\t How this search is performed depends on the scheme of the URL (the\n"
\r
83 "\t part before the first colon).\n"
\r
85 + "\t\tmbox:///path scans all subdirectories starting at path for mbox\n"
\r
86 + "\t\t files, and scans all mbox files for all messages.\n"
\r
88 "\t The maildir located at database.path, above, will automatically be added.\n";
\r
90 static const char user_config_comment[] =
\r
91 diff --git a/notmuch-new.c b/notmuch-new.c
\r
92 index 5250562..061a1a8 100644
\r
98 #include "notmuch-client.h"
\r
99 +#include <uriparser/Uri.h>
\r
101 #include <unistd.h>
\r
103 @@ -653,14 +654,307 @@ add_files (notmuch_database_t *notmuch,
\r
107 +/* Scan an mbox file for messages.
\r
109 + * We assume that mboxes are append only -- this function does not
\r
110 + * check to see if messages have gone missing.
\r
112 + * The mtime of the mbox file is stored in a "directory" document in
\r
115 +/* FIXME: a certain amount of this code appears in add_files_recursive,
\r
116 + * and could be refactored
\r
118 +static notmuch_status_t
\r
119 +add_messages_mbox_file (notmuch_database_t *notmuch,
\r
120 + const char *path,
\r
121 + add_files_state_t *state)
\r
123 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
\r
125 + time_t fs_mtime, db_mtime, stat_time;
\r
127 + char *line, *path_uri = NULL, *message_uri = NULL;
\r
129 + size_t offset, end_offset, line_size = 0;
\r
130 + notmuch_directory_t *directory;
\r
131 + int content_length = -1, is_headers;
\r
133 + if (stat (path, &st)) {
\r
134 + fprintf (stderr, "Error reading mbox file %s: %s\n",
\r
135 + path, strerror (errno));
\r
136 + return NOTMUCH_STATUS_FILE_ERROR;
\r
139 + stat_time = time (NULL);
\r
140 + if (! S_ISREG (st.st_mode)) {
\r
141 + fprintf (stderr, "Error: %s is not a file.\n", path);
\r
142 + return NOTMUCH_STATUS_FILE_ERROR;
\r
145 + fs_mtime = st.st_mtime;
\r
147 + path_uri = talloc_asprintf (notmuch, "mbox://%s", path);
\r
148 + status = notmuch_database_get_directory (notmuch, path_uri, &directory);
\r
153 + db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0;
\r
155 + if (directory && db_mtime == fs_mtime) {
\r
159 + mbox = fopen (path, "r");
\r
160 + if (mbox == NULL) {
\r
161 + fprintf (stderr, "Error: couldn't open %s for reading.\n",
\r
163 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
167 + line_len = getline (&line, &line_size, mbox);
\r
169 + if (line_len == -1) {
\r
170 + fprintf (stderr, "Error: reading from %s failed: %s\n",
\r
171 + path, strerror (errno));
\r
172 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
176 + if (strncmp (line, "From ", 5) != 0) {
\r
177 + fprintf (stderr, "Note: Ignoring non-mbox file: %s\n",
\r
179 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
183 + /* Loop invariant: At the beginning of the loop, we have just read
\r
184 + * a From_ line, but haven't yet read any of the headers.
\r
186 + while (! feof (mbox)) {
\r
188 + offset = ftell (mbox);
\r
189 + content_length = -1;
\r
191 + /* Read lines until we either get to the next From_ header, or
\r
192 + * we find a Content-Length header (mboxcl) and we run out of headers.
\r
195 + /* Get the offset before we read, in case we got another From_ header. */
\r
196 + end_offset = ftell (mbox);
\r
198 + line_len = getline (&line, &line_size, mbox);
\r
200 + /* Check to see if this line is a content-length header,
\r
201 + * or the end of the headers. */
\r
202 + if (is_headers && strncasecmp (line, "Content-Length: ",
\r
203 + strlen ("Content-Length: ")) == 0)
\r
204 + content_length = strtol (line + strlen ("Content-Length: "),
\r
207 + if (is_headers && strlen (line) == 1 && *line == '\n') {
\r
209 + /* If we got a content_length, skip the message body. */
\r
210 + if (content_length != -1) {
\r
211 + fseek (mbox, content_length, SEEK_CUR);
\r
212 + line_len = getline (&line, &line_size, mbox);
\r
214 + /* We should be at the end of the message. Sanity
\r
215 + * check: there should be a blank line, and then
\r
216 + * another From_ header. */
\r
217 + if (strlen (line) != 1 || *line != '\n') {
\r
218 + fprintf (stderr, "Warning: message with Content-Length not "
\r
219 + "immediately followed by blank line (%d)\n", offset);
\r
222 + end_offset = ftell (mbox);
\r
223 + line_len = getline (&line, &line_size, mbox);
\r
225 + if (line_len != -1 && strncmp (line, "From ", 5) != 0) {
\r
226 + fprintf (stderr, "Warning: message with Content-Length not "
\r
227 + "immediately followed by another message (%d)\n", offset);
\r
228 + fprintf (stderr, "Line was: %s", line);
\r
233 + } while (! feof (mbox) && strncmp (line, "From ", 5) != 0);
\r
235 + /* end_offset is now after the \n but before the From_. */
\r
236 + message_uri = talloc_asprintf (notmuch, "mbox://%s#%d+%d",
\r
237 + path, offset, (end_offset - 1) - offset);
\r
239 + _report_before_adding_file (state, message_uri);
\r
241 + status = _add_message (state, notmuch, message_uri);
\r
247 + _report_added_file (state);
\r
249 + talloc_free (message_uri);
\r
250 + message_uri = NULL;
\r
253 + /* This is the same precaution we take in maildir. */
\r
254 + if (fs_mtime != stat_time)
\r
255 + _filename_list_add (state->directory_mtimes, path_uri)->mtime = fs_mtime;
\r
261 + talloc_free (path_uri);
\r
263 + talloc_free (message_uri);
\r
265 + notmuch_directory_destroy (directory);
\r
271 + * Examine path recursively as follows:
\r
273 + * - Recurse on each subdirectory, as in add_files.
\r
275 + * - Call add_messages_mbox_file on every non-directory.
\r
277 +/* FIXME: this is almost entirely bits-and-pieces from
\r
278 + * add_files_recursive and could do with a refactor */
\r
279 +static notmuch_status_t
\r
280 +add_files_mbox (notmuch_database_t *notmuch,
\r
281 + const char *path,
\r
282 + add_files_state_t *state)
\r
284 + struct dirent **fs_entries = NULL;
\r
285 + struct dirent *entry = NULL;
\r
286 + char *next = NULL;
\r
287 + int num_fs_entries = 0, i, entry_type;
\r
289 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
\r
291 + if (stat (path, &st)) {
\r
292 + fprintf (stderr, "Error reading directory %s: %s\n",
\r
293 + path, strerror (errno));
\r
294 + return NOTMUCH_STATUS_FILE_ERROR;
\r
297 + num_fs_entries = scandir (path, &fs_entries, 0,
\r
298 + dirent_sort_inode);
\r
300 + if (num_fs_entries == -1) {
\r
301 + fprintf (stderr, "Error opening directory %s: %s\n",
\r
302 + path, strerror (errno));
\r
303 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
307 + for (i = 0; i < num_fs_entries; i++) {
\r
311 + entry = fs_entries[i];
\r
313 + entry_type = dirent_type (path, entry);
\r
314 + if (entry_type == -1) {
\r
315 + /* Be pessimistic, e.g. so we don't lose lots of mail just
\r
316 + * because a user broke a symlink. */
\r
317 + fprintf (stderr, "Error reading file %s/%s: %s\n",
\r
318 + path, entry->d_name, strerror (errno));
\r
319 + return NOTMUCH_STATUS_FILE_ERROR;
\r
320 + } else if (entry_type != S_IFDIR) {
\r
324 + /* Ignore special directories to avoid infinite recursion.
\r
325 + * Also ignore the .notmuch directory, any "tmp" directory
\r
326 + * that appears within a maildir and files/directories
\r
327 + * the user has configured to be ignored.
\r
329 + if (strcmp (entry->d_name, ".") == 0 ||
\r
330 + strcmp (entry->d_name, "..") == 0 ||
\r
331 + strcmp (entry->d_name, ".notmuch") == 0 ||
\r
332 + _entry_in_ignore_list (entry->d_name, state))
\r
337 + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
\r
338 + status = add_files_mbox (notmuch, next, state);
\r
343 + talloc_free (next);
\r
347 + /* Pass 2: Scan for new files, removed files, and removed directories. */
\r
348 + for (i = 0; i < num_fs_entries; i++)
\r
353 + entry = fs_entries[i];
\r
355 + /* Ignore files & directories user has configured to be ignored */
\r
356 + if (_entry_in_ignore_list (entry->d_name, state))
\r
359 + /* Only add regular files (and symlinks to regular files). */
\r
360 + entry_type = dirent_type (path, entry);
\r
361 + if (entry_type == -1) {
\r
362 + fprintf (stderr, "Error reading file %s/%s: %s\n",
\r
363 + path, entry->d_name, strerror (errno));
\r
364 + return NOTMUCH_STATUS_FILE_ERROR;
\r
365 + } else if (entry_type != S_IFREG) {
\r
369 + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
\r
370 + status = add_messages_mbox_file (notmuch, next, state);
\r
371 + talloc_free (next);
\r
382 + talloc_free (next);
\r
386 /* Call out to the appropriate add_files function, based on the URI. */
\r
387 static notmuch_status_t
\r
388 -add_files_uri (unused(notmuch_database_t *notmuch),
\r
389 - unused(const char *uri),
\r
390 - unused(add_files_state_t *state))
\r
391 +add_files_uri (notmuch_database_t *notmuch,
\r
393 + add_files_state_t *state)
\r
395 - /* Stub for now */
\r
396 - return NOTMUCH_STATUS_SUCCESS;
\r
398 + UriParserStateA parser;
\r
399 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
\r
400 + parser.uri = &parsed;
\r
401 + if (uriParseUriA (&parser, uri) != URI_SUCCESS)
\r
404 + if (strncmp (parsed.scheme.first, "mbox",
\r
405 + parsed.scheme.afterLast - parsed.scheme.first) == 0) {
\r
406 + ret = add_files_mbox (notmuch, parsed.pathHead->text.first - 1, state);
\r
411 + uriFreeUriMembersA (&parsed);
\r