1 Return-Path: <ethan.glasser.camp@gmail.com>
\r
2 X-Original-To: notmuch@notmuchmail.org
\r
3 Delivered-To: notmuch@notmuchmail.org
\r
4 Received: from localhost (localhost [127.0.0.1])
\r
5 by olra.theworths.org (Postfix) with ESMTP id 9DD71431FB6
\r
6 for <notmuch@notmuchmail.org>; Mon, 25 Jun 2012 13:55:14 -0700 (PDT)
\r
7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org
\r
11 X-Spam-Status: No, score=1.061 tagged_above=-999 required=5
\r
12 tests=[DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1,
\r
13 FREEMAIL_FROM=0.001, RCVD_IN_BL_SPAMCOP_NET=1.246,
\r
14 RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_SORBS_WEB=0.614] autolearn=disabled
\r
15 Received: from olra.theworths.org ([127.0.0.1])
\r
16 by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)
\r
17 with ESMTP id NgwX7iUdAUdb for <notmuch@notmuchmail.org>;
\r
18 Mon, 25 Jun 2012 13:55:13 -0700 (PDT)
\r
19 Received: from mail-wg0-f45.google.com (mail-wg0-f45.google.com
\r
20 [74.125.82.45]) (using TLSv1 with cipher RC4-SHA (128/128 bits)) (No client
\r
21 certificate requested) by olra.theworths.org (Postfix) with ESMTPS id
\r
22 4949F431FAF for <notmuch@notmuchmail.org>; Mon, 25 Jun 2012 13:55:13 -0700
\r
24 Received: by wgbdt14 with SMTP id dt14so3711317wgb.2
\r
25 for <notmuch@notmuchmail.org>; Mon, 25 Jun 2012 13:55:12 -0700 (PDT)
\r
26 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113;
\r
27 h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references;
\r
28 bh=5yIyyO80WMJ6eRq9x6aRcGJCgDQstm6+2ZZ67HRJ6p4=;
\r
29 b=otFB4H3/+huSv6oaF1MpuP584b3syoWjBAc1d25osDKatgVn1Ln/UIMYN5KQdh11JI
\r
30 mzJW0moD61QMEvdeRkWjtwdXrVZ4//HEiMncVAnhZIqo0Ip7WHLwf4AOe+U1JOlywq53
\r
31 6/DNQhsxFDKGbWRAu+3GWGYPQKH01iwVxw2w5CGxIxSIB0exWWqDNp7MUXW3XpU8Iqby
\r
32 CmzyeW+uXtUmKqtBfBg546AHmpubeKzpuFmTbwIQ0iZSk/FRJFJclxoSiYsKbkt1MARi
\r
33 huH8XKsSfWqIww4EQOb7DwhdtpCokBPaQU278e8xZpSGJzHPzUh4Q+Q+IAUx8YYdMmNl
\r
35 Received: by 10.180.78.197 with SMTP id d5mr27170402wix.7.1340657712026;
\r
36 Mon, 25 Jun 2012 13:55:12 -0700 (PDT)
\r
37 Received: from localhost ([195.24.209.21])
\r
38 by mx.google.com with ESMTPS id fm1sm22630wib.10.2012.06.25.13.54.57
\r
39 (version=TLSv1/SSLv3 cipher=OTHER);
\r
40 Mon, 25 Jun 2012 13:55:11 -0700 (PDT)
\r
41 From: Ethan Glasser-Camp <ethan.glasser.camp@gmail.com>
\r
42 To: notmuch@notmuchmail.org
\r
43 Subject: [RFC PATCH 14/14] new: Add scan support for mbox:// URIs
\r
44 Date: Mon, 25 Jun 2012 16:51:57 -0400
\r
45 Message-Id: <1340657517-6539-10-git-send-email-ethan@betacantrips.com>
\r
46 X-Mailer: git-send-email 1.7.9.5
\r
47 In-Reply-To: <1340657517-6539-1-git-send-email-ethan@betacantrips.com>
\r
48 References: <1340657517-6539-1-git-send-email-ethan@betacantrips.com>
\r
49 X-Mailman-Approved-At: Tue, 26 Jun 2012 03:51:54 -0700
\r
50 X-BeenThere: notmuch@notmuchmail.org
\r
51 X-Mailman-Version: 2.1.13
\r
53 List-Id: "Use and development of the notmuch mail system."
\r
54 <notmuch.notmuchmail.org>
\r
55 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,
\r
56 <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>
\r
57 List-Archive: <http://notmuchmail.org/pipermail/notmuch>
\r
58 List-Post: <mailto:notmuch@notmuchmail.org>
\r
59 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>
\r
60 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,
\r
61 <mailto:notmuch-request@notmuchmail.org?subject=subscribe>
\r
62 X-List-Received-Date: Mon, 25 Jun 2012 20:55:14 -0000
\r
64 A lot of code is duplicated from maildir, I don't think I handled all
\r
65 those errors correctly, and I didn't report any progress.
\r
67 Signed-off-by: Ethan Glasser-Camp <ethan@betacantrips.com>
\r
69 notmuch-new.c | 299 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
\r
70 1 file changed, 289 insertions(+), 10 deletions(-)
\r
72 diff --git a/notmuch-new.c b/notmuch-new.c
\r
73 index 1bf4e25..36fee34 100644
\r
79 #include "notmuch-client.h"
\r
80 +#include <uriparser/Uri.h>
\r
84 @@ -239,16 +240,6 @@ _entry_in_ignore_list (const char *entry, add_files_state_t *state)
\r
88 -/* Call out to the appropriate add_files function, based on the URI. */
\r
89 -static notmuch_status_t
\r
90 -add_files_uri (unused(notmuch_database_t *notmuch),
\r
91 - unused(const char *uri),
\r
92 - unused(add_files_state_t *state))
\r
94 - /* Stub for now */
\r
95 - return NOTMUCH_STATUS_SUCCESS;
\r
98 /* Progress-reporting function.
\r
100 * Can be used by any mailstore-crawling function that wants to alert
\r
101 @@ -674,6 +665,294 @@ add_files (notmuch_database_t *notmuch,
\r
105 +/* Scan an mbox file for messages.
\r
107 + * We assume that mboxes grow monotonically only.
\r
109 + * The mtime of the mbox file is stored in a "directory" document in
\r
112 +static notmuch_status_t
\r
113 +add_messages_mbox_file (notmuch_database_t *notmuch,
\r
114 + const char *path,
\r
115 + add_files_state_t *state)
\r
117 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
\r
119 + time_t fs_mtime, db_mtime, stat_time;
\r
121 + char *line, *path_uri = NULL, *message_uri = NULL;
\r
123 + size_t offset, end_offset, line_size = 0;
\r
124 + notmuch_directory_t *directory;
\r
125 + int content_length = -1, is_headers;
\r
127 + if (stat (path, &st)) {
\r
128 + fprintf (stderr, "Error reading mbox file %s: %s\n",
\r
129 + path, strerror (errno));
\r
130 + return NOTMUCH_STATUS_FILE_ERROR;
\r
133 + stat_time = time (NULL);
\r
134 + if (! S_ISREG (st.st_mode)) {
\r
135 + fprintf (stderr, "Error: %s is not a file.\n", path);
\r
136 + return NOTMUCH_STATUS_FILE_ERROR;
\r
139 + fs_mtime = st.st_mtime;
\r
141 + path_uri = talloc_asprintf (notmuch, "mbox://%s", path);
\r
142 + status = notmuch_database_get_directory (notmuch, path_uri, &directory);
\r
147 + db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0;
\r
149 + if (directory && db_mtime == fs_mtime) {
\r
153 + mbox = fopen (path, "r");
\r
154 + if (mbox == NULL) {
\r
155 + fprintf (stderr, "Error: couldn't open %s for reading.\n",
\r
157 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
161 + line_len = getline (&line, &line_size, mbox);
\r
163 + if (line_len == -1) {
\r
164 + fprintf (stderr, "Error: reading from %s failed: %s\n",
\r
165 + path, strerror (errno));
\r
166 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
170 + if (strncmp (line, "From ", 5) != 0) {
\r
171 + fprintf (stderr, "Note: Ignoring non-mbox file: %s\n",
\r
173 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
179 + /* Loop invariant: At the beginning of the loop, we have just read
\r
180 + * a From_ line, but haven't yet read any of the headers.
\r
182 + while (! feof (mbox)) {
\r
184 + offset = ftell (mbox);
\r
185 + content_length = -1;
\r
187 + /* Read lines until we either get to the next From_ header, or
\r
188 + * we find a Content-Length header (mboxcl) and we run out of headers.
\r
191 + /* Get the offset before we read, in case we got another From_ header. */
\r
192 + end_offset = ftell (mbox);
\r
194 + line_len = getline (&line, &line_size, mbox);
\r
196 + /* Check to see if this line is a content-length header,
\r
197 + * or the end of the headers. */
\r
198 + if (is_headers && strncasecmp (line, "Content-Length: ",
\r
199 + strlen ("Content-Length: ")) == 0)
\r
200 + content_length = strtol (line + strlen ("Content-Length: "),
\r
203 + if (is_headers && strlen (line) == 1 && *line == '\n') {
\r
205 + /* If we got a content_length, skip the message body. */
\r
206 + if (content_length != -1) {
\r
207 + fseek (mbox, content_length, SEEK_CUR);
\r
208 + line_len = getline (&line, &line_size, mbox);
\r
210 + /* We should be at the end of the message. Sanity
\r
211 + * check: there should be a blank line, and then
\r
212 + * another From_ header. */
\r
213 + if (strlen (line) != 1 || *line != '\n') {
\r
214 + fprintf (stderr, "Warning: message with Content-Length not "
\r
215 + "immediately followed by blank line (%d)\n", offset);
\r
218 + end_offset = ftell (mbox);
\r
219 + line_len = getline (&line, &line_size, mbox);
\r
221 + if (line_len != -1 && strncmp (line, "From ", 5) != 0) {
\r
222 + fprintf (stderr, "Warning: message with Content-Length not "
\r
223 + "immediately followed by another message (%d)\n", offset);
\r
224 + fprintf (stderr, "Line was: %s", line);
\r
229 + } while (! feof (mbox) && strncmp (line, "From ", 5) != 0);
\r
231 + /* end_offset is now after the \n but before the From_. */
\r
232 + message_uri = talloc_asprintf (notmuch, "mbox://%s#%d+%d",
\r
233 + path, offset, (end_offset - 1) - offset);
\r
234 + status = _add_message (state, notmuch, message_uri);
\r
235 + talloc_free (message_uri);
\r
236 + message_uri = NULL;
\r
239 + if (fs_mtime != stat_time)
\r
240 + _filename_list_add (state->directory_mtimes, path_uri)->mtime = fs_mtime;
\r
246 + talloc_free (path_uri);
\r
248 + talloc_free (message_uri);
\r
250 + notmuch_directory_destroy (directory);
\r
256 + * Examine path recursively as follows:
\r
258 + * - Recurse on each subdirectory, as in add_files.
\r
260 + * - Call add_messages_mbox_file on every non-directory.
\r
262 +static notmuch_status_t
\r
263 +add_files_mbox (notmuch_database_t *notmuch,
\r
264 + const char *path,
\r
265 + add_files_state_t *state)
\r
267 + struct dirent **fs_entries = NULL;
\r
268 + struct dirent *entry = NULL;
\r
269 + char *next = NULL;
\r
270 + int num_fs_entries = 0, i, entry_type;
\r
272 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
\r
274 + if (stat (path, &st)) {
\r
275 + fprintf (stderr, "Error reading directory %s: %s\n",
\r
276 + path, strerror (errno));
\r
277 + return NOTMUCH_STATUS_FILE_ERROR;
\r
280 + num_fs_entries = scandir (path, &fs_entries, 0,
\r
281 + dirent_sort_inode);
\r
283 + if (num_fs_entries == -1) {
\r
284 + fprintf (stderr, "Error opening directory %s: %s\n",
\r
285 + path, strerror (errno));
\r
286 + ret = NOTMUCH_STATUS_FILE_ERROR;
\r
290 + for (i = 0; i < num_fs_entries; i++) {
\r
294 + entry = fs_entries[i];
\r
296 + entry_type = dirent_type (path, entry);
\r
297 + if (entry_type == -1) {
\r
298 + /* Be pessimistic, e.g. so we don't lose lots of mail just
\r
299 + * because a user broke a symlink. */
\r
300 + fprintf (stderr, "Error reading file %s/%s: %s\n",
\r
301 + path, entry->d_name, strerror (errno));
\r
302 + return NOTMUCH_STATUS_FILE_ERROR;
\r
303 + } else if (entry_type != S_IFDIR) {
\r
307 + /* Ignore special directories to avoid infinite recursion.
\r
308 + * Also ignore the .notmuch directory, any "tmp" directory
\r
309 + * that appears within a maildir and files/directories
\r
310 + * the user has configured to be ignored.
\r
312 + if (strcmp (entry->d_name, ".") == 0 ||
\r
313 + strcmp (entry->d_name, "..") == 0 ||
\r
314 + strcmp (entry->d_name, ".notmuch") == 0 ||
\r
315 + _entry_in_ignore_list (entry->d_name, state))
\r
320 + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
\r
321 + status = add_files_mbox (notmuch, next, state);
\r
326 + talloc_free (next);
\r
330 + /* Pass 2: Scan for new files, removed files, and removed directories. */
\r
331 + for (i = 0; i < num_fs_entries; i++)
\r
336 + entry = fs_entries[i];
\r
338 + /* Ignore files & directories user has configured to be ignored */
\r
339 + if (_entry_in_ignore_list (entry->d_name, state))
\r
342 + /* Only add regular files (and symlinks to regular files). */
\r
343 + entry_type = dirent_type (path, entry);
\r
344 + if (entry_type == -1) {
\r
345 + fprintf (stderr, "Error reading file %s/%s: %s\n",
\r
346 + path, entry->d_name, strerror (errno));
\r
347 + return NOTMUCH_STATUS_FILE_ERROR;
\r
348 + } else if (entry_type != S_IFREG) {
\r
352 + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
\r
353 + status = add_messages_mbox_file (notmuch, next, state);
\r
354 + talloc_free (next);
\r
365 + talloc_free (next);
\r
369 +/* Call out to the appropriate add_files function, based on the URI. */
\r
370 +static notmuch_status_t
\r
371 +add_files_uri (notmuch_database_t *notmuch,
\r
373 + add_files_state_t *state)
\r
376 + UriParserStateA parser;
\r
377 + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
\r
378 + parser.uri = &parsed;
\r
379 + if (uriParseUriA (&parser, uri) != URI_SUCCESS)
\r
382 + if (strncmp (parsed.scheme.first, "mbox",
\r
383 + parsed.scheme.afterLast - parsed.scheme.first) == 0) {
\r
384 + ret = add_files_mbox (notmuch, parsed.pathHead->text.first - 1, state);
\r
389 + uriFreeUriMembersA (&parsed);
\r
394 setup_progress_printing_timer (void)
\r