Return-Path: X-Original-To: notmuch@notmuchmail.org Delivered-To: notmuch@notmuchmail.org Received: from localhost (localhost [127.0.0.1]) by olra.theworths.org (Postfix) with ESMTP id 9DD71431FB6 for ; Mon, 25 Jun 2012 13:55:14 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at olra.theworths.org X-Spam-Flag: NO X-Spam-Score: 1.061 X-Spam-Level: * X-Spam-Status: No, score=1.061 tagged_above=-999 required=5 tests=[DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, FREEMAIL_FROM=0.001, RCVD_IN_BL_SPAMCOP_NET=1.246, RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_SORBS_WEB=0.614] autolearn=disabled Received: from olra.theworths.org ([127.0.0.1]) by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id NgwX7iUdAUdb for ; Mon, 25 Jun 2012 13:55:13 -0700 (PDT) Received: from mail-wg0-f45.google.com (mail-wg0-f45.google.com [74.125.82.45]) (using TLSv1 with cipher RC4-SHA (128/128 bits)) (No client certificate requested) by olra.theworths.org (Postfix) with ESMTPS id 4949F431FAF for ; Mon, 25 Jun 2012 13:55:13 -0700 (PDT) Received: by wgbdt14 with SMTP id dt14so3711317wgb.2 for ; Mon, 25 Jun 2012 13:55:12 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references; bh=5yIyyO80WMJ6eRq9x6aRcGJCgDQstm6+2ZZ67HRJ6p4=; b=otFB4H3/+huSv6oaF1MpuP584b3syoWjBAc1d25osDKatgVn1Ln/UIMYN5KQdh11JI mzJW0moD61QMEvdeRkWjtwdXrVZ4//HEiMncVAnhZIqo0Ip7WHLwf4AOe+U1JOlywq53 6/DNQhsxFDKGbWRAu+3GWGYPQKH01iwVxw2w5CGxIxSIB0exWWqDNp7MUXW3XpU8Iqby CmzyeW+uXtUmKqtBfBg546AHmpubeKzpuFmTbwIQ0iZSk/FRJFJclxoSiYsKbkt1MARi huH8XKsSfWqIww4EQOb7DwhdtpCokBPaQU278e8xZpSGJzHPzUh4Q+Q+IAUx8YYdMmNl Hzog== Received: by 10.180.78.197 with SMTP id d5mr27170402wix.7.1340657712026; Mon, 25 Jun 2012 13:55:12 -0700 (PDT) Received: from localhost ([195.24.209.21]) by mx.google.com with ESMTPS id fm1sm22630wib.10.2012.06.25.13.54.57 (version=TLSv1/SSLv3 cipher=OTHER); Mon, 25 Jun 2012 13:55:11 -0700 (PDT) From: Ethan Glasser-Camp To: notmuch@notmuchmail.org Subject: [RFC PATCH 14/14] new: Add scan support for mbox:// URIs Date: Mon, 25 Jun 2012 16:51:57 -0400 Message-Id: <1340657517-6539-10-git-send-email-ethan@betacantrips.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1340657517-6539-1-git-send-email-ethan@betacantrips.com> References: <1340657517-6539-1-git-send-email-ethan@betacantrips.com> X-Mailman-Approved-At: Tue, 26 Jun 2012 03:51:54 -0700 X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 25 Jun 2012 20:55:14 -0000 A lot of code is duplicated from maildir, I don't think I handled all those errors correctly, and I didn't report any progress. Signed-off-by: Ethan Glasser-Camp --- notmuch-new.c | 299 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 289 insertions(+), 10 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 1bf4e25..36fee34 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -19,6 +19,7 @@ */ #include "notmuch-client.h" +#include #include @@ -239,16 +240,6 @@ _entry_in_ignore_list (const char *entry, add_files_state_t *state) return FALSE; } -/* Call out to the appropriate add_files function, based on the URI. */ -static notmuch_status_t -add_files_uri (unused(notmuch_database_t *notmuch), - unused(const char *uri), - unused(add_files_state_t *state)) -{ - /* Stub for now */ - return NOTMUCH_STATUS_SUCCESS; -} - /* Progress-reporting function. * * Can be used by any mailstore-crawling function that wants to alert @@ -674,6 +665,294 @@ add_files (notmuch_database_t *notmuch, return ret; } +/* Scan an mbox file for messages. + * + * We assume that mboxes grow monotonically only. + * + * The mtime of the mbox file is stored in a "directory" document in + * Xapian. + */ +static notmuch_status_t +add_messages_mbox_file (notmuch_database_t *notmuch, + const char *path, + add_files_state_t *state) +{ + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status; + struct stat st; + time_t fs_mtime, db_mtime, stat_time; + FILE *mbox; + char *line, *path_uri = NULL, *message_uri = NULL; + int line_len; + size_t offset, end_offset, line_size = 0; + notmuch_directory_t *directory; + int content_length = -1, is_headers; + + if (stat (path, &st)) { + fprintf (stderr, "Error reading mbox file %s: %s\n", + path, strerror (errno)); + return NOTMUCH_STATUS_FILE_ERROR; + } + + stat_time = time (NULL); + if (! S_ISREG (st.st_mode)) { + fprintf (stderr, "Error: %s is not a file.\n", path); + return NOTMUCH_STATUS_FILE_ERROR; + } + + fs_mtime = st.st_mtime; + + path_uri = talloc_asprintf (notmuch, "mbox://%s", path); + status = notmuch_database_get_directory (notmuch, path_uri, &directory); + if (status) { + ret = status; + goto DONE; + } + db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0; + + if (directory && db_mtime == fs_mtime) { + goto DONE; + } + + mbox = fopen (path, "r"); + if (mbox == NULL) { + fprintf (stderr, "Error: couldn't open %s for reading.\n", + path); + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + line_len = getline (&line, &line_size, mbox); + + if (line_len == -1) { + fprintf (stderr, "Error: reading from %s failed: %s\n", + path, strerror (errno)); + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + if (strncmp (line, "From ", 5) != 0) { + fprintf (stderr, "Note: Ignoring non-mbox file: %s\n", + path); + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + free(line); + line = NULL; + + /* Loop invariant: At the beginning of the loop, we have just read + * a From_ line, but haven't yet read any of the headers. + */ + while (! feof (mbox)) { + is_headers = 1; + offset = ftell (mbox); + content_length = -1; + + /* Read lines until we either get to the next From_ header, or + * we find a Content-Length header (mboxcl) and we run out of headers. + */ + do { + /* Get the offset before we read, in case we got another From_ header. */ + end_offset = ftell (mbox); + + line_len = getline (&line, &line_size, mbox); + + /* Check to see if this line is a content-length header, + * or the end of the headers. */ + if (is_headers && strncasecmp (line, "Content-Length: ", + strlen ("Content-Length: ")) == 0) + content_length = strtol (line + strlen ("Content-Length: "), + NULL, 10); + + if (is_headers && strlen (line) == 1 && *line == '\n') { + is_headers = 0; + /* If we got a content_length, skip the message body. */ + if (content_length != -1) { + fseek (mbox, content_length, SEEK_CUR); + line_len = getline (&line, &line_size, mbox); + + /* We should be at the end of the message. Sanity + * check: there should be a blank line, and then + * another From_ header. */ + if (strlen (line) != 1 || *line != '\n') { + fprintf (stderr, "Warning: message with Content-Length not " + "immediately followed by blank line (%d)\n", offset); + } + + end_offset = ftell (mbox); + line_len = getline (&line, &line_size, mbox); + + if (line_len != -1 && strncmp (line, "From ", 5) != 0) { + fprintf (stderr, "Warning: message with Content-Length not " + "immediately followed by another message (%d)\n", offset); + fprintf (stderr, "Line was: %s", line); + } + } + } + + } while (! feof (mbox) && strncmp (line, "From ", 5) != 0); + + /* end_offset is now after the \n but before the From_. */ + message_uri = talloc_asprintf (notmuch, "mbox://%s#%d+%d", + path, offset, (end_offset - 1) - offset); + status = _add_message (state, notmuch, message_uri); + talloc_free (message_uri); + message_uri = NULL; + } + + if (fs_mtime != stat_time) + _filename_list_add (state->directory_mtimes, path_uri)->mtime = fs_mtime; + +DONE: + if (line) + free (line); + if (path_uri) + talloc_free (path_uri); + if (message_uri) + talloc_free (message_uri); + if (directory) + notmuch_directory_destroy (directory); + + return ret; +} + +/* + * Examine path recursively as follows: + * + * - Recurse on each subdirectory, as in add_files. + * + * - Call add_messages_mbox_file on every non-directory. + */ +static notmuch_status_t +add_files_mbox (notmuch_database_t *notmuch, + const char *path, + add_files_state_t *state) +{ + struct dirent **fs_entries = NULL; + struct dirent *entry = NULL; + char *next = NULL; + int num_fs_entries = 0, i, entry_type; + struct stat st; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status; + + if (stat (path, &st)) { + fprintf (stderr, "Error reading directory %s: %s\n", + path, strerror (errno)); + return NOTMUCH_STATUS_FILE_ERROR; + } + + num_fs_entries = scandir (path, &fs_entries, 0, + dirent_sort_inode); + + if (num_fs_entries == -1) { + fprintf (stderr, "Error opening directory %s: %s\n", + path, strerror (errno)); + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + + for (i = 0; i < num_fs_entries; i++) { + if (interrupted) + break; + + entry = fs_entries[i]; + + entry_type = dirent_type (path, entry); + if (entry_type == -1) { + /* Be pessimistic, e.g. so we don't lose lots of mail just + * because a user broke a symlink. */ + fprintf (stderr, "Error reading file %s/%s: %s\n", + path, entry->d_name, strerror (errno)); + return NOTMUCH_STATUS_FILE_ERROR; + } else if (entry_type != S_IFDIR) { + continue; + } + + /* Ignore special directories to avoid infinite recursion. + * Also ignore the .notmuch directory, any "tmp" directory + * that appears within a maildir and files/directories + * the user has configured to be ignored. + */ + if (strcmp (entry->d_name, ".") == 0 || + strcmp (entry->d_name, "..") == 0 || + strcmp (entry->d_name, ".notmuch") == 0 || + _entry_in_ignore_list (entry->d_name, state)) + { + continue; + } + + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); + status = add_files_mbox (notmuch, next, state); + if (status) { + ret = status; + goto DONE; + } + talloc_free (next); + next = NULL; + } + + /* Pass 2: Scan for new files, removed files, and removed directories. */ + for (i = 0; i < num_fs_entries; i++) + { + if (interrupted) + break; + + entry = fs_entries[i]; + + /* Ignore files & directories user has configured to be ignored */ + if (_entry_in_ignore_list (entry->d_name, state)) + continue; + + /* Only add regular files (and symlinks to regular files). */ + entry_type = dirent_type (path, entry); + if (entry_type == -1) { + fprintf (stderr, "Error reading file %s/%s: %s\n", + path, entry->d_name, strerror (errno)); + return NOTMUCH_STATUS_FILE_ERROR; + } else if (entry_type != S_IFREG) { + continue; + } + + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); + status = add_messages_mbox_file (notmuch, next, state); + talloc_free (next); + next = NULL; + + if (status) { + ret = status; + goto DONE; + } + } + +DONE: + if (next) + talloc_free (next); + return ret; +} + +/* Call out to the appropriate add_files function, based on the URI. */ +static notmuch_status_t +add_files_uri (notmuch_database_t *notmuch, + const char *uri, + add_files_state_t *state) +{ + UriUriA parsed; + UriParserStateA parser; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + parser.uri = &parsed; + if (uriParseUriA (&parser, uri) != URI_SUCCESS) + goto DONE; + + if (strncmp (parsed.scheme.first, "mbox", + parsed.scheme.afterLast - parsed.scheme.first) == 0) { + ret = add_files_mbox (notmuch, parsed.pathHead->text.first - 1, state); + goto DONE; + } + +DONE: + uriFreeUriMembersA (&parsed); + return ret; +} + static void setup_progress_printing_timer (void) { -- 1.7.9.5