[PATCH 5/6] lib: parse messages only once
authorJani Nikula <jani@nikula.org>
Wed, 16 Oct 2013 19:00:12 +0000 (22:00 +0300)
committerW. Trevor King <wking@tremily.us>
Fri, 7 Nov 2014 17:57:26 +0000 (09:57 -0800)
92/d95cbecffde030005c72ec9451f92b3e168fd1 [new file with mode: 0644]

diff --git a/92/d95cbecffde030005c72ec9451f92b3e168fd1 b/92/d95cbecffde030005c72ec9451f92b3e168fd1
new file mode 100644 (file)
index 0000000..aecb83b
--- /dev/null
@@ -0,0 +1,253 @@
+Return-Path: <jani@nikula.org>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+       by olra.theworths.org (Postfix) with ESMTP id 4723A431FC9\r
+       for <notmuch@notmuchmail.org>; Wed, 16 Oct 2013 12:00:56 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Amavis-Alert: BAD HEADER SECTION, Duplicate header field: "References"\r
+X-Spam-Flag: NO\r
+X-Spam-Score: -0.7\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5\r
+       tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+       by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+       with ESMTP id 4xnvVTdj44HF for <notmuch@notmuchmail.org>;\r
+       Wed, 16 Oct 2013 12:00:50 -0700 (PDT)\r
+Received: from mail-ee0-f47.google.com (mail-ee0-f47.google.com\r
+ [74.125.83.47])       (using TLSv1 with cipher RC4-SHA (128/128 bits))        (No client\r
+ certificate requested)        by olra.theworths.org (Postfix) with ESMTPS id\r
+ 89CF6431FCB   for <notmuch@notmuchmail.org>; Wed, 16 Oct 2013 12:00:32 -0700\r
+ (PDT)\r
+Received: by mail-ee0-f47.google.com with SMTP id d49so580916eek.34\r
+       for <notmuch@notmuchmail.org>; Wed, 16 Oct 2013 12:00:31 -0700 (PDT)\r
+X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;\r
+       d=1e100.net; s=20130820;\r
+       h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\r
+       :references:in-reply-to:references;\r
+       bh=IH2SnVvGIXirOVHbhtKFi5hslXx4SV/xHhXd74UpFvw=;\r
+       b=YT6uMPJLlKkalWG/pBb1nW+FcpqJZ+Kkmb5kWGEWuUCTtqzNt+YCvtJzN78vD818I/\r
+       d4LsM7VZjcYakb290At19+aTPBGc6POz8fjkC4Pp+yaPAT6HJoOEDaJUe4NH80hXlUYn\r
+       7smZCIP6q+81YqiS9PAuPs1pQGjZiBUPADxO5vp/tgjeFxpTwbwbf2uioihTx1hG0eti\r
+       MsMEF9EM26DtYYlbT/pm4a3/O9m/zrpDcsOpbtnNIKxk7qdFW0GoqKRf1YJOYt1Y4bhg\r
+       1K5za0pRzGuKUav45u90UpEtqicxMHI1Kt5+4bs7YHgWMuoOUpvprsrXc2qr9Ifv5fbD\r
+       pr+g==\r
+X-Gm-Message-State:\r
+ ALoCoQnce8xInrJkX9gAilVZY8VijkrB49iW7IgcEFKIuSiQgeOWIQkL9x49RomZdCklShSqp0Rv\r
+X-Received: by 10.15.53.132 with SMTP id r4mr6878687eew.5.1381950031441;\r
+       Wed, 16 Oct 2013 12:00:31 -0700 (PDT)\r
+Received: from localhost (dsl-hkibrasgw2-58c36f-91.dhcp.inet.fi.\r
+       [88.195.111.91]) by mx.google.com with ESMTPSA id\r
+       m54sm182872688eex.2.1969.12.31.16.00.00\r
+       (version=TLSv1.2 cipher=RC4-SHA bits=128/128);\r
+       Wed, 16 Oct 2013 12:00:30 -0700 (PDT)\r
+From: Jani Nikula <jani@nikula.org>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH 5/6] lib: parse messages only once\r
+Date: Wed, 16 Oct 2013 22:00:12 +0300\r
+Message-Id:\r
+ <f499e217d35c69b3d02b9d455a6adbccf241d4f8.1381948853.git.jani@nikula.org>\r
+X-Mailer: git-send-email 1.8.4.rc3\r
+In-Reply-To: <cover.1381948853.git.jani@nikula.org>\r
+References: <cover.1381948853.git.jani@nikula.org>\r
+In-Reply-To: <cover.1381948853.git.jani@nikula.org>\r
+References: <cover.1381948853.git.jani@nikula.org>\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+       <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Wed, 16 Oct 2013 19:00:56 -0000\r
+\r
+Make the necessary changes to only do one gmime parse pass during\r
+indexing.\r
+---\r
+ lib/database.cc       |  2 +-\r
+ lib/index.cc          | 70 +++++----------------------------------------------\r
+ lib/message-file.c    |  9 +++++++\r
+ lib/notmuch-private.h | 15 +++++++++--\r
+ 4 files changed, 29 insertions(+), 67 deletions(-)\r
+\r
+diff --git a/lib/database.cc b/lib/database.cc\r
+index 45a3987..d097dda 100644\r
+--- a/lib/database.cc\r
++++ b/lib/database.cc\r
+@@ -1996,7 +1996,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,\r
+           date = notmuch_message_file_get_header (message_file, "date");\r
+           _notmuch_message_set_header_values (message, date, from, subject);\r
\r
+-          ret = _notmuch_message_index_file (message, filename);\r
++          ret = _notmuch_message_index_file (message, message_file);\r
+           if (ret)\r
+               goto DONE;\r
+       } else {\r
+diff --git a/lib/index.cc b/lib/index.cc\r
+index 78c18cf..71397da 100644\r
+--- a/lib/index.cc\r
++++ b/lib/index.cc\r
+@@ -425,63 +425,15 @@ _index_mime_part (notmuch_message_t *message,\r
\r
+ notmuch_status_t\r
+ _notmuch_message_index_file (notmuch_message_t *message,\r
+-                           const char *filename)\r
++                           notmuch_message_file_t *message_file)\r
+ {\r
+-    GMimeStream *stream = NULL;\r
+-    GMimeParser *parser = NULL;\r
+-    GMimeMessage *mime_message = NULL;\r
++    GMimeMessage *mime_message;\r
+     InternetAddressList *addresses;\r
+-    FILE *file = NULL;\r
+     const char *from, *subject;\r
+-    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;\r
+-    static int initialized = 0;\r
+-    char from_buf[5];\r
+-    bool is_mbox = false;\r
+-    static bool mbox_warning = false;\r
+-\r
+-    if (! initialized) {\r
+-      g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);\r
+-      initialized = 1;\r
+-    }\r
+-\r
+-    file = fopen (filename, "r");\r
+-    if (! file) {\r
+-      fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));\r
+-      ret = NOTMUCH_STATUS_FILE_ERROR;\r
+-      goto DONE;\r
+-    }\r
+-\r
+-    /* Is this mbox? */\r
+-    if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&\r
+-      strncmp (from_buf, "From ", 5) == 0)\r
+-      is_mbox = true;\r
+-    rewind (file);\r
\r
+-    /* Evil GMime steals my FILE* here so I won't fclose it. */\r
+-    stream = g_mime_stream_file_new (file);\r
+-\r
+-    parser = g_mime_parser_new_with_stream (stream);\r
+-    g_mime_parser_set_scan_from (parser, is_mbox);\r
+-\r
+-    mime_message = g_mime_parser_construct_message (parser);\r
+-\r
+-    if (is_mbox) {\r
+-      if (!g_mime_parser_eos (parser)) {\r
+-          /* This is a multi-message mbox. */\r
+-          ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;\r
+-          goto DONE;\r
+-      }\r
+-      /* For historical reasons, we support single-message mboxes,\r
+-       * but this behavior is likely to change in the future, so\r
+-       * warn. */\r
+-      if (!mbox_warning) {\r
+-          mbox_warning = true;\r
+-          fprintf (stderr, "\\r
+-Warning: %s is an mbox containing a single message,\n\\r
+-likely caused by misconfigured mail delivery.  Support for single-message\n\\r
+-mboxes is deprecated and may be removed in the future.\n", filename);\r
+-      }\r
+-    }\r
++    mime_message = notmuch_message_file_get_mime_message (message_file);\r
++    if (! mime_message)\r
++      return NOTMUCH_STATUS_FILE_NOT_EMAIL; /* more like internal error */\r
\r
+     from = g_mime_message_get_sender (mime_message);\r
\r
+@@ -502,15 +454,5 @@ mboxes is deprecated and may be removed in the future.\n", filename);\r
\r
+     _index_mime_part (message, g_mime_message_get_mime_part (mime_message));\r
\r
+-  DONE:\r
+-    if (mime_message)\r
+-      g_object_unref (mime_message);\r
+-\r
+-    if (parser)\r
+-      g_object_unref (parser);\r
+-\r
+-    if (stream)\r
+-      g_object_unref (stream);\r
+-\r
+-    return ret;\r
++    return NOTMUCH_STATUS_SUCCESS;\r
+ }\r
+diff --git a/lib/message-file.c b/lib/message-file.c\r
+index 9d5a3b9..7ab9e9d 100644\r
+--- a/lib/message-file.c\r
++++ b/lib/message-file.c\r
+@@ -247,6 +247,15 @@ mboxes is deprecated and may be removed in the future.\n", message->filename);\r
+     return NOTMUCH_STATUS_SUCCESS;\r
+ }\r
\r
++GMimeMessage *\r
++notmuch_message_file_get_mime_message (notmuch_message_file_t *message)\r
++{\r
++    if (! message->parsed)\r
++      return NULL;\r
++\r
++    return message->message;\r
++}\r
++\r
+ /* return NULL on errors, empty string for non-existing headers */\r
+ const char *\r
+ notmuch_message_file_get_header (notmuch_message_file_t *message,\r
+diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h\r
+index 7277df1..048dd6c 100644\r
+--- a/lib/notmuch-private.h\r
++++ b/lib/notmuch-private.h\r
+@@ -46,6 +46,8 @@ NOTMUCH_BEGIN_DECLS\r
\r
+ #include <talloc.h>\r
\r
++#include <gmime/gmime.h>\r
++\r
+ #include "xutil.h"\r
+ #include "error_util.h"\r
\r
+@@ -320,9 +322,11 @@ notmuch_message_get_author (notmuch_message_t *message);\r
\r
+ /* index.cc */\r
\r
++typedef struct _notmuch_message_file notmuch_message_file_t;\r
++\r
+ notmuch_status_t\r
+ _notmuch_message_index_file (notmuch_message_t *message,\r
+-                           const char *filename);\r
++                           notmuch_message_file_t *message_file);\r
\r
+ /* message-file.c */\r
\r
+@@ -330,7 +334,6 @@ _notmuch_message_index_file (notmuch_message_t *message,\r
+  * into the public interface in notmuch.h\r
+  */\r
\r
+-typedef struct _notmuch_message_file notmuch_message_file_t;\r
\r
+ /* Open a file containing a single email message.\r
+  *\r
+@@ -377,6 +380,14 @@ void\r
+ notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,\r
+                                       va_list va_headers);\r
\r
++/*\r
++ * get mime message. this is an ugly interface; maybe join index.cc\r
++ * and message-file.c, or move the top level indexing call to\r
++ * message-file.c with helpers in index.cc\r
++ */\r
++GMimeMessage *\r
++notmuch_message_file_get_mime_message (notmuch_message_file_t *message);\r
++\r
+ /* Get the value of the specified header from the message as a UTF-8 string.\r
+  *\r
+  * The header name is case insensitive.\r
+-- \r
+1.8.4.rc3\r
+\r