From: Austin Clements Date: Mon, 3 Feb 2014 21:40:03 +0000 (+1900) Subject: Re: [PATCH v3 6/6] lib: parse messages only once X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=1343267c8f43fe9790c0ed448d5d1a328b658672;p=notmuch-archives.git Re: [PATCH v3 6/6] lib: parse messages only once --- diff --git a/1b/a8755bcb92fbba185c5888815b91b7956bdec8 b/1b/a8755bcb92fbba185c5888815b91b7956bdec8 new file mode 100644 index 000000000..5173408ea --- /dev/null +++ b/1b/a8755bcb92fbba185c5888815b91b7956bdec8 @@ -0,0 +1,275 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 1B257431FBC + for ; Mon, 3 Feb 2014 13:40:16 -0800 (PST) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id KlM0d8QK2WRE for ; + Mon, 3 Feb 2014 13:40:08 -0800 (PST) +Received: from dmz-mailsec-scanner-6.mit.edu (dmz-mailsec-scanner-6.mit.edu + [18.7.68.35]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 30F17431FAF + for ; Mon, 3 Feb 2014 13:40:08 -0800 (PST) +X-AuditID: 12074423-f79726d000000cc9-1c-52f00cb7a523 +Received: from mailhub-auth-3.mit.edu ( [18.9.21.43]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-6.mit.edu (Symantec Messaging Gateway) with SMTP + id 62.FD.03273.7BC00F25; Mon, 3 Feb 2014 16:40:07 -0500 (EST) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-3.mit.edu (8.13.8/8.9.2) with ESMTP id s13Le62l031338; + Mon, 3 Feb 2014 16:40:07 -0500 +Received: from awakening.csail.mit.edu (awakening.csail.mit.edu [18.26.4.91]) + (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s13Le4qh015564 + (version=TLSv1/SSLv3 cipher=DHE-RSA-AES128-SHA bits=128 verify=NOT); + Mon, 3 Feb 2014 16:40:06 -0500 +Received: from amthrax by awakening.csail.mit.edu with local (Exim 4.80) + (envelope-from ) + id 1WARFM-0002hk-66; Mon, 03 Feb 2014 16:40:04 -0500 +Date: Mon, 3 Feb 2014 16:40:03 -0500 +From: Austin Clements +To: Jani Nikula +Subject: Re: [PATCH v3 6/6] lib: parse messages only once +Message-ID: <20140203214003.GN4375@mit.edu> +References: + <31d785c4a3e4b90862a0fdc545d4e900a4c898e2.1391456555.git.jani@nikula.org> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline +In-Reply-To: + <31d785c4a3e4b90862a0fdc545d4e900a4c898e2.1391456555.git.jani@nikula.org> +User-Agent: Mutt/1.5.21 (2010-09-15) +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFmpkleLIzCtJLcpLzFFi42IR4hTV1t3O8yHIYOZCcYum6c4W12/OZHZg + 8rh1/zW7x7NVt5gDmKK4bFJSczLLUov07RK4MnY93MlccMOoonnTa7YGxifqXYycHBICJhJ7 + FnxhhrDFJC7cW8/WxcjFISQwm0li85rlYAkhgQ2MEn8PZkEkTjFJHGq+xA7hLGGUWDTxFytI + FYuAisSW3xvAOtgENCS27V/OCGKLCChKbD65H8xmFpCW+Pa7mamLkYNDWMBS4kCTPUiYV0Bb + 4sqmJiaIZXUSR+bMYoOIC0qcnPmEBaJVS+LGv5dgrSBjlv/jAAlzCoRJzJwOsVUU6IIpJ7ex + TWAUmoWkexaS7lkI3QsYmVcxyqbkVunmJmbmFKcm6xYnJ+blpRbpmunlZpbopaaUbmIEhTS7 + i/IOxj8HlQ4xCnAwKvHwdux9FyTEmlhWXJl7iFGSg0lJlFef4UOQEF9SfkplRmJxRnxRaU5q + 8SFGCQ5mJRFev0/vg4R4UxIrq1KL8mFS0hwsSuK8iTPeBAkJpCeWpGanphakFsFkZTg4lCR4 + OYGxKyRYlJqeWpGWmVOCkGbi4AQZzgM0XAqkhre4IDG3ODMdIn+KUVFKnPcnN1BCACSRUZoH + 1wtLOa8YxYFeEeZlBWnnAaYruO5XQIOZgAavcwW5urgkESEl1cC4x/DzAf9ji2fHJX9XTu2R + DX5+/+Ehu5VajAz+OkVipxv1Fr9KMLQ26VTKtokrmVl8NqzYZI/swXtrvhfGrp/iY/ZEU0Pm + 4PrH8+KfXck1kLodO093BodEXmTJhA6HmMWV294Xr5ZepPBx5vWFabd7il0/6BVNvR0r1hrM + tVrlcnifYPP08wJKLMUZiYZazEXFiQA8LkdPFAMAAA== +Cc: notmuch@notmuchmail.org +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Mon, 03 Feb 2014 21:40:16 -0000 + +Quoth Jani Nikula on Feb 03 at 9:51 pm: +> Use the previously parsed gmime message for indexing instead of +> running an extra parsing pass. +> +> After this change, we'll only do unnecessary parsing of the message +> body for duplicates and non-messages. For regular non-duplicate +> messages, we have now shaved off an extra header parsing round during +> indexing. +> --- +> lib/database.cc | 2 +- +> lib/index.cc | 59 ++++++--------------------------------------------- +> lib/message-file.c | 9 ++++++++ +> lib/notmuch-private.h | 16 ++++++++++++-- +> 4 files changed, 30 insertions(+), 56 deletions(-) +> +> diff --git a/lib/database.cc b/lib/database.cc +> index d1bea88..3a29fe7 100644 +> --- a/lib/database.cc +> +++ b/lib/database.cc +> @@ -2029,7 +2029,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, +> date = notmuch_message_file_get_header (message_file, "date"); +> _notmuch_message_set_header_values (message, date, from, subject); +> +> - ret = _notmuch_message_index_file (message, filename); +> + ret = _notmuch_message_index_file (message, message_file); +> if (ret) +> goto DONE; +> } else { +> diff --git a/lib/index.cc b/lib/index.cc +> index 976e49f..71397da 100644 +> --- a/lib/index.cc +> +++ b/lib/index.cc +> @@ -425,52 +425,15 @@ _index_mime_part (notmuch_message_t *message, +> +> notmuch_status_t +> _notmuch_message_index_file (notmuch_message_t *message, +> - const char *filename) +> + notmuch_message_file_t *message_file) +> { +> - GMimeStream *stream = NULL; +> - GMimeParser *parser = NULL; +> - GMimeMessage *mime_message = NULL; +> + GMimeMessage *mime_message; +> InternetAddressList *addresses; +> - FILE *file = NULL; +> const char *from, *subject; +> - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; +> - static int initialized = 0; +> - char from_buf[5]; +> - bool is_mbox = false; +> - +> - if (! initialized) { +> - g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS); +> - initialized = 1; +> - } +> - +> - file = fopen (filename, "r"); +> - if (! file) { +> - fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno)); +> - ret = NOTMUCH_STATUS_FILE_ERROR; +> - goto DONE; +> - } +> - +> - /* Is this mbox? */ +> - if (fread (from_buf, sizeof (from_buf), 1, file) == 1 && +> - strncmp (from_buf, "From ", 5) == 0) +> - is_mbox = true; +> - rewind (file); +> - +> - /* Evil GMime steals my FILE* here so I won't fclose it. */ +> - stream = g_mime_stream_file_new (file); +> - +> - parser = g_mime_parser_new_with_stream (stream); +> - g_mime_parser_set_scan_from (parser, is_mbox); +> +> - mime_message = g_mime_parser_construct_message (parser); +> - +> - if (is_mbox) { +> - if (!g_mime_parser_eos (parser)) { +> - /* This is a multi-message mbox. */ +> - ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; +> - goto DONE; +> - } +> - } +> + mime_message = notmuch_message_file_get_mime_message (message_file); +> + if (! mime_message) +> + return NOTMUCH_STATUS_FILE_NOT_EMAIL; /* more like internal error */ + +Are there situations other than forgetting to call +notmuch_message_file_parse that could cause this? (Speaking of which, +where is notmuch_message_file_parse called?) + +> +> from = g_mime_message_get_sender (mime_message); +> +> @@ -491,15 +454,5 @@ _notmuch_message_index_file (notmuch_message_t *message, +> +> _index_mime_part (message, g_mime_message_get_mime_part (mime_message)); +> +> - DONE: +> - if (mime_message) +> - g_object_unref (mime_message); +> - +> - if (parser) +> - g_object_unref (parser); +> - +> - if (stream) +> - g_object_unref (stream); +> - +> - return ret; +> + return NOTMUCH_STATUS_SUCCESS; +> } +> diff --git a/lib/message-file.c b/lib/message-file.c +> index 33f6468..99e1dc8 100644 +> --- a/lib/message-file.c +> +++ b/lib/message-file.c +> @@ -250,6 +250,15 @@ mboxes is deprecated and may be removed in the future.\n", message->filename); +> return NOTMUCH_STATUS_SUCCESS; +> } +> +> +GMimeMessage * +> +notmuch_message_file_get_mime_message (notmuch_message_file_t *message) +> +{ +> + if (! message->parsed) +> + return NULL; + +This seems like another good opportunity to call the parser lazily and +hide notmuch_message_file_parse from the caller, rather than requiring +the caller to implement a particular call sequence (which I wasn't +even able to find above). This might also clean up the error handling +in the call to notmuch_message_file_get_mime_message above. + +> + +> + return message->message; +> +} +> + +> /* return NULL on errors, empty string for non-existing headers */ +> const char * +> notmuch_message_file_get_header (notmuch_message_file_t *message, +> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h +> index 7277df1..7559521 100644 +> --- a/lib/notmuch-private.h +> +++ b/lib/notmuch-private.h +> @@ -46,6 +46,8 @@ NOTMUCH_BEGIN_DECLS +> +> #include +> +> +#include +> + +> #include "xutil.h" +> #include "error_util.h" +> +> @@ -320,9 +322,11 @@ notmuch_message_get_author (notmuch_message_t *message); +> +> /* index.cc */ +> +> +typedef struct _notmuch_message_file notmuch_message_file_t; +> + +> notmuch_status_t +> _notmuch_message_index_file (notmuch_message_t *message, +> - const char *filename); +> + notmuch_message_file_t *message_file); +> +> /* message-file.c */ +> +> @@ -330,7 +334,6 @@ _notmuch_message_index_file (notmuch_message_t *message, +> * into the public interface in notmuch.h +> */ +> +> -typedef struct _notmuch_message_file notmuch_message_file_t; +> +> /* Open a file containing a single email message. +> * +> @@ -377,6 +380,15 @@ void +> notmuch_message_file_restrict_headersv (notmuch_message_file_t *message, +> va_list va_headers); +> +> +/* Get the gmime message of a parsed message file. +> + * +> + * Returns NULL if the message file has not been parsed. +> + * +> + * XXX: Would be nice to not have to expose GMimeMessage here. + +Maybe just forward-declare struct GMimeMessage? Then you also +wouldn't need to add the gmime #include. + +> + */ +> +GMimeMessage * +> +notmuch_message_file_get_mime_message (notmuch_message_file_t *message); +> + +> /* Get the value of the specified header from the message as a UTF-8 string. +> * +> * The header name is case insensitive.