From: Jani Nikula Date: Mon, 3 Feb 2014 19:51:46 +0000 (+0200) Subject: [PATCH v3 6/6] lib: parse messages only once X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=a0d06c41dd0c71efb9acf53ed1be54fea69ac2f1;p=notmuch-archives.git [PATCH v3 6/6] lib: parse messages only once --- diff --git a/5a/8d1c5897ae25dc0a001cb041285081464687f5 b/5a/8d1c5897ae25dc0a001cb041285081464687f5 new file mode 100644 index 000000000..2dbd8b1f8 --- /dev/null +++ b/5a/8d1c5897ae25dc0a001cb041285081464687f5 @@ -0,0 +1,249 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 8C793431FAF + for ; Mon, 3 Feb 2014 11:52:59 -0800 (PST) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Amavis-Alert: BAD HEADER SECTION, Duplicate header field: "References" +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 5e1cKsBHvdZj for ; + Mon, 3 Feb 2014 11:52:52 -0800 (PST) +Received: from mail-ee0-f46.google.com (mail-ee0-f46.google.com + [74.125.83.46]) (using TLSv1 with cipher RC4-SHA (128/128 bits)) (No client + certificate requested) by olra.theworths.org (Postfix) with ESMTPS id + 1B7B1429E51 for ; Mon, 3 Feb 2014 11:52:19 -0800 + (PST) +Received: by mail-ee0-f46.google.com with SMTP id c13so3867562eek.33 + for ; Mon, 03 Feb 2014 11:52:19 -0800 (PST) +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20130820; + h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to + :references:in-reply-to:references; + bh=RmAJ5gFXk/Jopawe+UrzcinIxcV7L25580CaQCkmSzk=; + b=JEqZ9VOGWcjCVoPEnpXwV8Zh9xhnMZVQ5khlVcTfG4OxpfaypPhBhnt7LQXAmxyHr+ + FlCB2ulwWbnkH4SmCHAbrTgUBejP1aGdG5m+Dnf4LUrVknqkyabjeloEnqx1G5Wfp00s + svyhUA9dFUZiqlgXdCfwIuJzEGNAX7E+PHzO9svW6wjC+3jU9H2MjVdr0A9r1nfNXAk3 + 3oJdbcR8pat/KEHuMrfPnwRy/HvecNnHCUt53KaavLtewgbe4zPArXLr0WvOIq8I/Yfh + wQ5bo1g59mL6ScmxEje2n7XL6r3WpNhM9zxiz79vhXJaPxM4Fm9IzU9x7nakqrEFqCqz + KBvw== +X-Gm-Message-State: + ALoCoQmg1Gh4ewsWQKByEZN/mO3d3twMR06eufDJfjycekidSc1BqGjx1cVzGmxaEhrxcbp7W56g +X-Received: by 10.15.54.72 with SMTP id s48mr45078878eew.3.1391457138946; + Mon, 03 Feb 2014 11:52:18 -0800 (PST) +Received: from localhost (dsl-hkibrasgw2-58c36f-91.dhcp.inet.fi. + [88.195.111.91]) + by mx.google.com with ESMTPSA id m1sm78834700een.7.2014.02.03.11.52.16 + for + (version=TLSv1.2 cipher=RC4-SHA bits=128/128); + Mon, 03 Feb 2014 11:52:18 -0800 (PST) +From: Jani Nikula +To: notmuch@notmuchmail.org +Subject: [PATCH v3 6/6] lib: parse messages only once +Date: Mon, 3 Feb 2014 21:51:46 +0200 +Message-Id: + <31d785c4a3e4b90862a0fdc545d4e900a4c898e2.1391456555.git.jani@nikula.org> +X-Mailer: git-send-email 1.8.5.2 +In-Reply-To: +References: +In-Reply-To: +References: +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Mon, 03 Feb 2014 19:52:59 -0000 + +Use the previously parsed gmime message for indexing instead of +running an extra parsing pass. + +After this change, we'll only do unnecessary parsing of the message +body for duplicates and non-messages. For regular non-duplicate +messages, we have now shaved off an extra header parsing round during +indexing. +--- + lib/database.cc | 2 +- + lib/index.cc | 59 ++++++--------------------------------------------- + lib/message-file.c | 9 ++++++++ + lib/notmuch-private.h | 16 ++++++++++++-- + 4 files changed, 30 insertions(+), 56 deletions(-) + +diff --git a/lib/database.cc b/lib/database.cc +index d1bea88..3a29fe7 100644 +--- a/lib/database.cc ++++ b/lib/database.cc +@@ -2029,7 +2029,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, + date = notmuch_message_file_get_header (message_file, "date"); + _notmuch_message_set_header_values (message, date, from, subject); + +- ret = _notmuch_message_index_file (message, filename); ++ ret = _notmuch_message_index_file (message, message_file); + if (ret) + goto DONE; + } else { +diff --git a/lib/index.cc b/lib/index.cc +index 976e49f..71397da 100644 +--- a/lib/index.cc ++++ b/lib/index.cc +@@ -425,52 +425,15 @@ _index_mime_part (notmuch_message_t *message, + + notmuch_status_t + _notmuch_message_index_file (notmuch_message_t *message, +- const char *filename) ++ notmuch_message_file_t *message_file) + { +- GMimeStream *stream = NULL; +- GMimeParser *parser = NULL; +- GMimeMessage *mime_message = NULL; ++ GMimeMessage *mime_message; + InternetAddressList *addresses; +- FILE *file = NULL; + const char *from, *subject; +- notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; +- static int initialized = 0; +- char from_buf[5]; +- bool is_mbox = false; +- +- if (! initialized) { +- g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS); +- initialized = 1; +- } +- +- file = fopen (filename, "r"); +- if (! file) { +- fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno)); +- ret = NOTMUCH_STATUS_FILE_ERROR; +- goto DONE; +- } +- +- /* Is this mbox? */ +- if (fread (from_buf, sizeof (from_buf), 1, file) == 1 && +- strncmp (from_buf, "From ", 5) == 0) +- is_mbox = true; +- rewind (file); +- +- /* Evil GMime steals my FILE* here so I won't fclose it. */ +- stream = g_mime_stream_file_new (file); +- +- parser = g_mime_parser_new_with_stream (stream); +- g_mime_parser_set_scan_from (parser, is_mbox); + +- mime_message = g_mime_parser_construct_message (parser); +- +- if (is_mbox) { +- if (!g_mime_parser_eos (parser)) { +- /* This is a multi-message mbox. */ +- ret = NOTMUCH_STATUS_FILE_NOT_EMAIL; +- goto DONE; +- } +- } ++ mime_message = notmuch_message_file_get_mime_message (message_file); ++ if (! mime_message) ++ return NOTMUCH_STATUS_FILE_NOT_EMAIL; /* more like internal error */ + + from = g_mime_message_get_sender (mime_message); + +@@ -491,15 +454,5 @@ _notmuch_message_index_file (notmuch_message_t *message, + + _index_mime_part (message, g_mime_message_get_mime_part (mime_message)); + +- DONE: +- if (mime_message) +- g_object_unref (mime_message); +- +- if (parser) +- g_object_unref (parser); +- +- if (stream) +- g_object_unref (stream); +- +- return ret; ++ return NOTMUCH_STATUS_SUCCESS; + } +diff --git a/lib/message-file.c b/lib/message-file.c +index 33f6468..99e1dc8 100644 +--- a/lib/message-file.c ++++ b/lib/message-file.c +@@ -250,6 +250,15 @@ mboxes is deprecated and may be removed in the future.\n", message->filename); + return NOTMUCH_STATUS_SUCCESS; + } + ++GMimeMessage * ++notmuch_message_file_get_mime_message (notmuch_message_file_t *message) ++{ ++ if (! message->parsed) ++ return NULL; ++ ++ return message->message; ++} ++ + /* return NULL on errors, empty string for non-existing headers */ + const char * + notmuch_message_file_get_header (notmuch_message_file_t *message, +diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h +index 7277df1..7559521 100644 +--- a/lib/notmuch-private.h ++++ b/lib/notmuch-private.h +@@ -46,6 +46,8 @@ NOTMUCH_BEGIN_DECLS + + #include + ++#include ++ + #include "xutil.h" + #include "error_util.h" + +@@ -320,9 +322,11 @@ notmuch_message_get_author (notmuch_message_t *message); + + /* index.cc */ + ++typedef struct _notmuch_message_file notmuch_message_file_t; ++ + notmuch_status_t + _notmuch_message_index_file (notmuch_message_t *message, +- const char *filename); ++ notmuch_message_file_t *message_file); + + /* message-file.c */ + +@@ -330,7 +334,6 @@ _notmuch_message_index_file (notmuch_message_t *message, + * into the public interface in notmuch.h + */ + +-typedef struct _notmuch_message_file notmuch_message_file_t; + + /* Open a file containing a single email message. + * +@@ -377,6 +380,15 @@ void + notmuch_message_file_restrict_headersv (notmuch_message_file_t *message, + va_list va_headers); + ++/* Get the gmime message of a parsed message file. ++ * ++ * Returns NULL if the message file has not been parsed. ++ * ++ * XXX: Would be nice to not have to expose GMimeMessage here. ++ */ ++GMimeMessage * ++notmuch_message_file_get_mime_message (notmuch_message_file_t *message); ++ + /* Get the value of the specified header from the message as a UTF-8 string. + * + * The header name is case insensitive. +-- +1.8.5.2 +