--- /dev/null
+Return-Path: <amthrax@awakening.csail.mit.edu>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+ by olra.theworths.org (Postfix) with ESMTP id 4603A431FB6\r
+ for <notmuch@notmuchmail.org>; Thu, 9 Dec 2010 13:00:17 -0800 (PST)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: 0\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none]\r
+ autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+ by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+ with ESMTP id gQfeb00aapUE for <notmuch@notmuchmail.org>;\r
+ Thu, 9 Dec 2010 13:00:16 -0800 (PST)\r
+Received: from dmz-mailsec-scanner-6.mit.edu (DMZ-MAILSEC-SCANNER-6.MIT.EDU\r
+ [18.7.68.35])\r
+ by olra.theworths.org (Postfix) with ESMTP id 59FC7431FB5\r
+ for <notmuch@notmuchmail.org>; Thu, 9 Dec 2010 13:00:16 -0800 (PST)\r
+X-AuditID: 12074423-b7bd0ae000000a00-66-4d01435e3c07\r
+Received: from mailhub-auth-1.mit.edu ( [18.9.21.35])\r
+ by dmz-mailsec-scanner-6.mit.edu (Symantec Brightmail Gateway) with\r
+ SMTP id 02.2A.02560.F53410D4; Thu, 9 Dec 2010 16:00:15 -0500 (EST)\r
+Received: from outgoing.mit.edu (OUTGOING-AUTH.MIT.EDU [18.7.22.103])\r
+ by mailhub-auth-1.mit.edu (8.13.8/8.9.2) with ESMTP id oB9L0EEE011330; \r
+ Thu, 9 Dec 2010 16:00:14 -0500\r
+Received: from awakening.csail.mit.edu (awakening.csail.mit.edu [18.26.4.91])\r
+ (authenticated bits=0)\r
+ (User authenticated as amdragon@ATHENA.MIT.EDU)\r
+ by outgoing.mit.edu (8.13.6/8.12.4) with ESMTP id oB9L0CZh010008\r
+ (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT);\r
+ Thu, 9 Dec 2010 16:00:13 -0500 (EST)\r
+Received: from amthrax by awakening.csail.mit.edu with local (Exim 4.72)\r
+ (envelope-from <amthrax@awakening.csail.mit.edu>)\r
+ id 1PQnb2-0007Hk-SH; Thu, 09 Dec 2010 16:00:12 -0500\r
+From: Austin Clements <amdragon@MIT.EDU>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH 1/5] Use a single unified pass to fetch scalar message\r
+ metadata.\r
+Date: Thu, 9 Dec 2010 15:59:52 -0500\r
+Message-Id: <1291928396-27937-2-git-send-email-amdragon@mit.edu>\r
+X-Mailer: git-send-email 1.7.2.3\r
+In-Reply-To: <1291928396-27937-1-git-send-email-amdragon@mit.edu>\r
+References: <1291928396-27937-1-git-send-email-amdragon@mit.edu>\r
+X-Brightmail-Tracker: AAAAARbjX5o=\r
+Cc: Austin Clements <amdragon@mit.edu>\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+ <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Thu, 09 Dec 2010 21:00:17 -0000\r
+\r
+This performs a single pass over a message's term list to fetch the\r
+thread ID, message ID, and reply-to, rather than requiring a pass for\r
+each. Xapian decompresses the term list anew for each iteration, so\r
+this reduces the amount of time spent decompressing message metadata.\r
+\r
+This reduces my inbox search from 3.102 seconds to 2.555 seconds (1.2X\r
+faster).\r
+---\r
+ lib/message.cc | 197 ++++++++++++++++++++++++++++----------------------------\r
+ 1 files changed, 98 insertions(+), 99 deletions(-)\r
+\r
+diff --git a/lib/message.cc b/lib/message.cc\r
+index adcd07d..d6ab636 100644\r
+--- a/lib/message.cc\r
++++ b/lib/message.cc\r
+@@ -254,41 +254,106 @@ _notmuch_message_create_for_message_id (notmuch_database_t *notmuch,\r
+ return message;\r
+ }\r
+ \r
+-unsigned int\r
+-_notmuch_message_get_doc_id (notmuch_message_t *message)\r
+-{\r
+- return message->doc_id;\r
+-}\r
+-\r
+-const char *\r
+-notmuch_message_get_message_id (notmuch_message_t *message)\r
++static char *\r
++_notmuch_message_get_term (notmuch_message_t *message,\r
++ Xapian::TermIterator &i, Xapian::TermIterator &end,\r
++ const char *prefix)\r
+ {\r
+- Xapian::TermIterator i;\r
++ int prefix_len = strlen (prefix);\r
++ const char *term = NULL;\r
++ char *value;\r
+ \r
+- if (message->message_id)\r
+- return message->message_id;\r
++ i.skip_to (prefix);\r
+ \r
+- i = message->doc.termlist_begin ();\r
+- i.skip_to (_find_prefix ("id"));\r
++ if (i != end)\r
++ term = (*i).c_str ();\r
+ \r
+- if (i == message->doc.termlist_end ())\r
+- INTERNAL_ERROR ("Message with document ID of %d has no message ID.\n",\r
+- message->doc_id);\r
++ if (!term || strncmp (term, prefix, prefix_len))\r
++ return NULL;\r
+ \r
+- message->message_id = talloc_strdup (message, (*i).c_str () + 1);\r
++ value = talloc_strdup (message, term + prefix_len);\r
+ \r
+ #if DEBUG_DATABASE_SANITY\r
+ i++;\r
+ \r
+- if (i != message->doc.termlist_end () &&\r
+- strncmp ((*i).c_str (), _find_prefix ("id"),\r
+- strlen (_find_prefix ("id"))) == 0)\r
+- {\r
+- INTERNAL_ERROR ("Mail (doc_id: %d) has duplicate message IDs",\r
+- message->doc_id);\r
++ if (i != end && strncmp ((*i).c_str (), prefix, prefix_len) == 0) {\r
++ INTERNAL_ERROR ("Mail (doc_id: %d) has duplicate %s terms: %s and %s\n",\r
++ message->doc_id, prefix, value,\r
++ (*i).c_str () + prefix_len);\r
+ }\r
+ #endif\r
+ \r
++ return value;\r
++}\r
++\r
++void\r
++_notmuch_message_ensure_metadata (notmuch_message_t *message)\r
++{\r
++ Xapian::TermIterator i, end;\r
++ const char *thread_prefix = _find_prefix ("thread"),\r
++ *id_prefix = _find_prefix ("id"),\r
++ *replyto_prefix = _find_prefix ("replyto");\r
++\r
++ /* We do this all in a single pass because Xapian decompresses the\r
++ * term list every time you iterate over it. Thus, while this is\r
++ * slightly more costly than looking up individual fields if only\r
++ * one field of the message object is actually used, it's a huge\r
++ * win as more fields are used. */\r
++\r
++ i = message->doc.termlist_begin ();\r
++ end = message->doc.termlist_end ();\r
++\r
++ /* Get thread */\r
++ if (!message->thread_id)\r
++ message->thread_id =\r
++ _notmuch_message_get_term (message, i, end, thread_prefix);\r
++\r
++ /* Get id */\r
++ assert (strcmp (thread_prefix, id_prefix) < 0);\r
++ if (!message->message_id)\r
++ message->message_id =\r
++ _notmuch_message_get_term (message, i, end, id_prefix);\r
++\r
++ /* Get reply to */\r
++ assert (strcmp (id_prefix, replyto_prefix) < 0);\r
++ if (!message->in_reply_to)\r
++ message->in_reply_to =\r
++ _notmuch_message_get_term (message, i, end, replyto_prefix);\r
++ /* It's perfectly valid for a message to have no In-Reply-To\r
++ * header. For these cases, we return an empty string. */\r
++ if (!message->in_reply_to)\r
++ message->in_reply_to = talloc_strdup (message, "");\r
++}\r
++\r
++static void\r
++_notmuch_message_invalidate_metadata (notmuch_message_t *message,\r
++ const char *prefix_name)\r
++{\r
++ if (strcmp ("thread", prefix_name) == 0) {\r
++ talloc_free (message->thread_id);\r
++ message->thread_id = NULL;\r
++ }\r
++\r
++ if (strcmp ("replyto", prefix_name) == 0) {\r
++ talloc_free (message->in_reply_to);\r
++ message->in_reply_to = NULL;\r
++ }\r
++}\r
++\r
++unsigned int\r
++_notmuch_message_get_doc_id (notmuch_message_t *message)\r
++{\r
++ return message->doc_id;\r
++}\r
++\r
++const char *\r
++notmuch_message_get_message_id (notmuch_message_t *message)\r
++{\r
++ if (!message->message_id)\r
++ _notmuch_message_ensure_metadata (message);\r
++ if (!message->message_id)\r
++ INTERNAL_ERROR ("Message with document ID of %u has no message ID.\n",\r
++ message->doc_id);\r
+ return message->message_id;\r
+ }\r
+ \r
+@@ -327,89 +392,19 @@ notmuch_message_get_header (notmuch_message_t *message, const char *header)\r
+ const char *\r
+ _notmuch_message_get_in_reply_to (notmuch_message_t *message)\r
+ {\r
+- const char *prefix = _find_prefix ("replyto");\r
+- int prefix_len = strlen (prefix);\r
+- Xapian::TermIterator i;\r
+- std::string in_reply_to;\r
+-\r
+- if (message->in_reply_to)\r
+- return message->in_reply_to;\r
+-\r
+- i = message->doc.termlist_begin ();\r
+- i.skip_to (prefix);\r
+-\r
+- if (i != message->doc.termlist_end ())\r
+- in_reply_to = *i;\r
+-\r
+- /* It's perfectly valid for a message to have no In-Reply-To\r
+- * header. For these cases, we return an empty string. */\r
+- if (i == message->doc.termlist_end () ||\r
+- strncmp (in_reply_to.c_str (), prefix, prefix_len))\r
+- {\r
+- message->in_reply_to = talloc_strdup (message, "");\r
+- return message->in_reply_to;\r
+- }\r
+-\r
+- message->in_reply_to = talloc_strdup (message,\r
+- in_reply_to.c_str () + prefix_len);\r
+-\r
+-#if DEBUG_DATABASE_SANITY\r
+- i++;\r
+-\r
+- in_reply_to = *i;\r
+-\r
+- if (i != message->doc.termlist_end () &&\r
+- strncmp ((*i).c_str (), prefix, prefix_len) == 0)\r
+- {\r
+- INTERNAL_ERROR ("Message %s has duplicate In-Reply-To IDs: %s and %s\n",\r
+- notmuch_message_get_message_id (message),\r
+- message->in_reply_to,\r
+- (*i).c_str () + prefix_len);\r
+- }\r
+-#endif\r
+-\r
++ if (!message->in_reply_to)\r
++ _notmuch_message_ensure_metadata (message);\r
+ return message->in_reply_to;\r
+ }\r
+ \r
+ const char *\r
+ notmuch_message_get_thread_id (notmuch_message_t *message)\r
+ {\r
+- const char *prefix = _find_prefix ("thread");\r
+- Xapian::TermIterator i;\r
+- std::string id;\r
+-\r
+- /* This code is written with the assumption that "thread" has a\r
+- * single-character prefix. */\r
+- assert (strlen (prefix) == 1);\r
+-\r
+- if (message->thread_id)\r
+- return message->thread_id;\r
+-\r
+- i = message->doc.termlist_begin ();\r
+- i.skip_to (prefix);\r
+-\r
+- if (i != message->doc.termlist_end ())\r
+- id = *i;\r
+-\r
+- if (i == message->doc.termlist_end () || id[0] != *prefix)\r
+- INTERNAL_ERROR ("Message with document ID of %d has no thread ID.\n",\r
++ if (!message->thread_id)\r
++ _notmuch_message_ensure_metadata (message);\r
++ if (!message->thread_id)\r
++ INTERNAL_ERROR ("Message with document ID of %u has no thread ID.\n",\r
+ message->doc_id);\r
+-\r
+- message->thread_id = talloc_strdup (message, id.c_str () + 1);\r
+-\r
+-#if DEBUG_DATABASE_SANITY\r
+- i++;\r
+- id = *i;\r
+-\r
+- if (i != message->doc.termlist_end () && id[0] == *prefix)\r
+- {\r
+- INTERNAL_ERROR ("Message %s has duplicate thread IDs: %s and %s\n",\r
+- notmuch_message_get_message_id (message),\r
+- message->thread_id,\r
+- id.c_str () + 1);\r
+- }\r
+-#endif\r
+-\r
+ return message->thread_id;\r
+ }\r
+ \r
+@@ -738,6 +733,8 @@ _notmuch_message_add_term (notmuch_message_t *message,\r
+ \r
+ talloc_free (term);\r
+ \r
++ _notmuch_message_invalidate_metadata (message, prefix_name);\r
++\r
+ return NOTMUCH_PRIVATE_STATUS_SUCCESS;\r
+ }\r
+ \r
+@@ -801,6 +798,8 @@ _notmuch_message_remove_term (notmuch_message_t *message,\r
+ \r
+ talloc_free (term);\r
+ \r
++ _notmuch_message_invalidate_metadata (message, prefix_name);\r
++\r
+ return NOTMUCH_PRIVATE_STATUS_SUCCESS;\r
+ }\r
+ \r
+-- \r
+1.7.2.3\r
+\r