From 32f10c9dec1b39481700c4a021afb9441c660a97 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 24 Oct 2014 08:30:37 +2000 Subject: [PATCH] [PATCH v3 5/9] lib: Implement ghost-based thread linking --- 4e/bd122a2256f168fda6d291d9851d5a7794d29a | 276 ++++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 4e/bd122a2256f168fda6d291d9851d5a7794d29a diff --git a/4e/bd122a2256f168fda6d291d9851d5a7794d29a b/4e/bd122a2256f168fda6d291d9851d5a7794d29a new file mode 100644 index 000000000..c7ed1c095 --- /dev/null +++ b/4e/bd122a2256f168fda6d291d9851d5a7794d29a @@ -0,0 +1,276 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 5F483431FD4 + for ; Thu, 23 Oct 2014 05:31:37 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -2.3 +X-Spam-Level: +X-Spam-Status: No, score=-2.3 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id ihZNxk20MBw3 for ; + Thu, 23 Oct 2014 05:31:30 -0700 (PDT) +Received: from dmz-mailsec-scanner-4.mit.edu (dmz-mailsec-scanner-4.mit.edu + [18.9.25.15]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id B6A07429E29 + for ; Thu, 23 Oct 2014 05:31:00 -0700 (PDT) +X-AuditID: 1209190f-f79aa6d000005b45-39-5448f504ab28 +Received: from mailhub-auth-2.mit.edu ( [18.7.62.36]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-4.mit.edu (Symantec Messaging Gateway) with SMTP + id 4B.6B.23365.405F8445; Thu, 23 Oct 2014 08:31:00 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-2.mit.edu (8.13.8/8.9.2) with ESMTP id s9NCUn1V027182; + Thu, 23 Oct 2014 08:30:50 -0400 +Received: from drake.dyndns.org + (216-15-114-40.c3-0.arl-ubr1.sbo-arl.ma.cable.rcn.com + [216.15.114.40]) (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s9NCUhCx008932 + (version=TLSv1/SSLv3 cipher=AES128-SHA bits=128 verify=NOT); + Thu, 23 Oct 2014 08:30:49 -0400 +Received: from amthrax by drake.dyndns.org with local (Exim 4.84) + (envelope-from ) + id 1XhHXP-0007bv-BX; Thu, 23 Oct 2014 08:30:43 -0400 +From: Austin Clements +To: notmuch@notmuchmail.org +Subject: [PATCH v3 5/9] lib: Implement ghost-based thread linking +Date: Thu, 23 Oct 2014 08:30:37 -0400 +Message-Id: <1414067441-29054-6-git-send-email-aclements@csail.mit.edu> +X-Mailer: git-send-email 2.1.0 +In-Reply-To: <1414067441-29054-1-git-send-email-aclements@csail.mit.edu> +References: <1414067441-29054-1-git-send-email-aclements@csail.mit.edu> +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFtrNIsWRmVeSWpSXmKPExsUixG6nosvy1SPEYMYraYvVc3ksrt+cyezA + 5LFz1l12j2erbjEHMEVx2aSk5mSWpRbp2yVwZbydeJS5oMux4u7Fc2wNjD+Nuxg5OSQETCQe + nj3PCGGLSVy4t54NxBYSmM0kMe+lfBcjF5C9kVHiwcxNTBDOHSaJGSs6WCGcJYwSq772soO0 + sAnoS6xYO4kVxBYRkJbYeXc2mM0s4Cjxef8isLHCQPbv7Y9Zuhg5OFgEVCW+vs4ECfMKuEss + m3CRDeIKOYkNu/+DXcQp4CGxZ8cdqIvcJbY/P8g6gZF/ASPDKkbZlNwq3dzEzJzi1GTd4uTE + vLzUIl0TvdzMEr3UlNJNjKAw4pTk38H47aDSIUYBDkYlHt6KZI8QIdbEsuLK3EOMkhxMSqK8 + MQ+AQnxJ+SmVGYnFGfFFpTmpxYcYJTiYlUR44y8A5XhTEiurUovyYVLSHCxK4rybfvCFCAmk + J5akZqemFqQWwWRlODiUJHgLvgA1ChalpqdWpGXmlCCkmTg4QYbzAA2fAFLDW1yQmFucmQ6R + P8WoKCXOm/wZKCEAksgozYPrhcX5K0ZxoFeEedVA2nmAKQKu+xXQYCaQqzeADS5JREhJNTAu + qDfqiKx/WPxtjmmwnpPBTZkpk5IlCz/3B07mjnr24UW+ZboKZ6vRzifJ+mo7/l1wbZkSyf0y + 2MbnxpLzvzzkPr7mf706l/UU+0/36lnetyIUv25Ynbr3i7SJ14eAh1UJjQ++1fca/rUJP2X2 + yuj+tLZSs/ikWRNsov5kr+3bf54lpkRa00aJpTgj0VCLuag4EQDhv+X7zgIAAA== +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Thu, 23 Oct 2014 12:31:37 -0000 + +From: Austin Clements + +This updates the thread linking code to use ghost messages instead of +user metadata to link messages into threads. + +In contrast with the old approach, this is actually correct. +Previously, thread merging updated only the thread IDs of message +documents, not thread IDs stored in user metadata. As originally +diagnosed by Mark Walters [1] and as demonstrated by the broken +T260-thread-order test, this can cause notmuch to fail to link +messages even though they're in the same thread. In principle the old +approach could have been fixed by updating the user metadata thread +IDs as well, but these are not indexed and hence this would have +required a full scan of all stored thread IDs. Ghost messages solve +this problem naturally by reusing the exact same thread ID and message +ID representation and indexing as regular messages. + +Furthermore, thanks to this greater symmetry, ghost messages are also +algorithmically simpler. We continue to support the old user metadata +format, so this patch can't delete any code, but when we do remove +support for the old format, several functions can simply be deleted. + +[1] id:8738h7kv2q.fsf@qmul.ac.uk +--- + lib/database.cc | 99 +++++++++++++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 83 insertions(+), 16 deletions(-) + +diff --git a/lib/database.cc b/lib/database.cc +index c641bcd..92a92d9 100644 +--- a/lib/database.cc ++++ b/lib/database.cc +@@ -1752,6 +1752,12 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id) + message_id); + } + ++static notmuch_status_t ++_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, ++ void *ctx, ++ const char *message_id, ++ const char **thread_id_ret); ++ + /* Find the thread ID to which the message with 'message_id' belongs. + * + * Note: 'thread_id_ret' must not be NULL! +@@ -1760,9 +1766,9 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id) + * + * Note: If there is no message in the database with the given + * 'message_id' then a new thread_id will be allocated for this +- * message and stored in the database metadata, (where this same ++ * message ID and stored in the database metadata so that the + * thread ID can be looked up if the message is added to the database +- * later). ++ * later. + */ + static notmuch_status_t + _resolve_message_id_to_thread_id (notmuch_database_t *notmuch, +@@ -1770,6 +1776,49 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch, + const char *message_id, + const char **thread_id_ret) + { ++ notmuch_private_status_t status; ++ notmuch_message_t *message; ++ ++ if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) ++ return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id, ++ thread_id_ret); ++ ++ /* Look for this message (regular or ghost) */ ++ message = _notmuch_message_create_for_message_id ( ++ notmuch, message_id, &status); ++ if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) { ++ /* Message exists */ ++ *thread_id_ret = talloc_steal ( ++ ctx, notmuch_message_get_thread_id (message)); ++ } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { ++ /* Message did not exist. Give it a fresh thread ID and ++ * populate this message as a ghost message. */ ++ *thread_id_ret = talloc_strdup ( ++ ctx, _notmuch_database_generate_thread_id (notmuch)); ++ if (! *thread_id_ret) { ++ status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY; ++ } else { ++ status = _notmuch_message_initialize_ghost (message, *thread_id_ret); ++ if (status == 0) ++ /* Commit the new ghost message */ ++ _notmuch_message_sync (message); ++ } ++ } else { ++ /* Create failed. Fall through. */ ++ } ++ ++ notmuch_message_destroy (message); ++ ++ return COERCE_STATUS (status, "Error creating ghost message"); ++} ++ ++/* Pre-ghost messages _resolve_message_id_to_thread_id */ ++static notmuch_status_t ++_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, ++ void *ctx, ++ const char *message_id, ++ const char **thread_id_ret) ++{ + notmuch_status_t status; + notmuch_message_t *message; + string thread_id_string; +@@ -2007,13 +2056,16 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + } + } + +-/* Given a (mostly empty) 'message' and its corresponding ++/* Given a blank or ghost 'message' and its corresponding + * 'message_file' link it to existing threads in the database. + * +- * The first check is in the metadata of the database to see if we +- * have pre-allocated a thread_id in advance for this message, (which +- * would have happened if a message was previously added that +- * referenced this one). ++ * First, if is_ghost, this retrieves the thread ID already stored in ++ * the message (which will be the case if a message was previously ++ * added that referenced this one). If the message is blank ++ * (!is_ghost), it doesn't have a thread ID yet (we'll generate one ++ * later in this function). If the database does not support ghost ++ * messages, this checks for a thread ID stored in database metadata ++ * for this message ID. + * + * Second, we look at 'message_file' and its link-relevant headers + * (References and In-Reply-To) for message IDs. +@@ -2021,7 +2073,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + * Finally, we look in the database for existing message that + * reference 'message'. + * +- * In all cases, we assign to the current message the first thread_id ++ * In all cases, we assign to the current message the first thread ID + * found (through either parent or child). We will also merge any + * existing, distinct threads where this message belongs to both, + * (which is not uncommon when messages are processed out of order). +@@ -2035,16 +2087,22 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + static notmuch_status_t + _notmuch_database_link_message (notmuch_database_t *notmuch, + notmuch_message_t *message, +- notmuch_message_file_t *message_file) ++ notmuch_message_file_t *message_file, ++ notmuch_bool_t is_ghost) + { + void *local = talloc_new (NULL); + notmuch_status_t status; +- const char *thread_id; ++ const char *thread_id = NULL; + + /* Check if the message already had a thread ID */ +- thread_id = _consume_metadata_thread_id (local, notmuch, message); +- if (thread_id) +- _notmuch_message_add_term (message, "thread", thread_id); ++ if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) { ++ if (is_ghost) ++ thread_id = notmuch_message_get_thread_id (message); ++ } else { ++ thread_id = _consume_metadata_thread_id (local, notmuch, message); ++ if (thread_id) ++ _notmuch_message_add_term (message, "thread", thread_id); ++ } + + status = _notmuch_database_link_message_to_parents (notmuch, message, + message_file, +@@ -2079,6 +2137,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, + notmuch_message_t *message = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2; + notmuch_private_status_t private_status; ++ notmuch_bool_t is_ghost = false; + + const char *date, *header; + const char *from, *to, *subject; +@@ -2171,12 +2230,20 @@ notmuch_database_add_message (notmuch_database_t *notmuch, + + _notmuch_message_add_filename (message, filename); + +- /* Is this a newly created message object? */ +- if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { ++ /* Is this a newly created message object or a ghost ++ * message? We have to be slightly careful: if this is a ++ * blank message, it's not safe to call ++ * notmuch_message_get_flag yet. */ ++ if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND || ++ (is_ghost = notmuch_message_get_flag ( ++ message, NOTMUCH_MESSAGE_FLAG_GHOST))) { + _notmuch_message_add_term (message, "type", "mail"); ++ if (is_ghost) ++ /* Convert ghost message to a regular message */ ++ _notmuch_message_remove_term (message, "type", "ghost"); + + ret = _notmuch_database_link_message (notmuch, message, +- message_file); ++ message_file, is_ghost); + if (ret) + goto DONE; + +-- +2.1.0 + -- 2.26.2