From 32ba475b298ec59ef84dc978e707f9dfafb9f4c3 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 22 Oct 2014 21:49:13 +2000 Subject: [PATCH] Re: [PATCH v2 08/12] lib: Implement ghost-based thread linking --- 01/bdb16594f6b01c74c999198b7edc00af539c78 | 292 ++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 01/bdb16594f6b01c74c999198b7edc00af539c78 diff --git a/01/bdb16594f6b01c74c999198b7edc00af539c78 b/01/bdb16594f6b01c74c999198b7edc00af539c78 new file mode 100644 index 000000000..961b78fb8 --- /dev/null +++ b/01/bdb16594f6b01c74c999198b7edc00af539c78 @@ -0,0 +1,292 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 20DEF431FB6 + for ; Tue, 21 Oct 2014 18:49:22 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -2.3 +X-Spam-Level: +X-Spam-Status: No, score=-2.3 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id pjKGcSE-sYOv for ; + Tue, 21 Oct 2014 18:49:14 -0700 (PDT) +Received: from outgoing.csail.mit.edu (outgoing.csail.mit.edu [128.30.2.149]) + by olra.theworths.org (Postfix) with ESMTP id 6733A431FAE + for ; Tue, 21 Oct 2014 18:49:14 -0700 (PDT) +Received: from [104.131.20.129] (helo=awakeningjr) + by outgoing.csail.mit.edu with esmtpsa (TLS1.0:RSA_AES_128_CBC_SHA1:16) + (Exim 4.72) (envelope-from ) + id 1Xgl33-0005Gj-S9; Tue, 21 Oct 2014 21:49:13 -0400 +Received: from amthrax by awakeningjr with local (Exim 4.84) + (envelope-from ) + id 1Xgl33-0000Ge-JH; Tue, 21 Oct 2014 21:49:13 -0400 +Date: Tue, 21 Oct 2014 21:49:13 -0400 +From: Austin Clements +To: Mark Walters +Subject: Re: [PATCH v2 08/12] lib: Implement ghost-based thread linking +Message-ID: <20141022014913.GE7970@csail.mit.edu> +References: <1412637438-4821-1-git-send-email-aclements@csail.mit.edu> + <1412637438-4821-9-git-send-email-aclements@csail.mit.edu> + <87zjcphvdu.fsf@qmul.ac.uk> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline +In-Reply-To: <87zjcphvdu.fsf@qmul.ac.uk> +User-Agent: Mutt/1.5.23 (2014-03-12) +Cc: notmuch@notmuchmail.org +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Wed, 22 Oct 2014 01:49:22 -0000 + +Quoth Mark Walters on Oct 22 at 12:10 am: +> On Tue, 07 Oct 2014, Austin Clements wrote: +> > From: Austin Clements +> > +> > This updates the thread linking code to use ghost messages instead of +> > user metadata to link messages into threads. +> > +> > In contrast with the old approach, this is actually correct. +> > Previously, thread merging updated only the thread IDs of message +> > documents, not thread IDs stored in user metadata. As originally +> > diagnosed by Mark Walters [1] and as demonstrated by the broken +> > T260-thread-order test, this can cause notmuch to fail to link +> > messages even though they're in the same thread. In principle the old +> > approach could have been fixed by updating the user metadata thread +> > IDs as well, but these are not indexed and hence this would have +> > required a full scan of all stored thread IDs. Ghost messages solve +> > this problem naturally by reusing the exact same thread ID and message +> > ID representation and indexing as regular messages. +> > +> > Furthermore, thanks to this greater symmetry, ghost messages are also +> > algorithmically simpler. We continue to support the old user metadata +> > format, so this patch can't delete any code, but when we do remove +> > support for the old format, several functions can simply be deleted. +> > +> > [1] id:8738h7kv2q.fsf@qmul.ac.uk +> > --- +> > lib/database.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++-------- +> > 1 file changed, 75 insertions(+), 11 deletions(-) +> > +> > diff --git a/lib/database.cc b/lib/database.cc +> > index c641bcd..fdcc526 100644 +> > --- a/lib/database.cc +> > +++ b/lib/database.cc +> > @@ -1752,6 +1752,12 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id) +> > message_id); +> > } +> > +> > +static notmuch_status_t +> > +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, +> > + void *ctx, +> > + const char *message_id, +> > + const char **thread_id_ret); +> > + +> > /* Find the thread ID to which the message with 'message_id' belongs. +> > * +> > * Note: 'thread_id_ret' must not be NULL! +> > @@ -1760,9 +1766,9 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id) +> > * +> > * Note: If there is no message in the database with the given +> > * 'message_id' then a new thread_id will be allocated for this +> > - * message and stored in the database metadata, (where this same +> > + * message ID and stored in the database metadata so that the +> > * thread ID can be looked up if the message is added to the database +> > - * later). +> > + * later. +> > */ +> > static notmuch_status_t +> > _resolve_message_id_to_thread_id (notmuch_database_t *notmuch, +> > @@ -1770,6 +1776,49 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch, +> > const char *message_id, +> > const char **thread_id_ret) +> > { +> > + notmuch_private_status_t status; +> > + notmuch_message_t *message; +> > + +> > + if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) +> > + return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id, +> > + thread_id_ret); +> > + +> > + /* Look for this message (regular or ghost) */ +> > + message = _notmuch_message_create_for_message_id ( +> > + notmuch, message_id, &status); +> > + if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) { +> > + /* Message exists */ +> > + *thread_id_ret = talloc_steal ( +> > + ctx, notmuch_message_get_thread_id (message)); +> > + } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { +> > + /* Message did not exist. Give it a fresh thread ID and +> > + * populate this message as a ghost message. */ +> > + *thread_id_ret = talloc_strdup ( +> > + ctx, _notmuch_database_generate_thread_id (notmuch)); +> > + if (! *thread_id_ret) { +> > + status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY; +> > + } else { +> > + status = _notmuch_message_initialize_ghost (message, *thread_id_ret); +> > + if (status == 0) +> > + /* Commit the new ghost message */ +> > + _notmuch_message_sync (message); +> > + } +> > + } else { +> > + /* Create failed. Fall through. */ +> > + } +> > + +> > + notmuch_message_destroy (message); +> > + +> > + return COERCE_STATUS (status, "Error creating ghost message"); +> > +} +> > + +> > +/* Pre-ghost messages _resolve_message_id_to_thread_id */ +> > +static notmuch_status_t +> > +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, +> > + void *ctx, +> > + const char *message_id, +> > + const char **thread_id_ret) +> > +{ +> > notmuch_status_t status; +> > notmuch_message_t *message; +> > string thread_id_string; +> > @@ -2007,7 +2056,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, +> > } +> > } +> > +> > -/* Given a (mostly empty) 'message' and its corresponding +> > +/* Given a blank or ghost 'message' and its corresponding +> > * 'message_file' link it to existing threads in the database. +> > * +> > * The first check is in the metadata of the database to see if we +> +> There is quite a lot of comment below this and it is not clear to me how +> much of it applies to the is_ghost case. In particular does the +> +> * Finally, we look in the database for existing message that +> * reference 'message'. +> +> still apply? I would have thought that we would already have done that +> in the is_ghost case. + +Good point. The comment isn't really wrong, but it isn't right +either. How about + +diff --git a/lib/database.cc b/lib/database.cc +index 6e51a72..d063ec8 100644 +--- a/lib/database.cc ++++ b/lib/database.cc +@@ -2121,10 +2121,13 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + /* Given a blank or ghost 'message' and its corresponding + * 'message_file' link it to existing threads in the database. + * +- * The first check is in the metadata of the database to see if we +- * have pre-allocated a thread_id in advance for this message, (which +- * would have happened if a message was previously added that +- * referenced this one). ++ * First, if is_ghost, this retrieves the thread ID already stored in ++ * the message (which will be the case if a message was previously ++ * added that referenced this one). If the message is blank ++ * (!is_ghost), it doesn't have a thread ID yet (we'll generate one ++ * later in this function). If the database does not support ghost ++ * messages, this checks for a thread ID stored in database metadata ++ * for this message ID. + * + * Second, we look at 'message_file' and its link-relevant headers + * (References and In-Reply-To) for message IDs. +@@ -2132,7 +2135,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + * Finally, we look in the database for existing message that + * reference 'message'. + * +- * In all cases, we assign to the current message the first thread_id ++ * In all cases, we assign to the current message the first thread ID + * found (through either parent or child). We will also merge any + * existing, distinct threads where this message belongs to both, + * (which is not uncommon when messages are processed out of order). + +? + +> Of course this also applies to the actual code which seems to call +> _notmuch_database_link_message_to_children unconditionally. It might be +> worth a comment in any case. +> +> Best wishes +> +> Mark +> +> +> > @@ -2035,16 +2084,22 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, +> > static notmuch_status_t +> > _notmuch_database_link_message (notmuch_database_t *notmuch, +> > notmuch_message_t *message, +> > - notmuch_message_file_t *message_file) +> > + notmuch_message_file_t *message_file, +> > + notmuch_bool_t is_ghost) +> > { +> > void *local = talloc_new (NULL); +> > notmuch_status_t status; +> > - const char *thread_id; +> > + const char *thread_id = NULL; +> > +> > /* Check if the message already had a thread ID */ +> > - thread_id = _consume_metadata_thread_id (local, notmuch, message); +> > - if (thread_id) +> > - _notmuch_message_add_term (message, "thread", thread_id); +> > + if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) { +> > + if (is_ghost) +> > + thread_id = notmuch_message_get_thread_id (message); +> > + } else { +> > + thread_id = _consume_metadata_thread_id (local, notmuch, message); +> > + if (thread_id) +> > + _notmuch_message_add_term (message, "thread", thread_id); +> > + } +> > +> > status = _notmuch_database_link_message_to_parents (notmuch, message, +> > message_file, +> > @@ -2079,6 +2134,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, +> > notmuch_message_t *message = NULL; +> > notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2; +> > notmuch_private_status_t private_status; +> > + notmuch_bool_t is_ghost = false; +> > +> > const char *date, *header; +> > const char *from, *to, *subject; +> > @@ -2171,12 +2227,20 @@ notmuch_database_add_message (notmuch_database_t *notmuch, +> > +> > _notmuch_message_add_filename (message, filename); +> > +> > - /* Is this a newly created message object? */ +> > - if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { +> > + /* Is this a newly created message object or a ghost +> > + * message? We have to be slightly careful: if this is a +> > + * blank message, it's not safe to call +> > + * notmuch_message_get_flag yet. */ +> > + if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND || +> > + (is_ghost = notmuch_message_get_flag ( +> > + message, NOTMUCH_MESSAGE_FLAG_GHOST))) { +> > _notmuch_message_add_term (message, "type", "mail"); +> > + if (is_ghost) +> > + /* Convert ghost message to a regular message */ +> > + _notmuch_message_remove_term (message, "type", "ghost"); +> > +> > ret = _notmuch_database_link_message (notmuch, message, +> > - message_file); +> > + message_file, is_ghost); +> > if (ret) +> > goto DONE; +> > +> > +> > _______________________________________________ +> > notmuch mailing list +> > notmuch@notmuchmail.org +> > http://notmuchmail.org/mailman/listinfo/notmuch -- 2.26.2