From bbaa0c455ae89a0bcd601757ac780f03cf94ecb2 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 7 Oct 2014 17:17:14 +1800 Subject: [PATCH] [PATCH v2 08/12] lib: Implement ghost-based thread linking --- af/5db08f553703d823c69957f7631085e7d505f6 | 252 ++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 af/5db08f553703d823c69957f7631085e7d505f6 diff --git a/af/5db08f553703d823c69957f7631085e7d505f6 b/af/5db08f553703d823c69957f7631085e7d505f6 new file mode 100644 index 000000000..f7369968d --- /dev/null +++ b/af/5db08f553703d823c69957f7631085e7d505f6 @@ -0,0 +1,252 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id D2907431FD6 + for ; Mon, 6 Oct 2014 16:17:37 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -2.3 +X-Spam-Level: +X-Spam-Status: No, score=-2.3 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id NoJv7c5qr-eN for ; + Mon, 6 Oct 2014 16:17:34 -0700 (PDT) +Received: from dmz-mailsec-scanner-7.mit.edu (dmz-mailsec-scanner-7.mit.edu + [18.7.68.36]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 1F8E8431FBD + for ; Mon, 6 Oct 2014 16:17:29 -0700 (PDT) +X-AuditID: 12074424-f79346d000004923-3d-543323088ef7 +Received: from mailhub-auth-4.mit.edu ( [18.7.62.39]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-7.mit.edu (Symantec Messaging Gateway) with SMTP + id 76.B5.18723.80323345; Mon, 6 Oct 2014 19:17:28 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-4.mit.edu (8.13.8/8.9.2) with ESMTP id s96NHRgh032097; + Mon, 6 Oct 2014 19:17:28 -0400 +Received: from drake.dyndns.org ([12.185.136.2]) (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s96NHPDN014532 + (version=TLSv1/SSLv3 cipher=AES128-SHA bits=128 verify=NOT); + Mon, 6 Oct 2014 19:17:27 -0400 +Received: from amthrax by drake.dyndns.org with local (Exim 4.84) + (envelope-from ) + id 1XbHWs-0001Jm-4i; Mon, 06 Oct 2014 17:17:22 -0600 +From: Austin Clements +To: notmuch@notmuchmail.org +Subject: [PATCH v2 08/12] lib: Implement ghost-based thread linking +Date: Mon, 6 Oct 2014 17:17:14 -0600 +Message-Id: <1412637438-4821-9-git-send-email-aclements@csail.mit.edu> +X-Mailer: git-send-email 2.1.0 +In-Reply-To: <1412637438-4821-1-git-send-email-aclements@csail.mit.edu> +References: <1412637438-4821-1-git-send-email-aclements@csail.mit.edu> +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFtrDIsWRmVeSWpSXmKPExsUixG6nrsuhbBxi0DhL1uJGazejxfWbM5kd + mDyerbrF7LHl0HvmAKYoLpuU1JzMstQifbsErozFWxcwF0w1r2idk9PA2K7TxcjJISFgIrH7 + dRMjhC0mceHeerYuRi4OIYHZTBI7Z19mhXA2MEosnd4HlVnKJHHw/QRmCGcJo8TF3UfZQfrZ + BPQlVqydxApiiwhIS+y8OxvMZhawlLj6tZ2pi5GDQ1jAWWLHbk+QMIuAqsSj8wvAVvMKuEl8 + erqNGeIMOYkNu/+DxTkF3CXWXJkMNkYIqOb5z6csExj5FzAyrGKUTcmt0s1NzMwpTk3WLU5O + zMtLLdI118vNLNFLTSndxAgOJBeVHYzNh5QOMQpwMCrx8EbsMAwRYk0sK67MPcQoycGkJMo7 + h904RIgvKT+lMiOxOCO+qDQntfgQowQHs5IIL9dvoxAh3pTEyqrUonyYlDQHi5I476YffCFC + AumJJanZqakFqUUwWRkODiUJ3g5FoKGCRanpqRVpmTklCGkmDk6Q4TxAww+B1PAWFyTmFmem + Q+RPMSpKifMGgyQEQBIZpXlwvbBIf8UoDvSKMO98kCoeYJKA634FNJgJaLDpHH2QwSWJCCmp + BsZZm0R3tTK6zWYvZ80odn19sD/mu1xD1NNk39D4J9+PuvC/deNrqTosPyvNpkK5Z2WWXF7W + G4UgmTvfhGRXbj59i71MeeVJBt0HT8QSlArad64+nm9m9OnZl4WmC++2nZ5su4ct4sRzXeY6 + k/RKwc7o+We0D8bHzz+5b+YPWWlltgmKYbvSxZVYijMSDbWYi4oTAS37XWjPAgAA +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Mon, 06 Oct 2014 23:17:38 -0000 + +From: Austin Clements + +This updates the thread linking code to use ghost messages instead of +user metadata to link messages into threads. + +In contrast with the old approach, this is actually correct. +Previously, thread merging updated only the thread IDs of message +documents, not thread IDs stored in user metadata. As originally +diagnosed by Mark Walters [1] and as demonstrated by the broken +T260-thread-order test, this can cause notmuch to fail to link +messages even though they're in the same thread. In principle the old +approach could have been fixed by updating the user metadata thread +IDs as well, but these are not indexed and hence this would have +required a full scan of all stored thread IDs. Ghost messages solve +this problem naturally by reusing the exact same thread ID and message +ID representation and indexing as regular messages. + +Furthermore, thanks to this greater symmetry, ghost messages are also +algorithmically simpler. We continue to support the old user metadata +format, so this patch can't delete any code, but when we do remove +support for the old format, several functions can simply be deleted. + +[1] id:8738h7kv2q.fsf@qmul.ac.uk +--- + lib/database.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 75 insertions(+), 11 deletions(-) + +diff --git a/lib/database.cc b/lib/database.cc +index c641bcd..fdcc526 100644 +--- a/lib/database.cc ++++ b/lib/database.cc +@@ -1752,6 +1752,12 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id) + message_id); + } + ++static notmuch_status_t ++_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, ++ void *ctx, ++ const char *message_id, ++ const char **thread_id_ret); ++ + /* Find the thread ID to which the message with 'message_id' belongs. + * + * Note: 'thread_id_ret' must not be NULL! +@@ -1760,9 +1766,9 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id) + * + * Note: If there is no message in the database with the given + * 'message_id' then a new thread_id will be allocated for this +- * message and stored in the database metadata, (where this same ++ * message ID and stored in the database metadata so that the + * thread ID can be looked up if the message is added to the database +- * later). ++ * later. + */ + static notmuch_status_t + _resolve_message_id_to_thread_id (notmuch_database_t *notmuch, +@@ -1770,6 +1776,49 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch, + const char *message_id, + const char **thread_id_ret) + { ++ notmuch_private_status_t status; ++ notmuch_message_t *message; ++ ++ if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) ++ return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id, ++ thread_id_ret); ++ ++ /* Look for this message (regular or ghost) */ ++ message = _notmuch_message_create_for_message_id ( ++ notmuch, message_id, &status); ++ if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) { ++ /* Message exists */ ++ *thread_id_ret = talloc_steal ( ++ ctx, notmuch_message_get_thread_id (message)); ++ } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { ++ /* Message did not exist. Give it a fresh thread ID and ++ * populate this message as a ghost message. */ ++ *thread_id_ret = talloc_strdup ( ++ ctx, _notmuch_database_generate_thread_id (notmuch)); ++ if (! *thread_id_ret) { ++ status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY; ++ } else { ++ status = _notmuch_message_initialize_ghost (message, *thread_id_ret); ++ if (status == 0) ++ /* Commit the new ghost message */ ++ _notmuch_message_sync (message); ++ } ++ } else { ++ /* Create failed. Fall through. */ ++ } ++ ++ notmuch_message_destroy (message); ++ ++ return COERCE_STATUS (status, "Error creating ghost message"); ++} ++ ++/* Pre-ghost messages _resolve_message_id_to_thread_id */ ++static notmuch_status_t ++_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch, ++ void *ctx, ++ const char *message_id, ++ const char **thread_id_ret) ++{ + notmuch_status_t status; + notmuch_message_t *message; + string thread_id_string; +@@ -2007,7 +2056,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + } + } + +-/* Given a (mostly empty) 'message' and its corresponding ++/* Given a blank or ghost 'message' and its corresponding + * 'message_file' link it to existing threads in the database. + * + * The first check is in the metadata of the database to see if we +@@ -2035,16 +2084,22 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch, + static notmuch_status_t + _notmuch_database_link_message (notmuch_database_t *notmuch, + notmuch_message_t *message, +- notmuch_message_file_t *message_file) ++ notmuch_message_file_t *message_file, ++ notmuch_bool_t is_ghost) + { + void *local = talloc_new (NULL); + notmuch_status_t status; +- const char *thread_id; ++ const char *thread_id = NULL; + + /* Check if the message already had a thread ID */ +- thread_id = _consume_metadata_thread_id (local, notmuch, message); +- if (thread_id) +- _notmuch_message_add_term (message, "thread", thread_id); ++ if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) { ++ if (is_ghost) ++ thread_id = notmuch_message_get_thread_id (message); ++ } else { ++ thread_id = _consume_metadata_thread_id (local, notmuch, message); ++ if (thread_id) ++ _notmuch_message_add_term (message, "thread", thread_id); ++ } + + status = _notmuch_database_link_message_to_parents (notmuch, message, + message_file, +@@ -2079,6 +2134,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, + notmuch_message_t *message = NULL; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2; + notmuch_private_status_t private_status; ++ notmuch_bool_t is_ghost = false; + + const char *date, *header; + const char *from, *to, *subject; +@@ -2171,12 +2227,20 @@ notmuch_database_add_message (notmuch_database_t *notmuch, + + _notmuch_message_add_filename (message, filename); + +- /* Is this a newly created message object? */ +- if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) { ++ /* Is this a newly created message object or a ghost ++ * message? We have to be slightly careful: if this is a ++ * blank message, it's not safe to call ++ * notmuch_message_get_flag yet. */ ++ if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND || ++ (is_ghost = notmuch_message_get_flag ( ++ message, NOTMUCH_MESSAGE_FLAG_GHOST))) { + _notmuch_message_add_term (message, "type", "mail"); ++ if (is_ghost) ++ /* Convert ghost message to a regular message */ ++ _notmuch_message_remove_term (message, "type", "ghost"); + + ret = _notmuch_database_link_message (notmuch, message, +- message_file); ++ message_file, is_ghost); + if (ret) + goto DONE; + +-- +2.1.0 + -- 2.26.2