From: Daniel Kahn Gillmor Date: Fri, 8 Jul 2016 09:27:26 +0000 (+0200) Subject: [PATCH v4 15/16] added notmuch_message_reindex X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=055b7bca7eea8d376654a58591d0493696b5850c;p=notmuch-archives.git [PATCH v4 15/16] added notmuch_message_reindex --- diff --git a/c3/8e58d9424f9c5d39b514b49a6ab9327dc9f334 b/c3/8e58d9424f9c5d39b514b49a6ab9327dc9f334 new file mode 100644 index 000000000..4865b8901 --- /dev/null +++ b/c3/8e58d9424f9c5d39b514b49a6ab9327dc9f334 @@ -0,0 +1,218 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by arlo.cworth.org (Postfix) with ESMTP id BF9286DE091F + for ; Fri, 8 Jul 2016 03:13:16 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at cworth.org +X-Spam-Flag: NO +X-Spam-Score: 0 +X-Spam-Level: +X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none] + autolearn=disabled +Received: from arlo.cworth.org ([127.0.0.1]) + by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id RsaQ-QlS0tUS for ; + Fri, 8 Jul 2016 03:13:08 -0700 (PDT) +Received: from che.mayfirst.org (che.mayfirst.org [162.247.75.118]) + by arlo.cworth.org (Postfix) with ESMTP id 121A16DE01BA + for ; Fri, 8 Jul 2016 03:13:07 -0700 (PDT) +Received: from fifthhorseman.net (unknown [88.128.80.54]) + by che.mayfirst.org (Postfix) with ESMTPSA id 8414CF99A + for ; Fri, 8 Jul 2016 06:13:06 -0400 (EDT) +Received: by fifthhorseman.net (Postfix, from userid 1000) + id C9C402174C; Fri, 8 Jul 2016 11:27:34 +0200 (CEST) +From: Daniel Kahn Gillmor +To: Notmuch Mail +Subject: [PATCH v4 15/16] added notmuch_message_reindex +Date: Fri, 8 Jul 2016 11:27:26 +0200 +Message-Id: <1467970047-8013-16-git-send-email-dkg@fifthhorseman.net> +X-Mailer: git-send-email 2.8.1 +In-Reply-To: <1467970047-8013-1-git-send-email-dkg@fifthhorseman.net> +References: <1467970047-8013-1-git-send-email-dkg@fifthhorseman.net> +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.20 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Fri, 08 Jul 2016 10:13:16 -0000 + +This new function asks the database to reindex a given message, using +the supplied indexopts. + +This can be used, for example, to index the cleartext of an encrypted +message. + +My initial inclination for this implementation was to remove all the +indexed terms for a given message's body, and then to add them back +in. + +Unfortunately, that doesn't appear to be possible due to the way we're +using xapian. I could find no way to distinguish terms which were +added during indexing of the message body from other terms associated +with the document. As a result, we just save the tags and properties, +remove the message from the database entirely, and add it back into +the database in full, re-adding tags and properties as needed. +--- + lib/message.cc | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- + lib/notmuch.h | 14 ++++++++ + 2 files changed, 121 insertions(+), 1 deletion(-) + +diff --git a/lib/message.cc b/lib/message.cc +index 9d3e807..ab807b7 100644 +--- a/lib/message.cc ++++ b/lib/message.cc +@@ -557,7 +557,9 @@ void + _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix) + { + Xapian::TermIterator i; +- size_t prefix_len = strlen (prefix); ++ size_t prefix_len = 0; ++ ++ prefix_len = strlen (prefix); + + while (1) { + i = message->doc.termlist_begin (); +@@ -1847,3 +1849,107 @@ _notmuch_message_frozen (notmuch_message_t *message) + { + return message->frozen; + } ++ ++notmuch_status_t ++notmuch_message_reindex (notmuch_message_t *message, ++ notmuch_indexopts_t *indexopts) ++{ ++ notmuch_database_t *notmuch = NULL; ++ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status; ++ notmuch_tags_t *tags = NULL; ++ notmuch_message_properties_t *properties = NULL; ++ notmuch_filenames_t *filenames, *orig_filenames = NULL; ++ const char *filename = NULL, *tag = NULL, *propkey = NULL; ++ notmuch_message_t *newmsg = NULL; ++ notmuch_bool_t readded = FALSE, skip; ++ const char *autotags[] = { ++ "attachment", ++ "encrypted", ++ "signed" }; ++ const char *autoproperties[] = { "index-decryption" }; ++ ++ if (message == NULL) ++ return NOTMUCH_STATUS_NULL_POINTER; ++ ++ notmuch = _notmuch_message_database (message); ++ ++ /* cache tags, properties, and filenames */ ++ tags = notmuch_message_get_tags (message); ++ properties = notmuch_message_get_properties (message, "", FALSE); ++ filenames = notmuch_message_get_filenames (message); ++ orig_filenames = notmuch_message_get_filenames (message); ++ ++ /* walk through filenames, removing them until the message is gone */ ++ for ( ; notmuch_filenames_valid (filenames); ++ notmuch_filenames_move_to_next (filenames)) { ++ filename = notmuch_filenames_get (filenames); ++ ++ ret = notmuch_database_remove_message (notmuch, filename); ++ if (ret != NOTMUCH_STATUS_SUCCESS && ++ ret != NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) ++ return ret; ++ } ++ if (ret != NOTMUCH_STATUS_SUCCESS) ++ return ret; ++ ++ /* re-add the filenames with the associated indexopts */ ++ for (; notmuch_filenames_valid (orig_filenames); ++ notmuch_filenames_move_to_next (orig_filenames)) { ++ filename = notmuch_filenames_get (orig_filenames); ++ ++ status = notmuch_database_add_message_with_indexopts(notmuch, ++ filename, ++ indexopts, ++ readded ? NULL : &newmsg); ++ if (status == NOTMUCH_STATUS_SUCCESS || ++ status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) { ++ if (!readded) { ++ /* re-add tags */ ++ for (; notmuch_tags_valid (tags); ++ notmuch_tags_move_to_next (tags)) { ++ tag = notmuch_tags_get (tags); ++ skip = FALSE; ++ ++ for (size_t i = 0; i < ARRAY_SIZE (autotags); i++) ++ if (strcmp (tag, autotags[i]) == 0) ++ skip = TRUE; ++ ++ if (!skip) { ++ status = notmuch_message_add_tag (newmsg, tag); ++ if (status != NOTMUCH_STATUS_SUCCESS) ++ ret = status; ++ } ++ } ++ /* re-add properties */ ++ for (; notmuch_message_properties_valid (properties); ++ notmuch_message_properties_move_to_next (properties)) { ++ propkey = notmuch_message_properties_key (properties); ++ skip = FALSE; ++ ++ for (size_t i = 0; i < ARRAY_SIZE (autoproperties); i++) ++ if (strcmp (propkey, autoproperties[i]) == 0) ++ skip = TRUE; ++ ++ if (!skip) { ++ status = notmuch_message_add_property (newmsg, propkey, ++ notmuch_message_properties_value (properties)); ++ if (status != NOTMUCH_STATUS_SUCCESS) ++ ret = status; ++ } ++ } ++ readded = TRUE; ++ } ++ } else { ++ /* if we failed to add this filename, go ahead and try the ++ * next one as though it were first, but report the ++ * error... */ ++ ret = status; ++ } ++ } ++ if (newmsg) ++ notmuch_message_destroy (newmsg); ++ ++ /* should we also destroy the incoming message object? at the ++ * moment, we leave that to the caller */ ++ return ret; ++} +diff --git a/lib/notmuch.h b/lib/notmuch.h +index 66b3503..9076a9b 100644 +--- a/lib/notmuch.h ++++ b/lib/notmuch.h +@@ -1394,6 +1394,20 @@ notmuch_filenames_t * + notmuch_message_get_filenames (notmuch_message_t *message); + + /** ++ * Re-index the e-mail corresponding to 'message' using the supplied index options ++ * ++ * Returns the status of the re-index operation. (see the return ++ * codes documented in notmuch_database_add_message) ++ * ++ * After reindexing, the user should discard the message object passed ++ * in here by calling notmuch_message_destroy, since it refers to the ++ * original message, not to the reindexed message. ++ */ ++notmuch_status_t ++notmuch_message_reindex (notmuch_message_t *message, ++ notmuch_indexopts_t *indexopts); ++ ++/** + * Message flags. + */ + typedef enum _notmuch_message_flag { +-- +2.8.1 +