add "notmuch reindex" subcommand
authorDaniel Kahn Gillmor <dkg@fifthhorseman.net>
Sun, 4 Jun 2017 12:32:35 +0000 (09:32 -0300)
committerDavid Bremner <david@tethera.net>
Wed, 2 Aug 2017 01:17:47 +0000 (21:17 -0400)
This new subcommand takes a set of search terms, and re-indexes the
list of matching messages.

13 files changed:
Makefile.local
doc/conf.py
doc/index.rst
doc/man1/notmuch-reindex.rst [new file with mode: 0644]
doc/man1/notmuch.rst
doc/man7/notmuch-search-terms.rst
notmuch-client.h
notmuch-reindex.c [new file with mode: 0644]
notmuch.c
performance-test/M04-reindex.sh [new file with mode: 0755]
performance-test/T03-reindex.sh [new file with mode: 0755]
test/T670-duplicate-mid.sh
test/T700-reindex.sh [new file with mode: 0755]

index 6bc78ef8e969b2d8649d68de6b590e8ded886cec..af12ca7f2ef88353648fbbf584422cd613bdb830 100644 (file)
@@ -225,6 +225,7 @@ notmuch_client_srcs =               \
        notmuch-dump.c          \
        notmuch-insert.c        \
        notmuch-new.c           \
+       notmuch-reindex.c       \
        notmuch-reply.c         \
        notmuch-restore.c       \
        notmuch-search.c        \
index a3d8269696a366dca12b1caa4a8d67e6005d993c..aa864b3c0eb635268b46d5a326c3a7ebca864a73 100644 (file)
@@ -95,6 +95,10 @@ man_pages = [
      u'incorporate new mail into the notmuch database',
      [notmuch_authors], 1),
 
+    ('man1/notmuch-reindex', 'notmuch-reindex',
+     u're-index matching messages',
+     [notmuch_authors], 1),
+
     ('man1/notmuch-reply', 'notmuch-reply',
      u'constructs a reply template for a set of messages',
      [notmuch_authors], 1),
index 344606d9e1814434dac78a722a138bb204adf19f..aa6c9f40462c5c89afae747041ddbf9075a4e157 100644 (file)
@@ -18,6 +18,7 @@ Contents:
    man5/notmuch-hooks
    man1/notmuch-insert
    man1/notmuch-new
+   man1/notmuch-reindex
    man1/notmuch-reply
    man1/notmuch-restore
    man1/notmuch-search
diff --git a/doc/man1/notmuch-reindex.rst b/doc/man1/notmuch-reindex.rst
new file mode 100644 (file)
index 0000000..e39cc4e
--- /dev/null
@@ -0,0 +1,29 @@
+===============
+notmuch-reindex
+===============
+
+SYNOPSIS
+========
+
+**notmuch** **reindex** [*option* ...] <*search-term*> ...
+
+DESCRIPTION
+===========
+
+Re-index all messages matching the search terms.
+
+See **notmuch-search-terms(7)** for details of the supported syntax for
+<*search-term*\ >.
+
+The **reindex** command searches for all messages matching the
+supplied search terms, and re-creates the full-text index on these
+messages using the supplied options.
+
+SEE ALSO
+========
+
+**notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**,
+**notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**,
+**notmuch-new(1)**,
+**notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**,
+**notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)**
index cb350d1a19767e4ac354925b525f8fc2c0aa25f9..40fd335bd8240c1eeb3b7c96a267fa19e6f624b8 100644 (file)
@@ -163,8 +163,8 @@ SEE ALSO
 
 **notmuch-address(1)**, **notmuch-compact(1)**, **notmuch-config(1)**,
 **notmuch-count(1)**, **notmuch-dump(1)**, **notmuch-hooks(5)**,
-**notmuch-insert(1)**, **notmuch-new(1)**, **notmuch-reply(1)**,
-**notmuch-restore(1)**, **notmuch-search(1)**,
+**notmuch-insert(1)**, **notmuch-new(1)**, **notmuch-reindex(1)**,
+**notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**,
 **notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)**
 
 The notmuch website: **https://notmuchmail.org**
index 47cab48d3ee97df0d9db80028a586c40fd1a3b2f..dd76972ee3893fe369d508fd4f7e92ec5ad2622a 100644 (file)
@@ -9,6 +9,8 @@ SYNOPSIS
 
 **notmuch** **dump** [--format=(batch-tag|sup)] [--] [--output=<*file*>] [--] [<*search-term*> ...]
 
+**notmuch** **reindex** [option ...] <*search-term*> ...
+
 **notmuch** **search** [option ...] <*search-term*> ...
 
 **notmuch** **show** [option ...] <*search-term*> ...
@@ -421,5 +423,6 @@ SEE ALSO
 
 **notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**,
 **notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**,
-**notmuch-new(1)**, **notmuch-reply(1)**, **notmuch-restore(1)**,
-**notmuch-search(1)**, **notmuch-show(1)**, **notmuch-tag(1)**
+**notmuch-new(1)**, **notmuch-reindex(1)**, **notmuch-reply(1)**,
+**notmuch-restore(1)**, **notmuch-search(1)**, **notmuch-show(1)**,
+**notmuch-tag(1)**
index ae37360b83600bf6a2bac76d47caf397cc53901c..1d3c0829530bff529528fbd3dd29f5d6f43533e5 100644 (file)
@@ -200,6 +200,9 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]);
 int
 notmuch_insert_command (notmuch_config_t *config, int argc, char *argv[]);
 
+int
+notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[]);
+
 int
 notmuch_reply_command (notmuch_config_t *config, int argc, char *argv[]);
 
diff --git a/notmuch-reindex.c b/notmuch-reindex.c
new file mode 100644 (file)
index 0000000..4422304
--- /dev/null
@@ -0,0 +1,134 @@
+/* notmuch - Not much of an email program, (just index and search)
+ *
+ * Copyright © 2016 Daniel Kahn Gillmor
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: Daniel Kahn Gillmor <dkg@fifthhorseman.net>
+ */
+
+#include "notmuch-client.h"
+#include "string-util.h"
+
+static volatile sig_atomic_t interrupted;
+
+static void
+handle_sigint (unused (int sig))
+{
+    static char msg[] = "Stopping...         \n";
+
+    /* This write is "opportunistic", so it's okay to ignore the
+     * result.  It is not required for correctness, and if it does
+     * fail or produce a short write, we want to get out of the signal
+     * handler as quickly as possible, not retry it. */
+    IGNORE_RESULT (write (2, msg, sizeof (msg) - 1));
+    interrupted = 1;
+}
+
+/* reindex all messages matching 'query_string' using the passed-in indexopts
+ */
+static int
+reindex_query (notmuch_database_t *notmuch, const char *query_string,
+              notmuch_param_t *indexopts)
+{
+    notmuch_query_t *query;
+    notmuch_messages_t *messages;
+    notmuch_message_t *message;
+    notmuch_status_t status;
+
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+
+    query = notmuch_query_create (notmuch, query_string);
+    if (query == NULL) {
+       fprintf (stderr, "Out of memory.\n");
+       return 1;
+    }
+
+    /* reindexing is not interested in any special sort order */
+    notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED);
+
+    status = notmuch_query_search_messages (query, &messages);
+    if (print_status_query ("notmuch reindex", query, status))
+       return status;
+
+    ret = notmuch_database_begin_atomic (notmuch);
+    for (;
+        notmuch_messages_valid (messages) && ! interrupted;
+        notmuch_messages_move_to_next (messages)) {
+       message = notmuch_messages_get (messages);
+
+       ret = notmuch_message_reindex(message, indexopts);
+       if (ret != NOTMUCH_STATUS_SUCCESS)
+           break;
+    }
+
+    if (!ret)
+       ret = notmuch_database_end_atomic (notmuch);
+
+    notmuch_query_destroy (query);
+
+    return ret || interrupted;
+}
+
+int
+notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[])
+{
+    char *query_string = NULL;
+    notmuch_database_t *notmuch;
+    struct sigaction action;
+    int opt_index;
+    int ret;
+    notmuch_param_t *indexopts = NULL;
+
+    /* Set up our handler for SIGINT */
+    memset (&action, 0, sizeof (struct sigaction));
+    action.sa_handler = handle_sigint;
+    sigemptyset (&action.sa_mask);
+    action.sa_flags = SA_RESTART;
+    sigaction (SIGINT, &action, NULL);
+
+    notmuch_opt_desc_t options[] = {
+       { NOTMUCH_OPT_INHERIT, (void *) &notmuch_shared_options, NULL, 0, 0 },
+       { 0, 0, 0, 0, 0 }
+    };
+
+    opt_index = parse_arguments (argc, argv, options, 1);
+    if (opt_index < 0)
+       return EXIT_FAILURE;
+
+    notmuch_process_shared_options (argv[0]);
+
+    if (notmuch_database_open (notmuch_config_get_database_path (config),
+                              NOTMUCH_DATABASE_MODE_READ_WRITE, &notmuch))
+       return EXIT_FAILURE;
+
+    notmuch_exit_if_unmatched_db_uuid (notmuch);
+
+    query_string = query_string_from_args (config, argc-opt_index, argv+opt_index);
+    if (query_string == NULL) {
+       fprintf (stderr, "Out of memory\n");
+       return EXIT_FAILURE;
+    }
+
+    if (*query_string == '\0') {
+       fprintf (stderr, "Error: notmuch reindex requires at least one search term.\n");
+       return EXIT_FAILURE;
+    }
+    
+    ret = reindex_query (notmuch, query_string, indexopts);
+
+    notmuch_database_destroy (notmuch);
+
+    return ret || interrupted ? EXIT_FAILURE : EXIT_SUCCESS;
+}
index 8e332ce644101addf99304952334b71838817f11..201c7454ee7377a33127a4f5a9e9b8921539dbd9 100644 (file)
--- a/notmuch.c
+++ b/notmuch.c
@@ -123,6 +123,8 @@ static command_t commands[] = {
       "Restore the tags from the given dump file (see 'dump')." },
     { "compact", notmuch_compact_command, NOTMUCH_CONFIG_OPEN,
       "Compact the notmuch database." },
+    { "reindex", notmuch_reindex_command, NOTMUCH_CONFIG_OPEN,
+      "Re-index all messages matching the search terms." },
     { "config", notmuch_config_command, NOTMUCH_CONFIG_OPEN,
       "Get or set settings in the notmuch configuration file." },
     { "help", notmuch_help_command, NOTMUCH_CONFIG_CREATE, /* create but don't save config */
diff --git a/performance-test/M04-reindex.sh b/performance-test/M04-reindex.sh
new file mode 100755 (executable)
index 0000000..d36e061
--- /dev/null
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+test_description='reindex'
+
+. ./perf-test-lib.sh || exit 1
+
+memory_start
+
+memory_run 'reindex *' "notmuch reindex '*'"
+
+memory_done
diff --git a/performance-test/T03-reindex.sh b/performance-test/T03-reindex.sh
new file mode 100755 (executable)
index 0000000..7af2d22
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+test_description='tagging'
+
+. ./perf-test-lib.sh || exit 1
+
+time_start
+
+time_run 'reindex *' "notmuch reindex '*'"
+time_run 'reindex *' "notmuch reindex '*'"
+time_run 'reindex *' "notmuch reindex '*'"
+
+time_done
index 2013c6c7032568a8de8e66986c2132e9709bb70d..ea5e1d6a5ba21899cd9b57c657f015ce2bc29814 100755 (executable)
@@ -30,4 +30,11 @@ EOF
 notmuch search --output=files "sekrit" | notmuch_dir_sanitize > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
+rm ${MAIL_DIR}/copy3
+test_begin_subtest 'reindex drops terms in duplicate file'
+cp /dev/null EXPECTED
+notmuch reindex '*'
+notmuch search --output=files "sekrit" | notmuch_dir_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
diff --git a/test/T700-reindex.sh b/test/T700-reindex.sh
new file mode 100755 (executable)
index 0000000..051fbb3
--- /dev/null
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+test_description='reindexing messages'
+. ./test-lib.sh || exit 1
+
+add_email_corpus
+
+notmuch tag +usertag1 '*'
+
+notmuch search '*' | notmuch_search_sanitize > initial-threads
+notmuch search --output=messages '*' > initial-message-ids
+notmuch dump > initial-dump
+
+test_begin_subtest 'reindex preserves threads'
+notmuch reindex '*'
+notmuch search '*' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file initial-threads OUTPUT
+
+test_begin_subtest 'reindex after removing duplicate file preserves threads'
+# remove one copy
+sed 's,3/3(4),3/3,' < initial-threads > EXPECTED
+mv $MAIL_DIR/bar/18:2, duplicate-msg-1.eml
+notmuch reindex '*'
+notmuch search '*' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'reindex preserves message-ids'
+notmuch reindex '*'
+notmuch search --output=messages '*' > OUTPUT
+test_expect_equal_file initial-message-ids OUTPUT
+
+test_begin_subtest 'reindex preserves tags'
+notmuch reindex '*'
+notmuch dump > OUTPUT
+test_expect_equal_file initial-dump OUTPUT
+
+test_begin_subtest 'reindex moves a message between threads'
+notmuch search --output=threads id:87iqd9rn3l.fsf@vertex.dottedmag > EXPECTED
+# re-parent
+sed -i 's/1258471718-6781-1-git-send-email-dottedmag@dottedmag.net/87iqd9rn3l.fsf@vertex.dottedmag/' $MAIL_DIR/02:2,*
+notmuch reindex id:1258471718-6781-2-git-send-email-dottedmag@dottedmag.net
+notmuch search --output=threads id:1258471718-6781-2-git-send-email-dottedmag@dottedmag.net > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'reindex detects removal of all files'
+notmuch search --output=messages not id:20091117232137.GA7669@griffis1.net> EXPECTED
+# remove both copies
+mv $MAIL_DIR/cur/51:2,* duplicate-message-2.eml
+notmuch reindex id:20091117232137.GA7669@griffis1.net
+notmuch search --output=messages '*' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "reindex preserves properties"
+cat <<EOF > prop-dump
+#= 1258471718-6781-1-git-send-email-dottedmag@dottedmag.net userprop=userval
+#= 1258471718-6781-2-git-send-email-dottedmag@dottedmag.net userprop=userval
+#= 1258491078-29658-1-git-send-email-dottedmag@dottedmag.net userprop=userval1
+#= 20091117190054.GU3165@dottiness.seas.harvard.edu userprop=userval
+#= 20091117203301.GV3165@dottiness.seas.harvard.edu userprop=userval3
+#= 87fx8can9z.fsf@vertex.dottedmag userprop=userval2
+#= 87iqd9rn3l.fsf@vertex.dottedmag userprop=userval
+#= 87lji4lx9v.fsf@yoom.home.cworth.org userprop=userval3
+#= 87lji5cbwo.fsf@yoom.home.cworth.org userprop=userval
+#= cf0c4d610911171136h1713aa59w9cf9aa31f052ad0a@mail.gmail.com userprop=userval
+EOF
+notmuch restore < prop-dump
+notmuch reindex '*'
+notmuch dump | grep '^#=' | sort > OUTPUT
+test_expect_equal_file prop-dump OUTPUT
+test_done
+
+add_email_corpus lkml
+
+test_begin_subtest "reindex of lkml corpus preserves threads"
+notmuch search '*' | notmuch_search_sanitize > EXPECTED
+notmuch reindex '*'
+notmuch search '*' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done