--- /dev/null
+Return-Path: <bremner@tesseract.cs.unb.ca>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+ by arlo.cworth.org (Postfix) with ESMTP id EF72F6DE1654\r
+ for <notmuch@notmuchmail.org>; Sun, 9 Aug 2015 02:27:43 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at cworth.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: 0.139\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=0.139 tagged_above=-999 required=5 tests=[AWL=0.129, \r
+ T_HEADER_FROM_DIFFERENT_DOMAINS=0.01] autolearn=disabled\r
+Received: from arlo.cworth.org ([127.0.0.1])\r
+ by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024)\r
+ with ESMTP id a6v1vDbKtd-S for <notmuch@notmuchmail.org>;\r
+ Sun, 9 Aug 2015 02:27:41 -0700 (PDT)\r
+Received: from gitolite.debian.net (gitolite.debian.net [87.98.215.224])\r
+ by arlo.cworth.org (Postfix) with ESMTPS id 9DB506DE100F\r
+ for <notmuch@notmuchmail.org>; Sun, 9 Aug 2015 02:27:41 -0700 (PDT)\r
+Received: from remotemail by gitolite.debian.net with local (Exim 4.80)\r
+ (envelope-from <bremner@tesseract.cs.unb.ca>)\r
+ id 1ZOMrf-0005Wc-L9; Sun, 09 Aug 2015 09:25:59 +0000\r
+Received: (nullmailer pid 7061 invoked by uid 1000); Sun, 09 Aug 2015\r
+ 09:24:47 -0000\r
+From: David Bremner <david@tethera.net>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH 1/5] lib: Add per-message last modification tracking\r
+Date: Sun, 9 Aug 2015 11:24:41 +0200\r
+Message-Id: <1439112285-6681-2-git-send-email-david@tethera.net>\r
+X-Mailer: git-send-email 2.1.4\r
+In-Reply-To: <1439112285-6681-1-git-send-email-david@tethera.net>\r
+References: <1439112285-6681-1-git-send-email-david@tethera.net>\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.18\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+ <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch/>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Sun, 09 Aug 2015 09:27:44 -0000\r
+\r
+From: Austin Clements <amdragon@mit.edu>\r
+\r
+This adds a new document value that stores the revision of the last\r
+modification to message metadata, where the revision number increases\r
+monotonically with each database commit.\r
+\r
+An alternative would be to store the wall-clock time of the last\r
+modification of each message. In principle this is simpler and has\r
+the advantage that any process can determine the current timestamp\r
+without support from libnotmuch. However, even assuming a computer's\r
+clock never goes backward and ignoring clock skew in networked\r
+environments, this has a fatal flaw. Xapian uses (optimistic)\r
+snapshot isolation, which means reads can be concurrent with writes.\r
+Given this, consider the following time line with a write and two read\r
+transactions:\r
+\r
+ write |-X-A--------------|\r
+ read 1 |---B---|\r
+ read 2 |---|\r
+\r
+The write transaction modifies message X and records the wall-clock\r
+time of the modification at A. The writer hangs around for a while\r
+and later commits its change. Read 1 is concurrent with the write, so\r
+it doesn't see the change to X. It does some query and records the\r
+wall-clock time of its results at B. Transaction read 2 later starts\r
+after the write commits and queries for changes since wall-clock time\r
+B (say the reads are performing an incremental backup). Even though\r
+read 1 could not see the change to X, read 2 is told (correctly) that\r
+X has not changed since B, the time of the last read. In fact, X\r
+changed before wall-clock time A, but the change was not visible until\r
+*after* wall-clock time B, so read 2 misses the change to X.\r
+\r
+This is tricky to solve in full-blown snapshot isolation, but because\r
+Xapian serializes writes, we can use a simple, monotonically\r
+increasing database revision number. Furthermore, maintaining this\r
+revision number requires no more IO than a wall-clock time solution\r
+because Xapian already maintains statistics on the upper (and lower)\r
+bound of each value stream.\r
+---\r
+ lib/database-private.h | 16 +++++++++++++++-\r
+ lib/database.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--\r
+ lib/message.cc | 22 ++++++++++++++++++++++\r
+ lib/notmuch-private.h | 10 +++++++++-\r
+ 4 files changed, 94 insertions(+), 4 deletions(-)\r
+\r
+diff --git a/lib/database-private.h b/lib/database-private.h\r
+index 24243db..5c5a2bb 100644\r
+--- a/lib/database-private.h\r
++++ b/lib/database-private.h\r
+@@ -100,6 +100,12 @@ enum _notmuch_features {\r
+ *\r
+ * Introduced: version 3. */\r
+ NOTMUCH_FEATURE_INDEXED_MIMETYPES = 1 << 5,\r
++\r
++ /* If set, messages store the revision number of the last\r
++ * modification in NOTMUCH_VALUE_LAST_MOD.\r
++ *\r
++ * Introduced: version 3. */\r
++ NOTMUCH_FEATURE_LAST_MOD = 1 << 6,\r
+ };\r
+ \r
+ /* In C++, a named enum is its own type, so define bitwise operators\r
+@@ -145,6 +151,8 @@ struct _notmuch_database {\r
+ \r
+ notmuch_database_mode_t mode;\r
+ int atomic_nesting;\r
++ /* TRUE if changes have been made in this atomic section */\r
++ notmuch_bool_t atomic_dirty;\r
+ Xapian::Database *xapian_db;\r
+ \r
+ /* Bit mask of features used by this database. This is a\r
+@@ -158,6 +166,11 @@ struct _notmuch_database {\r
+ * next library call. May be NULL */\r
+ char *status_string;\r
+ \r
++ /* Highest committed revision number. Modifications are recorded\r
++ * under a higher revision number, which can be generated with\r
++ * notmuch_database_new_revision. */\r
++ unsigned long revision;\r
++\r
+ Xapian::QueryParser *query_parser;\r
+ Xapian::TermGenerator *term_gen;\r
+ Xapian::ValueRangeProcessor *value_range_processor;\r
+@@ -179,7 +192,8 @@ struct _notmuch_database {\r
+ * will have it). */\r
+ #define NOTMUCH_FEATURES_CURRENT \\r
+ (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \\r
+- NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS)\r
++ NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS | \\r
++ NOTMUCH_FEATURE_LAST_MOD)\r
+ \r
+ /* Return the list of terms from the given iterator matching a prefix.\r
+ * The prefix will be stripped from the strings in the returned list.\r
+diff --git a/lib/database.cc b/lib/database.cc\r
+index 6a15174..52e2e8f 100644\r
+--- a/lib/database.cc\r
++++ b/lib/database.cc\r
+@@ -101,6 +101,9 @@ typedef struct {\r
+ *\r
+ * SUBJECT: The value of the "Subject" header\r
+ *\r
++ * LAST_MOD: The revision number as of the last tag or\r
++ * filename change.\r
++ *\r
+ * In addition, terms from the content of the message are added with\r
+ * "from", "to", "attachment", and "subject" prefixes for use by the\r
+ * user in searching. Similarly, terms from the path of the mail\r
+@@ -310,6 +313,8 @@ static const struct {\r
+ * them. */\r
+ { NOTMUCH_FEATURE_INDEXED_MIMETYPES,\r
+ "indexed MIME types", "w"},\r
++ { NOTMUCH_FEATURE_LAST_MOD,\r
++ "modification tracking", "w"},\r
+ };\r
+ \r
+ const char *\r
+@@ -737,6 +742,23 @@ _notmuch_database_ensure_writable (notmuch_database_t *notmuch)\r
+ return NOTMUCH_STATUS_SUCCESS;\r
+ }\r
+ \r
++/* Allocate a revision number for the next change. */\r
++unsigned long\r
++_notmuch_database_new_revision (notmuch_database_t *notmuch)\r
++{\r
++ unsigned long new_revision = notmuch->revision + 1;\r
++\r
++ /* If we're in an atomic section, hold off on updating the\r
++ * committed revision number until we commit the atomic section.\r
++ */\r
++ if (notmuch->atomic_nesting)\r
++ notmuch->atomic_dirty = TRUE;\r
++ else\r
++ notmuch->revision = new_revision;\r
++\r
++ return new_revision;\r
++}\r
++\r
+ /* Parse a database features string from the given database version.\r
+ * Returns the feature bit set.\r
+ *\r
+@@ -904,6 +926,7 @@ notmuch_database_open_verbose (const char *path,\r
+ notmuch->atomic_nesting = 0;\r
+ try {\r
+ string last_thread_id;\r
++ string last_mod;\r
+ \r
+ if (mode == NOTMUCH_DATABASE_MODE_READ_WRITE) {\r
+ notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,\r
+@@ -962,6 +985,14 @@ notmuch_database_open_verbose (const char *path,\r
+ INTERNAL_ERROR ("Malformed database last_thread_id: %s", str);\r
+ }\r
+ \r
++ /* Get current highest revision number. */\r
++ last_mod = notmuch->xapian_db->get_value_upper_bound (\r
++ NOTMUCH_VALUE_LAST_MOD);\r
++ if (last_mod.empty ())\r
++ notmuch->revision = 0;\r
++ else\r
++ notmuch->revision = Xapian::sortable_unserialise (last_mod);\r
++\r
+ notmuch->query_parser = new Xapian::QueryParser;\r
+ notmuch->term_gen = new Xapian::TermGenerator;\r
+ notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));\r
+@@ -1369,7 +1400,8 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,\r
+ \r
+ /* Figure out how much total work we need to do. */\r
+ if (new_features &\r
+- (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_BOOL_FOLDER)) {\r
++ (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_BOOL_FOLDER |\r
++ NOTMUCH_FEATURE_LAST_MOD)) {\r
+ notmuch_query_t *query = notmuch_query_create (notmuch, "");\r
+ total += notmuch_query_count_messages (query);\r
+ notmuch_query_destroy (query);\r
+@@ -1396,7 +1428,8 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,\r
+ \r
+ /* Perform per-message upgrades. */\r
+ if (new_features &\r
+- (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_BOOL_FOLDER)) {\r
++ (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_BOOL_FOLDER |\r
++ NOTMUCH_FEATURE_LAST_MOD)) {\r
+ notmuch_query_t *query = notmuch_query_create (notmuch, "");\r
+ notmuch_messages_t *messages;\r
+ notmuch_message_t *message;\r
+@@ -1433,6 +1466,14 @@ notmuch_database_upgrade (notmuch_database_t *notmuch,\r
+ if (new_features & NOTMUCH_FEATURE_BOOL_FOLDER)\r
+ _notmuch_message_upgrade_folder (message);\r
+ \r
++ /* Prior to NOTMUCH_FEATURE_LAST_MOD, messages did not\r
++ * track modification revisions. Give all messages the\r
++ * next available revision; since we just started tracking\r
++ * revisions for this database, that will be 1.\r
++ */\r
++ if (new_features & NOTMUCH_FEATURE_LAST_MOD)\r
++ _notmuch_message_upgrade_last_mod (message);\r
++\r
+ _notmuch_message_sync (message);\r
+ \r
+ notmuch_message_destroy (message);\r
+@@ -1615,6 +1656,11 @@ notmuch_database_end_atomic (notmuch_database_t *notmuch)\r
+ return NOTMUCH_STATUS_XAPIAN_EXCEPTION;\r
+ }\r
+ \r
++ if (notmuch->atomic_dirty) {\r
++ ++notmuch->revision;\r
++ notmuch->atomic_dirty = FALSE;\r
++ }\r
++\r
+ DONE:\r
+ notmuch->atomic_nesting--;\r
+ return NOTMUCH_STATUS_SUCCESS;\r
+diff --git a/lib/message.cc b/lib/message.cc\r
+index 1ddce3c..26b5e76 100644\r
+--- a/lib/message.cc\r
++++ b/lib/message.cc\r
+@@ -998,6 +998,16 @@ _notmuch_message_set_header_values (notmuch_message_t *message,\r
+ message->modified = TRUE;\r
+ }\r
+ \r
++/* Upgrade a message to support NOTMUCH_FEATURE_LAST_MOD. The caller\r
++ * must call _notmuch_message_sync. */\r
++void\r
++_notmuch_message_upgrade_last_mod (notmuch_message_t *message)\r
++{\r
++ /* _notmuch_message_sync will update the last modification\r
++ * revision; we just have to ask it to. */\r
++ message->modified = TRUE;\r
++}\r
++\r
+ /* Synchronize changes made to message->doc out into the database. */\r
+ void\r
+ _notmuch_message_sync (notmuch_message_t *message)\r
+@@ -1010,6 +1020,18 @@ _notmuch_message_sync (notmuch_message_t *message)\r
+ if (! message->modified)\r
+ return;\r
+ \r
++ /* Update the last modification of this message. */\r
++ if (message->notmuch->features & NOTMUCH_FEATURE_LAST_MOD)\r
++ /* sortable_serialise gives a reasonably compact encoding,\r
++ * which directly translates to reduced IO when scanning the\r
++ * value stream. Since it's built for doubles, we only get 53\r
++ * effective bits, but that's still enough for the database to\r
++ * last a few centuries at 1 million revisions per second. */\r
++ message->doc.add_value (NOTMUCH_VALUE_LAST_MOD,\r
++ Xapian::sortable_serialise (\r
++ _notmuch_database_new_revision (\r
++ message->notmuch)));\r
++\r
+ db = static_cast <Xapian::WritableDatabase *> (message->notmuch->xapian_db);\r
+ db->replace_document (message->doc_id, message->doc);\r
+ message->modified = FALSE;\r
+diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h\r
+index cc9ce12..f52b4e4 100644\r
+--- a/lib/notmuch-private.h\r
++++ b/lib/notmuch-private.h\r
+@@ -107,7 +107,8 @@ typedef enum {\r
+ NOTMUCH_VALUE_TIMESTAMP = 0,\r
+ NOTMUCH_VALUE_MESSAGE_ID,\r
+ NOTMUCH_VALUE_FROM,\r
+- NOTMUCH_VALUE_SUBJECT\r
++ NOTMUCH_VALUE_SUBJECT,\r
++ NOTMUCH_VALUE_LAST_MOD,\r
+ } notmuch_value_t;\r
+ \r
+ /* Xapian (with flint backend) complains if we provide a term longer\r
+@@ -194,6 +195,9 @@ void\r
+ _notmuch_database_log (notmuch_database_t *notmuch,\r
+ const char *format, ...);\r
+ \r
++unsigned long\r
++_notmuch_database_new_revision (notmuch_database_t *notmuch);\r
++\r
+ const char *\r
+ _notmuch_database_relative_path (notmuch_database_t *notmuch,\r
+ const char *path);\r
+@@ -305,6 +309,10 @@ _notmuch_message_set_header_values (notmuch_message_t *message,\r
+ const char *date,\r
+ const char *from,\r
+ const char *subject);\r
++\r
++void\r
++_notmuch_message_upgrade_last_mod (notmuch_message_t *message);\r
++\r
+ void\r
+ _notmuch_message_sync (notmuch_message_t *message);\r
+ \r
+-- \r
+2.1.4\r
+\r