From 598557644da839ea37aadc2319d86b65f0ef7929 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 26 Aug 2014 13:26:00 +2000 Subject: [PATCH] [PATCH v4 02/11] lib: Database version 3: Introduce fine-grained "features" --- e4/567c28326981e9150d97a9a7224bd4db3233ef | 553 ++++++++++++++++++++++ 1 file changed, 553 insertions(+) create mode 100644 e4/567c28326981e9150d97a9a7224bd4db3233ef diff --git a/e4/567c28326981e9150d97a9a7224bd4db3233ef b/e4/567c28326981e9150d97a9a7224bd4db3233ef new file mode 100644 index 000000000..8e4235760 --- /dev/null +++ b/e4/567c28326981e9150d97a9a7224bd4db3233ef @@ -0,0 +1,553 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id B4619431FBD + for ; Mon, 25 Aug 2014 10:28:56 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -2.3 +X-Spam-Level: +X-Spam-Status: No, score=-2.3 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 2rkgTrINkSl2 for ; + Mon, 25 Aug 2014 10:28:49 -0700 (PDT) +Received: from dmz-mailsec-scanner-4.mit.edu (dmz-mailsec-scanner-4.mit.edu + [18.9.25.15]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 7901A431FC2 + for ; Mon, 25 Aug 2014 10:28:21 -0700 (PDT) +X-AuditID: 1209190f-f79aa6d000005b45-b5-53fb7234b52e +Received: from mailhub-auth-4.mit.edu ( [18.7.62.39]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-4.mit.edu (Symantec Messaging Gateway) with SMTP + id 16.D9.23365.4327BF35; Mon, 25 Aug 2014 13:28:20 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-4.mit.edu (8.13.8/8.9.2) with ESMTP id s7PHQEx3001075; + Mon, 25 Aug 2014 13:26:15 -0400 +Received: from drake.dyndns.org (31-35-14.wireless.csail.mit.edu + [128.31.35.14]) (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s7PHQBNZ029607 + (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT); + Mon, 25 Aug 2014 13:26:13 -0400 +Received: from amthrax by drake.dyndns.org with local (Exim 4.77) + (envelope-from ) + id 1XLy1z-0003jM-TA; Mon, 25 Aug 2014 13:26:11 -0400 +From: Austin Clements +To: notmuch@notmuchmail.org +Subject: [PATCH v4 02/11] lib: Database version 3: Introduce fine-grained + "features" +Date: Mon, 25 Aug 2014 13:26:00 -0400 +Message-Id: <1408987569-14146-3-git-send-email-amdragon@mit.edu> +X-Mailer: git-send-email 2.0.0 +In-Reply-To: <1408987569-14146-1-git-send-email-amdragon@mit.edu> +References: <1408987569-14146-1-git-send-email-amdragon@mit.edu> +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFjrBIsWRmVeSWpSXmKPExsUixG6nrmtS9DvY4NY5fYsbrd2MFk3TnS2u + 35zJ7MDscev+a3aPZ6tuMXtsOfSeOYA5issmJTUnsyy1SN8ugSuj6chutoLH3YwVC/9dZW5g + 7MjtYuTkkBAwkWjq/skMYYtJXLi3nq2LkYtDSGA2k8Tk4zdZIZyNjBLLNtxkgXCOMUnMevGU + HcKZyyixvHcRI0g/m4CGxO9bi5lAbBEBaYmdd2ezgtjMAl4Syy70g9nCAiESt9pmsIHYLAKq + EnvuXgPr5RVwkLj5o5kF4g45iYYbn8BqOAUcJaYvnA02Uwio5vvl88wTGPkXMDKsYpRNya3S + zU3MzClOTdYtTk7My0st0jXRy80s0UtNKd3ECAowTkn+HYzfDiodYhTgYFTi4b0R/ztYiDWx + rLgy9xCjJAeTkijvtHygEF9SfkplRmJxRnxRaU5q8SFGCQ5mJRHeZpAcb0piZVVqUT5MSpqD + RUmc9621VbCQQHpiSWp2ampBahFMVoaDQ0mC934BUKNgUWp6akVaZk4JQpqJgxNkOA/QcP9C + kOHFBYm5xZnpEPlTjIpS4ryGIAkBkERGaR5cLywBvGIUB3pFmNcGpIoHmDzgul8BDWYCGmza + 8xNkcEkiQkqqgbGqMGdlybb8BTcSb8xR/rtqn7ORgfDUKR3nNn9b83T71iOxs9h3/Ar99cl6 + 8q+FOtafXp95qphRa/0m0F+5n2GPCEPkv6j6nfOPl+6yvSXRFz89eWsY1+VfduJt/r8mikjN + tV7rvSPuk8qH5JUM3bt7pbbvUhOp+s8rUfypruVOS8/W9r4NLNuUWIozEg21mIuKEwGTgOK5 + 2wIAAA== +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Mon, 25 Aug 2014 17:28:57 -0000 + +Previously, our database schema was versioned by a single number. +Each database schema change had to occur "atomically" in Notmuch's +development history: before some commit, Notmuch used version N, after +that commit, it used version N+1. Hence, each new schema version +could introduce only one change, the task of developing a schema +change fell on a single person, and it all had to happen and be +perfect in a single commit series. This made introducing a new schema +version hard. We've seen only two schema changes in the history of +Notmuch. + +This commit introduces database schema version 3; hopefully the last +schema version we'll need for a while. With this version, we switch +from a single version number to "features": a set of named, +independent aspects of the database schema. + +Features should make backwards compatibility easier. For many things, +it should be easy to support databases both with and without a +feature, which will allow us to make upgrades optional and will enable +"unstable" features that can be developed and tested over time. + +Features also make forwards compatibility easier. The features +recorded in a database include "compatibility flags," which can +indicate to an older version of Notmuch when it must support a given +feature to open the database for read or for write. This lets us +replace the old vague "I don't recognize this version, so something +might go wrong, but I promise to try my best" warnings upon opening a +database with an unknown version with precise errors. If a database +is safe to open for read/write despite unknown features, an older +version will know that and issue no message at all. If the database +is not safe to open for read/write because of unknown features, an +older version will know that, too, and can tell the user exactly which +required features it lacks support for. +--- + lib/database-private.h | 108 ++++++++++++++++++++++++++- + lib/database.cc | 197 ++++++++++++++++++++++++++++++++++++++++--------- + 2 files changed, 271 insertions(+), 34 deletions(-) + +diff --git a/lib/database-private.h b/lib/database-private.h +index d3e65fd..ca0751c 100644 +--- a/lib/database-private.h ++++ b/lib/database-private.h +@@ -36,16 +36,106 @@ + + #pragma GCC visibility push(hidden) + ++/* Bit masks for _notmuch_database::features. Features are named, ++ * independent aspects of the database schema. ++ * ++ * A database stores the set of features that it "uses" (implicitly ++ * before database version 3 and explicitly as of version 3). ++ * ++ * A given library version will "recognize" a particular set of ++ * features; if a database uses a feature that the library does not ++ * recognize, the library will refuse to open it. It is assumed the ++ * set of recognized features grows monotonically over time. A ++ * library version will "implement" some subset of the recognized ++ * features: some operations may require that the database use (or not ++ * use) some feature, while other operations may support both ++ * databases that use and that don't use some feature. ++ * ++ * On disk, the database stores string names for these features (see ++ * the feature_names array). These enum bit values are never ++ * persisted to disk and may change freely. ++ */ ++enum _notmuch_features { ++ /* If set, file names are stored in "file-direntry" terms. If ++ * unset, file names are stored in document data. ++ * ++ * Introduced: version 1. */ ++ NOTMUCH_FEATURE_FILE_TERMS = 1 << 0, ++ ++ /* If set, directory timestamps are stored in documents with ++ * XDIRECTORY terms and relative paths. If unset, directory ++ * timestamps are stored in documents with XTIMESTAMP terms and ++ * absolute paths. ++ * ++ * Introduced: version 1. */ ++ NOTMUCH_FEATURE_DIRECTORY_DOCS = 1 << 1, ++ ++ /* If set, the from, subject, and message-id headers are stored in ++ * message document values. If unset, message documents *may* ++ * have these values, but if the value is empty, it must be ++ * retrieved from the message file. ++ * ++ * Introduced: optional in version 1, required as of version 3. ++ */ ++ NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES = 1 << 2, ++ ++ /* If set, folder terms are boolean and path terms exist. If ++ * unset, folder terms are probabilistic and stemmed and path ++ * terms do not exist. ++ * ++ * Introduced: version 2. */ ++ NOTMUCH_FEATURE_BOOL_FOLDER = 1 << 3, ++}; ++ ++/* In C++, a named enum is its own type, so define bitwise operators ++ * on _notmuch_features. */ ++inline _notmuch_features ++operator|(_notmuch_features a, _notmuch_features b) ++{ ++ return static_cast<_notmuch_features>( ++ static_cast(a) | static_cast(b)); ++} ++ ++inline _notmuch_features ++operator&(_notmuch_features a, _notmuch_features b) ++{ ++ return static_cast<_notmuch_features>( ++ static_cast(a) & static_cast(b)); ++} ++ ++inline _notmuch_features ++operator~(_notmuch_features a) ++{ ++ return static_cast<_notmuch_features>(~static_cast(a)); ++} ++ ++inline _notmuch_features& ++operator|=(_notmuch_features &a, _notmuch_features b) ++{ ++ a = a | b; ++ return a; ++} ++ ++inline _notmuch_features& ++operator&=(_notmuch_features &a, _notmuch_features b) ++{ ++ a = a & b; ++ return a; ++} ++ + struct _notmuch_database { + notmuch_bool_t exception_reported; + + char *path; + +- notmuch_bool_t needs_upgrade; + notmuch_database_mode_t mode; + int atomic_nesting; + Xapian::Database *xapian_db; + ++ /* Bit mask of features used by this database. This is a ++ * bitwise-OR of NOTMUCH_FEATURE_* values (above). */ ++ enum _notmuch_features features; ++ + unsigned int last_doc_id; + uint64_t last_thread_id; + +@@ -55,6 +145,22 @@ struct _notmuch_database { + Xapian::ValueRangeProcessor *date_range_processor; + }; + ++/* Prior to database version 3, features were implied by the database ++ * version number, so hard-code them for earlier versions. */ ++#define NOTMUCH_FEATURES_V0 ((enum _notmuch_features)0) ++#define NOTMUCH_FEATURES_V1 (NOTMUCH_FEATURES_V0 | NOTMUCH_FEATURE_FILE_TERMS | \ ++ NOTMUCH_FEATURE_DIRECTORY_DOCS) ++#define NOTMUCH_FEATURES_V2 (NOTMUCH_FEATURES_V1 | NOTMUCH_FEATURE_BOOL_FOLDER) ++ ++/* Current database features. If any of these are missing from a ++ * database, request an upgrade. ++ * NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES is not included because ++ * upgrade doesn't currently introduce the feature (though brand new ++ * databases will have it). */ ++#define NOTMUCH_FEATURES_CURRENT \ ++ (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \ ++ NOTMUCH_FEATURE_BOOL_FOLDER) ++ + /* Return the list of terms from the given iterator matching a prefix. + * The prefix will be stripped from the strings in the returned list. + * The list will be allocated using ctx as the talloc context. +diff --git a/lib/database.cc b/lib/database.cc +index 9c0952a..2b566f7 100644 +--- a/lib/database.cc ++++ b/lib/database.cc +@@ -20,6 +20,7 @@ + + #include "database-private.h" + #include "parse-time-vrp.h" ++#include "string-util.h" + + #include + +@@ -42,7 +43,7 @@ typedef struct { + const char *prefix; + } prefix_t; + +-#define NOTMUCH_DATABASE_VERSION 2 ++#define NOTMUCH_DATABASE_VERSION 3 + + #define STRINGIFY(s) _SUB_STRINGIFY(s) + #define _SUB_STRINGIFY(s) #s +@@ -154,6 +155,17 @@ typedef struct { + * changes are made to the database (such as by + * indexing new fields). + * ++ * features The set of features supported by this ++ * database. This consists of a set of ++ * '\n'-separated lines, where each is a feature ++ * name, a '\t', and compatibility flags. If the ++ * compatibility flags contain 'w', then the ++ * opener must support this feature to safely ++ * write this database. If the compatibility ++ * flags contain 'r', then the opener must ++ * support this feature to read this database. ++ * Introduced in database version 3. ++ * + * last_thread_id The last thread ID generated. This is stored + * as a 16-byte hexadecimal ASCII representation + * of a 64-bit unsigned integer. The first ID +@@ -254,6 +266,28 @@ _find_prefix (const char *name) + return ""; + } + ++static const struct { ++ /* NOTMUCH_FEATURE_* value. */ ++ _notmuch_features value; ++ /* Feature name as it appears in the database. This name should ++ * be appropriate for displaying to the user if an older version ++ * of notmuch doesn't support this feature. */ ++ const char *name; ++ /* Compatibility flags when this feature is declared. */ ++ const char *flags; ++} feature_names[] = { ++ { NOTMUCH_FEATURE_FILE_TERMS, ++ "multiple paths per message", "rw" }, ++ { NOTMUCH_FEATURE_DIRECTORY_DOCS, ++ "relative directory paths", "rw" }, ++ /* Header values are not required for reading a database because a ++ * reader can just refer to the message file. */ ++ { NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES, ++ "from/subject/message-ID in database", "w" }, ++ { NOTMUCH_FEATURE_BOOL_FOLDER, ++ "exact folder:/path: search", "rw" }, ++}; ++ + const char * + notmuch_status_to_string (notmuch_status_t status) + { +@@ -591,6 +625,11 @@ notmuch_database_create (const char *path, notmuch_database_t **database) + ¬much); + if (status) + goto DONE; ++ ++ /* Upgrade doesn't add this feature to existing databases, but new ++ * databases have it. */ ++ notmuch->features |= NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES; ++ + status = notmuch_database_upgrade (notmuch, NULL, NULL); + if (status) { + notmuch_database_close(notmuch); +@@ -619,6 +658,83 @@ _notmuch_database_ensure_writable (notmuch_database_t *notmuch) + return NOTMUCH_STATUS_SUCCESS; + } + ++/* Parse a database features string from the given database version. ++ * Returns the feature bit set. ++ * ++ * For version < 3, this ignores the features string and returns a ++ * hard-coded set of features. ++ * ++ * If there are unrecognized features that are required to open the ++ * database in mode (which should be 'r' or 'w'), return a ++ * comma-separated list of unrecognized but required features in ++ * *incompat_out suitable for presenting to the user. *incompat_out ++ * will be allocated from ctx. ++ */ ++static _notmuch_features ++_parse_features (const void *ctx, const char *features, unsigned int version, ++ char mode, char **incompat_out) ++{ ++ _notmuch_features res = static_cast<_notmuch_features>(0); ++ unsigned int namelen, i; ++ size_t llen = 0; ++ const char *flags; ++ ++ /* Prior to database version 3, features were implied by the ++ * version number. */ ++ if (version == 0) ++ return NOTMUCH_FEATURES_V0; ++ else if (version == 1) ++ return NOTMUCH_FEATURES_V1; ++ else if (version == 2) ++ return NOTMUCH_FEATURES_V2; ++ ++ /* Parse the features string */ ++ while ((features = strtok_len_c (features + llen, "\n", &llen)) != NULL) { ++ flags = strchr (features, '\t'); ++ if (! flags || flags > features + llen) ++ continue; ++ namelen = flags - features; ++ ++ for (i = 0; i < ARRAY_SIZE (feature_names); ++i) { ++ if (strlen (feature_names[i].name) == namelen && ++ strncmp (feature_names[i].name, features, namelen) == 0) { ++ res |= feature_names[i].value; ++ break; ++ } ++ } ++ ++ if (i == ARRAY_SIZE (feature_names) && incompat_out) { ++ /* Unrecognized feature */ ++ const char *have = strchr (flags, mode); ++ if (have && have < features + llen) { ++ /* This feature is required to access this database in ++ * 'mode', but we don't understand it. */ ++ if (! *incompat_out) ++ *incompat_out = talloc_strdup (ctx, ""); ++ *incompat_out = talloc_asprintf_append_buffer ( ++ *incompat_out, "%s%.*s", **incompat_out ? ", " : "", ++ namelen, features); ++ } ++ } ++ } ++ ++ return res; ++} ++ ++static char * ++_print_features (const void *ctx, unsigned int features) ++{ ++ unsigned int i; ++ char *res = talloc_strdup (ctx, ""); ++ ++ for (i = 0; i < ARRAY_SIZE (feature_names); ++i) ++ if (features & feature_names[i].value) ++ res = talloc_asprintf_append_buffer ( ++ res, "%s\t%s\n", feature_names[i].name, feature_names[i].flags); ++ ++ return res; ++} ++ + notmuch_status_t + notmuch_database_open (const char *path, + notmuch_database_mode_t mode, +@@ -627,7 +743,7 @@ notmuch_database_open (const char *path, + notmuch_status_t status = NOTMUCH_STATUS_SUCCESS; + void *local = talloc_new (NULL); + notmuch_database_t *notmuch = NULL; +- char *notmuch_path, *xapian_path; ++ char *notmuch_path, *xapian_path, *incompat_features; + struct stat st; + int err; + unsigned int i, version; +@@ -677,7 +793,6 @@ notmuch_database_open (const char *path, + if (notmuch->path[strlen (notmuch->path) - 1] == '/') + notmuch->path[strlen (notmuch->path) - 1] = '\0'; + +- notmuch->needs_upgrade = FALSE; + notmuch->mode = mode; + notmuch->atomic_nesting = 0; + try { +@@ -686,37 +801,44 @@ notmuch_database_open (const char *path, + if (mode == NOTMUCH_DATABASE_MODE_READ_WRITE) { + notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path, + Xapian::DB_CREATE_OR_OPEN); +- version = notmuch_database_get_version (notmuch); +- +- if (version > NOTMUCH_DATABASE_VERSION) { +- fprintf (stderr, +- "Error: Notmuch database at %s\n" +- " has a newer database format version (%u) than supported by this\n" +- " version of notmuch (%u). Refusing to open this database in\n" +- " read-write mode.\n", +- notmuch_path, version, NOTMUCH_DATABASE_VERSION); +- notmuch->mode = NOTMUCH_DATABASE_MODE_READ_ONLY; +- notmuch_database_destroy (notmuch); +- notmuch = NULL; +- status = NOTMUCH_STATUS_FILE_ERROR; +- goto DONE; +- } +- +- if (version < NOTMUCH_DATABASE_VERSION) +- notmuch->needs_upgrade = TRUE; + } else { + notmuch->xapian_db = new Xapian::Database (xapian_path); +- version = notmuch_database_get_version (notmuch); +- if (version > NOTMUCH_DATABASE_VERSION) +- { +- fprintf (stderr, +- "Warning: Notmuch database at %s\n" +- " has a newer database format version (%u) than supported by this\n" +- " version of notmuch (%u). Some operations may behave incorrectly,\n" +- " (but the database will not be harmed since it is being opened\n" +- " in read-only mode).\n", +- notmuch_path, version, NOTMUCH_DATABASE_VERSION); +- } ++ } ++ ++ /* Check version. As of database version 3, we represent ++ * changes in terms of features, so assume a version bump ++ * means a dramatically incompatible change. */ ++ version = notmuch_database_get_version (notmuch); ++ if (version > NOTMUCH_DATABASE_VERSION) { ++ fprintf (stderr, ++ "Error: Notmuch database at %s\n" ++ " has a newer database format version (%u) than supported by this\n" ++ " version of notmuch (%u).\n", ++ notmuch_path, version, NOTMUCH_DATABASE_VERSION); ++ notmuch->mode = NOTMUCH_DATABASE_MODE_READ_ONLY; ++ notmuch_database_destroy (notmuch); ++ notmuch = NULL; ++ status = NOTMUCH_STATUS_FILE_ERROR; ++ goto DONE; ++ } ++ ++ /* Check features. */ ++ incompat_features = NULL; ++ notmuch->features = _parse_features ( ++ local, notmuch->xapian_db->get_metadata ("features").c_str (), ++ version, mode == NOTMUCH_DATABASE_MODE_READ_WRITE ? 'w' : 'r', ++ &incompat_features); ++ if (incompat_features) { ++ fprintf (stderr, ++ "Error: Notmuch database at %s\n" ++ " requires features (%s)\n" ++ " not supported by this version of notmuch.\n", ++ notmuch_path, incompat_features); ++ notmuch->mode = NOTMUCH_DATABASE_MODE_READ_ONLY; ++ notmuch_database_destroy (notmuch); ++ notmuch = NULL; ++ status = NOTMUCH_STATUS_FILE_ERROR; ++ goto DONE; + } + + notmuch->last_doc_id = notmuch->xapian_db->get_lastdocid (); +@@ -1048,7 +1170,9 @@ notmuch_database_get_version (notmuch_database_t *notmuch) + notmuch_bool_t + notmuch_database_needs_upgrade (notmuch_database_t *notmuch) + { +- return notmuch->needs_upgrade; ++ return notmuch->mode == NOTMUCH_DATABASE_MODE_READ_WRITE && ++ ((NOTMUCH_FEATURES_CURRENT & ~notmuch->features) || ++ (notmuch_database_get_version (notmuch) < NOTMUCH_DATABASE_VERSION)); + } + + static volatile sig_atomic_t do_progress_notify = 0; +@@ -1077,6 +1201,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, + double progress), + void *closure) + { ++ void *local = talloc_new (NULL); + Xapian::WritableDatabase *db; + struct sigaction action; + struct itimerval timerval; +@@ -1114,6 +1239,10 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, + timer_is_active = TRUE; + } + ++ /* Set the target features so we write out changes in the desired ++ * format. */ ++ notmuch->features |= NOTMUCH_FEATURES_CURRENT; ++ + /* Before version 1, each message document had its filename in the + * data field. Copy that into the new format by calling + * notmuch_message_add_filename. +@@ -1226,6 +1355,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, + notmuch_query_destroy (query); + } + ++ db->set_metadata ("features", _print_features (local, notmuch->features)); + db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION)); + db->flush (); + +@@ -1302,6 +1432,7 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, + sigaction (SIGALRM, &action, NULL); + } + ++ talloc_free (local); + return NOTMUCH_STATUS_SUCCESS; + } + +-- +2.0.0 + -- 2.26.2