Return-Path: X-Original-To: notmuch@notmuchmail.org Delivered-To: notmuch@notmuchmail.org Received: from localhost (localhost [127.0.0.1]) by olra.theworths.org (Postfix) with ESMTP id B327C431FBD for ; Mon, 8 Feb 2010 11:42:05 -0800 (PST) X-Virus-Scanned: Debian amavisd-new at olra.theworths.org X-Spam-Flag: NO X-Spam-Score: -3.186 X-Spam-Level: X-Spam-Status: No, score=-3.186 tagged_above=-999 required=5 tests=[ALL_TRUSTED=-1.8, AWL=1.213, BAYES_00=-2.599] autolearn=ham Received: from olra.theworths.org ([127.0.0.1]) by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id Jhn3Z9BWJoFr for ; Mon, 8 Feb 2010 11:42:04 -0800 (PST) Received: from yoom.home.cworth.org (localhost [127.0.0.1]) by olra.theworths.org (Postfix) with ESMTP id 89866431FBC for ; Mon, 8 Feb 2010 11:42:04 -0800 (PST) Received: by yoom.home.cworth.org (Postfix, from userid 1000) id 1BF6155011E; Mon, 8 Feb 2010 11:42:04 -0800 (PST) From: Carl Worth To: notmuch@notmuchmail.org Date: Mon, 08 Feb 2010 11:42:04 -0800 Message-ID: <87mxzjpl9v.fsf@yoom.home.cworth.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Subject: [notmuch] [PATCH] Switch from random to sequential thread identifiers. X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Feb 2010 19:42:05 -0000 The sequential identifiers have the advantage of being guaranteed to be unique (until we overflow a 64-bit unsigned integer), and also take up slightly less space in the "notmuch search" output (20 columns rather than 32). This change also has the side effect of fixing a bug where notmuch could block on /dev/random at startup (waiting for some entropy to appear). This bug was hit hard by the test suite, (which could easily exhaust the available entropy on common systems---resulting in large delays of the test suite). --- I'm sending this patch to the mailing-list rather than pushing it directly so that any authors of user interfaces can ensure that they are ready for the length of thread identifiers in "notmuch search" output to change. I tested that the emacs client doesn't need any change in this regard. And the change doesn't introduce any compatibility with an existing database, so no rebuild required (hurrah!). -Carl lib/database-private.h | 7 +++++- lib/database.cc | 52 ++++++++++++++++++++++++++++++++++++++++++++--- lib/message.cc | 46 ------------------------------------------ test/notmuch-test | 2 +- 4 files changed, 55 insertions(+), 52 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 5891584..5bb6e86 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -27,14 +27,19 @@ struct _notmuch_database { notmuch_bool_t exception_reported; + char *path; + + notmuch_bool_t needs_upgrade; notmuch_database_mode_t mode; Xapian::Database *xapian_db; + + uint64_t last_thread_id; + Xapian::QueryParser *query_parser; Xapian::TermGenerator *term_gen; Xapian::ValueRangeProcessor *value_range_processor; - notmuch_bool_t needs_upgrade; }; /* Convert tags from Xapian internal format to notmuch format. diff --git a/lib/database.cc b/lib/database.cc index cce7847..65ff12e 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -533,6 +533,8 @@ notmuch_database_open (const char *path, notmuch->needs_upgrade = FALSE; notmuch->mode = mode; try { + string last_thread_id; + if (mode == NOTMUCH_DATABASE_MODE_READ_WRITE) { notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path, Xapian::DB_CREATE_OR_OPEN); @@ -567,6 +569,20 @@ notmuch_database_open (const char *path, notmuch_path, version, NOTMUCH_DATABASE_VERSION); } } + + last_thread_id = notmuch->xapian_db->get_metadata ("last_thread_id"); + if (last_thread_id.empty ()) { + notmuch->last_thread_id = 0; + } else { + const char *str; + char *end; + + str = last_thread_id.c_str (); + notmuch->last_thread_id = strtoull (str, &end, 10); + if (*end != '\0') + INTERNAL_ERROR ("Malformed database last_thread_id: %s", str); + } + notmuch->query_parser = new Xapian::QueryParser; notmuch->term_gen = new Xapian::TermGenerator; notmuch->term_gen->set_stemmer (Xapian::Stem ("english")); @@ -1278,14 +1294,38 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch, return ret; } +static const char * +_notmuch_database_generate_thread_id (notmuch_database_t *notmuch) +{ + /* 20 bytes (+ terminator) for ASCII decimal representation of + * a 64-bit integer. */ + static char thread_id[21]; + Xapian::WritableDatabase *db; + + db = static_cast (notmuch->xapian_db); + + notmuch->last_thread_id++; + + sprintf (thread_id, "%020llu", notmuch->last_thread_id); + + db->set_metadata ("last_thread_id", thread_id); + + return thread_id; +} + /* Given a (mostly empty) 'message' and its corresponding * 'message_file' link it to existing threads in the database. * * We first look at 'message_file' and its link-relevant headers * (References and In-Reply-To) for message IDs. We also look in the - * database for existing message that reference 'message'. + * database for existing message that reference 'message'. In either + * case, we will assign to the current message the first thread_id + * found (through either parent or child). We will also merge any + * existing, distinct threads where this message belongs to both, + * (which is not uncommon when mesages are processed out of order). * - * The end result is to call _notmuch_message_ensure_thread_id which + * Finally, if not thread ID has been found through parent or child, + * we call _notmuch_message_generate_thread_id to generate a new * generates a new thread ID if the message doesn't connect to any * existing threads. */ @@ -1308,8 +1348,12 @@ _notmuch_database_link_message (notmuch_database_t *notmuch, if (status) return status; - if (thread_id == NULL) - _notmuch_message_ensure_thread_id (message); + /* If not part of any existing thread, generate a new thread ID. */ + if (thread_id == NULL) { + thread_id = _notmuch_database_generate_thread_id (notmuch); + + _notmuch_message_add_term (message, "thread", thread_id); + } return NOTMUCH_STATUS_SUCCESS; } diff --git a/lib/message.cc b/lib/message.cc index f0e905b..0195050 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -42,13 +42,6 @@ struct _notmuch_message { Xapian::Document doc; }; -/* "128 bits of thread-id ought to be enough for anybody" */ -#define NOTMUCH_THREAD_ID_BITS 128 -#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4) -typedef struct _thread_id { - char str[NOTMUCH_THREAD_ID_DIGITS + 1]; -} thread_id_t; - /* We end up having to call the destructor explicitly because we had * to use "placement new" in order to initialize C++ objects within a * block that we allocated with talloc. So C++ is making talloc @@ -557,45 +550,6 @@ _notmuch_message_set_date (notmuch_message_t *message, Xapian::sortable_serialise (time_value)); } -static void -thread_id_generate (thread_id_t *thread_id) -{ - static int seeded = 0; - FILE *dev_random; - uint32_t value; - char *s; - int i; - - if (! seeded) { - dev_random = fopen ("/dev/random", "r"); - if (dev_random == NULL) { - srand (time (NULL)); - } else { - fread ((void *) &value, sizeof (value), 1, dev_random); - srand (value); - fclose (dev_random); - } - seeded = 1; - } - - s = thread_id->str; - for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) { - value = rand (); - sprintf (s, "%08x", value); - s += 8; - } -} - -void -_notmuch_message_ensure_thread_id (notmuch_message_t *message) -{ - /* If not part of any existing thread, generate a new thread_id. */ - thread_id_t thread_id; - - thread_id_generate (&thread_id); - _notmuch_message_add_term (message, "thread", thread_id.str); -} - /* Synchronize changes made to message->doc out into the database. */ void _notmuch_message_sync (notmuch_message_t *message) diff --git a/test/notmuch-test b/test/notmuch-test index 2e5eb24..cac5705 100755 --- a/test/notmuch-test +++ b/test/notmuch-test @@ -146,7 +146,7 @@ add_message () } NOTMUCH_IGNORED_OUTPUT_REGEXP='^Processed [0-9]*( total)? file|Found [0-9]* total file' -NOTMUCH_THREAD_ID_SQUELCH='s/thread:................................/thread:XXX/' +NOTMUCH_THREAD_ID_SQUELCH='s/thread:..................../thread:XXX/' execute_expecting () { args=$1 -- 1.6.5.7