From 30df6bba3193e47c7faa15a2c74740a098e1355a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 10 Mar 2014 12:15:48 +2000 Subject: [PATCH] Re: [Patch v3 04/15] lib: make folder: prefix literal --- 80/fde1b8dcf09aece38169ae19bd236a4b870d8e | 355 ++++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 80/fde1b8dcf09aece38169ae19bd236a4b870d8e diff --git a/80/fde1b8dcf09aece38169ae19bd236a4b870d8e b/80/fde1b8dcf09aece38169ae19bd236a4b870d8e new file mode 100644 index 000000000..1173d0223 --- /dev/null +++ b/80/fde1b8dcf09aece38169ae19bd236a4b870d8e @@ -0,0 +1,355 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 1DBC4431FBD + for ; Sun, 9 Mar 2014 09:16:04 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 4oYsUBcpegRz for ; + Sun, 9 Mar 2014 09:15:56 -0700 (PDT) +Received: from dmz-mailsec-scanner-1.mit.edu (dmz-mailsec-scanner-1.mit.edu + [18.9.25.12]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 026E2431FBC + for ; Sun, 9 Mar 2014 09:15:55 -0700 (PDT) +X-AuditID: 1209190c-f794a6d000000c27-86-531c93bae22b +Received: from mailhub-auth-4.mit.edu ( [18.7.62.39]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-1.mit.edu (Symantec Messaging Gateway) with SMTP + id 12.35.03111.AB39C135; Sun, 9 Mar 2014 12:15:54 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-4.mit.edu (8.13.8/8.9.2) with ESMTP id s29GFqNb007502; + Sun, 9 Mar 2014 12:15:53 -0400 +Received: from awakening.csail.mit.edu (awakening.csail.mit.edu [18.26.4.91]) + (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s29GFntX022420 + (version=TLSv1/SSLv3 cipher=DHE-RSA-AES128-SHA bits=128 verify=NOT); + Sun, 9 Mar 2014 12:15:51 -0400 +Received: from amthrax by awakening.csail.mit.edu with local (Exim 4.80) + (envelope-from ) + id 1WMgOD-0006mz-GU; Sun, 09 Mar 2014 12:15:49 -0400 +Date: Sun, 9 Mar 2014 12:15:48 -0400 +From: Austin Clements +To: Jani Nikula +Subject: Re: [Patch v3 04/15] lib: make folder: prefix literal +Message-ID: <20140309161548.GO4709@mit.edu> +References: <1394313585-28422-1-git-send-email-david@tethera.net> + <1394313585-28422-5-git-send-email-david@tethera.net> + <20140308235127.GL4709@mit.edu> <87pplv69q8.fsf@nikula.org> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline +In-Reply-To: <87pplv69q8.fsf@nikula.org> +User-Agent: Mutt/1.5.21 (2010-09-15) +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFupjleLIzCtJLcpLzFFi42IRYrdT1901WSbY4NBFbosbrd2MFk3TnS2u + 35zJ7MDscev+a3aPZ6tuMXtsOfSeOYA5issmJTUnsyy1SN8ugSvjzdaH7AXnQisur3rA0sD4 + 0KGLkZNDQsBEYt7eY8wQtpjEhXvr2boYuTiEBGYzSXy70w/lbGCUeHL9LCOEc4pJ4vX8o6wQ + zhJGiYtr5rGB9LMIqEjcvD8XbBabgIbEtv3LGUFsEQFFic0n94PZzAJ2Eke+d4HZwkB238sD + LCA2r4C2xPWFm6A27GSUuHRoIRNEQlDi5MwnLBDNWhI3/r0EinMA2dISy/9xgIQ5gXa9X3qF + HcQWBbphysltbBMYhWYh6Z6FpHsWQvcCRuZVjLIpuVW6uYmZOcWpybrFyYl5ealFuoZ6uZkl + eqkppZsYQcHOKcmzg/HNQaVDjAIcjEo8vB0TpIOFWBPLiitzDzFKcjApifLqA2NFiC8pP6Uy + I7E4I76oNCe1+BCjBAezkgjv7ElAOd6UxMqq1KJ8mJQ0B4uSOG/fWYlgIYH0xJLU7NTUgtQi + mKwMB4eSBC8HyFDBotT01Iq0zJwShDQTByfIcB6g4W4gNbzFBYm5xZnpEPlTjIpS4rySIFsF + QBIZpXlwvbBk9IpRHOgVYV5ZkHYeYCKD634FNJgJaHDzcSmQwSWJCCmpBkYFDz/fGbout62O + 1Gz+WnTq58oZ94Ue/SmdOJEr9mHpgo/u3M0Ben36ZxkTRB/fzLic//pwv62oc65nhHQ2Q5fC + OnHv+W8SNvQUcfvMLa3L4bs2d0XD/dnVN+ftlNh+68yMF/EyFWG+xz6cT5R133HYgC/mndGq + 67mM1trxj/OqBa8ZH1u6pEqJpTgj0VCLuag4EQB8bswzIQMAAA== +Cc: notmuch@notmuchmail.org +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Sun, 09 Mar 2014 16:16:04 -0000 + +Quoth Jani Nikula on Mar 09 at 10:45 am: +> On Sun, 09 Mar 2014, Austin Clements wrote: +> > Quoth David Bremner on Mar 08 at 5:19 pm: +> >> From: Jani Nikula +> >> +> >> In xapian terms, convert folder: prefix from probabilistic to boolean +> >> prefix, matching the paths, relative form the maildir root, of the +> > +> > s/form/from/ +> > +> >> message files, ignoring the maildir new and cur leaf directories. +> >> +> >> folder:foo matches all message files in foo, foo/new, and foo/cur. +> >> +> >> folder:foo/new does *not* match message files in foo/new. +> >> +> >> folder:"" matches all message files in the top level maildir and its +> >> new and cur subdirectories. +> >> +> >> This change constitutes a database change: bump the database version +> >> and add database upgrade support for folder: terms. The upgrade also +> >> adds path: terms. +> >> --- +> >> lib/database.cc | 38 ++++++++++++++++++++++-- +> >> lib/message.cc | 80 ++++++++++++++++++++++++++++++++++++++++++++------- +> >> lib/notmuch-private.h | 3 ++ +> >> 3 files changed, 108 insertions(+), 13 deletions(-) +> >> +> >> diff --git a/lib/database.cc b/lib/database.cc +> >> index 93cc7f5..186e3a7 100644 +> >> --- a/lib/database.cc +> >> +++ b/lib/database.cc +> >> @@ -42,7 +42,7 @@ typedef struct { +> >> const char *prefix; +> >> } prefix_t; +> >> +> >> -#define NOTMUCH_DATABASE_VERSION 1 +> >> +#define NOTMUCH_DATABASE_VERSION 2 +> >> +> >> #define STRINGIFY(s) _SUB_STRINGIFY(s) +> >> #define _SUB_STRINGIFY(s) #s +> >> @@ -210,6 +210,7 @@ static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { +> >> { "is", "K" }, +> >> { "id", "Q" }, +> >> { "path", "P" }, +> >> + { "folder", "XFOLDER:" }, +> > +> > It took me a while to figure out that the ":" here means that Xapian +> > will unconditionally use a ":" after the prefix, instead of only using +> > a ":" when the first letter following the prefix is upper-case ASCII. +> > Maybe I was only confused by this because I simultaneously knew too +> > much and not enough about Xapian, but it might be worth a comment. +> > Something like, +> > +> > /* Without the ":", since this is a multi-letter prefix, Xapian +> > * will add a colon itself if the first letter of the path is +> > * upper-case ASCII. Including the ":" forces there to always be +> > * a colon, which keeps our own logic simpler. */ +> +> Do you mean "... first letter of the _prefix_ is ..."? + +I did mean the path. If the folder prefer were just "XFOLDER", then +Xapian::QueryParser would translate the query folder:foo into the term +XFOLDERfoo like you'd expect, but it would translate the query +folder:Foo into the term XFOLDER:Foo. We'd have to account for this +when constructing terms and (arguably) when removing terms. But +"XFOLDER:" suppresses the colon-adding logic, so these two queries +simply map to XFOLDER:foo and XFOLDER:Foo. + +> Jani. +> +> > +> >> }; +> >> +> >> static prefix_t PROBABILISTIC_PREFIX[]= { +> >> @@ -217,7 +218,6 @@ static prefix_t PROBABILISTIC_PREFIX[]= { +> >> { "to", "XTO" }, +> >> { "attachment", "XATTACHMENT" }, +> >> { "subject", "XSUBJECT"}, +> >> - { "folder", "XFOLDER"} +> >> }; +> >> +> >> const char * +> >> @@ -1168,6 +1168,40 @@ notmuch_database_upgrade (notmuch_database_t *notmuch, +> >> } +> >> } +> >> +> >> + /* +> >> + * Prior to version 2, the "folder:" prefix was probabilistic and +> >> + * stemmed. Change it to the current boolean prefix. Add "path:" +> >> + * prefixes while at it. +> >> + */ +> >> + if (version < 2) { +> >> + notmuch_query_t *query = notmuch_query_create (notmuch, ""); +> > +> > Three space indentation and no tabs? (It looks like this was in +> > Jani's v2, also. I'm guessing at some point there was a copy-paste +> > from a diff with tabs converted to spaces?) +> > +> >> + notmuch_messages_t *messages; +> >> + notmuch_message_t *message; +> >> + +> >> + count = 0; +> >> + total = notmuch_query_count_messages (query); +> >> + +> >> + for (messages = notmuch_query_search_messages (query); +> >> + notmuch_messages_valid (messages); +> >> + notmuch_messages_move_to_next (messages)) { +> >> + if (do_progress_notify) { +> >> + progress_notify (closure, (double) count / total); +> >> + do_progress_notify = 0; +> >> + } +> >> + +> >> + message = notmuch_messages_get (messages); +> >> + +> >> + _notmuch_message_upgrade_folder (message); +> >> + _notmuch_message_sync (message); +> >> + +> >> + notmuch_message_destroy (message); +> >> + +> >> + count++; +> >> + } +> >> + +> >> + notmuch_query_destroy (query); +> >> + } +> >> + +> >> db->set_metadata ("version", STRINGIFY (NOTMUCH_DATABASE_VERSION)); +> >> db->flush (); +> >> +> >> diff --git a/lib/message.cc b/lib/message.cc +> >> index 21abe8e..31cb9f1 100644 +> >> --- a/lib/message.cc +> >> +++ b/lib/message.cc +> >> @@ -504,6 +504,56 @@ _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix) +> >> } +> >> } +> >> +> >> +/* Return true if p points at "new" or "cur". */ +> >> +static bool is_maildir (const char *p) +> >> +{ +> >> + return strcmp (p, "cur") == 0 || strcmp (p, "new") == 0; +> >> +} +> >> + +> >> +/* Add "folder:" term for directory. */ +> >> +static notmuch_status_t +> >> +_notmuch_message_add_folder_terms (notmuch_message_t *message, +> >> + const char *directory) +> >> +{ +> >> + char *folder, *last; +> >> + +> >> + folder = talloc_strdup (NULL, directory); +> >> + if (! folder) +> >> + return NOTMUCH_STATUS_OUT_OF_MEMORY; +> > +> > Same formatting problem in this chunk. +> > +> >> + +> >> + /* +> >> + * If the message file is in a leaf directory named "new" or +> >> + * "cur", presume maildir and index the parent directory. Thus a +> >> + * "folder:" prefix search matches messages in the specified +> >> + * maildir folder, i.e. in the specified directory and its "new" +> >> + * and "cur" subdirectories. +> >> + * +> >> + * Note that this means the "folder:" prefix can't be used for +> >> + * distinguishing between message files in "new" or "cur". The +> >> + * "path:" prefix needs to be used for that. +> >> + * +> >> + * Note the deliberate difference to _filename_is_in_maildir(). We +> >> + * don't want to index different things depending on the existence +> >> + * or non-existence of all maildir sibling directories "new", +> >> + * "cur", and "tmp". Doing so would be surprising, and difficult +> >> + * for the user to fix in case all subdirectories were not in +> >> + * place during indexing. +> >> + */ +> >> + last = strrchr (folder, '/'); +> >> + if (last) { +> >> + if (is_maildir (last + 1)) +> >> + *last = '\0'; +> >> + } else if (is_maildir (folder)) { +> >> + *folder = '\0'; +> >> + } +> >> + +> >> + _notmuch_message_add_term (message, "folder", folder); +> >> + +> >> + talloc_free (folder); +> >> + +> >> + return NOTMUCH_STATUS_SUCCESS; +> >> +} +> >> + +> >> #define RECURSIVE_SUFFIX "/**" +> >> +> >> /* Add "path:" terms for directory. */ +> >> @@ -570,9 +620,8 @@ _notmuch_message_add_directory_terms (void *ctx, notmuch_message_t *message) +> >> directory = _notmuch_database_get_directory_path (ctx, +> >> message->notmuch, +> >> directory_id); +> >> - if (strlen (directory)) +> >> - _notmuch_message_gen_terms (message, "folder", directory); +> >> +> >> + _notmuch_message_add_folder_terms (message, directory); +> >> _notmuch_message_add_path_terms (message, directory); +> >> } +> >> +> >> @@ -610,9 +659,7 @@ _notmuch_message_add_filename (notmuch_message_t *message, +> >> * notmuch_directory_get_child_files() . */ +> >> _notmuch_message_add_term (message, "file-direntry", direntry); +> >> +> >> - /* New terms allow user to search with folder: specification. */ +> >> - _notmuch_message_gen_terms (message, "folder", directory); +> >> - +> >> + _notmuch_message_add_folder_terms (message, directory); +> >> _notmuch_message_add_path_terms (message, directory); +> >> +> >> talloc_free (local); +> >> @@ -637,8 +684,6 @@ _notmuch_message_remove_filename (notmuch_message_t *message, +> >> const char *filename) +> >> { +> >> void *local = talloc_new (message); +> >> - const char *folder_prefix = _find_prefix ("folder"); +> >> - char *zfolder_prefix = talloc_asprintf(local, "Z%s", folder_prefix); +> >> char *direntry; +> >> notmuch_private_status_t private_status; +> >> notmuch_status_t status; +> >> @@ -659,10 +704,7 @@ _notmuch_message_remove_filename (notmuch_message_t *message, +> >> /* Re-synchronize "folder:" and "path:" terms for this message. */ +> >> +> >> /* Remove all "folder:" terms. */ +> >> - _notmuch_message_remove_terms (message, folder_prefix); +> >> - +> >> - /* Remove all "folder:" stemmed terms. */ +> >> - _notmuch_message_remove_terms (message, zfolder_prefix); +> >> + _notmuch_message_remove_terms (message, _find_prefix ("folder")); +> >> +> >> /* Remove all "path:" terms. */ +> >> _notmuch_message_remove_terms (message, _find_prefix ("path")); +> >> @@ -675,6 +717,22 @@ _notmuch_message_remove_filename (notmuch_message_t *message, +> >> return status; +> >> } +> >> +> >> +/* Upgrade the "folder:" prefix from V1 to V2. */ +> >> +#define FOLDER_PREFIX_V1 "XFOLDER" +> >> +#define ZFOLDER_PREFIX_V1 "Z" FOLDER_PREFIX_V1 +> >> +void +> >> +_notmuch_message_upgrade_folder (notmuch_message_t *message) +> >> +{ +> >> + /* Remove all old "folder:" terms. */ +> >> + _notmuch_message_remove_terms (message, FOLDER_PREFIX_V1); +> >> + +> >> + /* Remove all old "folder:" stemmed terms. */ +> >> + _notmuch_message_remove_terms (message, ZFOLDER_PREFIX_V1); +> >> + +> >> + /* Add new boolean "folder:" and "path:" terms. */ +> >> + _notmuch_message_add_directory_terms (message, message); +> >> +} +> >> + +> >> char * +> >> _notmuch_message_talloc_copy_data (notmuch_message_t *message) +> >> { +> >> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h +> >> index af185c7..59eb2bc 100644 +> >> --- a/lib/notmuch-private.h +> >> +++ b/lib/notmuch-private.h +> >> @@ -263,6 +263,9 @@ _notmuch_message_gen_terms (notmuch_message_t *message, +> >> void +> >> _notmuch_message_upgrade_filename_storage (notmuch_message_t *message); +> >> +> >> +void +> >> +_notmuch_message_upgrade_folder (notmuch_message_t *message); +> >> + +> >> notmuch_status_t +> >> _notmuch_message_add_filename (notmuch_message_t *message, +> >> const char *filename); -- 2.26.2