From: Austin Clements Date: Wed, 9 Nov 2011 13:44:35 +0000 (-0500) Subject: tag: Automatically limit to messages whose tags will actually change. X-Git-Tag: 0.11_rc1~84 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=da67bf12ce122759f72d1d510fb8996df3c9f946;p=notmuch.git tag: Automatically limit to messages whose tags will actually change. This optimizes the user's tagging query to exclude messages that won't be affected by the tagging operation, saving computation and IO for redundant tagging operations. For example, notmuch tag +notmuch to:notmuch@notmuchmail.org will now use the query ( to:notmuch@notmuchmail.org ) and (not tag:"notmuch") In the past, we've often suggested that people do this exact transformation by hand for slow tagging operations. This makes that unnecessary. --- diff --git a/NEWS b/NEWS index 384f5a02..2b2f08a6 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,16 @@ +Notmuch 0.11 (201x-xx-xx) +========================= + +Performance +----------- + +Automatic tag query optimization + + "notmuch tag" now automatically optimizes the user's query to + exclude messages whose tags won't change. In the past, we've + suggested that people do this by hand; this is no longer necessary. + + Notmuch 0.10.1 (2011-11-25) =========================== diff --git a/notmuch-tag.c b/notmuch-tag.c index dded39ea..537d5a4d 100644 --- a/notmuch-tag.c +++ b/notmuch-tag.c @@ -30,6 +30,81 @@ handle_sigint (unused (int sig)) interrupted = 1; } +static char * +_escape_tag (char *buf, const char *tag) +{ + const char *in = tag; + char *out = buf; + /* Boolean terms surrounded by double quotes can contain any + * character. Double quotes are quoted by doubling them. */ + *out++ = '"'; + while (*in) { + if (*in == '"') + *out++ = '"'; + *out++ = *in++; + } + *out++ = '"'; + *out = 0; + return buf; +} + +static char * +_optimize_tag_query (void *ctx, const char *orig_query_string, char *argv[], + int *add_tags, int add_tags_count, + int *remove_tags, int remove_tags_count) +{ + /* This is subtler than it looks. Xapian ignores the '-' operator + * at the beginning both queries and parenthesized groups and, + * furthermore, the presence of a '-' operator at the beginning of + * a group can inhibit parsing of the previous operator. Hence, + * the user-provided query MUST appear first, but it is safe to + * parenthesize and the exclusion part of the query must not use + * the '-' operator (though the NOT operator is fine). */ + + char *escaped, *query_string; + const char *join = ""; + int i; + unsigned int max_tag_len = 0; + + /* Allocate a buffer for escaping tags. This is large enough to + * hold a fully escaped tag with every character doubled plus + * enclosing quotes and a NUL. */ + for (i = 0; i < add_tags_count; i++) + if (strlen (argv[add_tags[i]] + 1) > max_tag_len) + max_tag_len = strlen (argv[add_tags[i]] + 1); + for (i = 0; i < remove_tags_count; i++) + if (strlen (argv[remove_tags[i]] + 1) > max_tag_len) + max_tag_len = strlen (argv[remove_tags[i]] + 1); + escaped = talloc_array(ctx, char, max_tag_len * 2 + 3); + if (!escaped) + return NULL; + + /* Build the new query string */ + if (strcmp (orig_query_string, "*") == 0) + query_string = talloc_strdup (ctx, "("); + else + query_string = talloc_asprintf (ctx, "( %s ) and (", orig_query_string); + + for (i = 0; i < add_tags_count && query_string; i++) { + query_string = talloc_asprintf_append_buffer ( + query_string, "%snot tag:%s", join, + _escape_tag (escaped, argv[add_tags[i]] + 1)); + join = " or "; + } + for (i = 0; i < remove_tags_count && query_string; i++) { + query_string = talloc_asprintf_append_buffer ( + query_string, "%stag:%s", join, + _escape_tag (escaped, argv[remove_tags[i]] + 1)); + join = " or "; + } + + if (query_string) + query_string = talloc_strdup_append_buffer (query_string, ")"); + + talloc_free (escaped); + return query_string; +} + int notmuch_tag_command (void *ctx, unused (int argc), unused (char *argv[])) { @@ -93,6 +168,16 @@ notmuch_tag_command (void *ctx, unused (int argc), unused (char *argv[])) return 1; } + /* Optimize the query so it excludes messages that already have + * the specified set of tags. */ + query_string = _optimize_tag_query (ctx, query_string, argv, + add_tags, add_tags_count, + remove_tags, remove_tags_count); + if (query_string == NULL) { + fprintf (stderr, "Out of memory.\n"); + return 1; + } + config = notmuch_config_open (ctx, NULL, NULL); if (config == NULL) return 1;