From 7867a8eb31f4c8fbe5e8eb06477b52e9b9143dc7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 17 Dec 2013 20:03:22 +0200 Subject: [PATCH] Re: [PATCH] lib: Add a new prefix "list" to the search-terms syntax --- ab/e2bf8a62094f768f5094cde6d0205968a7b916 | 310 ++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 ab/e2bf8a62094f768f5094cde6d0205968a7b916 diff --git a/ab/e2bf8a62094f768f5094cde6d0205968a7b916 b/ab/e2bf8a62094f768f5094cde6d0205968a7b916 new file mode 100644 index 000000000..701ff5d6d --- /dev/null +++ b/ab/e2bf8a62094f768f5094cde6d0205968a7b916 @@ -0,0 +1,310 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 1AE76431FC3 + for ; Tue, 17 Dec 2013 10:10:07 -0800 (PST) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: 0 +X-Spam-Level: +X-Spam-Status: No, score=0 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_NONE=-0.0001] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id IuP8oTvzApcf for ; + Tue, 17 Dec 2013 10:09:59 -0800 (PST) +X-Greylist: delayed 383 seconds by postgrey-1.32 at olra; + Tue, 17 Dec 2013 10:09:58 PST +Received: from jenni2.inet.fi (mta-out.inet.fi [195.156.147.13]) + by olra.theworths.org (Postfix) with ESMTP id E6CB3431FBF + for ; Tue, 17 Dec 2013 10:09:58 -0800 (PST) +Received: from node.shutemov.name (80.220.224.16) by jenni2.inet.fi + (8.5.140.03) id 52775C9903BE6DDE; Tue, 17 Dec 2013 20:03:26 +0200 +Received: by node.shutemov.name (Postfix, from userid 1000) + id 29749417EE; Tue, 17 Dec 2013 20:03:23 +0200 (EET) +Date: Tue, 17 Dec 2013 20:03:22 +0200 +From: "Kirill A. Shutemov" +To: Jani Nikula +Subject: Re: [PATCH] lib: Add a new prefix "list" to the search-terms syntax +Message-ID: <20131217180322.GA9272@node.dhcp.inet.fi> +References: <20130409083010.GA27675@raorn.name> + <1365549369-12776-1-git-send-email-raorn@raorn.name> + <87bo2ougmb.fsf@nikula.org> +MIME-Version: 1.0 +Content-Type: text/plain; charset=iso-8859-1 +Content-Disposition: inline +Content-Transfer-Encoding: 8bit +In-Reply-To: <87bo2ougmb.fsf@nikula.org> +User-Agent: Mutt/1.5.22.1-rc1 (2013-10-16) +Cc: notmuch@notmuchmail.org, "Alexey I. Froloff" +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Tue, 17 Dec 2013 18:10:07 -0000 + +On Thu, Oct 17, 2013 at 05:17:00PM +0300, Jani Nikula wrote: +> On Wed, 10 Apr 2013, "Alexey I. Froloff" wrote: +> > From: "Alexey I. Froloff" +> > +> > Add support for indexing and searching the message's List-Id header. +> > This is useful when matching all the messages belonging to a particular +> > mailing list. +> +> There's an issue with our duplicate message-id handling that is likely +> to cause confusion with List-Id: searches. If you receive several +> duplicates of the same message (judged by the message-id), only the +> first one of them gets indexed, and the rest are ignored. This means +> that for messages you receive both directly and through a list, it will +> be arbitrary whether the List-Id: gets indexed or not. Therefore a list: +> search might not return all the messages you'd expect. + +I've tried to address this. The patch also adds few tests for the feature. + +There's still missing functionality: re-indexing existing messages for +list-id, handling message removal, etc. + +Any comments? + +diff --git a/lib/database.cc b/lib/database.cc +index f395061e3a73..196243e15d1a 100644 +--- a/lib/database.cc ++++ b/lib/database.cc +@@ -205,6 +205,7 @@ static prefix_t BOOLEAN_PREFIX_INTERNAL[] = { + }; + + static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = { ++ { "list", "XLIST"}, + { "thread", "G" }, + { "tag", "K" }, + { "is", "K" }, +@@ -2025,10 +2026,13 @@ notmuch_database_add_message (notmuch_database_t *notmuch, + date = notmuch_message_file_get_header (message_file, "date"); + _notmuch_message_set_header_values (message, date, from, subject); + +- ret = _notmuch_message_index_file (message, filename); ++ ret = _notmuch_message_index_file (message, filename, false); + if (ret) + goto DONE; + } else { ++ ret = _notmuch_message_index_file (message, filename, true); ++ if (ret) ++ goto DONE; + ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID; + } + +diff --git a/lib/index.cc b/lib/index.cc +index 78c18cf36d10..9fe1ad6502ed 100644 +--- a/lib/index.cc ++++ b/lib/index.cc +@@ -304,6 +304,47 @@ _index_address_list (notmuch_message_t *message, + } + } + ++static void ++_index_list_id (notmuch_message_t *message, ++ const char *list_id_header) ++{ ++ const char *begin_list_id, *end_list_id, *list_id; ++ void *local; ++ ++ if (list_id_header == NULL) ++ return; ++ ++ /* RFC2919 says that the list-id is found at the end of the header ++ * and enclosed between angle brackets. If we cannot find a ++ * matching pair of brackets containing at least one character, ++ * we ignore the list id header. */ ++ begin_list_id = strrchr (list_id_header, '<'); ++ if (!begin_list_id) { ++ fprintf (stderr, "Warning: Not indexing mailformed List-Id tag.\n"); ++ return; ++ } ++ ++ end_list_id = strrchr(begin_list_id, '>'); ++ if (!end_list_id || (end_list_id - begin_list_id < 2)) { ++ fprintf (stderr, "Warning: Not indexing mailformed List-Id tag.\n"); ++ return; ++ } ++ ++ local = talloc_new (message); ++ ++ /* We extract the list id between the angle brackets */ ++ list_id = talloc_strndup (local, begin_list_id + 1, ++ end_list_id - begin_list_id - 1); ++ ++ /* _notmuch_message_add_term() may return ++ * NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG here. We can't fix it, but ++ * this is not a reason to exit with error... */ ++ if (_notmuch_message_add_term (message, "list", list_id)) ++ fprintf (stderr, "Warning: Not indexing List-Id: <%s>\n", list_id); ++ ++ talloc_free (local); ++} ++ + /* Callback to generate terms for each mime part of a message. */ + static void + _index_mime_part (notmuch_message_t *message, +@@ -425,14 +466,15 @@ _index_mime_part (notmuch_message_t *message, + + notmuch_status_t + _notmuch_message_index_file (notmuch_message_t *message, +- const char *filename) ++ const char *filename, ++ notmuch_bool_t duplicate) + { + GMimeStream *stream = NULL; + GMimeParser *parser = NULL; + GMimeMessage *mime_message = NULL; + InternetAddressList *addresses; + FILE *file = NULL; +- const char *from, *subject; ++ const char *from, *subject, *list_id; + notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS; + static int initialized = 0; + char from_buf[5]; +@@ -485,6 +527,9 @@ mboxes is deprecated and may be removed in the future.\n", filename); + + from = g_mime_message_get_sender (mime_message); + ++ if (duplicate) ++ goto DUP; ++ + addresses = internet_address_list_parse_string (from); + if (addresses) { + _index_address_list (message, "from", addresses); +@@ -502,6 +547,10 @@ mboxes is deprecated and may be removed in the future.\n", filename); + + _index_mime_part (message, g_mime_message_get_mime_part (mime_message)); + ++ DUP: ++ list_id = g_mime_object_get_header (GMIME_OBJECT (mime_message), "List-Id"); ++ _index_list_id (message, list_id); ++ + DONE: + if (mime_message) + g_object_unref (mime_message); +diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h +index af185c7c5ba8..138dfa58efc8 100644 +--- a/lib/notmuch-private.h ++++ b/lib/notmuch-private.h +@@ -322,7 +322,8 @@ notmuch_message_get_author (notmuch_message_t *message); + + notmuch_status_t + _notmuch_message_index_file (notmuch_message_t *message, +- const char *filename); ++ const char *filename, ++ notmuch_bool_t duplicate); + + /* message-file.c */ + +diff --git a/man/man7/notmuch-search-terms.7 b/man/man7/notmuch-search-terms.7 +index f1627b3488f8..29b30b7b0b00 100644 +--- a/man/man7/notmuch-search-terms.7 ++++ b/man/man7/notmuch-search-terms.7 +@@ -52,6 +52,8 @@ terms to match against specific portions of an email, (where + + thread: + ++ list: ++ + folder: + + date:.. +@@ -109,6 +111,12 @@ within a matching directory. Only the directory components below the + top-level mail database path are available to be searched. + + The ++.BR list: , ++is used to match mailing list ID of an email message \- contents of the ++List\-Id: header without the '<', '>' delimiters or decoded list ++description. ++ ++The + .B date: + prefix can be used to restrict the results to only messages within a + particular time range (based on the Date: header) with a range syntax +diff --git a/test/corpus/cur/18:2, b/test/corpus/cur/18:2, +index f522f69eb933..2b54925bd5d1 100644 +--- a/test/corpus/cur/18:2, ++++ b/test/corpus/cur/18:2, +@@ -3,6 +3,7 @@ To: notmuch@notmuchmail.org + Date: Tue, 17 Nov 2009 18:21:38 -0500 + Subject: [notmuch] archive + Message-ID: <20091117232137.GA7669@griffis1.net> ++List-Id: + + Just subscribed, I'd like to catch up on the previous postings, + but the archive link seems to be bogus? +diff --git a/test/corpus/cur/51:2, b/test/corpus/cur/51:2, +index f522f69eb933..b155e6ee64a5 100644 +--- a/test/corpus/cur/51:2, ++++ b/test/corpus/cur/51:2, +@@ -3,6 +3,7 @@ To: notmuch@notmuchmail.org + Date: Tue, 17 Nov 2009 18:21:38 -0500 + Subject: [notmuch] archive + Message-ID: <20091117232137.GA7669@griffis1.net> ++List-Id: + + Just subscribed, I'd like to catch up on the previous postings, + but the archive link seems to be bogus? +diff --git a/test/search b/test/search +index a7a0b18d2e48..bef42971226c 100755 +--- a/test/search ++++ b/test/search +@@ -129,4 +129,28 @@ add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12 + output=$(notmuch search "bödý" | notmuch_search_sanitize) + test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)" + ++test_begin_subtest "Search by List-Id" ++notmuch search list:notmuch.notmuchmail.org | notmuch_search_sanitize > OUTPUT ++cat <EXPECTED ++thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread) ++thread:XXX 2009-11-18 [4/7] Lars Kellogg-Stedman, Mikhail Gusarov| Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread) ++thread:XXX 2009-11-18 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread) ++thread:XXX 2009-11-17 [1/3] Adrian Perez de Castro| Keith Packard, Carl Worth; [notmuch] Introducing myself (inbox signed unread) ++thread:XXX 2009-11-17 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread) ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "Search by List-Id, duplicated messages, step 1" ++notmuch search list:test1.example.com | notmuch_search_sanitize > OUTPUT ++cat <EXPECTED ++thread:XXX 2009-11-17 [1/3] Aron Griffis| Keith Packard, Carl Worth; [notmuch] archive (inbox unread) ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "Search by List-Id, duplicated messages, step 2" ++notmuch search list:test2.example.com | notmuch_search_sanitize > OUTPUT ++cat <EXPECTED ++thread:XXX 2009-11-17 [1/3] Aron Griffis| Keith Packard, Carl Worth; [notmuch] archive (inbox unread) ++EOF ++test_expect_equal_file OUTPUT EXPECTED + test_done +diff --git a/test/test-lib.sh b/test/test-lib.sh +index d8e0d9115a69..981bde4a4004 100644 +--- a/test/test-lib.sh ++++ b/test/test-lib.sh +@@ -576,9 +576,9 @@ test_expect_equal_json () { + # The test suite forces LC_ALL=C, but this causes Python 3 to + # decode stdin as ASCII. We need to read JSON in UTF-8, so + # override Python's stdio encoding defaults. +- output=$(echo "$1" | PYTHONIOENCODING=utf-8 python -mjson.tool \ ++ output=$(echo "$1" | PYTHONIOENCODING=utf-8 python2 -mjson.tool \ + || echo "$1") +- expected=$(echo "$2" | PYTHONIOENCODING=utf-8 python -mjson.tool \ ++ expected=$(echo "$2" | PYTHONIOENCODING=utf-8 python2 -mjson.tool \ + || echo "$2") + shift 2 + test_expect_equal "$output" "$expected" "$@" +-- + Kirill A. Shutemov -- 2.26.2