From 2806c510f40d576799f7daf9a82ce093cfb7a02b Mon Sep 17 00:00:00 2001 From: Mark Walters Date: Thu, 23 Oct 2014 10:41:43 +0100 Subject: [PATCH] Re: [PATCH v3 3/4] cli: Extend the search command for --output={sender, recipients} --- 3c/536f290dfc01e568ace6d3a58686c764c98ef9 | 470 ++++++++++++++++++++++ 1 file changed, 470 insertions(+) create mode 100644 3c/536f290dfc01e568ace6d3a58686c764c98ef9 diff --git a/3c/536f290dfc01e568ace6d3a58686c764c98ef9 b/3c/536f290dfc01e568ace6d3a58686c764c98ef9 new file mode 100644 index 000000000..212906cc8 --- /dev/null +++ b/3c/536f290dfc01e568ace6d3a58686c764c98ef9 @@ -0,0 +1,470 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 2F15E431FD4 + for ; Thu, 23 Oct 2014 02:42:17 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -1.098 +X-Spam-Level: +X-Spam-Status: No, score=-1.098 tagged_above=-999 required=5 + tests=[DKIM_ADSP_CUSTOM_MED=0.001, FREEMAIL_FROM=0.001, + NML_ADSP_CUSTOM_MED=1.2, RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 7KeBc8Jb473c for ; + Thu, 23 Oct 2014 02:42:08 -0700 (PDT) +Received: from mail2.qmul.ac.uk (mail2.qmul.ac.uk [138.37.6.6]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 10684431FBD + for ; Thu, 23 Oct 2014 02:42:08 -0700 (PDT) +Received: from smtp.qmul.ac.uk ([138.37.6.40]) + by mail2.qmul.ac.uk with esmtp (Exim 4.71) + (envelope-from ) + id 1XhEtt-0006Yv-CS; Thu, 23 Oct 2014 10:42:05 +0100 +Received: from [176.12.107.132] (helo=localhost) + by smtp.qmul.ac.uk with esmtpsa (TLSv1:AES128-SHA:128) (Exim 4.71) + (envelope-from ) + id 1XhEts-0002EQ-Kw; Thu, 23 Oct 2014 10:41:45 +0100 +From: Mark Walters +To: Michal Sojka , notmuch@notmuchmail.org +Subject: Re: [PATCH v3 3/4] cli: Extend the search command for + --output={sender, recipients} +In-Reply-To: <1413150093-8383-4-git-send-email-sojkam1@fel.cvut.cz> +References: <87zjd51phx.fsf@steelpick.2x.cz> + <1413150093-8383-1-git-send-email-sojkam1@fel.cvut.cz> + <1413150093-8383-4-git-send-email-sojkam1@fel.cvut.cz> +User-Agent: Notmuch/0.18.1+86~gef5e66a (http://notmuchmail.org) Emacs/23.4.1 + (x86_64-pc-linux-gnu) +Date: Thu, 23 Oct 2014 10:41:43 +0100 +Message-ID: <87egtzazs8.fsf@qmul.ac.uk> +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: quoted-printable +X-QM-SPAM-Info: 176.12.107.132 is in Janet mirror of Spamhaus XBL; + see http://www.spamhaus.org/xbl/ +X-Sender-Host-Address: 176.12.107.132 +X-QM-Geographic: According to ripencc, + this message was delivered by a machine in Britain (UK) (GB). +X-QM-SPAM-Info: Sender has good ham record. :) +X-QM-Body-MD5: c1ab82be0d0946d1fd7d9ee277360059 (of first 20000 bytes) +X-SpamAssassin-Score: 0.0 +X-SpamAssassin-SpamBar: / +X-SpamAssassin-Report: The QM spam filters have analysed this message to + determine if it is + spam. We require at least 5.0 points to mark a message as spam. + This message scored 0.0 points. Summary of the scoring: + * 0.0 FREEMAIL_FROM Sender email is commonly abused enduser mail + provider * (markwalters1009[at]gmail.com) +X-QM-Scan-Virus: ClamAV says the message is clean +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Thu, 23 Oct 2014 09:42:17 -0000 + +On Sun, 12 Oct 2014, Michal Sojka wrote: +> The new outputs allow printing senders, recipients or both of matching +> messages. The --output option is converted from "keyword" argument to +> "flags" argument, which means that the user can use --output=3Dsender and +> --output=3Drecipients simultaneously, to print both. Other combinations +> produce an error. +> +> This code based on a patch from Jani Nikula. +> --- +> completion/notmuch-completion.bash | 2 +- +> completion/notmuch-completion.zsh | 3 +- +> doc/man1/notmuch-search.rst | 22 +++++++- +> notmuch-search.c | 110 +++++++++++++++++++++++++++++++= ++++--- +> test/T090-search-output.sh | 64 +++++++++++++++++++++ +> 5 files changed, 189 insertions(+), 12 deletions(-) +> +> diff --git a/completion/notmuch-completion.bash b/completion/notmuch-comp= +letion.bash +> index 0571dc9..cfbd389 100644 +> --- a/completion/notmuch-completion.bash +> +++ b/completion/notmuch-completion.bash +> @@ -294,7 +294,7 @@ _notmuch_search() +> return +> ;; +> --output) +> - COMPREPLY=3D( $( compgen -W "summary threads messages files tags" -= +- "${cur}" ) ) +> + COMPREPLY=3D( $( compgen -W "summary threads messages files tags se= +nder recipients" -- "${cur}" ) ) +> return +> ;; +> --sort) +> diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-compl= +etion.zsh +> index 67a9aba..3e52a00 100644 +> --- a/completion/notmuch-completion.zsh +> +++ b/completion/notmuch-completion.zsh +> @@ -52,7 +52,8 @@ _notmuch_search() +> _arguments -s : \ +> '--max-threads=3D[display only the first x threads from the search r= +esults]:number of threads to show: ' \ +> '--first=3D[omit the first x threads from the search results]:number= + of threads to omit: ' \ +> - '--sort=3D[sort results]:sorting:((newest-first\:"reverse chronologi= +cal order" oldest-first\:"chronological order"))' +> + '--sort=3D[sort results]:sorting:((newest-first\:"reverse chronologi= +cal order" oldest-first\:"chronological order"))' \ +> + '--output=3D[select what to output]:output:((summary threads message= +s files tags sender recipients))' +> } +>=20=20 +> _notmuch() +> diff --git a/doc/man1/notmuch-search.rst b/doc/man1/notmuch-search.rst +> index 90160f2..c9d38b1 100644 +> --- a/doc/man1/notmuch-search.rst +> +++ b/doc/man1/notmuch-search.rst +> @@ -35,7 +35,7 @@ Supported options for **search** include +> intended for programs that invoke **notmuch(1)** internally. If +> omitted, the latest supported version will be used. +>=20=20 +> - ``--output=3D(summary|threads|messages|files|tags)`` +> + ``--output=3D(summary|threads|messages|files|tags|sender|recipients)= +`` +>=20=20 +> **summary** +> Output a summary of each thread with any message matching +> @@ -78,6 +78,26 @@ Supported options for **search** include +> by null characters (--format=3Dtext0), as a JSON array +> (--format=3Djson), or as an S-Expression list (--format=3Dse= +xp). +>=20=20 +> + **sender** +> + Output all addresses from the *From* header that appear on +> + any message matching the search terms, either one per line +> + (--format=3Dtext), separated by null characters +> + (--format=3Dtext0), as a JSON array (--format=3Djson), or as +> + an S-Expression list (--format=3Dsexp). +> + +> + Note: Searching for **sender** should be much faster than +> + searching for **recipients**, because sender addresses are +> + cached directly in the database whereas other addresses +> + need to be fetched from message files. +> + +> + **recipients** +> + Like **sender** but for addresses from *To*, *Cc* and +> + *Bcc* headers. +> + +> + This option can be given multiple times to combine different +> + outputs. Curently, this is only supported for **sender** and +> + **recipients** outputs. +> + +> ``--sort=3D``\ (**newest-first**\ \|\ **oldest-first**) +> This option can be used to present results in either +> chronological order (**oldest-first**) or reverse chronological +> diff --git a/notmuch-search.c b/notmuch-search.c +> index 5ac2a26..74588f8 100644 +> --- a/notmuch-search.c +> +++ b/notmuch-search.c +> @@ -23,11 +23,14 @@ +> #include "string-util.h" +>=20=20 +> typedef enum { +> - OUTPUT_SUMMARY, +> - OUTPUT_THREADS, +> - OUTPUT_MESSAGES, +> - OUTPUT_FILES, +> - OUTPUT_TAGS +> + OUTPUT_SUMMARY =3D 1 << 0, +> + OUTPUT_THREADS =3D 1 << 1, +> + OUTPUT_MESSAGES =3D 1 << 2, +> + OUTPUT_FILES =3D 1 << 3, +> + OUTPUT_TAGS =3D 1 << 4, +> + OUTPUT_SENDER =3D 1 << 5, +> + OUTPUT_RECIPIENTS =3D 1 << 6, +> + OUTPUT_ADDRESSES =3D OUTPUT_SENDER | OUTPUT_RECIPIENTS, +> } output_t; +>=20=20 +> typedef struct { +> @@ -220,6 +223,67 @@ do_search_threads (search_options_t *o) +> return 0; +> } +>=20=20 +> +static void +> +print_address_list (const search_options_t *o, InternetAddressList *list) +> +{ +> + InternetAddress *address; +> + int i; +> + +> + for (i =3D 0; i < internet_address_list_length (list); i++) { +> + address =3D internet_address_list_get_address (list, i); +> + if (INTERNET_ADDRESS_IS_GROUP (address)) { +> + InternetAddressGroup *group; +> + InternetAddressList *group_list; +> + +> + group =3D INTERNET_ADDRESS_GROUP (address); +> + group_list =3D internet_address_group_get_members (group); +> + if (group_list =3D=3D NULL) +> + continue; +> + +> + print_address_list (o, group_list); +> + } else { +> + InternetAddressMailbox *mailbox; +> + const char *name; +> + const char *addr; +> + char *full_address; +> + +> + mailbox =3D INTERNET_ADDRESS_MAILBOX (address); +> + +> + name =3D internet_address_get_name (address); +> + addr =3D internet_address_mailbox_get_addr (mailbox); +> + +> + if (name && *name) +> + full_address =3D talloc_asprintf (o->format, "%s <%s>", name, addr); +> + else +> + full_address =3D talloc_strdup (o->format, addr); +> + +> + if (!full_address) { +> + fprintf (stderr, "Error: out of memory\n"); +> + break; +> + } +> + o->format->string (o->format, full_address); +> + o->format->separator (o->format); +> + +> + talloc_free (full_address); + +Thinking about this some more how about printing the name and address as +a structured pair/map (at least for all cases except text/text0 output): +something like (in JSON) +[name: "John Doe" address: "john.doe@example.com"] + +It seems wrong to me to go to the effort of separating them in the C and +then combining them in the output. + +This could also help with the questions about uniqueness. If the client +can get the data ready parsed into name/address then it can deal with +much of the uniqueness itself. + +My preference would be for the default to print one line for each +distinct full_address, and then any filter-by options to refine from +there. + +One other advantage of structuring the output is that it is extensible: +for example, at some later stage, we could include a "count" in the map +allowing the client can pick the most popular variant. + +Best wishes + +Mark + + + + +> + } +> + } +> +} +> + +> +static void +> +print_address_string (const search_options_t *o, const char *recipients) +> +{ +> + InternetAddressList *list; +> + +> + if (recipients =3D=3D NULL) +> + return; +> + +> + list =3D internet_address_list_parse_string (recipients); +> + if (list =3D=3D NULL) +> + return; +> + +> + print_address_list (o, list); +> +} +> + +> static int +> do_search_messages (search_options_t *o) +> { +> @@ -266,11 +330,29 @@ do_search_messages (search_options_t *o) +>=20=20=09=20=20=20=20 +> notmuch_filenames_destroy( filenames ); +>=20=20 +> - } else { /* output =3D=3D OUTPUT_MESSAGES */ +> + } else if (o->output =3D=3D OUTPUT_MESSAGES) { +> format->set_prefix (format, "id"); +> format->string (format, +> notmuch_message_get_message_id (message)); +> format->separator (format); +> + } else { +> + if (o->output & OUTPUT_SENDER) { +> + const char *addrs; +> + +> + addrs =3D notmuch_message_get_header (message, "from"); +> + print_address_string (o, addrs); +> + } +> + +> + if (o->output & OUTPUT_RECIPIENTS) { +> + const char *hdrs[] =3D { "to", "cc", "bcc" }; +> + const char *addrs; +> + size_t j; +> + +> + for (j =3D 0; j < ARRAY_SIZE (hdrs); j++) { +> + addrs =3D notmuch_message_get_header (message, hdrs[j]); +> + print_address_string (o, addrs); +> + } +> + } +> } +>=20=20 +> notmuch_message_destroy (message); +> @@ -337,7 +419,7 @@ notmuch_search_command (notmuch_config_t *config, int= + argc, char *argv[]) +> notmuch_database_t *notmuch; +> search_options_t o =3D { +> .sort =3D NOTMUCH_SORT_NEWEST_FIRST, +> - .output =3D OUTPUT_SUMMARY, +> + .output =3D 0, +> .offset =3D 0, +> .limit =3D -1, /* unlimited */ +> .dupe =3D -1, +> @@ -366,10 +448,12 @@ notmuch_search_command (notmuch_config_t *config, i= +nt argc, char *argv[]) +> { "text0", NOTMUCH_FORMAT_TEXT0 }, +> { 0, 0 } } }, +> { NOTMUCH_OPT_INT, ¬much_format_version, "format-version", 0, 0 }, +> - { NOTMUCH_OPT_KEYWORD, &o.output, "output", 'o', +> + { NOTMUCH_OPT_KEYWORD_FLAGS, &o.output, "output", 'o', +> (notmuch_keyword_t []){ { "summary", OUTPUT_SUMMARY }, +> { "threads", OUTPUT_THREADS }, +> { "messages", OUTPUT_MESSAGES }, +> + { "sender", OUTPUT_SENDER }, +> + { "recipients", OUTPUT_RECIPIENTS }, +> { "files", OUTPUT_FILES }, +> { "tags", OUTPUT_TAGS }, +> { 0, 0 } } }, +> @@ -389,6 +473,9 @@ notmuch_search_command (notmuch_config_t *config, int= + argc, char *argv[]) +> if (opt_index < 0) +> return EXIT_FAILURE; +>=20=20 +> + if (! o.output) +> + o.output =3D OUTPUT_SUMMARY; +> + +> switch (format_sel) { +> case NOTMUCH_FORMAT_TEXT: +> o.format =3D sprinter_text_create (config, stdout); +> @@ -455,18 +542,23 @@ notmuch_search_command (notmuch_config_t *config, i= +nt argc, char *argv[]) +> } +>=20=20 +> switch (o.output) { +> - default: +> case OUTPUT_SUMMARY: +> case OUTPUT_THREADS: +> ret =3D do_search_threads (&o); +> break; +> case OUTPUT_MESSAGES: +> + case OUTPUT_SENDER: +> + case OUTPUT_RECIPIENTS: +> + case OUTPUT_ADDRESSES: +> case OUTPUT_FILES: +> ret =3D do_search_messages (&o); +> break; +> case OUTPUT_TAGS: +> ret =3D do_search_tags (notmuch, o.format, o.query); +> break; +> + default: +> + fprintf (stderr, "Error: the combination of outputs is not supported.\n= +"); +> + ret =3D 1; +> } +>=20=20 +> notmuch_query_destroy (o.query); +> diff --git a/test/T090-search-output.sh b/test/T090-search-output.sh +> index 947d572..e696c01 100755 +> --- a/test/T090-search-output.sh +> +++ b/test/T090-search-output.sh +> @@ -387,6 +387,70 @@ cat <EXPECTED +> EOF +> test_expect_equal_file OUTPUT EXPECTED +>=20=20 +> +test_begin_subtest "--output=3Dsender" +> +notmuch search --output=3Dsender '*' | sort | uniq --count >OUTPUT +> +cat <EXPECTED +> + 1 Adrian Perez de Castro +> + 2 Alex Botero-Lowry +> + 4 Alexander Botero-Lowry +> + 1 Aron Griffis +> + 12 Carl Worth +> + 1 Chris Wilson +> + 1 Fran=C3=A7ois Boulogne +> + 1 Ingmar Vanhassel +> + 1 Israel Herraiz +> + 4 Jan Janak +> + 2 Jjgod Jiang +> + 7 Keith Packard +> + 5 Lars Kellogg-Stedman +> + 5 Mikhail Gusarov +> + 1 Olivier Berger +> + 1 Rolland Santimano +> + 3 Stewart Smith +> +EOF +> +test_expect_equal_file OUTPUT EXPECTED +> + +> +test_begin_subtest "--output=3Drecipients" +> +notmuch search --output=3Drecipients '*' | sort | uniq --count >OUTPUT +> +cat <EXPECTED +> + 1 Allan McRae +> + 1 Discussion about the Arch User Repository (AUR) +> + 1 Keith Packard +> + 1 Mikhail Gusarov +> + 2 notmuch +> + 48 notmuch@notmuchmail.org +> + 1 olivier.berger@it-sudparis.eu +> +EOF +> +test_expect_equal_file OUTPUT EXPECTED +> + +> +test_begin_subtest "--output=3Dsender --output=3Drecipients" +> +notmuch search --output=3Dsender --output=3Drecipients '*' | sort | uniq= + --count >OUTPUT +> +cat <EXPECTED +> + 1 Adrian Perez de Castro +> + 2 Alex Botero-Lowry +> + 4 Alexander Botero-Lowry +> + 1 Allan McRae +> + 1 Aron Griffis +> + 12 Carl Worth +> + 1 Chris Wilson +> + 1 Discussion about the Arch User Repository (AUR) +> + 1 Fran=C3=A7ois Boulogne +> + 1 Ingmar Vanhassel +> + 1 Israel Herraiz +> + 4 Jan Janak +> + 2 Jjgod Jiang +> + 8 Keith Packard +> + 5 Lars Kellogg-Stedman +> + 6 Mikhail Gusarov +> + 1 Olivier Berger +> + 1 Rolland Santimano +> + 3 Stewart Smith +> + 2 notmuch +> + 48 notmuch@notmuchmail.org +> + 1 olivier.berger@it-sudparis.eu +> +EOF +> +test_expect_equal_file OUTPUT EXPECTED +> + +> test_begin_subtest "sanitize output for quoted-printable line-breaks in = +author and subject" +> add_message "[subject]=3D'two =3D?ISO-8859-1?Q?line=3D0A_subject?=3D +> headers'" +> --=20 +> 2.1.1 +> +> _______________________________________________ +> notmuch mailing list +> notmuch@notmuchmail.org +> http://notmuchmail.org/mailman/listinfo/notmuch -- 2.26.2