From: Michal Sojka Date: Wed, 5 Nov 2014 00:25:59 +0000 (+0100) Subject: [PATCH v3 10/10] cli: address: Add --filter-by option to configure address filtering X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=1512bd31b934bcc9162d488ca7b931710c9bad73;p=notmuch-archives.git [PATCH v3 10/10] cli: address: Add --filter-by option to configure address filtering --- diff --git a/66/5f3129d38f7b66cf294944101d9d3e5984dadc b/66/5f3129d38f7b66cf294944101d9d3e5984dadc new file mode 100644 index 000000000..cb66a5e78 --- /dev/null +++ b/66/5f3129d38f7b66cf294944101d9d3e5984dadc @@ -0,0 +1,335 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 6055B431FBC + for ; Tue, 4 Nov 2014 16:26:40 -0800 (PST) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -2.3 +X-Spam-Level: +X-Spam-Status: No, score=-2.3 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id Vnjyyros3Y62 for ; + Tue, 4 Nov 2014 16:26:35 -0800 (PST) +Received: from max.feld.cvut.cz (max.feld.cvut.cz [147.32.192.36]) + by olra.theworths.org (Postfix) with ESMTP id 3379D431FC0 + for ; Tue, 4 Nov 2014 16:26:22 -0800 (PST) +Received: from localhost (unknown [192.168.200.7]) + by max.feld.cvut.cz (Postfix) with ESMTP id 9E5E35CD1A8 + for ; Wed, 5 Nov 2014 01:26:21 +0100 (CET) +X-Virus-Scanned: IMAP STYX AMAVIS +Received: from max.feld.cvut.cz ([192.168.200.1]) + by localhost (styx.feld.cvut.cz [192.168.200.7]) (amavisd-new, + port 10044) with ESMTP id h9udu-_Ic597 for ; + Wed, 5 Nov 2014 01:26:17 +0100 (CET) +Received: from imap.feld.cvut.cz (imap.feld.cvut.cz [147.32.192.34]) + by max.feld.cvut.cz (Postfix) with ESMTP id C6F6F5CD1AF + for ; Wed, 5 Nov 2014 01:26:14 +0100 (CET) +Received: from wsh by steelpick.2x.cz with local (Exim 4.84) + (envelope-from ) + id 1XloQJ-0005DN-1d; Wed, 05 Nov 2014 01:26:07 +0100 +From: Michal Sojka +To: notmuch@notmuchmail.org +Subject: [PATCH v3 10/10] cli: address: Add --filter-by option to configure + address filtering +Date: Wed, 5 Nov 2014 01:25:59 +0100 +Message-Id: <1415147159-19946-11-git-send-email-sojkam1@fel.cvut.cz> +X-Mailer: git-send-email 2.1.1 +In-Reply-To: <1415147159-19946-1-git-send-email-sojkam1@fel.cvut.cz> +References: <1415147159-19946-1-git-send-email-sojkam1@fel.cvut.cz> +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Wed, 05 Nov 2014 00:26:40 -0000 + +This option allows to configure the criterion for duplicate address +filtering. Without this option, all unique combinations of name and +address parts are printed. This option allows to filter the output +more, for example to only contain unique address parts. +--- + completion/notmuch-completion.bash | 6 +++- + completion/notmuch-completion.zsh | 1 + + doc/man1/notmuch-address.rst | 36 ++++++++++++++++++- + notmuch-search.c | 51 ++++++++++++++++++++++++-- + test/T097-address-filter-by.sh | 73 ++++++++++++++++++++++++++++++++++++++ + 5 files changed, 162 insertions(+), 5 deletions(-) + create mode 100755 test/T097-address-filter-by.sh + +diff --git a/completion/notmuch-completion.bash b/completion/notmuch-completion.bash +index db152f3..2cb1586 100644 +--- a/completion/notmuch-completion.bash ++++ b/completion/notmuch-completion.bash +@@ -310,7 +310,7 @@ _notmuch_search() + ! $split && + case "${cur}" in + -*) +- local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate=" ++ local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate= --filter-by=" + compopt -o nospace + COMPREPLY=( $(compgen -W "$options" -- ${cur}) ) + ;; +@@ -343,6 +343,10 @@ _notmuch_address() + COMPREPLY=( $( compgen -W "true false flag all" -- "${cur}" ) ) + return + ;; ++ --filter-by) ++ COMPREPLY=( $( compgen -W "nameaddr name addr addrfold nameaddrfold" -- "${cur}" ) ) ++ return ++ ;; + esac + + ! $split && +diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-completion.zsh +index 8968562..3758f1a 100644 +--- a/completion/notmuch-completion.zsh ++++ b/completion/notmuch-completion.zsh +@@ -62,6 +62,7 @@ _notmuch_address() + _arguments -s : \ + '--sort=[sort results]:sorting:((newest-first\:"reverse chronological order" oldest-first\:"chronological order"))' \ + '--output=[select what to output]:output:((sender recipients count))' ++ '--filter-by=[filter out duplicate addresses]:filter-by:((nameaddr\:"both name and address part" name\:"name part" addr\:"address part" addrfold\:"case-insensitive address part" nameaddrfold\:"name and case-insensitive address part"))' + } + + _notmuch() +diff --git a/doc/man1/notmuch-address.rst b/doc/man1/notmuch-address.rst +index 359616e..00582c3 100644 +--- a/doc/man1/notmuch-address.rst ++++ b/doc/man1/notmuch-address.rst +@@ -11,7 +11,8 @@ DESCRIPTION + =========== + + Search for messages matching the given search terms, and display the +-addresses from them. Duplicate addresses are filtered out. ++addresses from them. Duplicate addresses are filtered out. Filtering ++can be configured with the --filter-by option. + + See **notmuch-search-terms(7)** for details of the supported syntax for + . +@@ -77,6 +78,39 @@ Supported options for **address** include + **false** allows excluded messages to match search terms and + appear in displayed results. + ++ ``--filter-by=``\ (**nameaddr**\ \|\ **name** \|\ **addr**\ \|\ **addrfold**\ \|\ **nameaddrfold**\) ++ ++ Controls how to filter out duplicate addresses. The filtering ++ algorithm receives a sequence of email addresses and outputs ++ the same sequence without the addresses that are considered a ++ duplicate of a previously output address. What is considered a ++ duplicate depends on how the two addresses are compared: ++ ++ **nameaddr** means that both name and address parts are ++ compared in case-sensitive manner. Therefore, all same looking ++ addresses strings are considered duplicate. This is the ++ default. ++ ++ **name** means that only the name part is compared (in ++ case-sensitive manner). For example, the addresses "John Doe ++ " and "John Doe " will be ++ considered duplicate. ++ ++ **addr** means that only the address part is compared (in ++ case-sensitive manner). For example, the addresses "John Doe ++ " and "Dr. John Doe " will ++ be considered duplicate. ++ ++ **addrfold** is like **addr**, but comparison is done in ++ canse-insensitive manner. For example, the addresses "John Doe ++ " and "Dr. John Doe " will ++ be considered duplicate. ++ ++ **nameaddrfold** is like **nameaddr**, but address comparison ++ is done in canse-insensitive manner. For example, the ++ addresses "John Doe " and "John Doe ++ " will be considered duplicate. ++ + EXIT STATUS + =========== + +diff --git a/notmuch-search.c b/notmuch-search.c +index 5036d8e..246ec0a 100644 +--- a/notmuch-search.c ++++ b/notmuch-search.c +@@ -43,6 +43,14 @@ typedef enum { + NOTMUCH_FORMAT_SEXP + } format_sel_t; + ++typedef enum { ++ FILTER_BY_NAMEADDR = 0, ++ FILTER_BY_NAME, ++ FILTER_BY_ADDR, ++ FILTER_BY_ADDRFOLD, ++ FILTER_BY_NAMEADDRFOLD, ++} filter_by_t; ++ + typedef struct { + notmuch_database_t *notmuch; + format_sel_t format_sel; +@@ -55,6 +63,7 @@ typedef struct { + int limit; + int dupe; + GHashTable *addresses; ++ filter_by_t filter_by; + } search_context_t; + + typedef struct { +@@ -243,16 +252,44 @@ do_search_threads (search_context_t *ctx) + return 0; + } + +-/* Returns TRUE iff name and addr is duplicate. If not, stores the +- * name/addr pair in order to detect subsequent duplicates. */ ++/* Returns TRUE iff name and/or addr is considered duplicate. If not, ++ * stores the name/addr pair in order to detect subsequent ++ * duplicates. */ + static notmuch_bool_t + is_duplicate (const search_context_t *ctx, const char *name, const char *addr) + { + notmuch_bool_t duplicate; + char *key; ++ gchar *addrfold = NULL; + mailbox_t *mailbox; + +- key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); ++ if (ctx->filter_by == FILTER_BY_ADDRFOLD || ++ ctx->filter_by == FILTER_BY_NAMEADDRFOLD) ++ addrfold = g_utf8_casefold (addr, -1); ++ ++ switch (ctx->filter_by) { ++ case FILTER_BY_NAMEADDR: ++ key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); ++ break; ++ case FILTER_BY_NAMEADDRFOLD: ++ key = talloc_asprintf (ctx->format, "%s <%s>", name, addrfold); ++ break; ++ case FILTER_BY_NAME: ++ key = talloc_strdup (ctx->format, name); /* !name results in !key */ ++ break; ++ case FILTER_BY_ADDR: ++ key = talloc_strdup (ctx->format, addr); ++ break; ++ case FILTER_BY_ADDRFOLD: ++ key = talloc_strdup (ctx->format, addrfold); ++ break; ++ default: ++ INTERNAL_ERROR("invalid --filter-by flags"); ++ } ++ ++ if (addrfold) ++ g_free (addrfold); ++ + if (! key) + return FALSE; + +@@ -727,10 +764,18 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[]) + (notmuch_keyword_t []){ { "true", NOTMUCH_EXCLUDE_TRUE }, + { "false", NOTMUCH_EXCLUDE_FALSE }, + { 0, 0 } } }, ++ { NOTMUCH_OPT_KEYWORD, &ctx->filter_by, "filter-by", 'b', ++ (notmuch_keyword_t []){ { "nameaddr", FILTER_BY_NAMEADDR }, ++ { "name", FILTER_BY_NAME }, ++ { "addr", FILTER_BY_ADDR }, ++ { "addrfold", FILTER_BY_ADDRFOLD }, ++ { "nameaddrfold", FILTER_BY_NAMEADDRFOLD }, ++ { 0, 0 } } }, + { NOTMUCH_OPT_INHERIT, &common_options, NULL, 0, 0 }, + { 0, 0, 0, 0, 0 } + }; + ++ ctx->filter_by = FILTER_BY_NAMEADDR, + opt_index = parse_arguments (argc, argv, options, 1); + if (opt_index < 0) + return EXIT_FAILURE; +diff --git a/test/T097-address-filter-by.sh b/test/T097-address-filter-by.sh +new file mode 100755 +index 0000000..544d8e8 +--- /dev/null ++++ b/test/T097-address-filter-by.sh +@@ -0,0 +1,73 @@ ++#!/usr/bin/env bash ++test_description='duplicite address filtering in "notmuch address"' ++. ./test-lib.sh ++ ++add_message '[to]="John Doe , John Doe "' ++add_message '[to]="\"Doe, John\" "' '[cc]="John Doe "' ++add_message '[to]="\"Doe, John\" "' '[bcc]="John Doe "' ++ ++test_begin_subtest "--output=recipients" ++notmuch address --output=recipients "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++"Doe, John" ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=nameaddr" ++notmuch address --output=recipients --filter-by=nameaddr "*" >OUTPUT ++# The same as above ++cat <EXPECTED ++John Doe ++John Doe ++"Doe, John" ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=name" ++notmuch address --output=recipients --filter-by=name "*" >OUTPUT ++cat <EXPECTED ++John Doe ++"Doe, John" ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=addr" ++notmuch address --output=recipients --filter-by=addr "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=addrfold" ++notmuch address --output=recipients --filter-by=addrfold "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=nameaddrfold" ++notmuch address --output=recipients --filter-by=nameaddrfold "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++"Doe, John" ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=nameaddrfold --output=count" ++notmuch address --output=recipients --filter-by=nameaddrfold --output=count "*" | sort -n >OUTPUT ++cat <EXPECTED ++1 John Doe ++2 "Doe, John" ++3 John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_done +-- +2.1.1 +