From d514c51880d672dd43698930e258028dbb0e5d06 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Sun, 2 Nov 2014 01:50:00 +0100 Subject: [PATCH] [PATCH 10/10] cli: address: Add --filter-by option to configure address filtering --- 88/61239712ce04ba07721d3e33ee7a735d9da6d4 | 332 ++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 88/61239712ce04ba07721d3e33ee7a735d9da6d4 diff --git a/88/61239712ce04ba07721d3e33ee7a735d9da6d4 b/88/61239712ce04ba07721d3e33ee7a735d9da6d4 new file mode 100644 index 000000000..d5f8652ec --- /dev/null +++ b/88/61239712ce04ba07721d3e33ee7a735d9da6d4 @@ -0,0 +1,332 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 1B6E0429E47 + for ; Sat, 1 Nov 2014 17:50:36 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -2.3 +X-Spam-Level: +X-Spam-Status: No, score=-2.3 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 3JC3jKPqnZ2W for ; + Sat, 1 Nov 2014 17:50:30 -0700 (PDT) +Received: from max.feld.cvut.cz (max.feld.cvut.cz [147.32.192.36]) + by olra.theworths.org (Postfix) with ESMTP id 86768431E62 + for ; Sat, 1 Nov 2014 17:50:15 -0700 (PDT) +Received: from localhost (unknown [192.168.200.7]) + by max.feld.cvut.cz (Postfix) with ESMTP id E28A85CD28D + for ; Sun, 2 Nov 2014 01:50:14 +0100 (CET) +X-Virus-Scanned: IMAP STYX AMAVIS +Received: from max.feld.cvut.cz ([192.168.200.1]) + by localhost (styx.feld.cvut.cz [192.168.200.7]) (amavisd-new, + port 10044) with ESMTP id xPLK3xamo-9l for ; + Sun, 2 Nov 2014 01:50:10 +0100 (CET) +Received: from imap.feld.cvut.cz (imap.feld.cvut.cz [147.32.192.34]) + by max.feld.cvut.cz (Postfix) with ESMTP id 3181B5CD287 + for ; Sun, 2 Nov 2014 01:50:10 +0100 (CET) +Received: from wsh by steelpick.2x.cz with local (Exim 4.84) + (envelope-from ) + id 1XkjMq-00085V-LX; Sun, 02 Nov 2014 01:50:04 +0100 +From: Michal Sojka +To: notmuch@notmuchmail.org +Subject: [PATCH 10/10] cli: address: Add --filter-by option to configure + address filtering +Date: Sun, 2 Nov 2014 01:50:00 +0100 +Message-Id: <1414889400-30977-11-git-send-email-sojkam1@fel.cvut.cz> +X-Mailer: git-send-email 2.1.1 +In-Reply-To: <1414889400-30977-1-git-send-email-sojkam1@fel.cvut.cz> +References: <1414889400-30977-1-git-send-email-sojkam1@fel.cvut.cz> +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Sun, 02 Nov 2014 00:50:36 -0000 + +This option allows to configure the criterion for duplicate address +filtering. Without this option, all unique combinations of name and +address parts are printed. This option allows to filter the output +more, for example to only contain unique address parts. +--- + completion/notmuch-completion.bash | 6 +++- + completion/notmuch-completion.zsh | 1 + + doc/man1/notmuch-address.rst | 36 ++++++++++++++++++- + notmuch-search.c | 48 +++++++++++++++++++++++-- + test/T097-address-filter-by.sh | 73 ++++++++++++++++++++++++++++++++++++++ + 5 files changed, 160 insertions(+), 4 deletions(-) + create mode 100755 test/T097-address-filter-by.sh + +diff --git a/completion/notmuch-completion.bash b/completion/notmuch-completion.bash +index db152f3..2cb1586 100644 +--- a/completion/notmuch-completion.bash ++++ b/completion/notmuch-completion.bash +@@ -310,7 +310,7 @@ _notmuch_search() + ! $split && + case "${cur}" in + -*) +- local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate=" ++ local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate= --filter-by=" + compopt -o nospace + COMPREPLY=( $(compgen -W "$options" -- ${cur}) ) + ;; +@@ -343,6 +343,10 @@ _notmuch_address() + COMPREPLY=( $( compgen -W "true false flag all" -- "${cur}" ) ) + return + ;; ++ --filter-by) ++ COMPREPLY=( $( compgen -W "nameaddr name addr addrfold nameaddrfold" -- "${cur}" ) ) ++ return ++ ;; + esac + + ! $split && +diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-completion.zsh +index 8968562..3758f1a 100644 +--- a/completion/notmuch-completion.zsh ++++ b/completion/notmuch-completion.zsh +@@ -62,6 +62,7 @@ _notmuch_address() + _arguments -s : \ + '--sort=[sort results]:sorting:((newest-first\:"reverse chronological order" oldest-first\:"chronological order"))' \ + '--output=[select what to output]:output:((sender recipients count))' ++ '--filter-by=[filter out duplicate addresses]:filter-by:((nameaddr\:"both name and address part" name\:"name part" addr\:"address part" addrfold\:"case-insensitive address part" nameaddrfold\:"name and case-insensitive address part"))' + } + + _notmuch() +diff --git a/doc/man1/notmuch-address.rst b/doc/man1/notmuch-address.rst +index 18473a7..524ab91 100644 +--- a/doc/man1/notmuch-address.rst ++++ b/doc/man1/notmuch-address.rst +@@ -11,7 +11,8 @@ DESCRIPTION + =========== + + Search for messages matching the given search terms, and display the +-addresses from them. Duplicate addresses are filtered out. ++addresses from them. Duplicate addresses are filtered out. Filtering ++can be configured with the --filter-by option. + + See **notmuch-search-terms(7)** for details of the supported syntax for + . +@@ -85,6 +86,39 @@ Supported options for **address** include + is the number of matching non-excluded messages in the thread, + rather than the number of matching messages. + ++ ``--filter-by=``\ (**nameaddr**\ \|\ **name** \|\ **addr**\ \|\ **addrfold**\ \|\ **nameaddrfold**\) ++ ++ Controls how to filter out duplicate addresses. The filtering ++ algorithm receives a sequence of email addresses and outputs ++ the same sequence without the addresses that are considered a ++ duplicate of a previously output address. What is considered a ++ duplicate depends on how the two addresses are compared: ++ ++ **nameaddr** means that both name and address parts are ++ compared in case-sensitive manner. Therefore, all same looking ++ addresses strings are considered duplicate. This is the ++ default. ++ ++ **name** means that only the name part is compared (in ++ case-sensitive manner). For example, the addresses "John Doe ++ " and "John Doe " will be ++ considered duplicate. ++ ++ **addr** means that only the address part is compared (in ++ case-sensitive manner). For example, the addresses "John Doe ++ " and "Dr. John Doe " will ++ be considered duplicate. ++ ++ **addrfold** is like **addr**, but comparison is done in ++ canse-insensitive manner. For example, the addresses "John Doe ++ " and "Dr. John Doe " will ++ be considered duplicate. ++ ++ **nameaddrfold** is like **nameaddr**, but address comparison ++ is done in canse-insensitive manner. For example, the ++ addresses "John Doe " and "John Doe ++ " will be considered duplicate. ++ + EXIT STATUS + =========== + +diff --git a/notmuch-search.c b/notmuch-search.c +index b45d480..ee1aead 100644 +--- a/notmuch-search.c ++++ b/notmuch-search.c +@@ -43,6 +43,14 @@ typedef enum { + NOTMUCH_FORMAT_SEXP + } format_sel_t; + ++typedef enum { ++ FILTER_BY_NAMEADDR = 0, ++ FILTER_BY_NAME, ++ FILTER_BY_ADDR, ++ FILTER_BY_ADDRFOLD, ++ FILTER_BY_NAMEADDRFOLD, ++} filter_by_t; ++ + typedef struct { + notmuch_database_t *notmuch; + format_sel_t format_sel; +@@ -55,6 +63,7 @@ typedef struct { + int limit; + int dupe; + GHashTable *addresses; ++ filter_by_t filter_by; + } search_context_t; + + typedef struct { +@@ -243,15 +252,42 @@ do_search_threads (search_context_t *ctx) + return 0; + } + +-/* Returns TRUE iff name and addr is duplicate. */ ++/* Returns TRUE iff name and/or addr is considered duplicate. */ + static notmuch_bool_t + is_duplicate (const search_context_t *ctx, const char *name, const char *addr) + { + notmuch_bool_t duplicate; + char *key; ++ gchar *addrfold = NULL; + mailbox_t *mailbox; + +- key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); ++ if (ctx->filter_by == FILTER_BY_ADDRFOLD || ++ ctx->filter_by == FILTER_BY_NAMEADDRFOLD) ++ addrfold = g_utf8_casefold (addr, -1); ++ ++ switch (ctx->filter_by) { ++ case FILTER_BY_NAMEADDR: ++ key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); ++ break; ++ case FILTER_BY_NAMEADDRFOLD: ++ key = talloc_asprintf (ctx->format, "%s <%s>", name, addrfold); ++ break; ++ case FILTER_BY_NAME: ++ key = talloc_strdup (ctx->format, name); /* !name results in !key */ ++ break; ++ case FILTER_BY_ADDR: ++ key = talloc_strdup (ctx->format, addr); ++ break; ++ case FILTER_BY_ADDRFOLD: ++ key = talloc_strdup (ctx->format, addrfold); ++ break; ++ default: ++ INTERNAL_ERROR("invalid --filter-by flags"); ++ } ++ ++ if (addrfold) ++ g_free (addrfold); ++ + if (! key) + return FALSE; + +@@ -694,10 +730,18 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[]) + { "recipients", OUTPUT_RECIPIENTS }, + { "count", OUTPUT_COUNT }, + { 0, 0 } } }, ++ { NOTMUCH_OPT_KEYWORD, &ctx->filter_by, "filter-by", 'b', ++ (notmuch_keyword_t []){ { "nameaddr", FILTER_BY_NAMEADDR }, ++ { "name", FILTER_BY_NAME }, ++ { "addr", FILTER_BY_ADDR }, ++ { "addrfold", FILTER_BY_ADDRFOLD }, ++ { "nameaddrfold", FILTER_BY_NAMEADDRFOLD }, ++ { 0, 0 } } }, + { NOTMUCH_OPT_INHERIT, &common_options, NULL, 0, 0 }, + { 0, 0, 0, 0, 0 } + }; + ++ ctx->filter_by = FILTER_BY_NAMEADDR, + opt_index = parse_arguments (argc, argv, options, 1); + if (opt_index < 0) + return EXIT_FAILURE; +diff --git a/test/T097-address-filter-by.sh b/test/T097-address-filter-by.sh +new file mode 100755 +index 0000000..544d8e8 +--- /dev/null ++++ b/test/T097-address-filter-by.sh +@@ -0,0 +1,73 @@ ++#!/usr/bin/env bash ++test_description='duplicite address filtering in "notmuch address"' ++. ./test-lib.sh ++ ++add_message '[to]="John Doe , John Doe "' ++add_message '[to]="\"Doe, John\" "' '[cc]="John Doe "' ++add_message '[to]="\"Doe, John\" "' '[bcc]="John Doe "' ++ ++test_begin_subtest "--output=recipients" ++notmuch address --output=recipients "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++"Doe, John" ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=nameaddr" ++notmuch address --output=recipients --filter-by=nameaddr "*" >OUTPUT ++# The same as above ++cat <EXPECTED ++John Doe ++John Doe ++"Doe, John" ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=name" ++notmuch address --output=recipients --filter-by=name "*" >OUTPUT ++cat <EXPECTED ++John Doe ++"Doe, John" ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=addr" ++notmuch address --output=recipients --filter-by=addr "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=addrfold" ++notmuch address --output=recipients --filter-by=addrfold "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=nameaddrfold" ++notmuch address --output=recipients --filter-by=nameaddrfold "*" >OUTPUT ++cat <EXPECTED ++John Doe ++John Doe ++"Doe, John" ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_begin_subtest "--output=recipients --filter-by=nameaddrfold --output=count" ++notmuch address --output=recipients --filter-by=nameaddrfold --output=count "*" | sort -n >OUTPUT ++cat <EXPECTED ++1 John Doe ++2 "Doe, John" ++3 John Doe ++EOF ++test_expect_equal_file OUTPUT EXPECTED ++ ++test_done +-- +2.1.1 + -- 2.26.2