From 5eb6abf47f1bae8444836399db9ebaaec41c523b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Sep 2015 22:40:03 +0300 Subject: [PATCH] [PATCH v2 7/9] cli: add support for deduplicating based on case insensitive address --- bb/4497f877437683e967527f112cf56a9b5ebb0f | 194 ++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 bb/4497f877437683e967527f112cf56a9b5ebb0f diff --git a/bb/4497f877437683e967527f112cf56a9b5ebb0f b/bb/4497f877437683e967527f112cf56a9b5ebb0f new file mode 100644 index 000000000..5fa068c56 --- /dev/null +++ b/bb/4497f877437683e967527f112cf56a9b5ebb0f @@ -0,0 +1,194 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by arlo.cworth.org (Postfix) with ESMTP id DBE016DE15D4 + for ; Thu, 3 Sep 2015 12:40:36 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at cworth.org +X-Amavis-Alert: BAD HEADER SECTION, Duplicate header field: "References" +X-Spam-Flag: NO +X-Spam-Score: -0.426 +X-Spam-Level: +X-Spam-Status: No, score=-0.426 tagged_above=-999 required=5 tests=[AWL=0.294, + RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01] + autolearn=disabled +Received: from arlo.cworth.org ([127.0.0.1]) + by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 0ixXvFvmogwZ for ; + Thu, 3 Sep 2015 12:40:35 -0700 (PDT) +Received: from mail-wi0-f174.google.com (mail-wi0-f174.google.com + [209.85.212.174]) + by arlo.cworth.org (Postfix) with ESMTPS id 7E6076DE1534 + for ; Thu, 3 Sep 2015 12:40:28 -0700 (PDT) +Received: by wiclk2 with SMTP id lk2so1207528wic.0 + for ; Thu, 03 Sep 2015 12:40:27 -0700 (PDT) +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20130820; + h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to + :references:in-reply-to:references; + bh=wj+tnZTwmrqNPh73iKZVy64DGNf+PYHfGPjHpeWz3Cs=; + b=RvqXDkwKlZg0JcuQOXwyfv2hJAFWgxfO69Z3gOTgorrEVwvoF8nDJrsgQzxixMI55v + v0avq/Tu/gFsq9E0avlwIeOp+iARZRmNFxmIsZ3Vc1TdYYi/hjv6niGpvqBEkKpdQHSU + sZaF9DWrPQV529LybAB79pp0r330Ft6mbRilgnoKCmEQrANzXePbmNWgf4YndYibvOz8 + CifBrZzNQGUN23sdAx9h94jzkDxUz5N8pAVrb4tKajhTGX7yR4dM/9mHGQGcRDdv48vY + xjiRb9t4tDM+P5OArdJFEWnko9kaeoUVCeXaqZJ4BCibe7LEnU4hHMZtvU65qsh2FpGO + RPcg== +X-Gm-Message-State: + ALoCoQmkdY8vuX2kaXvbnJVtNC/w/1HSklbfImyg9/RRvPnS3oT7FvNL4QjlTx295ce319ZqcEwf +X-Received: by 10.180.74.52 with SMTP id q20mr18151766wiv.94.1441309226937; + Thu, 03 Sep 2015 12:40:26 -0700 (PDT) +Received: from localhost (mobile-access-bcee4f-131.dhcp.inet.fi. + [188.238.79.131]) + by smtp.gmail.com with ESMTPSA id i7sm625597wib.15.2015.09.03.12.40.26 + (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); + Thu, 03 Sep 2015 12:40:26 -0700 (PDT) +From: Jani Nikula +To: notmuch@notmuchmail.org +Subject: [PATCH v2 7/9] cli: add support for deduplicating based on case + insensitive address +Date: Thu, 3 Sep 2015 22:40:03 +0300 +Message-Id: + +X-Mailer: git-send-email 2.1.4 +In-Reply-To: +References: +In-Reply-To: +References: +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.18 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Thu, 03 Sep 2015 19:40:36 -0000 + +Consider all variants of an email address as one, and print the most +common variant. +--- + notmuch-search.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 42 insertions(+), 6 deletions(-) + +diff --git a/notmuch-search.c b/notmuch-search.c +index 7c51d5df6bd4..deb9e58a747c 100644 +--- a/notmuch-search.c ++++ b/notmuch-search.c +@@ -39,6 +39,7 @@ typedef enum { + typedef enum { + DEDUP_NONE, + DEDUP_MAILBOX, ++ DEDUP_ADDRESS, + } dedup_t; + + typedef enum { +@@ -352,7 +353,7 @@ print_mailbox (const search_context_t *ctx, const mailbox_t *mailbox) + name_addr = internet_address_to_string (ia, FALSE); + + if (format->is_text_printer) { +- if (count > 0) { ++ if (ctx->output & OUTPUT_COUNT) { + format->integer (format, count); + format->string (format, "\t"); + } +@@ -366,7 +367,7 @@ print_mailbox (const search_context_t *ctx, const mailbox_t *mailbox) + format->string (format, addr); + format->map_key (format, "name-addr"); + format->string (format, name_addr); +- if (count > 0) { ++ if (ctx->output & OUTPUT_COUNT) { + format->map_key (format, "count"); + format->integer (format, count); + } +@@ -403,7 +404,6 @@ process_address_list (const search_context_t *ctx, + mailbox_t mbx = { + .name = internet_address_get_name (address), + .addr = internet_address_mailbox_get_addr (mailbox), +- .count = 0, + }; + + /* OUTPUT_COUNT only works with deduplication */ +@@ -411,7 +411,8 @@ process_address_list (const search_context_t *ctx, + is_duplicate (ctx, mbx.name, mbx.addr)) + continue; + +- if (ctx->output & OUTPUT_COUNT) ++ /* OUTPUT_COUNT and DEDUP_ADDRESS require a full pass. */ ++ if (ctx->output & OUTPUT_COUNT || ctx->dedup == DEDUP_ADDRESS) + continue; + + print_mailbox (ctx, &mbx); +@@ -450,6 +451,34 @@ _list_free_for_g_hash (void *ptr) + g_list_free_full (ptr, _talloc_free_for_g_hash); + } + ++/* Print the most common variant of a list of unique mailboxes, and ++ * conflate the counts. */ ++static void ++print_popular (const search_context_t *ctx, GList *list) ++{ ++ GList *l; ++ mailbox_t *mailbox = NULL, *m; ++ int max = 0; ++ int total = 0; ++ ++ for (l = list; l; l = l->next) { ++ m = l->data; ++ total += m->count; ++ if (m->count > max) { ++ mailbox = m; ++ max = m->count; ++ } ++ } ++ ++ if (! mailbox) ++ INTERNAL_ERROR("Empty list in address hash table\n"); ++ ++ /* The original count is no longer needed, so overwrite. */ ++ mailbox->count = total; ++ ++ print_mailbox (ctx, mailbox); ++} ++ + static void + print_list_value (void *mailbox, void *context) + { +@@ -459,7 +488,12 @@ print_list_value (void *mailbox, void *context) + static void + print_hash_value (unused (void *key), void *list, void *context) + { +- g_list_foreach (list, print_list_value, context); ++ const search_context_t *ctx = context; ++ ++ if (ctx->dedup == DEDUP_ADDRESS) ++ print_popular (ctx, list); ++ else ++ g_list_foreach (list, print_list_value, context); + } + + static int +@@ -557,7 +591,8 @@ do_search_messages (search_context_t *ctx) + notmuch_message_destroy (message); + } + +- if (ctx->addresses && ctx->output & OUTPUT_COUNT) ++ if (ctx->addresses && ++ (ctx->output & OUTPUT_COUNT || ctx->dedup == DEDUP_ADDRESS)) + g_hash_table_foreach (ctx->addresses, print_hash_value, ctx); + + notmuch_messages_destroy (messages); +@@ -821,6 +856,7 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[]) + { NOTMUCH_OPT_KEYWORD, &ctx->dedup, "deduplicate", 'D', + (notmuch_keyword_t []){ { "no", DEDUP_NONE }, + { "mailbox", DEDUP_MAILBOX }, ++ { "address", DEDUP_ADDRESS }, + { 0, 0 } } }, + { NOTMUCH_OPT_INHERIT, (void *) &common_options, NULL, 0, 0 }, + { NOTMUCH_OPT_INHERIT, (void *) ¬much_shared_options, NULL, 0, 0 }, +-- +2.1.4 + -- 2.26.2