From dd4c8d6f419c61037efa550131fd4732e949c8e4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 25 Sep 2015 19:48:20 +0300 Subject: [PATCH] [PATCH 6/9 v3 part 2/2] cli: change the data structure for notmuch address deduplication --- 9a/8160360ff1573c1e17eaf5bb2471d642701d9e | 230 ++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 9a/8160360ff1573c1e17eaf5bb2471d642701d9e diff --git a/9a/8160360ff1573c1e17eaf5bb2471d642701d9e b/9a/8160360ff1573c1e17eaf5bb2471d642701d9e new file mode 100644 index 000000000..9e752465b --- /dev/null +++ b/9a/8160360ff1573c1e17eaf5bb2471d642701d9e @@ -0,0 +1,230 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by arlo.cworth.org (Postfix) with ESMTP id 2D7626DE1271 + for ; Fri, 25 Sep 2015 09:48:45 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at cworth.org +X-Spam-Flag: NO +X-Spam-Score: -0.513 +X-Spam-Level: +X-Spam-Status: No, score=-0.513 tagged_above=-999 required=5 tests=[AWL=0.207, + RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01] + autolearn=disabled +Received: from arlo.cworth.org ([127.0.0.1]) + by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id fAQjkAqo8-uS for ; + Fri, 25 Sep 2015 09:48:43 -0700 (PDT) +Received: from mail-wi0-f173.google.com (mail-wi0-f173.google.com + [209.85.212.173]) + by arlo.cworth.org (Postfix) with ESMTPS id 29B626DE0B64 + for ; Fri, 25 Sep 2015 09:48:43 -0700 (PDT) +Received: by wiclk2 with SMTP id lk2so29886671wic.0 + for ; Fri, 25 Sep 2015 09:48:41 -0700 (PDT) +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20130820; + h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to + :references; + bh=bTtx3JWHo+A/OFwZ8eU8V8k7Kk1T6eVxK75jkReLZK8=; + b=Or9VdLXF6hCHQ8DxmNXTGOisUBOkZArkyJbwDWA3BrZ5mVYBsPTupILF7Y9H+dZaYo + 4L/mtwXdO/cpl5jIa5c7UU8lUpRRDPrKZlZmYcMTc+W9ZyXyzY/HUJGT1T7WuBeaaFNq + rk7FnTsaDBMydPOgsYabHjaenZAhwSpbuHqaAC+kwMB2mIpBfTdxGISMMSJbnGwj1x5l + AWflv6POtfYTXPld9HTlCEIV8dZRRQ13VmRJ3mhlRAU3FPRJOIloIn0C84uVFUd2s+J/ + /CpT3aaF7PMKPGfU4GYBoo9Sipdn78AbjF3xzuKlWgiw/czsn0t1H8GV4Qc75RgXnqKe + Faig== +X-Gm-Message-State: + ALoCoQlIxvFmFLHeDfEAA/5pGeOtg2U5F2N7WKc9GmEpTYWbvJWaSgVzGtF6V0RW+JFbcxmx4ueE +X-Received: by 10.180.107.164 with SMTP id hd4mr4754539wib.94.1443199721667; + Fri, 25 Sep 2015 09:48:41 -0700 (PDT) +Received: from localhost (mobile-access-bcee63-221.dhcp.inet.fi. + [188.238.99.221]) + by smtp.gmail.com with ESMTPSA id hk5sm4205985wjb.6.2015.09.25.09.48.40 + (version=TLSv1/SSLv3 cipher=OTHER); + Fri, 25 Sep 2015 09:48:40 -0700 (PDT) +From: Jani Nikula +To: David Bremner , Jani Nikula , + notmuch@notmuchmail.org +Subject: [PATCH 6/9 v3 part 2/2] cli: change the data structure for notmuch + address deduplication +Date: Fri, 25 Sep 2015 19:48:20 +0300 +Message-Id: <1443199700-16654-2-git-send-email-jani@nikula.org> +X-Mailer: git-send-email 2.1.4 +In-Reply-To: <1443199700-16654-1-git-send-email-jani@nikula.org> +References: <878u7v2y3x.fsf@zancas.localnet> + <1443199700-16654-1-git-send-email-jani@nikula.org> +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.18 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Fri, 25 Sep 2015 16:48:45 -0000 + +Currently we key the address hash table with the case sensitive "name +
". Switch to case insensitive keying with just address, and +store the case sensitive name and address in linked lists. This will +be helpful in adding support for different deduplication schemes in +the future. + +There will be a slight performance penalty for the current full case +sensitive name + address deduplication, but this is simpler as a whole +when other deduplication schemes are added, and I expect the schemes +to be added to become more popular than the current default. + +Aparet from the possible performance penalty, the only user visible +change should be the change in the output ordering for +--output=count. The order is not guaranteed (and is based on hash +table traversal) currently anyway, so this should be of no +consequence. + +--- + +v3: abstract strcmp_null +--- + notmuch-client.h | 1 + + notmuch-search.c | 80 +++++++++++++++++++++++++++++++++++++++++++++----------- + 2 files changed, 66 insertions(+), 15 deletions(-) + +diff --git a/notmuch-client.h b/notmuch-client.h +index de8a3b15f865..3bd2903ec54a 100644 +--- a/notmuch-client.h ++++ b/notmuch-client.h +@@ -48,6 +48,7 @@ typedef GMimeCryptoContext notmuch_crypto_context_t; + #include + #include + #include ++#include + + #include "talloc-extra.h" + +diff --git a/notmuch-search.c b/notmuch-search.c +index 966c310f8f18..6cac0fcdc1df 100644 +--- a/notmuch-search.c ++++ b/notmuch-search.c +@@ -265,30 +265,70 @@ static mailbox_t *new_mailbox (void *ctx, const char *name, const char *addr) + return mailbox; + } + ++static int mailbox_compare (const void *v1, const void *v2) ++{ ++ const mailbox_t *m1 = v1, *m2 = v2; ++ int ret; ++ ++ ret = strcmp_null (m1->name, m2->name); ++ if (! ret) ++ ret = strcmp (m1->addr, m2->addr); ++ ++ return ret; ++} ++ + /* Returns TRUE iff name and addr is duplicate. If not, stores the + * name/addr pair in order to detect subsequent duplicates. */ + static notmuch_bool_t + is_duplicate (const search_context_t *ctx, const char *name, const char *addr) + { + char *key; ++ GList *list, *l; + mailbox_t *mailbox; + +- key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); +- if (! key) +- return FALSE; ++ list = g_hash_table_lookup (ctx->addresses, addr); ++ if (list) { ++ mailbox_t find = { ++ .name = name, ++ .addr = addr, ++ }; ++ ++ l = g_list_find_custom (list, &find, mailbox_compare); ++ if (l) { ++ mailbox = l->data; ++ mailbox->count++; ++ return TRUE; ++ } ++ ++ mailbox = new_mailbox (ctx->format, name, addr); ++ if (! mailbox) ++ return FALSE; + +- mailbox = g_hash_table_lookup (ctx->addresses, key); +- if (mailbox) { +- mailbox->count++; +- talloc_free (key); +- return TRUE; ++ /* ++ * XXX: It would be more efficient to prepend to the list, but ++ * then we'd have to store the changed list head back to the ++ * hash table. This check is here just to avoid the compiler ++ * warning for unused result. ++ */ ++ if (list != g_list_append (list, mailbox)) ++ INTERNAL_ERROR ("appending to list changed list head\n"); ++ ++ return FALSE; + } + ++ key = talloc_strdup (ctx->format, addr); ++ if (! key) ++ return FALSE; ++ + mailbox = new_mailbox (ctx->format, name, addr); + if (! mailbox) + return FALSE; + +- g_hash_table_insert (ctx->addresses, key, mailbox); ++ list = g_list_append (NULL, mailbox); ++ if (! list) ++ return FALSE; ++ ++ g_hash_table_insert (ctx->addresses, key, list); + + return FALSE; + } +@@ -401,12 +441,21 @@ _talloc_free_for_g_hash (void *ptr) + } + + static void +-print_hash_value (unused (gpointer key), gpointer value, gpointer user_data) ++_list_free_for_g_hash (void *ptr) + { +- const mailbox_t *mailbox = value; +- search_context_t *ctx = user_data; ++ g_list_free_full (ptr, _talloc_free_for_g_hash); ++} + +- print_mailbox (ctx, mailbox); ++static void ++print_list_value (void *mailbox, void *context) ++{ ++ print_mailbox (context, mailbox); ++} ++ ++static void ++print_hash_value (unused (void *key), void *list, void *context) ++{ ++ g_list_foreach (list, print_list_value, context); + } + + static int +@@ -794,8 +843,9 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[]) + argc - opt_index, argv + opt_index)) + return EXIT_FAILURE; + +- ctx->addresses = g_hash_table_new_full (g_str_hash, g_str_equal, +- _talloc_free_for_g_hash, _talloc_free_for_g_hash); ++ ctx->addresses = g_hash_table_new_full (strcase_hash, strcase_equal, ++ _talloc_free_for_g_hash, ++ _list_free_for_g_hash); + + ret = do_search_messages (ctx); + +-- +2.1.4 + -- 2.26.2