From 26ce6f1fe08d77f4e52c411c35247fcc7c2e5f1b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Sat, 29 Aug 2015 17:56:35 +0300 Subject: [PATCH] [RFC PATCH 4/5] cli: change the data structure for notmuch address deduplication --- 26/5e5bf3dc2e3452124de3f00821816a34b684a5 | 231 ++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 26/5e5bf3dc2e3452124de3f00821816a34b684a5 diff --git a/26/5e5bf3dc2e3452124de3f00821816a34b684a5 b/26/5e5bf3dc2e3452124de3f00821816a34b684a5 new file mode 100644 index 000000000..a36601f11 --- /dev/null +++ b/26/5e5bf3dc2e3452124de3f00821816a34b684a5 @@ -0,0 +1,231 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by arlo.cworth.org (Postfix) with ESMTP id 306FC6DE1642 + for ; Sat, 29 Aug 2015 07:56:56 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at cworth.org +X-Amavis-Alert: BAD HEADER SECTION, Duplicate header field: "References" +X-Spam-Flag: NO +X-Spam-Score: -0.342 +X-Spam-Level: +X-Spam-Status: No, score=-0.342 tagged_above=-999 required=5 tests=[AWL=0.378, + RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01] + autolearn=disabled +Received: from arlo.cworth.org ([127.0.0.1]) + by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 9QYBvn1_ocJo for ; + Sat, 29 Aug 2015 07:56:54 -0700 (PDT) +Received: from mail-wi0-f178.google.com (mail-wi0-f178.google.com + [209.85.212.178]) + by arlo.cworth.org (Postfix) with ESMTPS id 32FA66DE00CB + for ; Sat, 29 Aug 2015 07:56:48 -0700 (PDT) +Received: by wicne3 with SMTP id ne3so9345992wic.0 + for ; Sat, 29 Aug 2015 07:56:46 -0700 (PDT) +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20130820; + h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to + :references:in-reply-to:references; + bh=pxnad6MtpP8ZeVFYco5HnK3jBxG6v2lyls84zu9uWEY=; + b=JrRjM1e/RPMwlOEPdOsgOBXLrfVqC6I0YNjd+oc9Ft9bI9pc8ltjoaqpODvliZtRXJ + aeDYuuH/tznMY2Yw+Y+tqZg/1BIazqg+w4ww5I9rBoNMWtzhuZRTg9kFsFFOfoFsd5ZI + e/j+PxFfYYrzPXj9d895Dwr2rKFLg0IoBwp5+nAwNu9t5/uyz6TwjXfvL+0pkYn1/eIv + mpMxcQF4mwe/w9B6wyakGLGCbZtt/xOdgv2vJvzws+LvzID98hz5tYa4aCeQyQlmFhWF + UnYzLmV6tmgKDDwO/JbZYGlSeSOqqMWwDOUFCs6W+sH7nWk5Cf17tCfOtNHm5xr+XojH + sRaA== +X-Gm-Message-State: + ALoCoQmr1kS8RyUARePM0Lvr5mBNgyZCzKOdEATIztdcTDqi4Uwfe5R2tr+KFohTvJTXGwR0ayah +X-Received: by 10.195.11.202 with SMTP id ek10mr17938463wjd.12.1440860206781; + Sat, 29 Aug 2015 07:56:46 -0700 (PDT) +Received: from localhost (mobile-access-bcee4f-131.dhcp.inet.fi. + [188.238.79.131]) + by smtp.gmail.com with ESMTPSA id gt10sm8918092wib.20.2015.08.29.07.56.45 + (version=TLSv1/SSLv3 cipher=OTHER); + Sat, 29 Aug 2015 07:56:46 -0700 (PDT) +From: Jani Nikula +To: notmuch@notmuchmail.org +Subject: [RFC PATCH 4/5] cli: change the data structure for notmuch address + deduplication +Date: Sat, 29 Aug 2015 17:56:35 +0300 +Message-Id: + +X-Mailer: git-send-email 2.1.4 +In-Reply-To: +References: +In-Reply-To: +References: +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.18 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Sat, 29 Aug 2015 14:56:56 -0000 + +Currently we key the address hash table with the case sensitive "name +
". Switch to case insensitive keying with just address, and +store the case sensitive name and address in linked lists. This will +be helpful in adding support different deduplication schemes in the +future. There will be a slight performance penalty for the current +full case sensitive name + address deduplication, but this is simpler +as a whole when other deduplication schemes are added, and I expect +the schemes to be added to become more popular than the current +default. +--- + notmuch-client.h | 1 + + notmuch-search.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++---------- + 2 files changed, 76 insertions(+), 16 deletions(-) + +diff --git a/notmuch-client.h b/notmuch-client.h +index 882aa30563df..97d68d1158ac 100644 +--- a/notmuch-client.h ++++ b/notmuch-client.h +@@ -48,6 +48,7 @@ typedef GMimeCryptoContext notmuch_crypto_context_t; + #include + #include + #include ++#include + + #include "talloc-extra.h" + +diff --git a/notmuch-search.c b/notmuch-search.c +index be8afcc0187b..60311393198d 100644 +--- a/notmuch-search.c ++++ b/notmuch-search.c +@@ -258,30 +258,79 @@ static mailbox_t *new_mailbox (void *ctx, const char *name, const char *addr) + return mailbox; + } + ++static int ++strcase_equal (const void *a, const void *b) ++{ ++ return strcasecmp (a, b) == 0; ++} ++ ++static unsigned int ++strcase_hash (const void *ptr) ++{ ++ const char *s = ptr; ++ ++ /* This is the djb2 hash. */ ++ unsigned int hash = 5381; ++ while (s && *s) { ++ hash = ((hash << 5) + hash) + tolower (*s); ++ s++; ++ } ++ ++ return hash; ++} ++ ++static int mailbox_compare (const void *v1, const void *v2) ++{ ++ const mailbox_t *m1 = v1, *m2 = v2; ++ int v; ++ ++ if (m1->name && m2->name) ++ v = strcmp (m1->name, m2->name); ++ else ++ v = !!m1->name - !!m2->name; ++ ++ if (! v) ++ v = strcmp (m1->addr, m2->addr); ++ ++ return v; ++} ++ + /* Returns TRUE iff name and addr is duplicate. If not, stores the + * name/addr pair in order to detect subsequent duplicates. */ + static notmuch_bool_t + is_duplicate (const search_context_t *ctx, const char *name, const char *addr) + { + char *key; ++ GList *list, *l; + mailbox_t *mailbox; + +- key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); +- if (! key) ++ mailbox = new_mailbox (ctx->format, name, addr); ++ if (! mailbox) + return FALSE; + +- mailbox = g_hash_table_lookup (ctx->addresses, key); +- if (mailbox) { +- mailbox->count++; +- talloc_free (key); +- return TRUE; ++ list = g_hash_table_lookup (ctx->addresses, addr); ++ if (list) { ++ l = g_list_find_custom (list, mailbox, mailbox_compare); ++ if (l) { ++ talloc_free (mailbox); ++ mailbox = l->data; ++ mailbox->count++; ++ return TRUE; ++ } ++ ++ g_list_append (list, mailbox); ++ return FALSE; + } + +- mailbox = new_mailbox (ctx->format, name, addr); +- if (! mailbox) ++ key = talloc_strdup (ctx->format, addr); ++ if (! key) + return FALSE; + +- g_hash_table_insert (ctx->addresses, key, mailbox); ++ list = g_list_append (NULL, mailbox); ++ if (! list) ++ return FALSE; ++ ++ g_hash_table_insert (ctx->addresses, key, list); + + return FALSE; + } +@@ -393,12 +442,21 @@ _talloc_free_for_g_hash (void *ptr) + } + + static void +-print_hash_value (unused (gpointer key), gpointer value, gpointer user_data) ++_list_free_for_g_hash (void *ptr) ++{ ++ g_list_free_full (ptr, _talloc_free_for_g_hash); ++} ++ ++static void ++print_list_value (void *mailbox, void *context) + { +- const mailbox_t *mailbox = value; +- search_context_t *ctx = user_data; ++ print_mailbox (context, mailbox); ++} + +- print_mailbox (ctx, mailbox); ++static void ++print_hash_value (unused (void *key), void *list, void *context) ++{ ++ g_list_foreach (list, print_list_value, context); + } + + static int +@@ -778,8 +836,9 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[]) + argc - opt_index, argv + opt_index)) + return EXIT_FAILURE; + +- ctx->addresses = g_hash_table_new_full (g_str_hash, g_str_equal, +- _talloc_free_for_g_hash, _talloc_free_for_g_hash); ++ ctx->addresses = g_hash_table_new_full (strcase_hash, strcase_equal, ++ _talloc_free_for_g_hash, ++ _list_free_for_g_hash); + + ret = do_search_messages (ctx); + +-- +2.1.4 + -- 2.26.2