Return-Path: X-Original-To: notmuch@notmuchmail.org Delivered-To: notmuch@notmuchmail.org Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 306FC6DE1642 for ; Sat, 29 Aug 2015 07:56:56 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Amavis-Alert: BAD HEADER SECTION, Duplicate header field: "References" X-Spam-Flag: NO X-Spam-Score: -0.342 X-Spam-Level: X-Spam-Status: No, score=-0.342 tagged_above=-999 required=5 tests=[AWL=0.378, RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 9QYBvn1_ocJo for ; Sat, 29 Aug 2015 07:56:54 -0700 (PDT) Received: from mail-wi0-f178.google.com (mail-wi0-f178.google.com [209.85.212.178]) by arlo.cworth.org (Postfix) with ESMTPS id 32FA66DE00CB for ; Sat, 29 Aug 2015 07:56:48 -0700 (PDT) Received: by wicne3 with SMTP id ne3so9345992wic.0 for ; Sat, 29 Aug 2015 07:56:46 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:in-reply-to:references; bh=pxnad6MtpP8ZeVFYco5HnK3jBxG6v2lyls84zu9uWEY=; b=JrRjM1e/RPMwlOEPdOsgOBXLrfVqC6I0YNjd+oc9Ft9bI9pc8ltjoaqpODvliZtRXJ aeDYuuH/tznMY2Yw+Y+tqZg/1BIazqg+w4ww5I9rBoNMWtzhuZRTg9kFsFFOfoFsd5ZI e/j+PxFfYYrzPXj9d895Dwr2rKFLg0IoBwp5+nAwNu9t5/uyz6TwjXfvL+0pkYn1/eIv mpMxcQF4mwe/w9B6wyakGLGCbZtt/xOdgv2vJvzws+LvzID98hz5tYa4aCeQyQlmFhWF UnYzLmV6tmgKDDwO/JbZYGlSeSOqqMWwDOUFCs6W+sH7nWk5Cf17tCfOtNHm5xr+XojH sRaA== X-Gm-Message-State: ALoCoQmr1kS8RyUARePM0Lvr5mBNgyZCzKOdEATIztdcTDqi4Uwfe5R2tr+KFohTvJTXGwR0ayah X-Received: by 10.195.11.202 with SMTP id ek10mr17938463wjd.12.1440860206781; Sat, 29 Aug 2015 07:56:46 -0700 (PDT) Received: from localhost (mobile-access-bcee4f-131.dhcp.inet.fi. [188.238.79.131]) by smtp.gmail.com with ESMTPSA id gt10sm8918092wib.20.2015.08.29.07.56.45 (version=TLSv1/SSLv3 cipher=OTHER); Sat, 29 Aug 2015 07:56:46 -0700 (PDT) From: Jani Nikula To: notmuch@notmuchmail.org Subject: [RFC PATCH 4/5] cli: change the data structure for notmuch address deduplication Date: Sat, 29 Aug 2015 17:56:35 +0300 Message-Id: X-Mailer: git-send-email 2.1.4 In-Reply-To: References: In-Reply-To: References: X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 29 Aug 2015 14:56:56 -0000 Currently we key the address hash table with the case sensitive "name
". Switch to case insensitive keying with just address, and store the case sensitive name and address in linked lists. This will be helpful in adding support different deduplication schemes in the future. There will be a slight performance penalty for the current full case sensitive name + address deduplication, but this is simpler as a whole when other deduplication schemes are added, and I expect the schemes to be added to become more popular than the current default. --- notmuch-client.h | 1 + notmuch-search.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index 882aa30563df..97d68d1158ac 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -48,6 +48,7 @@ typedef GMimeCryptoContext notmuch_crypto_context_t; #include #include #include +#include #include "talloc-extra.h" diff --git a/notmuch-search.c b/notmuch-search.c index be8afcc0187b..60311393198d 100644 --- a/notmuch-search.c +++ b/notmuch-search.c @@ -258,30 +258,79 @@ static mailbox_t *new_mailbox (void *ctx, const char *name, const char *addr) return mailbox; } +static int +strcase_equal (const void *a, const void *b) +{ + return strcasecmp (a, b) == 0; +} + +static unsigned int +strcase_hash (const void *ptr) +{ + const char *s = ptr; + + /* This is the djb2 hash. */ + unsigned int hash = 5381; + while (s && *s) { + hash = ((hash << 5) + hash) + tolower (*s); + s++; + } + + return hash; +} + +static int mailbox_compare (const void *v1, const void *v2) +{ + const mailbox_t *m1 = v1, *m2 = v2; + int v; + + if (m1->name && m2->name) + v = strcmp (m1->name, m2->name); + else + v = !!m1->name - !!m2->name; + + if (! v) + v = strcmp (m1->addr, m2->addr); + + return v; +} + /* Returns TRUE iff name and addr is duplicate. If not, stores the * name/addr pair in order to detect subsequent duplicates. */ static notmuch_bool_t is_duplicate (const search_context_t *ctx, const char *name, const char *addr) { char *key; + GList *list, *l; mailbox_t *mailbox; - key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); - if (! key) + mailbox = new_mailbox (ctx->format, name, addr); + if (! mailbox) return FALSE; - mailbox = g_hash_table_lookup (ctx->addresses, key); - if (mailbox) { - mailbox->count++; - talloc_free (key); - return TRUE; + list = g_hash_table_lookup (ctx->addresses, addr); + if (list) { + l = g_list_find_custom (list, mailbox, mailbox_compare); + if (l) { + talloc_free (mailbox); + mailbox = l->data; + mailbox->count++; + return TRUE; + } + + g_list_append (list, mailbox); + return FALSE; } - mailbox = new_mailbox (ctx->format, name, addr); - if (! mailbox) + key = talloc_strdup (ctx->format, addr); + if (! key) return FALSE; - g_hash_table_insert (ctx->addresses, key, mailbox); + list = g_list_append (NULL, mailbox); + if (! list) + return FALSE; + + g_hash_table_insert (ctx->addresses, key, list); return FALSE; } @@ -393,12 +442,21 @@ _talloc_free_for_g_hash (void *ptr) } static void -print_hash_value (unused (gpointer key), gpointer value, gpointer user_data) +_list_free_for_g_hash (void *ptr) +{ + g_list_free_full (ptr, _talloc_free_for_g_hash); +} + +static void +print_list_value (void *mailbox, void *context) { - const mailbox_t *mailbox = value; - search_context_t *ctx = user_data; + print_mailbox (context, mailbox); +} - print_mailbox (ctx, mailbox); +static void +print_hash_value (unused (void *key), void *list, void *context) +{ + g_list_foreach (list, print_list_value, context); } static int @@ -778,8 +836,9 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[]) argc - opt_index, argv + opt_index)) return EXIT_FAILURE; - ctx->addresses = g_hash_table_new_full (g_str_hash, g_str_equal, - _talloc_free_for_g_hash, _talloc_free_for_g_hash); + ctx->addresses = g_hash_table_new_full (strcase_hash, strcase_equal, + _talloc_free_for_g_hash, + _list_free_for_g_hash); ret = do_search_messages (ctx); -- 2.1.4