--- /dev/null
+Return-Path: <zack@upsilon.cc>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+ by olra.theworths.org (Postfix) with ESMTP id 58603431FBD\r
+ for <notmuch@notmuchmail.org>; Wed, 1 Aug 2012 01:10:06 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: 0\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none]\r
+ autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+ by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+ with ESMTP id aKIf-ipc8p2b for <notmuch@notmuchmail.org>;\r
+ Wed, 1 Aug 2012 01:10:05 -0700 (PDT)\r
+Received: from upsilon.hackadomia.org (upsilon.hackadomia.org\r
+ [91.121.245.170]) by olra.theworths.org (Postfix) with ESMTP id 1A80B431FBC\r
+ for <notmuch@notmuchmail.org>; Wed, 1 Aug 2012 01:10:05 -0700 (PDT)\r
+Received: from usha.takhisis.invalid (unknown [151.59.176.148])\r
+ by upsilon.hackadomia.org (Postfix) with ESMTPSA id 151B496024;\r
+ Wed, 1 Aug 2012 10:10:04 +0200 (CEST)\r
+Received: by usha.takhisis.invalid (Postfix, from userid 1000)\r
+ id 0BF9E683154; Wed, 1 Aug 2012 10:10:03 +0200 (CEST)\r
+From: Stefano Zacchiroli <zack@upsilon.cc>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH 1/2] Add duplicate message removal for notmuch-mutt.\r
+Date: Wed, 1 Aug 2012 10:09:41 +0200\r
+Message-Id: <1343808582-9519-2-git-send-email-zack@upsilon.cc>\r
+X-Mailer: git-send-email 1.7.10.4\r
+In-Reply-To: <1343808582-9519-1-git-send-email-zack@upsilon.cc>\r
+References: <1343808582-9519-1-git-send-email-zack@upsilon.cc>\r
+Cc: "Kevin J. McCarthy" <kevin@8t8.us>, Stefano Zacchiroli <zack@upsilon.cc>\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+ <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Wed, 01 Aug 2012 08:10:06 -0000\r
+\r
+From: Kevin McCarthy <kevin@8t8.us>\r
+\r
+Add a --remove-dups flag which removes duplicate files from search and\r
+thread results. Uses fdupes if installed. Otherwise it runs a size and\r
+Digest::SHA scan on each file to detect duplicates.\r
+\r
+Signed-off-by: Stefano Zacchiroli <zack@upsilon.cc>\r
+---\r
+ contrib/notmuch-mutt/notmuch-mutt | 89 ++++++++++++++++++++++++++++------\r
+ contrib/notmuch-mutt/notmuch-mutt.rc | 4 +-\r
+ 2 files changed, 76 insertions(+), 17 deletions(-)\r
+\r
+diff --git a/contrib/notmuch-mutt/notmuch-mutt b/contrib/notmuch-mutt/notmuch-mutt\r
+index 7c125e6..d14709d 100755\r
+--- a/contrib/notmuch-mutt/notmuch-mutt\r
++++ b/contrib/notmuch-mutt/notmuch-mutt\r
+@@ -18,6 +18,8 @@ use Mail::Box::Maildir;\r
+ use Pod::Usage;\r
+ use String::ShellQuote;\r
+ use Term::ReadLine;\r
++use Digest::SHA;\r
++use File::Which;\r
+ \r
+ \r
+ my $xdg_cache_dir = "$ENV{HOME}/.cache";\r
+@@ -34,16 +36,65 @@ sub empty_maildir($) {\r
+ $folder->close();\r
+ }\r
+ \r
+-# search($maildir, $query)\r
++# Match files by size and SHA-256; then delete duplicates\r
++sub builtin_remove_dups($) {\r
++ my ($maildir) = @_;\r
++ my (%size_to_files, %sha_to_files);\r
++\r
++ # Group files by matching sizes\r
++ foreach my $file (glob("$maildir/cur/*")) {\r
++ my $size = -s $file;\r
++ push(@{$size_to_files{$size}}, $file) if $size;\r
++ }\r
++\r
++ foreach my $same_size_files (values %size_to_files) {\r
++ # Don't run sha unless there is another file of the same size\r
++ next if scalar(@$same_size_files) < 2;\r
++ %sha_to_files = ();\r
++\r
++ # Group files with matching sizes by SHA-256\r
++ foreach my $file (@$same_size_files) {\r
++ open(my $fh, '<', $file) or next;\r
++ binmode($fh);\r
++ my $sha256hash = Digest::SHA->new(256)->addfile($fh)->hexdigest;\r
++ close($fh);\r
++\r
++ push(@{$sha_to_files{$sha256hash}}, $file);\r
++ }\r
++\r
++ # Remove duplicates\r
++ foreach my $same_sha_files (values %sha_to_files) {\r
++ next if scalar(@$same_sha_files) < 2;\r
++ unlink(@{$same_sha_files}[1..$#$same_sha_files]);\r
++ }\r
++ }\r
++}\r
++\r
++# Use either fdupes or the built-in scanner to detect and remove duplicate\r
++# search results in the maildir\r
++sub remove_duplicates($) {\r
++ my ($maildir) = @_;\r
++\r
++ my $fdupes = which("fdupes");\r
++ if ($fdupes) {\r
++ system("$fdupes --hardlinks --symlinks --delete --noprompt"\r
++ . " --quiet $maildir/cur/ > /dev/null");\r
++ } else {\r
++ builtin_remove_dups($maildir);\r
++ }\r
++}\r
++\r
++# search($maildir, $remove_dups, $query)\r
+ # search mails according to $query with notmuch; store results in $maildir\r
+-sub search($$) {\r
+- my ($maildir, $query) = @_;\r
++sub search($$$) {\r
++ my ($maildir, $remove_dups, $query) = @_;\r
+ $query = shell_quote($query);\r
+ \r
+ empty_maildir($maildir);\r
+ system("notmuch search --output=files $query"\r
+ . " | sed -e 's: :\\\\ :g'"\r
+ . " | xargs --no-run-if-empty ln -s -t $maildir/cur/");\r
++ remove_duplicates($maildir) if ($remove_dups);\r
+ }\r
+ \r
+ sub prompt($$) {\r
+@@ -74,28 +125,28 @@ sub get_message_id() {\r
+ return $1;\r
+ }\r
+ \r
+-sub search_action($$@) {\r
+- my ($interactive, $results_dir, @params) = @_;\r
++sub search_action($$$@) {\r
++ my ($interactive, $results_dir, $remove_dups, @params) = @_;\r
+ \r
+ if (! $interactive) {\r
+- search($results_dir, join(' ', @params));\r
++ search($results_dir, $remove_dups, join(' ', @params));\r
+ } else {\r
+ my $query = prompt("search ('?' for man): ", join(' ', @params));\r
+ if ($query ne "") {\r
+- search($results_dir,$query);\r
++ search($results_dir, $remove_dups, $query);\r
+ }\r
+ }\r
+ }\r
+ \r
+-sub thread_action(@) {\r
+- my ($results_dir, @params) = @_;\r
++sub thread_action($$@) {\r
++ my ($results_dir, $remove_dups, @params) = @_;\r
+ \r
+ my $mid = get_message_id();\r
+ my $search_cmd = 'notmuch search --output=threads ' . shell_quote("id:$mid");\r
+ my $tid = `$search_cmd`; # get thread id\r
+ chomp($tid);\r
+ \r
+- search($results_dir, $tid);\r
++ search($results_dir, $remove_dups, $tid);\r
+ }\r
+ \r
+ sub tag_action(@) {\r
+@@ -118,11 +169,13 @@ sub main() {\r
+ my $results_dir = "$cache_dir/results";\r
+ my $interactive = 0;\r
+ my $help_needed = 0;\r
++ my $remove_dups = 0;\r
+ \r
+ my $getopt = GetOptions(\r
+ "h|help" => \$help_needed,\r
+ "o|output-dir=s" => \$results_dir,\r
+- "p|prompt" => \$interactive);\r
++ "p|prompt" => \$interactive,\r
++ "r|remove-dups" => \$remove_dups);\r
+ if (! $getopt || $#ARGV < 0) { die_usage() };\r
+ my ($action, @params) = ($ARGV[0], @ARGV[1..$#ARGV]);\r
+ \r
+@@ -136,9 +189,9 @@ sub main() {\r
+ print STDERR "Error: no search term provided\n\n";\r
+ die_usage();\r
+ } elsif ($action eq "search") {\r
+- search_action($interactive, $results_dir, @params);\r
++ search_action($interactive, $results_dir, $remove_dups, @params);\r
+ } elsif ($action eq "thread") {\r
+- thread_action($results_dir, @params);\r
++ thread_action($results_dir, $remove_dups, @params);\r
+ } elsif ($action eq "tag") {\r
+ tag_action(@params);\r
+ } else {\r
+@@ -189,6 +242,12 @@ be overwritten. (Default: F<~/.cache/notmuch/mutt/results/>)\r
+ Instead of using command line search terms, prompt the user for them (only for\r
+ "search").\r
+ \r
++=item -r\r
++\r
++=item --remove-dups\r
++\r
++Remove duplicates from search results.\r
++\r
+ =item -h\r
+ \r
+ =item --help\r
+@@ -205,10 +264,10 @@ the following in your Mutt configuration (usually one of: F<~/.muttrc>,\r
+ F</etc/Muttrc>, or a configuration snippet under F</etc/Muttrc.d/>):\r
+ \r
+ macro index <F8> \\r
+- "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt --prompt search<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter>" \\r
++ "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt -r --prompt search<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter>" \\r
+ "notmuch: search mail"\r
+ macro index <F9> \\r
+- "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt thread<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter><enter-command>set wait_key<enter>" \\r
++ "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt -r thread<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter><enter-command>set wait_key<enter>" \\r
+ "notmuch: reconstruct thread"\r
+ macro index <F6> \\r
+ "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt tag -- -inbox<enter>" \\r
+diff --git a/contrib/notmuch-mutt/notmuch-mutt.rc b/contrib/notmuch-mutt/notmuch-mutt.rc\r
+index b0a38d1..ddc4b48 100644\r
+--- a/contrib/notmuch-mutt/notmuch-mutt.rc\r
++++ b/contrib/notmuch-mutt/notmuch-mutt.rc\r
+@@ -1,8 +1,8 @@\r
+ macro index <F8> \\r
+- "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt --prompt search<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter>" \\r
++ "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt -r --prompt search<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter>" \\r
+ "notmuch: search mail"\r
+ macro index <F9> \\r
+- "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt thread<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter><enter-command>set wait_key<enter>" \\r
++ "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt -r thread<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter><enter-command>set wait_key<enter>" \\r
+ "notmuch: reconstruct thread"\r
+ macro index <F6> \\r
+ "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt tag -- -inbox<enter>" \\r
+-- \r
+1.7.10.4\r
+\r