1 Return-Path: <zack@upsilon.cc>
\r
2 X-Original-To: notmuch@notmuchmail.org
\r
3 Delivered-To: notmuch@notmuchmail.org
\r
4 Received: from localhost (localhost [127.0.0.1])
\r
5 by olra.theworths.org (Postfix) with ESMTP id 58603431FBD
\r
6 for <notmuch@notmuchmail.org>; Wed, 1 Aug 2012 01:10:06 -0700 (PDT)
\r
7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org
\r
11 X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none]
\r
13 Received: from olra.theworths.org ([127.0.0.1])
\r
14 by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)
\r
15 with ESMTP id aKIf-ipc8p2b for <notmuch@notmuchmail.org>;
\r
16 Wed, 1 Aug 2012 01:10:05 -0700 (PDT)
\r
17 Received: from upsilon.hackadomia.org (upsilon.hackadomia.org
\r
18 [91.121.245.170]) by olra.theworths.org (Postfix) with ESMTP id 1A80B431FBC
\r
19 for <notmuch@notmuchmail.org>; Wed, 1 Aug 2012 01:10:05 -0700 (PDT)
\r
20 Received: from usha.takhisis.invalid (unknown [151.59.176.148])
\r
21 by upsilon.hackadomia.org (Postfix) with ESMTPSA id 151B496024;
\r
22 Wed, 1 Aug 2012 10:10:04 +0200 (CEST)
\r
23 Received: by usha.takhisis.invalid (Postfix, from userid 1000)
\r
24 id 0BF9E683154; Wed, 1 Aug 2012 10:10:03 +0200 (CEST)
\r
25 From: Stefano Zacchiroli <zack@upsilon.cc>
\r
26 To: notmuch@notmuchmail.org
\r
27 Subject: [PATCH 1/2] Add duplicate message removal for notmuch-mutt.
\r
28 Date: Wed, 1 Aug 2012 10:09:41 +0200
\r
29 Message-Id: <1343808582-9519-2-git-send-email-zack@upsilon.cc>
\r
30 X-Mailer: git-send-email 1.7.10.4
\r
31 In-Reply-To: <1343808582-9519-1-git-send-email-zack@upsilon.cc>
\r
32 References: <1343808582-9519-1-git-send-email-zack@upsilon.cc>
\r
33 Cc: "Kevin J. McCarthy" <kevin@8t8.us>, Stefano Zacchiroli <zack@upsilon.cc>
\r
34 X-BeenThere: notmuch@notmuchmail.org
\r
35 X-Mailman-Version: 2.1.13
\r
37 List-Id: "Use and development of the notmuch mail system."
\r
38 <notmuch.notmuchmail.org>
\r
39 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,
\r
40 <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>
\r
41 List-Archive: <http://notmuchmail.org/pipermail/notmuch>
\r
42 List-Post: <mailto:notmuch@notmuchmail.org>
\r
43 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>
\r
44 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,
\r
45 <mailto:notmuch-request@notmuchmail.org?subject=subscribe>
\r
46 X-List-Received-Date: Wed, 01 Aug 2012 08:10:06 -0000
\r
48 From: Kevin McCarthy <kevin@8t8.us>
\r
50 Add a --remove-dups flag which removes duplicate files from search and
\r
51 thread results. Uses fdupes if installed. Otherwise it runs a size and
\r
52 Digest::SHA scan on each file to detect duplicates.
\r
54 Signed-off-by: Stefano Zacchiroli <zack@upsilon.cc>
\r
56 contrib/notmuch-mutt/notmuch-mutt | 89 ++++++++++++++++++++++++++++------
\r
57 contrib/notmuch-mutt/notmuch-mutt.rc | 4 +-
\r
58 2 files changed, 76 insertions(+), 17 deletions(-)
\r
60 diff --git a/contrib/notmuch-mutt/notmuch-mutt b/contrib/notmuch-mutt/notmuch-mutt
\r
61 index 7c125e6..d14709d 100755
\r
62 --- a/contrib/notmuch-mutt/notmuch-mutt
\r
63 +++ b/contrib/notmuch-mutt/notmuch-mutt
\r
64 @@ -18,6 +18,8 @@ use Mail::Box::Maildir;
\r
66 use String::ShellQuote;
\r
72 my $xdg_cache_dir = "$ENV{HOME}/.cache";
\r
73 @@ -34,16 +36,65 @@ sub empty_maildir($) {
\r
77 -# search($maildir, $query)
\r
78 +# Match files by size and SHA-256; then delete duplicates
\r
79 +sub builtin_remove_dups($) {
\r
80 + my ($maildir) = @_;
\r
81 + my (%size_to_files, %sha_to_files);
\r
83 + # Group files by matching sizes
\r
84 + foreach my $file (glob("$maildir/cur/*")) {
\r
85 + my $size = -s $file;
\r
86 + push(@{$size_to_files{$size}}, $file) if $size;
\r
89 + foreach my $same_size_files (values %size_to_files) {
\r
90 + # Don't run sha unless there is another file of the same size
\r
91 + next if scalar(@$same_size_files) < 2;
\r
92 + %sha_to_files = ();
\r
94 + # Group files with matching sizes by SHA-256
\r
95 + foreach my $file (@$same_size_files) {
\r
96 + open(my $fh, '<', $file) or next;
\r
98 + my $sha256hash = Digest::SHA->new(256)->addfile($fh)->hexdigest;
\r
101 + push(@{$sha_to_files{$sha256hash}}, $file);
\r
104 + # Remove duplicates
\r
105 + foreach my $same_sha_files (values %sha_to_files) {
\r
106 + next if scalar(@$same_sha_files) < 2;
\r
107 + unlink(@{$same_sha_files}[1..$#$same_sha_files]);
\r
112 +# Use either fdupes or the built-in scanner to detect and remove duplicate
\r
113 +# search results in the maildir
\r
114 +sub remove_duplicates($) {
\r
115 + my ($maildir) = @_;
\r
117 + my $fdupes = which("fdupes");
\r
119 + system("$fdupes --hardlinks --symlinks --delete --noprompt"
\r
120 + . " --quiet $maildir/cur/ > /dev/null");
\r
122 + builtin_remove_dups($maildir);
\r
126 +# search($maildir, $remove_dups, $query)
\r
127 # search mails according to $query with notmuch; store results in $maildir
\r
129 - my ($maildir, $query) = @_;
\r
131 + my ($maildir, $remove_dups, $query) = @_;
\r
132 $query = shell_quote($query);
\r
134 empty_maildir($maildir);
\r
135 system("notmuch search --output=files $query"
\r
136 . " | sed -e 's: :\\\\ :g'"
\r
137 . " | xargs --no-run-if-empty ln -s -t $maildir/cur/");
\r
138 + remove_duplicates($maildir) if ($remove_dups);
\r
142 @@ -74,28 +125,28 @@ sub get_message_id() {
\r
146 -sub search_action($$@) {
\r
147 - my ($interactive, $results_dir, @params) = @_;
\r
148 +sub search_action($$$@) {
\r
149 + my ($interactive, $results_dir, $remove_dups, @params) = @_;
\r
151 if (! $interactive) {
\r
152 - search($results_dir, join(' ', @params));
\r
153 + search($results_dir, $remove_dups, join(' ', @params));
\r
155 my $query = prompt("search ('?' for man): ", join(' ', @params));
\r
156 if ($query ne "") {
\r
157 - search($results_dir,$query);
\r
158 + search($results_dir, $remove_dups, $query);
\r
163 -sub thread_action(@) {
\r
164 - my ($results_dir, @params) = @_;
\r
165 +sub thread_action($$@) {
\r
166 + my ($results_dir, $remove_dups, @params) = @_;
\r
168 my $mid = get_message_id();
\r
169 my $search_cmd = 'notmuch search --output=threads ' . shell_quote("id:$mid");
\r
170 my $tid = `$search_cmd`; # get thread id
\r
173 - search($results_dir, $tid);
\r
174 + search($results_dir, $remove_dups, $tid);
\r
177 sub tag_action(@) {
\r
178 @@ -118,11 +169,13 @@ sub main() {
\r
179 my $results_dir = "$cache_dir/results";
\r
180 my $interactive = 0;
\r
181 my $help_needed = 0;
\r
182 + my $remove_dups = 0;
\r
184 my $getopt = GetOptions(
\r
185 "h|help" => \$help_needed,
\r
186 "o|output-dir=s" => \$results_dir,
\r
187 - "p|prompt" => \$interactive);
\r
188 + "p|prompt" => \$interactive,
\r
189 + "r|remove-dups" => \$remove_dups);
\r
190 if (! $getopt || $#ARGV < 0) { die_usage() };
\r
191 my ($action, @params) = ($ARGV[0], @ARGV[1..$#ARGV]);
\r
193 @@ -136,9 +189,9 @@ sub main() {
\r
194 print STDERR "Error: no search term provided\n\n";
\r
196 } elsif ($action eq "search") {
\r
197 - search_action($interactive, $results_dir, @params);
\r
198 + search_action($interactive, $results_dir, $remove_dups, @params);
\r
199 } elsif ($action eq "thread") {
\r
200 - thread_action($results_dir, @params);
\r
201 + thread_action($results_dir, $remove_dups, @params);
\r
202 } elsif ($action eq "tag") {
\r
203 tag_action(@params);
\r
205 @@ -189,6 +242,12 @@ be overwritten. (Default: F<~/.cache/notmuch/mutt/results/>)
\r
206 Instead of using command line search terms, prompt the user for them (only for
\r
211 +=item --remove-dups
\r
213 +Remove duplicates from search results.
\r
218 @@ -205,10 +264,10 @@ the following in your Mutt configuration (usually one of: F<~/.muttrc>,
\r
219 F</etc/Muttrc>, or a configuration snippet under F</etc/Muttrc.d/>):
\r
222 - "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt --prompt search<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter>" \
\r
223 + "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt -r --prompt search<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter>" \
\r
224 "notmuch: search mail"
\r
226 - "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt thread<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter><enter-command>set wait_key<enter>" \
\r
227 + "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt -r thread<enter><change-folder-readonly>~/.cache/notmuch/mutt/results<enter><enter-command>set wait_key<enter>" \
\r
228 "notmuch: reconstruct thread"
\r
230 "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt tag -- -inbox<enter>" \
\r
231 diff --git a/contrib/notmuch-mutt/notmuch-mutt.rc b/contrib/notmuch-mutt/notmuch-mutt.rc
\r
232 index b0a38d1..ddc4b48 100644
\r
233 --- a/contrib/notmuch-mutt/notmuch-mutt.rc
\r
234 +++ b/contrib/notmuch-mutt/notmuch-mutt.rc
\r
237 - "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt --prompt search<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter>" \
\r
238 + "<enter-command>unset wait_key<enter><shell-escape>notmuch-mutt -r --prompt search<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter>" \
\r
239 "notmuch: search mail"
\r
241 - "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt thread<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter><enter-command>set wait_key<enter>" \
\r
242 + "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt -r thread<enter><change-folder-readonly>`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`<enter><enter-command>set wait_key<enter>" \
\r
243 "notmuch: reconstruct thread"
\r
245 "<enter-command>unset wait_key<enter><pipe-message>notmuch-mutt tag -- -inbox<enter>" \
\r