From fffb92da8e09bb2fed63dc565e6ab7fa9b44d2f9 Mon Sep 17 00:00:00 2001 From: Kevin McCarthy Date: Wed, 1 Aug 2012 10:09:41 +0200 Subject: [PATCH] Add duplicate message removal for notmuch-mutt. Add a --remove-dups flag which removes duplicate files from search and thread results. Uses fdupes if installed. Otherwise it runs a size and Digest::SHA scan on each file to detect duplicates. Signed-off-by: Stefano Zacchiroli --- contrib/notmuch-mutt/notmuch-mutt | 89 +++++++++++++++++++++++----- contrib/notmuch-mutt/notmuch-mutt.rc | 4 +- 2 files changed, 76 insertions(+), 17 deletions(-) diff --git a/contrib/notmuch-mutt/notmuch-mutt b/contrib/notmuch-mutt/notmuch-mutt index 7c125e6e..d14709df 100755 --- a/contrib/notmuch-mutt/notmuch-mutt +++ b/contrib/notmuch-mutt/notmuch-mutt @@ -18,6 +18,8 @@ use Mail::Box::Maildir; use Pod::Usage; use String::ShellQuote; use Term::ReadLine; +use Digest::SHA; +use File::Which; my $xdg_cache_dir = "$ENV{HOME}/.cache"; @@ -34,16 +36,65 @@ sub empty_maildir($) { $folder->close(); } -# search($maildir, $query) +# Match files by size and SHA-256; then delete duplicates +sub builtin_remove_dups($) { + my ($maildir) = @_; + my (%size_to_files, %sha_to_files); + + # Group files by matching sizes + foreach my $file (glob("$maildir/cur/*")) { + my $size = -s $file; + push(@{$size_to_files{$size}}, $file) if $size; + } + + foreach my $same_size_files (values %size_to_files) { + # Don't run sha unless there is another file of the same size + next if scalar(@$same_size_files) < 2; + %sha_to_files = (); + + # Group files with matching sizes by SHA-256 + foreach my $file (@$same_size_files) { + open(my $fh, '<', $file) or next; + binmode($fh); + my $sha256hash = Digest::SHA->new(256)->addfile($fh)->hexdigest; + close($fh); + + push(@{$sha_to_files{$sha256hash}}, $file); + } + + # Remove duplicates + foreach my $same_sha_files (values %sha_to_files) { + next if scalar(@$same_sha_files) < 2; + unlink(@{$same_sha_files}[1..$#$same_sha_files]); + } + } +} + +# Use either fdupes or the built-in scanner to detect and remove duplicate +# search results in the maildir +sub remove_duplicates($) { + my ($maildir) = @_; + + my $fdupes = which("fdupes"); + if ($fdupes) { + system("$fdupes --hardlinks --symlinks --delete --noprompt" + . " --quiet $maildir/cur/ > /dev/null"); + } else { + builtin_remove_dups($maildir); + } +} + +# search($maildir, $remove_dups, $query) # search mails according to $query with notmuch; store results in $maildir -sub search($$) { - my ($maildir, $query) = @_; +sub search($$$) { + my ($maildir, $remove_dups, $query) = @_; $query = shell_quote($query); empty_maildir($maildir); system("notmuch search --output=files $query" . " | sed -e 's: :\\\\ :g'" . " | xargs --no-run-if-empty ln -s -t $maildir/cur/"); + remove_duplicates($maildir) if ($remove_dups); } sub prompt($$) { @@ -74,28 +125,28 @@ sub get_message_id() { return $1; } -sub search_action($$@) { - my ($interactive, $results_dir, @params) = @_; +sub search_action($$$@) { + my ($interactive, $results_dir, $remove_dups, @params) = @_; if (! $interactive) { - search($results_dir, join(' ', @params)); + search($results_dir, $remove_dups, join(' ', @params)); } else { my $query = prompt("search ('?' for man): ", join(' ', @params)); if ($query ne "") { - search($results_dir,$query); + search($results_dir, $remove_dups, $query); } } } -sub thread_action(@) { - my ($results_dir, @params) = @_; +sub thread_action($$@) { + my ($results_dir, $remove_dups, @params) = @_; my $mid = get_message_id(); my $search_cmd = 'notmuch search --output=threads ' . shell_quote("id:$mid"); my $tid = `$search_cmd`; # get thread id chomp($tid); - search($results_dir, $tid); + search($results_dir, $remove_dups, $tid); } sub tag_action(@) { @@ -118,11 +169,13 @@ sub main() { my $results_dir = "$cache_dir/results"; my $interactive = 0; my $help_needed = 0; + my $remove_dups = 0; my $getopt = GetOptions( "h|help" => \$help_needed, "o|output-dir=s" => \$results_dir, - "p|prompt" => \$interactive); + "p|prompt" => \$interactive, + "r|remove-dups" => \$remove_dups); if (! $getopt || $#ARGV < 0) { die_usage() }; my ($action, @params) = ($ARGV[0], @ARGV[1..$#ARGV]); @@ -136,9 +189,9 @@ sub main() { print STDERR "Error: no search term provided\n\n"; die_usage(); } elsif ($action eq "search") { - search_action($interactive, $results_dir, @params); + search_action($interactive, $results_dir, $remove_dups, @params); } elsif ($action eq "thread") { - thread_action($results_dir, @params); + thread_action($results_dir, $remove_dups, @params); } elsif ($action eq "tag") { tag_action(@params); } else { @@ -189,6 +242,12 @@ be overwritten. (Default: F<~/.cache/notmuch/mutt/results/>) Instead of using command line search terms, prompt the user for them (only for "search"). +=item -r + +=item --remove-dups + +Remove duplicates from search results. + =item -h =item --help @@ -205,10 +264,10 @@ the following in your Mutt configuration (usually one of: F<~/.muttrc>, F, or a configuration snippet under F): macro index \ - "unset wait_keynotmuch-mutt --prompt search~/.cache/notmuch/mutt/results" \ + "unset wait_keynotmuch-mutt -r --prompt search~/.cache/notmuch/mutt/results" \ "notmuch: search mail" macro index \ - "unset wait_keynotmuch-mutt thread~/.cache/notmuch/mutt/resultsset wait_key" \ + "unset wait_keynotmuch-mutt -r thread~/.cache/notmuch/mutt/resultsset wait_key" \ "notmuch: reconstruct thread" macro index \ "unset wait_keynotmuch-mutt tag -- -inbox" \ diff --git a/contrib/notmuch-mutt/notmuch-mutt.rc b/contrib/notmuch-mutt/notmuch-mutt.rc index b0a38d1a..ddc4b480 100644 --- a/contrib/notmuch-mutt/notmuch-mutt.rc +++ b/contrib/notmuch-mutt/notmuch-mutt.rc @@ -1,8 +1,8 @@ macro index \ - "unset wait_keynotmuch-mutt --prompt search`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`" \ + "unset wait_keynotmuch-mutt -r --prompt search`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`" \ "notmuch: search mail" macro index \ - "unset wait_keynotmuch-mutt thread`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`set wait_key" \ + "unset wait_keynotmuch-mutt -r thread`echo ${XDG_CACHE_HOME:-$HOME/.cache}/notmuch/mutt/results`set wait_key" \ "notmuch: reconstruct thread" macro index \ "unset wait_keynotmuch-mutt tag -- -inbox" \ -- 2.26.2