1 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
2 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
4 # Extracts mail to an Mbox or Maildir
5 package Ssoma::Extractor;
9 use Email::LocalDelivery;
12 my ($class, $git) = @_;
13 bless { git => $git, ref => "refs/heads/master" }, $class;
16 # runs a command which returns a list of files, no file name sanitization
17 # here needed since all of the path names stored in git trees are controlled
18 # by us (and based on SHA-1 hexdigest)
22 $? == 0 or die "$cmd failed: $?\n";
28 my ($self, $target, $since) = @_;
29 my $git = $self->{git};
31 # read all of the state file
32 my $state = "$git->{git_dir}/ssoma.state";
33 my $cfg = $git->config_list($state);
35 my $pkey = "target.$target.path";
36 my $path = $cfg->{$pkey};
38 my $ckey = "target.$target.command";
39 my $command = $cfg->{$ckey};
41 my $ikey = "target.$target.imap";
42 my $imap = $cfg->{$ikey};
44 my $lkey = "target.$target.last-imported";
45 my $last = $cfg->{$lkey};
47 my $ref = $self->{ref};
48 my $tip = $git->qx_sha1("git rev-parse $ref^0");
50 my $new; # arrayref of new file pathnames in a git tree
53 my @cmd = (qw(git rev-list), "--since=$since", $tip);
56 # get the commit last in the list, unfortunately --reverse
57 # is not usable with --since
58 open my $rl, '-|', @cmd or die "failed to open rev-list: $!\n";
59 foreach my $cmt (<$rl>) {
62 # do not re-import even if --since is specified
63 if (defined $last && ($last eq $cmt)) {
69 close $rl; # we may break the pipe here
70 $last = $tmp if defined $tmp;
73 # only inject newly-added
74 $last =~ /\A[a-f0-9]{40}\z/ or die "$lkey invalid in $state\n";
76 # we don't want blob->tree conflict resolution in MDA
77 # tricking us into extracting the same message twice;
78 # MDA will keep the original in sufficiently-identical messages
79 my $cmd = "git diff-tree -r --name-only -M100% --diff-filter=A";
80 $new = _flist("$cmd $last $tip");
82 # new maildir or mbox (to us), import everything in the
84 $new = _flist("git ls-tree -r --name-only $tip");
88 $i++ if defined $command;
89 $i++ if defined $path;
90 $i++ if defined $imap;
92 "only one of $pkey, $ckey, or $ikey may be defined in $state\n";
94 if (defined $command) {
95 $self->_run_for_each_msg($command, $tip, $new)
96 } elsif (defined $path) {
97 $self->_deliver_each_msg($path, $tip, $new);
98 } elsif (defined $imap) {
99 $self->_imap_deliver_each_msg($tip, $new);
101 die "neither $pkey, $ckey, nor $ikey are defined in $state\n";
104 # update the last-imported var
106 local $ENV{GIT_CONFIG} = $state;
107 my $rv = system(qw/git config/, $lkey, $tip);
108 $rv == 0 or die "git config $lkey $tip failed: $? ($rv)\n";
112 # deliver to mbox or maildir, Email::LocalDelivery determines the type of
113 # folder (via Email::FolderType) via trailing trailing slash for maildir
114 # (and lack of trailing slash for mbox). Ezmlm and MH formats are not
115 # currently supported by Email::LocalDelivery.
116 sub _deliver_each_msg {
117 my ($self, $dest, $tip, $new) = @_;
118 my $git = $self->{git};
119 my $git_pm = $git->try_git_pm;
120 foreach my $path (@$new) {
121 _deliver_die($git->cat_blob("$tip:$path", $git_pm), $dest);
125 # just pipe the blob message to $command, bypassing Perl,
126 # so there's no validation at all
127 sub _run_for_each_msg {
128 my ($self, $command, $tip, $new) = @_;
129 my $git = $self->{git};
130 foreach my $path (@$new) {
131 my $cmd = "git cat-file blob $tip:$path | $command";
132 my $rv = system($cmd);
133 $rv == 0 or die "delivery command: $cmd failed: $? ($rv)\n";
137 sub _imap_deliver_each_msg {
138 my ($self, $tip, $new) = @_;
139 my $git = $self->{git};
141 my $imap = Ssoma::IMAP->new($git);
142 my $git_pm = $git->try_git_pm;
143 foreach my $path (@$new) {
144 $imap->imap_deliver($git->cat_blob("$tip:$path", $git_pm));
150 my ($self, $target, $since) = @_;
151 $self->{git}->tmp_git_do(sub { $self->_extract($target, $since) });
155 my @rv = Email::LocalDelivery->deliver(@_);
156 (scalar @rv == 1 && -f $rv[0]) or
157 die "delivery to $_[1] failed: $!\n";
160 # implements "ssoma cat MESSAGE-ID"
162 my ($self, $message_id, $mbox) = @_;
163 $self->{git}->tmp_git_do(sub {
164 $self->_midextract($message_id, $mbox);
169 my ($self, $message_id, $mbox) = @_;
170 my $git = $self->{git};
171 my $path = $git->mid2path($message_id);
172 my $ref = $self->{ref};
173 my $tip = $git->qx_sha1("git rev-parse $ref^0");
174 my $obj = "$tip:$path";
175 my $type = $git->type($obj);
176 if ($type eq "tree") { # unlikely
177 $git->each_in_tree($obj, sub {
178 my ($blob_id, $xpath) = ($1, $2);
179 _deliver_die($git->cat_blob($blob_id), $mbox);
181 } elsif ($type eq "blob") {
182 _deliver_die($git->cat_blob($obj), $mbox);
184 die "unhandled type: $type (obj=$obj)\n";