+++ /dev/null
-% ssoma-mda(1) ssoma user manual
-
-# NAME
-
-ssoma-mda - mail delivery agent for ssoma
-
-# SYNOPSIS
-
-ssoma-mda [-1] /path/to/ssoma/repository.git < message
-
-# DESCRIPTION
-
-ssoma-mda delivers messages to a git repository as described by
-ssoma_repository(5). It reads messages from STDIN and takes no
-command-line arguments. It may be invoked by the MTA (mail transport
-agent, e.g. postfix or exim) or as part of another MDA (e.g. procmail or
-maildrop)
-
-ssoma-mda takes no command-line options and does not alter its own
-permissions. This must be done by the MTA or MDA which invokes
-ssoma-mda.
-
-# OPTIONS
-
--1
-: Only allow a Message-ID to appear once in the database.
- Future messages with an identical Message-ID will not be allowed.
-
-# FILES
-
-See ssoma_repository(5) for details.
-
-# ENVIRONMENT
-
-ssoma-mda depends on no environment variables
-
-# CONTACT
-
-All feedback welcome via plain-text mail to <meta@public-inbox.org>\
-The mail archives are hosted at git://public-inbox.org/meta
-See ssoma(1) for instructions on how to subscribe.
-
-# COPYRIGHT
-
-Copyright 2013, Eric Wong <normalperson@yhbt.net> and all contributors.\
-License: AGPLv3 or later <http://www.gnu.org/licenses/agpl-3.0.txt>
-
-# SEE ALSO
-
-git(1), ssoma(1), ssoma_repository(5)
-ssoma (client-side) installation
---------------------------------
-This is for users who wish to follow archives hosted on public-inbox.
-We only use commonly-available Perl modules available on Debian-based
-distributions (and not necessarily modern ones!)
+ssoma-mda installation
+----------------------
+This is for users who wish to add email to archives.
If you have problems or comments on installation, please send a
plain-text email to: meta@public-inbox.org
git clone git://80x24.org/ssoma
-standard MakeMaker installation (Perl)
---------------------------------------
+standard distutils installation
+-------------------------------
- perl Makefile.PL
- make
- make test
- make install # root permissions may be needed
+ TODO
Requirements
------------
-All packages should be easily available in Debian GNU/Linux and derived
-distros. Debian package names for Perl modules are are listed for
-convenience.
-
-* git (https://git-scm.com/)
-* any MUA capable of reading/importing IMAP, mbox(5) or Maildir
-* Perl and several modules: (Debian package name (7.0))
- - Digest::SHA perl
- - Email::LocalDelivery libemail-localdelivery-perl
- - Email::Simple libemail-simple-perl
- - File::Path::Expand libfile-path-expand-perl
- - Net::IMAP::Simple libnet-imap-simple-perl
+It requires Python 3.3 or later (currently only packaged for Debian
+Sid) and pygit2 (tested on version 0.21.3).
Copyright
---------
+++ /dev/null
-.gitignore
-COPYING
-Documentation/include.mk
-Documentation/ssoma-mda.txt
-Documentation/ssoma-rm.txt
-Documentation/ssoma.txt
-Documentation/ssoma_repository.txt
-Documentation/txt2pre
-INSTALL
-MANIFEST
-Makefile.PL
-README
-lib/Ssoma/Extractor.pm
-lib/Ssoma/Git.pm
-lib/Ssoma/GitIndexInfo.pm
-lib/Ssoma/IMAP.pm
-lib/Ssoma/MDA.pm
-lib/Ssoma/Remover.pm
-ssoma
-ssoma-mda
-ssoma-rm
-t/all.t
-t/extractor.t
-t/git.t
-t/imap.t
-t/mda-badheaders.t
-t/mda-conflict.t
-t/mda-missing-mid.t
-t/remover.t
Features
--------
* stores email in git, so readers have a full history of the mailing list
-* mail user-agent (MUA) users may choose from IMAP, mbox(5), and Maildir
* uses only well-documented and easy-to-implement data formats
Install
-------
Installation should be easy and require only a few, commonly-available
-packages. See http://ssoma.public-inbox.org/INSTALL for details.
+packages. See http://ssoma-mda.public-inbox.org/INSTALL for details.
Hacking
-------
Source code is available via git:
- git clone git://80x24.org/ssoma
+ git clone git://80x24.org/ssoma-mda
See below for contact info.
tangentially related projects we depend on (e.g. git developers on
git@vger.kernel.org).
-You can subscribe via ssoma, LISTNAME is a name of your choosing:
-
- URL=git://public-inbox.org/meta
- LISTNAME=public-inbox
-
- # to initialize a maildir (this may be a new or existing maildir,
- # ssoma will not touch existing messages)
- # If you prefer mbox, use mbox:/path/to/mbox as the last argument
- ssoma add $LISTNAME $URL maildir:/path/to/maildir
-
- # read with your favorite MUA (only using mutt as an example)
- mutt -f /path/to/maildir # (or /path/to/mbox)
-
- # to keep your mbox or maildir up-to-date, periodically run the following:
- ssoma sync $LISTNAME
-
- # your MUA may modify and delete messages from the maildir or mbox,
- # this does not affect ssoma functionality at all
-
- # to sync all your ssoma subscriptions
- ssoma sync
-
- # You may wish to sync in your cronjob
- ssoma sync --cron
-
Mail repository format
----------------------
If you are uncomfortable running code in ssoma for any reason and
would rather read directly from the git repository, the following
document describes it:
- http://ssoma.public-inbox.org/ssoma_repository.txt
+ http://ssoma-mda.public-inbox.org/ssoma_repository.txt
Copyright
---------
+++ /dev/null
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: GPLv2 or later (https://www.gnu.org/licenses/gpl-2.0.txt)
-#
-# Note: some trivial code here stolen from git-svn + Perl modules
-# distributed with git. This remains GPLv2+ so improvements may flow
-# back into git. Note: git-svn has always been GPLv2+, unlike most
-# of the rest of git being GPLv2-only.
-
-package Ssoma::Git;
-use strict;
-use warnings;
-use File::Path qw/mkpath/;
-use Fcntl qw/:DEFAULT :flock SEEK_END/;
-use IO::Handle;
-use Email::Simple;
-use Digest::SHA qw/sha1_hex/;
-
-# Future versions of Ssoma will always be able to handle this version, at least
-our $REPO_VERSION = 1;
-
-sub new {
- my ($class, $git_dir) = @_;
- bless {
- git_dir => $git_dir,
- index => "$git_dir/ssoma.index",
- }, $class;
-}
-
-# initialize a git repository
-sub init_db {
- my ($self, @opts) = @_;
-
- my @cmd = (qw(git init --bare), @opts);
- push @cmd, $self->{git_dir};
-
- system(@cmd) == 0 or die join(' ', @cmd)." failed: $?\n";
-
- $self->tmp_git_do(sub {
- @cmd = (qw(git config ssoma.repoversion), $REPO_VERSION);
- system(@cmd) == 0 or die "command: ". join(' ', @cmd) . ": $?\n";
- });
-}
-
-sub lockfile { $_[0]->{git_dir} . "/ssoma.lock" }
-
-sub sync_do {
- my ($self, $sub) = @_;
-
- my $path = $self->lockfile;
- my $lock;
-
- # we must not race here because this is concurrent:
- sysopen($lock, $path, O_WRONLY) or
- sysopen($lock, $path, O_CREAT|O_EXCL|O_WRONLY) or
- sysopen($lock, $path, O_WRONLY) or
- die "failed to open lock $path: $!\n";
-
- # wait for other processes to be done
- flock($lock, LOCK_EX) or die "lock failed: $!\n";
-
- # run the sub!
- my @ret = eval { &$sub };
- my $err = $@;
-
- # these would happen anyways, but be explicit so we can detect errors
- flock($lock, LOCK_UN) or die "unlock failed: $!\n";
- close $lock or die "close lockfile($path) failed: $!\n";
-
- die $err if $err;
-
- wantarray ? @ret : $ret[0];
-}
-
-# perform sub with the given GIT_DIR
-sub tmp_git_do {
- my ($self, $sub) = @_;
- local $ENV{GIT_DIR} = $self->{git_dir};
- &$sub;
-}
-
-# perform sub with a temporary index
-sub tmp_index_do {
- my ($self, $sub) = @_;
- local $ENV{GIT_INDEX_FILE} = $self->{index};
-
- my ($dir, $base) = ($self->{index} =~ m#^(.*?)/?([^/]+)$#);
- mkpath([$dir]) unless -d $dir;
- -d $dir or die "$dir creation failed $!\n";
- &$sub;
-}
-
-# bidirectional pipe, output would be SHA-1 hexdigest
-sub bidi_sha1 {
- my ($self, @cmd) = @_;
- my $sub = pop @cmd;
- my $cmd = join(' ', @cmd);
- my ($in_0, $in_1, $out_0, $out_1);
-
- pipe($in_0, $in_1) or die "pipe failed: $!\n";
- pipe($out_0, $out_1) or die "pipe failed: $!\n";
-
- my $pid = fork;
- defined $pid or die "fork failed: $!\n";
-
- if ($pid == 0) {
- open STDIN, '<&', $in_0 or die "redirect stdin failed: $!\n";
- open STDOUT, '>&', $out_1 or die "redirect stdout failed: $!\n";
- exec @cmd;
- die "exec($cmd) failed: $!\n";
- }
-
- close $in_0 or die "close in_0 failed: $!\n";
- close $out_1 or die "close out_1 failed: $!\n";
- $sub->($in_1);
- close $in_1 or die "close in_1 failed: $!\n";
- my $sha1 = <$out_0>;
- close $out_0 or die "close out_0 failed: $!\n";
- waitpid($pid, 0) or die "waitpid $pid failed: $!\n";
- $? == 0 or die "$cmd failed: $?\n";
- chomp $sha1;
- $sha1 =~ /\A[a-f0-9]{40}\z/i or die "not a SHA-1: $sha1\n";
- $sha1;
-}
-
-# run a command described by str and return the SHA-1 hexdigest output
-sub qx_sha1 {
- my ($self, $str) = @_;
- my $sha1 = `$str`;
-
- die "$str failed: $?\n" if $?;
- chomp $sha1;
- $sha1 =~ /\A[a-f0-9]{40}\z/i or
- die "not a SHA-1 hexdigest from: $str\n";
- $sha1;
-}
-
-# returns a blob identifier the new message
-sub simple_to_blob {
- my ($self, $simple) = @_;
- $self->bidi_sha1(qw/git hash-object -w --stdin/, sub {
- my ($io) = @_;
- print $io $simple->as_string or die "print failed: $!\n";
- });
-}
-
-# converts the given object name to an Email::Simple object
-sub blob_to_simple {
- my ($self, $obj) = @_;
- Email::Simple->new($self->cat_blob($obj));
-}
-
-# returns key-value pairs of config directives in a hash
-sub config_list {
- my ($self, $file) = @_;
-
- local $ENV{GIT_CONFIG} = $file;
-
- my @cfg = `git config -l`;
- $? == 0 or die "git config -l failed: $?\n";
- chomp @cfg;
- my %rv = map { split(/=/, $_, 2) } @cfg;
- \%rv;
-}
-
-# used to hash the relevant portions of a message when there are conflicts
-sub hash_simple2 {
- my ($self, $simple) = @_;
- my $dig = Digest::SHA->new("SHA-1");
- $dig->add($simple->header("Subject"));
- $dig->add($simple->body);
- $dig->hexdigest;
-}
-
-# we currently only compare messages for equality based on
-# Message-ID, Subject: header and body, nothing else.
-# both args are Email::Simple objects
-sub simple_eq {
- my ($self, $cur, $new) = @_;
-
- (($cur->header("Subject") eq $new->header("Subject")) &&
- ($cur->body eq $new->body));
-}
-
-# kills leading/trailing space in-place
-sub stripws {
- $_[0] =~ s/\A\s*//;
- $_[0] =~ s/\s*\z//;
-}
-
-sub mid2path {
- my ($self, $message_id) = @_;
- stripws($message_id);
- $message_id =~ s/\A<//;
- $message_id =~ s/>\z//;
- my $hex = sha1_hex($message_id);
- $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or
- die "BUG: not a SHA-1 hex: $hex";
- "$1/$2";
-}
-
-sub cat_blob {
- my ($self, $blob_id, $git_pm) = @_;
- my $str;
- if ($git_pm) {
- open my $fh, '>', \$str or
- die "failed to setup string handle: $!\n";
- binmode $fh;
- my $bytes = $git_pm->cat_blob($blob_id, $fh);
- close $fh or die "failed to close string handle: $!\n";
- die "$blob_id invalid\n" if $bytes <= 0;
- } else {
- my $cmd = "git cat-file blob $blob_id";
- $str = `$cmd`;
- die "$cmd failed: $?\n" if $?;
- }
- $str;
-}
-
-sub type {
- my ($self, $obj) = @_;
- my $cmd = "git cat-file -t $obj";
- my $str = `$cmd`;
- die "$cmd failed: $?\n" if $?;
- chomp $str;
- $str;
-}
-
-# only used for conflict resolution
-sub each_in_tree {
- my ($self, $obj, $sub) = @_;
- my $cmd = "git ls-tree $obj";
- my @tree = `$cmd`;
- $? == 0 or die "$cmd failed: $!\n";
- my $x40 = '[a-f0-9]{40}';
- foreach my $line (@tree) {
- if ($line =~ m!\A100644 blob ($x40)\t($x40)$!o) {
- my ($blob_id, $path) = ($1, $2);
- $sub->($blob_id, $path);
- } else {
- warn "unexpected: bad line from $cmd:\n$line";
- }
- }
-}
-
-sub commit_index {
- my ($self, $gii, $need_parent, $ref, $message) = @_;
-
- # this is basically what git commit(1) does,
- # but we use git plumbing, not porcelain
- $gii->done;
- my $tree = $self->qx_sha1("git write-tree");
-
- # can't rely on qx_sha1 since we initial commit may not have a parent
- my $cmd = "git rev-parse $ref^0";
- my $parent;
- if ($need_parent) {
- $parent = $self->qx_sha1($cmd);
- } else {
- $parent = eval { $self->qx_sha1("$cmd 2>/dev/null") };
- if (defined $parent && $parent !~ /\A[a-f0-9]{40}\z/) {
- die "$cmd returned bad SHA-1: $parent\n";
- }
- }
-
- # make the commit
- my @cmd = qw/git commit-tree/;
- push @cmd, $tree;
- push @cmd, '-p', $parent if $parent;
- push @cmd, '-m', "'$message'";
-
- my $commit = $self->qx_sha1(join(' ', @cmd));
-
- # update the ref
- @cmd = (qw/git update-ref/, $ref, $commit);
- push @cmd, $parent if $parent; # verification
- system(@cmd) == 0 or die "command: ". join(' ', @cmd) . ": $?\n";
-
- # gc if needed
- @cmd = qw/git gc --auto/;
- system(@cmd) == 0 or die "command: ". join(' ', @cmd) . ": $?\n";
-}
-
-# keep Git.pm optional, not all installations of git have it
-sub try_git_pm {
- my ($self) = @_;
- eval {
- require Git;
- Git->repository(Directory => $self->{git_dir});
- };
-}
-
-1;
+++ /dev/null
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: GPLv2 or later (https://www.gnu.org/licenses/gpl-2.0.txt)
-#
-# Note: some trivial code here stolen from git-svn + Perl modules
-# distributed with git. This remains GPLv2+ so improvements may flow
-# back into git. Note: git-svn has always been GPLv2+, unlike most
-# of the rest of git being GPLv2-only.
-#
-# Not using Git.pm and friends directly because some git installations may use
-# a different Perl than this (and I might end up rewriting this entirely
-# in another language). Git::IndexInfo is also somewhat recent, so folks
-# on LTS distros may not have it, yet.
-
-package Ssoma::GitIndexInfo;
-use strict;
-use warnings;
-
-sub new {
- my ($class) = @_;
- my $pid = open my $gui, '|-';
- defined $pid or die "failed to pipe + fork: $!\n";
- if ($pid == 0) {
- exec(qw/git update-index -z --index-info/);
- die "exec failed: $!\n";
- }
- bless { gui => $gui, pid => $pid, nr => 0}, $class;
-}
-
-sub remove {
- my ($self, $path) = @_;
- print { $self->{gui} } '0 ', 0 x 40, "\t", $path, "\0" or
- die "failed to print to git update-index pipe: $!\n";
- ++$self->{nr};
-}
-
-sub update {
- my ($self, $mode, $hash, $path) = @_;
- print { $self->{gui} } $mode, ' ', $hash, "\t", $path, "\0" or
- die "failed to print to git update-index pipe: $!\n";
- ++$self->{nr};
-}
-
-sub done {
- my ($self) = @_;
- close $self->{gui} or die "close pipe: $!\n";
- $? == 0 or die "git update-index failed: $?\n";
-}
-
-1;
+++ /dev/null
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-#
-# Mail Delivery Agent module, delivers mail into a ssoma git repo
-package Ssoma::MDA;
-use strict;
-use warnings;
-use Ssoma::GitIndexInfo;
-
-sub new {
- my ($class, $git) = @_;
- bless { git => $git, ref => "refs/heads/master" }, $class;
-}
-
-# may convert existing blob to a tree
-# returns false if message already exists
-# returns true on successful delivery
-sub blob_upgrade {
- my ($self, $gii, $new, $path) = @_;
-
- my $git = $self->{git};
- my $obj = "$self->{ref}^0:$path";
- my $cur = $git->blob_to_simple($obj);
-
- # do nothing if the messages match:
- return 0 if $git->simple_eq($cur, $new);
-
- # kill the old blob
- $gii->remove($path);
-
- # implicitly create a new tree via index with two messages
- foreach my $simple ($cur, $new) {
- my $id = $git->simple_to_blob($simple);
- my $path2 = $git->hash_simple2($simple);
- $gii->update("100644", $id, "$path/$path2");
- }
- 1;
-}
-
-# used to update existing trees, which only happen when we have Message-ID
-# conflicts
-sub tree_update {
- my ($self, $gii, $new, $path) = @_;
- my $git = $self->{git};
- my $obj = "$self->{ref}^0:$path";
- my $cmd = "git ls-tree $obj";
- my @tree = `$cmd`;
- $? == 0 or die "$cmd failed: $!\n";
- chomp @tree;
-
- my $id = $git->simple_to_blob($new);
- my $path2 = $git->hash_simple2($new);
-
- # go through the existing tree and look for duplicates
- foreach my $line (@tree) {
- $line =~ m!\A100644 blob ([a-f0-9]{40})\t(([a-f0-9]{40}))\z! or
- die "corrupt repo: bad line from $cmd: $line\n";
- my ($xid, $xpath2) = ($1, $2);
-
- # do nothing if most of the message matches
- return 0 if $path2 eq $xpath2 || $id eq $xid;
- }
-
- # no duplicates found, add to the index
- $gii->update("100644", $id, "$path/$path2");
-}
-
-# this appends the given message-id to the git repo, requires locking
-# (Ssoma::Git::sync_do)
-sub append {
- my ($self, $path, $simple, $once) = @_;
-
- my $git = $self->{git};
- my $ref = $self->{ref};
-
- # $path is a path name we generated, so it's sanitized
- my $gii = Ssoma::GitIndexInfo->new;
-
- my $obj = "$ref^0:$path";
- my $cmd = "git cat-file -t $obj";
- my $type = `$cmd 2>/dev/null`;
-
- if ($? == 0) { # rare, object already exists
- chomp $type;
- if ($once) {
- my $mid = $simple->header("Message-ID");
- die "CONFLICT: Message-ID: $mid exists ($path)\n";
- }
-
- # we return undef here if the message already exists
- if ($type eq "blob") {
- # this may upgrade the existing blob to a tree
- $self->blob_upgrade($gii, $simple, $path) or return;
- } elsif ($type eq "tree") {
- # possibly add object to an existing tree
- $self->tree_update($gii, $simple, $path) or return;
- } else {
- # we're screwed if a commit/tag has the same SHA-1
- die "CONFLICT: `$cmd' returned: $type\n";
- }
- } else { # new message, just create a blob, common
- my $id = $git->simple_to_blob($simple);
- $gii->update('100644', $id, $path);
- }
- my $subject = $simple->header("Subject");
- $git->commit_index($gii, 0, $ref, $subject);
-}
-
-# the main entry point takes an Email::Simple object
-sub deliver {
- my ($self, $simple, $once) = @_;
- my $git = $self->{git};
-
- # convert the Message-ID into a path
- my $mid = $simple->header("Message-ID");
-
- # if there's no Message-ID, generate one to avoid too many conflicts
- # leading to trees
- if (!defined $mid || $mid =~ /\A\s*\z/) {
- $mid = '<' . $git->hash_simple2($simple) . '@localhost>';
- $simple->header_set("Message-ID", $mid);
- }
- my $path = $git->mid2path($mid);
-
- # kill potentially confusing/misleading headers
- foreach my $d (qw(lines content-length)) {
- $simple->header_set($d);
- }
-
- my $sub = sub {
- $git->tmp_index_do(sub {
- $self->append($path, $simple, $once);
- });
- };
- $git->sync_do(sub { $git->tmp_git_do($sub) });
-}
-
-1;
-#!/usr/bin/perl -w
+#!/usr/bin/env python
# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-# This is the command-line mail delivery agent for servers.
# Try to keep this small as it may be invoked frequently for each message
# delivered.
-my $usage = "ssoma-mda [-1] /path/to/git/repo < /path/to/rfc2822_message";
-use strict;
-use warnings;
-use Ssoma::MDA;
-use Ssoma::Git;
-use Email::Simple;
-my $once = $ARGV[0] eq "-1";
-my $repo = pop @ARGV or die "Usage: $usage\n";
-my $git = Ssoma::Git->new($repo);
-my $mda = Ssoma::MDA->new($git);
-my $simple;
-{
- local $/;
- $simple = Email::Simple->new(<STDIN>);
-}
-$mda->deliver($simple, $once);
+
+"""Mail delivery agent for adding mail to a ssoma Git repository.
+
+ssoma-mda reads a message from standard input and delivers it to a Git
+repository as described by ssoma_repository(5). It may be invoked by
+the MTA (mail transport agent, e.g. Postfix or Exim) or as part of
+another MDA (e.g. procmail or maildrop).
+
+ssoma-mda does not alter its own permissions. This must be done by
+the MTA or MDA which invokes ssoma-mda.
+
+# FILES
+
+See ssoma_repository(5) for details.
+
+# ENVIRONMENT
+
+ssoma-mda depends on no environment variables directly, but it uses
+your PATH to find your local Git.
+
+# CONTACT
+
+All feedback welcome via plain-text mail to <meta@public-inbox.org>.
+The mail archives are hosted at git://public-inbox.org/meta See
+ssoma(1) for instructions on how to subscribe.
+
+# COPYRIGHT
+
+Copyright 2013, Eric Wong <normalperson@yhbt.net> and all contributors.
+License: AGPLv3 or later <http://www.gnu.org/licenses/agpl-3.0.txt>
+
+# SEE ALSO
+
+git(1), ssoma(1), ssoma_repository(5)
+"""
+
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse as _argparse
+import email as _email
+import email.message as _email_message
+import email.policy as _email_policy
+import email.utils as _email_utils
+import hashlib as _hashlib
+import logging as _logging
+import os.path as _os_path
+import sys as _sys
+import pygit2 as _pygit2
+
+
+__version__ = '0.2.0'
+
+_LOG = _logging.getLogger('ssoma-mda')
+_LOG.setLevel(_logging.ERROR)
+_LOG.addHandler(_logging.StreamHandler())
+
+_COMMIT_MESSAGE_ENCODING = 'UTF-8'
+
+
+class DirtyIndex(RuntimeError):
+ def __init__(self, repository, diff):
+ self.repository = repository
+ self.diff = diff
+ status = '\n'.join(
+ ' {} {}'.format(patch.status, patch.old_file_path)
+ for patch in diff)
+ super(DirtyIndex, self).__init__('dirty index:\n{}'.format(status))
+
+
+class MessagePathConflict(RuntimeError):
+ """Different messages with the same target path."""
+ def __init__(self, repository, path, message=None):
+ self.repository = repository
+ self.path = path
+ self.message = message
+ super(MessagePathConflict, self).__init__(
+ 'duplicate message for {}'.format(path))
+
+
+def _add_message(repository, index, path, message_bytes):
+ """Add a message to the repository.
+
+ For messages without a Message-ID hash conflict (most messages).
+ """
+ oid = repository.write(_pygit2.GIT_OBJ_BLOB, message_bytes)
+ _LOG.debug('add message at {} ({})'.format(path, oid.hex[:8]))
+ entry = _pygit2.IndexEntry(path, oid, _pygit2.GIT_FILEMODE_BLOB)
+ index.add(entry)
+ index.write()
+
+
+def _upgrade_blob(repository, index, path, obj, message_bytes, once=False):
+ """Possibly upgrade an existing blob to a tree.
+
+ To handle conflicting Message-ID hashes.
+ """
+ old_message_bytes = obj.read_raw()
+ if message_bytes == old_message_bytes:
+ _LOG.info('skipping byte-duplicate message for {}'.format(path))
+ return
+ if once:
+ raise MessagePathConflict(
+ repository=repository, path=path, message=message_bytes)
+ _LOG.debug('upgrade {} to a directory'.format(path))
+ index.remove(path)
+ for action, bytes in [
+ ('upgrade', old_message_bytes),
+ ('add', message_bytes)
+ ]:
+ oid = repository.write(_pygit2.GIT_OBJ_BLOB, bytes)
+ p = _os_path.join(path, oid.hex)
+ _LOG.debug('{} message to tree at {}'.format(action, p))
+ entry = _pygit2.IndexEntry(p, oid, _pygit2.GIT_FILEMODE_BLOB)
+ index.add(entry)
+ index.write()
+
+
+def _update_tree(repository, index, path, obj, message_bytes, once=False):
+ """Possibly update an existing tree.
+
+ To handle conflicting Message-ID hashes.
+ """
+ if once: # we shouldn't have this tree at all
+ raise MessagePathConflict(
+ repository=repository, path=path, message=message_bytes)
+ oid = repository.write(_pygit2.GIT_OBJ_BLOB, message_bytes)
+ p = _os_path.join(path, oid.hex)
+ _LOG.debug('add message to tree at {}'.format(p))
+ entry = _pygit2.IndexEntry(p, oid, _pygit2.GIT_FILEMODE_BLOB)
+ index.add(entry)
+ index.write()
+
+
+def append(repository, path, message_bytes, commit_message,
+ author=None, **kwargs):
+ """Append the given message to the Git repo at 'path' (or a subpath).
+
+ Additional keyword arguments are passed through to index-updating
+ function.
+ """
+ index = repository.index
+ index.read()
+ reference_name = 'HEAD'
+ try:
+ reference = repository.head
+ except _pygit2.GitError as e:
+ if 'not found' in str(e): # no HEAD commit (so this will be the first)
+ reference = None
+ commit = None
+ _add_message(
+ repository=repository, index=index, path=path,
+ message_bytes=message_bytes)
+ else: # we have a HEAD commit to build on
+ commit = reference.get_object()
+ diff = index.diff_to_tree(commit.tree)
+ if len(diff):
+ raise DirtyIndex(repository=repository, diff=diff)
+ try:
+ entry = commit.tree[path]
+ except KeyError: # new message, just create a blob
+ _add_message(
+ repository=repository, index=index, path=path,
+ message_bytes=message_bytes)
+ else: # object already exists
+ obj = repository.get(entry.oid)
+ kwargs = kwargs.copy()
+ kwargs.update({
+ 'repository': repository,
+ 'index': index,
+ 'path': path,
+ 'obj': obj,
+ 'message_bytes': message_bytes,
+ })
+ if obj.type == _pygit2.GIT_OBJ_BLOB:
+ _upgrade_blob(**kwargs)
+ elif obj.type == _pygit2.GIT_OBJ_TREE:
+ _update_tree(**kwargs)
+ else:
+ raise NotImplementedError(
+ 'tree entry for {} has type {}'.format(
+ path, type(obj).__name__.lower()))
+ tree = index.write_tree()
+ if commit is None or tree != commit.tree.oid: # we've changed something
+ committer = repository.default_signature
+ if author is None:
+ author = committer
+ if commit is None:
+ parents = []
+ else:
+ parents = [commit.oid]
+ _LOG.debug('create a new commit for tree {}: {}'.format(
+ tree.hex[:8], commit_message))
+ new_commit = repository.create_commit(
+ reference_name, author, committer, commit_message, tree,
+ parents, _COMMIT_MESSAGE_ENCODING)
+ _LOG.debug('new commit {} advances {}'.format(
+ new_commit.hex[:8], reference_name))
+ else:
+ _LOG.info('no changes to commit')
+
+
+def message_id_path(message_id):
+ """Calculate the default path from a Message-ID
+
+ >>> message_id_path('<20131106023245.GA20224@dcvr.yhbt.net>')
+ 'f2/8c6cfd2b0a65f994c3e1be266105413b3d3f63'
+ """
+ message_id = message_id.lstrip('<').rstrip('>')
+ hash = _hashlib.sha1(message_id.encode('UTF-8')).hexdigest()
+ return _os_path.join(hash[:2], hash[2:])
+
+
+def get_author(message):
+ """Create a pygit2.Signature for the message author."""
+ author_name, author_email = _email_utils.parseaddr(
+ message['From'])
+ date = message['Date']
+ datetime = _email_utils.parsedate_to_datetime(date)
+ time = int(datetime.timestamp())
+ offset = datetime.utcoffset().seconds // 60
+ return _pygit2.Signature(
+ name=author_name,
+ email=author_email,
+ time=time,
+ offset=offset)
+
+
+def deliver(message=None, message_bytes=None, **kwargs):
+ """Deliver a message to a ssoma repository.
+
+ The input message can be an email.message.Message instance (use
+ 'message'), the raw SMTP byte stream (use 'message_bytes'), or
+ both (in which case 'message' is used to extract the message data,
+ and 'message_bytes' is written to the repository.
+
+ Additional keyword arguments are passed through to append().
+ """
+ if message is None:
+ if message_bytes is None:
+ raise ValueError('no message arguments')
+ message = _email.message_from_bytes(
+ message_bytes, policy=_email_policy.SMTP)
+ elif message_bytes is None:
+ message_bytes = message.as_bytes(policy=_email_policy.SMTP)
+
+ message_id = message.get('Message-ID', '')
+ path = message_id_path(message_id=message_id)
+ _LOG.info('deliver {} to {}'.format(message_id, path))
+ commit_message = message.get('Subject', '<no subject>')
+ author = get_author(message=message)
+ repository = _pygit2.Repository(_os_path.curdir)
+ append(
+ repository=repository, path=path, message_bytes=message_bytes,
+ commit_message=commit_message, author=author, **kwargs)
+
+
+def main(stream=_sys.stdin.buffer):
+ """Command-line entry point."""
+ parser = _argparse.ArgumentParser(
+ description=__doc__.strip(),
+ formatter_class=_argparse.RawDescriptionHelpFormatter)
+ parser.add_argument(
+ '-v', '--version', action='version',
+ version='%(prog)s {}'.format(__version__))
+ parser.add_argument(
+ '-l', '--log-level',
+ choices=['critical', 'error', 'warning', 'info', 'debug'],
+ help='Log verbosity. Defaults to {!r}.'.format(
+ _logging.getLevelName(_LOG.level).lower()))
+ parser.add_argument(
+ '-1', '--once', action='store_true',
+ help='Die if the incoming Message-ID is already in the repository.')
+
+ args = parser.parse_args()
+
+ if args.log_level:
+ level = getattr(_logging, args.log_level.upper())
+ _LOG.setLevel(level)
+
+ try:
+ deliver(message_bytes=stream.read(), once=args.once)
+ except (DirtyIndex, MessagePathConflict) as e:
+ _LOG.error(e)
+ raise SystemExit(1)
+
+
+if __name__ == '__main__':
+ main()
+++ /dev/null
-#!/usr/bin/perl -w
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-use strict;
-use warnings;
-use Test::More;
-use Ssoma::MDA;
-use Ssoma::Git;
-use Email::Simple;
-use Digest::SHA qw/sha1_hex/;
-use File::Temp qw/tempdir/;
-
-my $tmpdir = tempdir(CLEANUP => 1);
-my $git = Ssoma::Git->new("$tmpdir/gittest");
-$git->init_db;
-my $mda = Ssoma::MDA->new($git);
-
-my $email = Email::Simple->new("From: U <u\@example.com>\n\nHIHI\n");
-my %headers = (
- "To" => "Me <me\@example.com>",
- "From" => "You <you\@example.com>",
- "Message-ID" => "<666\@example.com>",
- "Subject" => ":o",
- "Lines" => "666",
- "Content-Length" => "666",
-);
-
-my %discard = map { $_ => 1 } qw(Lines Content-Length);
-
-while (my ($key, $val) = each %headers) {
- $email->header_set($key, $val);
-}
-
-$mda->deliver($email);
-
-local $ENV{GIT_DIR} = "$tmpdir/gittest";
-
-my $blob_id = sha1_hex("666\@example.com");
-my ($dir, $base) = ($blob_id =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!);
-ok(defined $dir && defined $base, "bad sha1: $blob_id");
-
-my $raw = `git cat-file blob HEAD:$dir/$base`;
-is(0, $?, "git cat-file returned: $?");
-
-my $delivered = Email::Simple->new($raw);
-is("HIHI\n", $delivered->body, "body matches");
-
-while (my ($key, $val) = each %headers) {
- if ($discard{$key}) {
- is($delivered->header($key), undef, "header $key discarded");
- } else {
- is($delivered->header($key), $val, "header $key not discarded");
- }
-}
-
-done_testing();
-
+++ /dev/null
-#!/usr/bin/perl -w
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-use strict;
-use warnings;
-use Test::More;
-use Ssoma::MDA;
-use Ssoma::Git;
-use Email::Simple;
-use Digest::SHA qw/sha1_hex/;
-use File::Temp qw/tempdir/;
-
-my $tmpdir = tempdir(CLEANUP => 1);
-my $git = Ssoma::Git->new("$tmpdir/gittest");
-$git->init_db;
-my $mda = Ssoma::MDA->new($git);
-
-my $email = Email::Simple->new("From: U <u\@example.com>\n\nHIHI\n");
-$email->header_set("To", "Me <me\@example.com>");
-$email->header_set("Subject", ":o");
-$email->header_set("Message-ID", "<12345\@example.com>");
-
-$mda->deliver($email);
-
-local $ENV{GIT_DIR} = "$tmpdir/gittest";
-my @orig = `git rev-list HEAD`;
-is(1, scalar @orig, "one revision exists");
-
-# deliver a second message
-$email->header_set("message-ID", "<666\@example.com>");
-$email->body_set("BYEBYE\nBYEYBE\n");
-
-$mda->deliver($email);
-
-# validate delivery results and history
-my @two = ` git rev-list HEAD`;
-is(2, scalar @two, "two revisions exist");
-is($orig[0], $two[1], "history is correct");
-
-my @tree = `git ls-tree -r HEAD`;
-is(0, $?, "git ls-tree -r HEAD succeeded");
-chomp @tree;
-is(2, scalar @tree, "two entries in tree");
-
-# ensure path Message-ID -> path mapping works
-foreach my $line (@tree) {
- my ($mode, $type, $blob, $path) = split(/\s+/, $line);;
- my $raw = `git cat-file blob $blob`;
- my $simple = Email::Simple->new($raw);
- my $mid = $simple->header("message-id");
- my $path_sha1 = $path;
- $path_sha1 =~ tr!/!!d;
- $mid =~ tr/<>//d;
- is($path_sha1, sha1_hex($mid), "path mapping works $mid");
-}
-
-# delivery again with identical Message-ID
-$mda->deliver($email);
-
-# duplicate detected
-chomp(my @curr = `git ls-tree -r HEAD`);
-is_deeply(\@tree, \@curr, "duplicate not stored");
-
-# repeat message-ID but different content
-$email->body_set("different\n");
-$mda->deliver($email);
-
-my @prev = @curr;
-my @prev_blobs = map { (split(/\s+/, $_))[2] } @prev;
-
-chomp(@curr = `git ls-tree -r HEAD`);
-my %curr_blobs = map { (split(/\s+/, $_))[2] => 1 } @curr;
-is(3, scalar @curr, "mismatch stored with identical Message-ID");
-
-foreach my $prev (@prev_blobs) {
- ok(delete $curr_blobs{$prev}, "prev=$prev blob exists");
-}
-
-my @only = keys %curr_blobs;
-is(1, scalar @only, "one new blob stored");
-
-my $body_3 = "3rd message with identical Message-ID, ridiculous\n";
-$email->body_set($body_3);
-$mda->deliver($email);
-
-@prev = @curr;
-@prev_blobs = map { (split(/\s+/, $_))[2] } @prev;
-chomp(@curr = `git ls-tree -r HEAD`);
-%curr_blobs = map { (split(/\s+/, $_))[2] => 1 } @curr;
-is(4, scalar @curr, "another stored with identical Message-ID");
-
-foreach my $prev (@prev_blobs) {
- ok(delete $curr_blobs{$prev}, "prev=$prev blob exists");
-}
-@only = keys %curr_blobs;
-is(1, scalar @only, "one new blob stored");
-
-my $want = sha1_hex($email->header("Subject") . $email->body);
-my @want = grep(m!/\Q$want\E!, @curr);
-is(1, scalar @want, "wanted message is unique");
-my $blob = (split(/\s+/, $want[0]))[2];
-my $s = `git cat-file blob $blob`;
-$s = Email::Simple->new($s);
-is("<666\@example.com>", $s->header("message-id"), "MID matches");
-is($body_3, $s->body, "body matches");
-
-done_testing();
+++ /dev/null
-#!/usr/bin/perl -w
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-use strict;
-use warnings;
-use Test::More;
-use Ssoma::MDA;
-use Ssoma::Git;
-use Email::Simple;
-use File::Temp qw/tempdir/;
-my $tmpdir = tempdir(CLEANUP => 1);
-my $git = Ssoma::Git->new("$tmpdir/gittest");
-$git->init_db;
-my $mda = Ssoma::MDA->new($git);
-my $email = Email::Simple->new("From: U <u\@example.com>\n\nHIHI\n");
-$mda->deliver($email);
-
-local $ENV{GIT_DIR} = "$tmpdir/gittest";
-my @tree = `git ls-tree -r HEAD`;
-is(scalar @tree, 1, "one item in tree");
-my @line = split(/\s+/, $tree[0]);
-my $msg = Email::Simple->new($git->cat_blob($line[2]));
-like($msg->header("message-id"), qr/\A<[a-f0-9]{40}\@localhost>\z/,
- "message-id generated for message missing it");
-
-done_testing();