#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2013, Eric Wong and all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) # Try to keep this small as it may be invoked frequently for each message # delivered. """Mail delivery agent for adding mail to a ssoma Git repository. ssoma-mda reads a message from standard input and delivers it to a Git repository as described by ssoma_repository(5). It may be invoked by the MTA (mail transport agent, e.g. Postfix or Exim) or as part of another MDA (e.g. procmail or maildrop). ssoma-mda does not alter its own permissions. This must be done by the MTA or MDA which invokes ssoma-mda. # FILES See ssoma_repository(5) for details. # ENVIRONMENT ssoma-mda depends on no environment variables directly, but it uses your PATH to find your local Git. # CONTACT All feedback welcome via plain-text mail to . The mail archives are hosted at git://public-inbox.org/meta See ssoma(1) for instructions on how to subscribe. # COPYRIGHT Copyright 2013, Eric Wong and all contributors. License: AGPLv3 or later # SEE ALSO git(1), ssoma(1), ssoma_repository(5) """ from __future__ import print_function from __future__ import unicode_literals import argparse as _argparse import email as _email import email.header as _email_header import email.message as _email_message import email.policy as _email_policy import email.utils as _email_utils import hashlib as _hashlib import logging as _logging import os.path as _os_path import re as _re import sys as _sys import pygit2 as _pygit2 __version__ = '0.2.0' _LOG = _logging.getLogger('ssoma-mda') _LOG.setLevel(_logging.ERROR) _LOG.addHandler(_logging.StreamHandler()) _COMMIT_MESSAGE_ENCODING = 'UTF-8' _FOLDING_WHITESPACE_REGEX = _re.compile('\s*\n\s*') _FALLBACK_ENCODINGS = { # support non-standard RFC 2047 encodings 'no': 'ASCII', '\x10\x10': 'ASCII', } class DirtyIndex(RuntimeError): def __init__(self, repository, diff): self.repository = repository self.diff = diff status = '\n'.join( ' {} {}'.format(patch.status, patch.old_file_path) for patch in diff) super(DirtyIndex, self).__init__('dirty index:\n{}'.format(status)) class MessagePathConflict(RuntimeError): """Different messages with the same target path.""" def __init__(self, repository, path, message=None): self.repository = repository self.path = path self.message = message super(MessagePathConflict, self).__init__( 'duplicate message for {}'.format(path)) def _add_message(repository, index, path, message_bytes): """Add a message to the repository. For messages without a Message-ID hash conflict (most messages). """ oid = repository.write(_pygit2.GIT_OBJ_BLOB, message_bytes) _LOG.debug('add message at {} ({})'.format(path, oid.hex[:8])) entry = _pygit2.IndexEntry(path, oid, _pygit2.GIT_FILEMODE_BLOB) index.add(entry) index.write() def _upgrade_blob(repository, index, path, obj, message_bytes, once=False): """Possibly upgrade an existing blob to a tree. To handle conflicting Message-ID hashes. """ old_message_bytes = obj.read_raw() if message_bytes == old_message_bytes: _LOG.info('skipping byte-duplicate message for {}'.format(path)) return if once: raise MessagePathConflict( repository=repository, path=path, message=message_bytes) _LOG.debug('upgrade {} to a directory'.format(path)) index.remove(path) for action, bytes in [ ('upgrade', old_message_bytes), ('add', message_bytes) ]: oid = repository.write(_pygit2.GIT_OBJ_BLOB, bytes) p = _os_path.join(path, oid.hex) _LOG.debug('{} message to tree at {}'.format(action, p)) entry = _pygit2.IndexEntry(p, oid, _pygit2.GIT_FILEMODE_BLOB) index.add(entry) index.write() def _update_tree(repository, index, path, obj, message_bytes, once=False): """Possibly update an existing tree. To handle conflicting Message-ID hashes. """ if once: # we shouldn't have this tree at all raise MessagePathConflict( repository=repository, path=path, message=message_bytes) oid = repository.write(_pygit2.GIT_OBJ_BLOB, message_bytes) p = _os_path.join(path, oid.hex) _LOG.debug('add message to tree at {}'.format(p)) entry = _pygit2.IndexEntry(p, oid, _pygit2.GIT_FILEMODE_BLOB) index.add(entry) index.write() def append(repository, path, message_bytes, commit_message, author=None, **kwargs): """Append the given message to the Git repo at 'path' (or a subpath). Additional keyword arguments are passed through to index-updating function. """ index = repository.index index.read() reference_name = 'HEAD' try: reference = repository.head except _pygit2.GitError as e: if 'not found' in str(e): # no HEAD commit (so this will be the first) reference = None commit = None _add_message( repository=repository, index=index, path=path, message_bytes=message_bytes) else: # we have a HEAD commit to build on commit = reference.get_object() diff = index.diff_to_tree(commit.tree) if len(diff): raise DirtyIndex(repository=repository, diff=diff) try: entry = commit.tree[path] except KeyError: # new message, just create a blob _add_message( repository=repository, index=index, path=path, message_bytes=message_bytes) else: # object already exists obj = repository.get(entry.oid) kwargs = kwargs.copy() kwargs.update({ 'repository': repository, 'index': index, 'path': path, 'obj': obj, 'message_bytes': message_bytes, }) if obj.type == _pygit2.GIT_OBJ_BLOB: _upgrade_blob(**kwargs) elif obj.type == _pygit2.GIT_OBJ_TREE: _update_tree(**kwargs) else: raise NotImplementedError( 'tree entry for {} has type {}'.format( path, type(obj).__name__.lower())) tree = index.write_tree() if commit is None or tree != commit.tree.oid: # we've changed something committer = repository.default_signature if author is None: author = committer if commit is None: parents = [] else: parents = [commit.oid] _LOG.debug('create a new commit for tree {}: {}'.format( tree.hex[:8], commit_message)) new_commit = repository.create_commit( reference_name, author, committer, commit_message, tree, parents, _COMMIT_MESSAGE_ENCODING) _LOG.debug('new commit {} advances {}'.format( new_commit.hex[:8], reference_name)) else: _LOG.info('no changes to commit') def message_id_path(message_id): """Calculate the default path from a Message-ID >>> message_id_path('<20131106023245.GA20224@dcvr.yhbt.net>') 'f2/8c6cfd2b0a65f994c3e1be266105413b3d3f63' """ message_id = message_id.lstrip('<').rstrip('>') hash = _hashlib.sha1(message_id.encode('UTF-8')).hexdigest() return _os_path.join(hash[:2], hash[2:]) def _decode_header(string): """Wrap email.header.decode_header to assemble a string. >>> _decode_header(string='hello') 'hello' >>> _decode_header(string='=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=') 'Keld Jørn Simonsen' >>> _decode_header(string='Keld =?ISO-8859-1?Q?J=F8rn_Simonsen?=') 'Keld Jørn Simonsen' We also support a few non-standard cases for compatibility with existing messages: >>> _decode_header(string='=?no?q?=5BPATCH=203/3=5D=20Add=20=27compose=27=20command?=') "[PATCH 3/3] Add 'compose' command" """ chunks = [] for decoded, charset in _email_header.decode_header(string): if isinstance(decoded, bytes) and not charset: charset = 'ASCII' if charset: if charset in _FALLBACK_ENCODINGS: charset = _FALLBACK_ENCODINGS[charset] decoded = str(decoded, charset) chunks.append(decoded) return ''.join(chunks) def get_commit_message(message): r"""Unwrap and decode the message subject for use as a commit message. >>> get_commit_message( ... message={'Subject': '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\t=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='}) 'If you can read this you understand the example.' """ commit_message = message.get('Subject', '') commit_message = _FOLDING_WHITESPACE_REGEX.sub(' ', commit_message) return _decode_header(string=commit_message) def get_author(message): """Create a pygit2.Signature for the message author. >>> author = get_author(message={ ... 'From': '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ', ... 'Date': 'Fri, 21 Nov 1997 09:55:06 -0600', ... }) >>> author.name 'Keld Jørn Simonsen' >>> author.email 'keld@dkuug.dk' >>> author.time 880127706 >>> author.offset 1080 """ author_name, author_email = _email_utils.parseaddr( message['From']) if not author_name: author_name = author_email.split('@')[0] author_name = _decode_header(string=author_name) date = message['Date'] datetime = _email_utils.parsedate_to_datetime(date) time = int(datetime.timestamp()) if datetime.utcoffset(): offset = datetime.utcoffset().seconds // 60 else: offset = 0 return _pygit2.Signature( name=author_name, email=author_email, time=time, offset=offset, encoding=_COMMIT_MESSAGE_ENCODING) def deliver(message=None, message_bytes=None, **kwargs): """Deliver a message to a ssoma repository. The input message can be an email.message.Message instance (use 'message'), the raw SMTP byte stream (use 'message_bytes'), or both (in which case 'message' is used to extract the message data, and 'message_bytes' is written to the repository. Additional keyword arguments are passed through to append(). """ if message is None: if message_bytes is None: raise ValueError('no message arguments') message = _email.message_from_bytes( message_bytes, policy=_email_policy.SMTP) elif message_bytes is None: message_bytes = message.as_bytes(policy=_email_policy.SMTP) message_id = message.get('Message-ID', '') path = message_id_path(message_id=message_id) _LOG.info('deliver {} to {}'.format(message_id, path)) commit_message = get_commit_message(message=message) author = get_author(message=message) repository = _pygit2.Repository(_os_path.curdir) append( repository=repository, path=path, message_bytes=message_bytes, commit_message=commit_message, author=author, **kwargs) def main(stream=_sys.stdin.buffer): """Command-line entry point.""" parser = _argparse.ArgumentParser( description=__doc__.strip(), formatter_class=_argparse.RawDescriptionHelpFormatter) parser.add_argument( '-v', '--version', action='version', version='%(prog)s {}'.format(__version__)) parser.add_argument( '-l', '--log-level', choices=['critical', 'error', 'warning', 'info', 'debug'], help='Log verbosity. Defaults to {!r}.'.format( _logging.getLevelName(_LOG.level).lower())) parser.add_argument( '-1', '--once', action='store_true', help='Die if the incoming Message-ID is already in the repository.') args = parser.parse_args() if args.log_level: level = getattr(_logging, args.log_level.upper()) _LOG.setLevel(level) try: deliver(message_bytes=stream.read(), once=args.once) except (DirtyIndex, MessagePathConflict) as e: _LOG.error(e) raise SystemExit(1) if __name__ == '__main__': main()