2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
4 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
5 # Try to keep this small as it may be invoked frequently for each message
8 """Mail delivery agent for adding mail to a ssoma Git repository.
10 ssoma-mda reads a message from standard input and delivers it to a Git
11 repository as described by ssoma_repository(5). It may be invoked by
12 the MTA (mail transport agent, e.g. Postfix or Exim) or as part of
13 another MDA (e.g. procmail or maildrop).
15 ssoma-mda does not alter its own permissions. This must be done by
16 the MTA or MDA which invokes ssoma-mda.
20 See ssoma_repository(5) for details.
24 ssoma-mda depends on no environment variables directly, but it uses
25 your PATH to find your local Git.
29 All feedback welcome via plain-text mail to <meta@public-inbox.org>.
30 The mail archives are hosted at git://public-inbox.org/meta See
31 ssoma(1) for instructions on how to subscribe.
35 Copyright 2013, Eric Wong <normalperson@yhbt.net> and all contributors.
36 License: AGPLv3 or later <http://www.gnu.org/licenses/agpl-3.0.txt>
40 git(1), ssoma(1), ssoma_repository(5)
43 from __future__ import print_function
44 from __future__ import unicode_literals
46 import argparse as _argparse
47 import email as _email
48 import email.header as _email_header
49 import email.message as _email_message
50 import email.policy as _email_policy
51 import email.utils as _email_utils
52 import hashlib as _hashlib
53 import logging as _logging
54 import os.path as _os_path
57 import pygit2 as _pygit2
62 _LOG = _logging.getLogger('ssoma-mda')
63 _LOG.setLevel(_logging.ERROR)
64 _LOG.addHandler(_logging.StreamHandler())
66 _COMMIT_MESSAGE_ENCODING = 'UTF-8'
67 _FOLDING_WHITESPACE_REGEX = _re.compile('\s*\n\s*')
70 class DirtyIndex(RuntimeError):
71 def __init__(self, repository, diff):
72 self.repository = repository
75 ' {} {}'.format(patch.status, patch.old_file_path)
77 super(DirtyIndex, self).__init__('dirty index:\n{}'.format(status))
80 class MessagePathConflict(RuntimeError):
81 """Different messages with the same target path."""
82 def __init__(self, repository, path, message=None):
83 self.repository = repository
85 self.message = message
86 super(MessagePathConflict, self).__init__(
87 'duplicate message for {}'.format(path))
90 def _add_message(repository, index, path, message_bytes):
91 """Add a message to the repository.
93 For messages without a Message-ID hash conflict (most messages).
95 oid = repository.write(_pygit2.GIT_OBJ_BLOB, message_bytes)
96 _LOG.debug('add message at {} ({})'.format(path, oid.hex[:8]))
97 entry = _pygit2.IndexEntry(path, oid, _pygit2.GIT_FILEMODE_BLOB)
102 def _upgrade_blob(repository, index, path, obj, message_bytes, once=False):
103 """Possibly upgrade an existing blob to a tree.
105 To handle conflicting Message-ID hashes.
107 old_message_bytes = obj.read_raw()
108 if message_bytes == old_message_bytes:
109 _LOG.info('skipping byte-duplicate message for {}'.format(path))
112 raise MessagePathConflict(
113 repository=repository, path=path, message=message_bytes)
114 _LOG.debug('upgrade {} to a directory'.format(path))
116 for action, bytes in [
117 ('upgrade', old_message_bytes),
118 ('add', message_bytes)
120 oid = repository.write(_pygit2.GIT_OBJ_BLOB, bytes)
121 p = _os_path.join(path, oid.hex)
122 _LOG.debug('{} message to tree at {}'.format(action, p))
123 entry = _pygit2.IndexEntry(p, oid, _pygit2.GIT_FILEMODE_BLOB)
128 def _update_tree(repository, index, path, obj, message_bytes, once=False):
129 """Possibly update an existing tree.
131 To handle conflicting Message-ID hashes.
133 if once: # we shouldn't have this tree at all
134 raise MessagePathConflict(
135 repository=repository, path=path, message=message_bytes)
136 oid = repository.write(_pygit2.GIT_OBJ_BLOB, message_bytes)
137 p = _os_path.join(path, oid.hex)
138 _LOG.debug('add message to tree at {}'.format(p))
139 entry = _pygit2.IndexEntry(p, oid, _pygit2.GIT_FILEMODE_BLOB)
144 def append(repository, path, message_bytes, commit_message,
145 author=None, **kwargs):
146 """Append the given message to the Git repo at 'path' (or a subpath).
148 Additional keyword arguments are passed through to index-updating
151 index = repository.index
153 reference_name = 'HEAD'
155 reference = repository.head
156 except _pygit2.GitError as e:
157 if 'not found' in str(e): # no HEAD commit (so this will be the first)
161 repository=repository, index=index, path=path,
162 message_bytes=message_bytes)
163 else: # we have a HEAD commit to build on
164 commit = reference.get_object()
165 diff = index.diff_to_tree(commit.tree)
167 raise DirtyIndex(repository=repository, diff=diff)
169 entry = commit.tree[path]
170 except KeyError: # new message, just create a blob
172 repository=repository, index=index, path=path,
173 message_bytes=message_bytes)
174 else: # object already exists
175 obj = repository.get(entry.oid)
176 kwargs = kwargs.copy()
178 'repository': repository,
182 'message_bytes': message_bytes,
184 if obj.type == _pygit2.GIT_OBJ_BLOB:
185 _upgrade_blob(**kwargs)
186 elif obj.type == _pygit2.GIT_OBJ_TREE:
187 _update_tree(**kwargs)
189 raise NotImplementedError(
190 'tree entry for {} has type {}'.format(
191 path, type(obj).__name__.lower()))
192 tree = index.write_tree()
193 if commit is None or tree != commit.tree.oid: # we've changed something
194 committer = repository.default_signature
200 parents = [commit.oid]
201 _LOG.debug('create a new commit for tree {}: {}'.format(
202 tree.hex[:8], commit_message))
203 new_commit = repository.create_commit(
204 reference_name, author, committer, commit_message, tree,
205 parents, _COMMIT_MESSAGE_ENCODING)
206 _LOG.debug('new commit {} advances {}'.format(
207 new_commit.hex[:8], reference_name))
209 _LOG.info('no changes to commit')
212 def message_id_path(message_id):
213 """Calculate the default path from a Message-ID
215 >>> message_id_path('<20131106023245.GA20224@dcvr.yhbt.net>')
216 'f2/8c6cfd2b0a65f994c3e1be266105413b3d3f63'
218 message_id = message_id.lstrip('<').rstrip('>')
219 hash = _hashlib.sha1(message_id.encode('UTF-8')).hexdigest()
220 return _os_path.join(hash[:2], hash[2:])
223 def _decode_header(string):
224 """Wrap email.header.decode_header to assemble a string.
226 >>> _decode_header(string='hello')
228 >>> _decode_header(string='=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=')
232 for decoded, charset in _email_header.decode_header(string):
234 decoded = str(decoded, charset)
235 chunks.append(decoded)
236 return ''.join(chunks)
239 def get_commit_message(message):
240 r"""Unwrap and decode the message subject for use as a commit message.
242 >>> get_commit_message(
243 ... message={'Subject': '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\t=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='})
244 'If you can read this you understand the example.'
246 commit_message = message.get('Subject', '<no subject>')
247 commit_message = _FOLDING_WHITESPACE_REGEX.sub(' ', commit_message)
248 return _decode_header(string=commit_message)
251 def get_author(message):
252 """Create a pygit2.Signature for the message author."""
253 author_name, author_email = _email_utils.parseaddr(
256 author_name = author_email.split('@')[0]
257 date = message['Date']
258 datetime = _email_utils.parsedate_to_datetime(date)
259 time = int(datetime.timestamp())
260 if datetime.utcoffset():
261 offset = datetime.utcoffset().seconds // 60
264 return _pygit2.Signature(
271 def deliver(message=None, message_bytes=None, **kwargs):
272 """Deliver a message to a ssoma repository.
274 The input message can be an email.message.Message instance (use
275 'message'), the raw SMTP byte stream (use 'message_bytes'), or
276 both (in which case 'message' is used to extract the message data,
277 and 'message_bytes' is written to the repository.
279 Additional keyword arguments are passed through to append().
282 if message_bytes is None:
283 raise ValueError('no message arguments')
284 message = _email.message_from_bytes(
285 message_bytes, policy=_email_policy.SMTP)
286 elif message_bytes is None:
287 message_bytes = message.as_bytes(policy=_email_policy.SMTP)
289 message_id = message.get('Message-ID', '')
290 path = message_id_path(message_id=message_id)
291 _LOG.info('deliver {} to {}'.format(message_id, path))
292 commit_message = get_commit_message(message=message)
293 author = get_author(message=message)
294 repository = _pygit2.Repository(_os_path.curdir)
296 repository=repository, path=path, message_bytes=message_bytes,
297 commit_message=commit_message, author=author, **kwargs)
300 def main(stream=_sys.stdin.buffer):
301 """Command-line entry point."""
302 parser = _argparse.ArgumentParser(
303 description=__doc__.strip(),
304 formatter_class=_argparse.RawDescriptionHelpFormatter)
306 '-v', '--version', action='version',
307 version='%(prog)s {}'.format(__version__))
310 choices=['critical', 'error', 'warning', 'info', 'debug'],
311 help='Log verbosity. Defaults to {!r}.'.format(
312 _logging.getLevelName(_LOG.level).lower()))
314 '-1', '--once', action='store_true',
315 help='Die if the incoming Message-ID is already in the repository.')
317 args = parser.parse_args()
320 level = getattr(_logging, args.log_level.upper())
324 deliver(message_bytes=stream.read(), once=args.once)
325 except (DirtyIndex, MessagePathConflict) as e:
330 if __name__ == '__main__':