1 # Copyright (C) 2012 W. Trevor King <wking@tremily.us>
3 # This file is part of pygrader.
5 # pygrader is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation, either version 3 of the License, or (at your option) any later
10 # pygrader is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # pygrader. If not, see <http://www.gnu.org/licenses/>.
17 """Extract message parts with a given MIME type from a mailbox.
20 from __future__ import absolute_import
22 import email.utils as _email_utils
23 import hashlib as _hashlib
24 import mailbox as _mailbox
26 import os.path as _os_path
29 from . import LOG as _LOG
32 def message_time(message):
33 """Get the Unix time when ``message`` was received.
35 >>> from email.utils import formatdate
36 >>> from pgp_mime.email import encodedMIMEText
37 >>> msg = encodedMIMEText('Ping!')
38 >>> msg['Received'] = (
39 ... 'from smtp.home.net (smtp.home.net [123.456.123.456]) '
40 ... 'by smtp.mail.uu.edu (Postfix) with ESMTP id 5BA225C83EF '
41 ... 'for <wking@tremily.us>; Sun, 09 Oct 2011 11:50:46 -0400 (EDT)')
42 >>> time = message_time(msg)
46 'Sun, 09 Oct 2011 15:50:46 -0000'
48 received = message['Received'] # RFC 822
50 mid = message['Message-ID']
51 _LOG.debug('no Received in {}'.format(mid))
53 date = received.split(';', 1)[1]
54 return _time.mktime(_email_utils.parsedate(date))
56 def extract_mime(message, mime_type=None, output='.', dry_run=False):
57 _LOG.debug('parsing {}'.format(message['Subject']))
58 time = message_time(message=message)
59 for part in message.walk():
60 fname = part.get_filename()
62 continue # don't extract parts without filenames
63 ffname = _os_path.join(output, fname) # full file name
64 ctype = part.get_content_type()
65 if mime_type is None or ctype == mime_type:
66 contents = part.get_payload(decode=True)
70 while _os_path.exists(ffname):
71 old = _hashlib.sha1(open(ffname, 'rb').read())
72 new = _hashlib.sha1(contents)
73 if old.digest() == new.digest():
77 ffname = '{}.{}'.format(base_ffname, count)
79 _LOG.debug('{} already extracted as {}'.format(fname, ffname))
81 _LOG.debug('extract {} to {}'.format(fname, ffname))
83 with open(ffname, 'wb') as f:
86 _os.utime(ffname, (time, time))