pygrader/extract_mime.py

   1 # Copyright (C) 2012 W. Trevor King <wking@drexel.edu>
   2 #
   3 # This file is part of pygrader.
   4 #
   5 # pygrader is free software: you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation, either version 3 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # pygrader is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # pygrader.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 """Extract message parts with a given MIME type from a mailbox.
  18 """
  19
  20 from __future__ import absolute_import
  21
  22 import email.utils as _email_utils
  23 import hashlib as _hashlib
  24 import mailbox as _mailbox
  25 import os as _os
  26 import os.path as _os_path
  27 import time as _time
  28
  29 from . import LOG as _LOG
  30 from .color import color_string as _color_string
  31 from .color import standard_colors as _standard_colors
  32
  33
  34 def message_time(message, use_color=None):
  35     highlight,lowlight,good,bad = _standard_colors(use_color=use_color)
  36     received = message['Received']  # RFC 822
  37     if received is None:
  38         mid = message['Message-ID']
  39         _LOG.debug(_color_string(
  40                 string='no Received in {}'.format(mid), color=lowlight))
  41         return None
  42     date = received.split(';', 1)[1]
  43     return _time.mktime(_email_utils.parsedate(date))
  44
  45 def extract_mime(message, mime_type=None, output='.', dry_run=False):
  46     _LOG.debug('parsing {}'.format(message['Subject']))
  47     time = message_time(message=message)
  48     for part in message.walk():
  49         fname = part.get_filename()
  50         if not fname:
  51             continue  # don't extract parts without filenames
  52         ffname = _os_path.join(output, fname)  # full file name
  53         ctype = part.get_content_type()
  54         if mime_type is None or ctype == mime_type:
  55             contents = part.get_payload(decode=True)
  56             count = 0
  57             base_ffname = ffname
  58             is_copy = False
  59             while _os_path.exists(ffname):
  60                 old = _hashlib.sha1(open(ffname, 'rb').read())
  61                 new = _hashlib.sha1(contents)
  62                 if old.digest() == new.digest():
  63                     is_copy = True
  64                     break
  65                 count += 1
  66                 ffname = '{}.{}'.format(base_ffname, count)
  67             if is_copy:
  68                 _LOG.debug('{} already extracted as {}'.format(fname, ffname))
  69                 continue
  70             _LOG.debug('extract {} to {}'.format(fname, ffname))
  71             if not dry_run:
  72                 with open(ffname, 'wb') as f:
  73                     f.write(contents)
  74                 if time is not None:
  75                     _os.utime(ffname, (time, time))