mailpipe|handler: centralize student/course extraction from subjects.
[pygrader.git] / pygrader / mailpipe.py
1 # Copyright (C) 2012 W. Trevor King <wking@drexel.edu>
2 #
3 # This file is part of pygrader.
4 #
5 # pygrader is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation, either version 3 of the License, or (at your option) any later
8 # version.
9 #
10 # pygrader is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License along with
15 # pygrader.  If not, see <http://www.gnu.org/licenses/>.
16
17 "Incoming email processing."
18
19 from __future__ import absolute_import
20
21 from email import message_from_file as _message_from_file
22 from email.header import decode_header as _decode_header
23 from email.mime.text import MIMEText as _MIMEText
24 import mailbox as _mailbox
25 import re as _re
26 import sys as _sys
27
28 import pgp_mime as _pgp_mime
29 from lxml import etree as _etree
30
31 from . import LOG as _LOG
32 from .email import construct_email as _construct_email
33 from .email import construct_response as _construct_response
34 from .extract_mime import message_time as _message_time
35 from .model.person import Person as _Person
36
37 from .handler import InsecureMessage as _InsecureMessage
38 from .handler import InvalidAssignmentSubject as _InvalidAssignmentSubject
39 from .handler import InvalidMessage as _InvalidMessage
40 from .handler import InvalidStudentSubject as _InvalidStudentSubject
41 from .handler import InvalidSubjectMessage as _InvalidSubjectMessage
42 from .handler import Response as _Response
43 from .handler import UnsignedMessage as _UnsignedMessage
44 from .handler.get import run as _handle_get
45 from .handler.submission import run as _handle_submission
46 from .handler.submission import InvalidSubmission as _InvalidSubmission
47
48
49 _TAG_REGEXP = _re.compile('^.*\[([^]]*)\].*$')
50
51
52 class NoReturnPath (_InvalidMessage):
53     def __init__(self, address, **kwargs):
54         if 'error' not in kwargs:
55             kwargs['error'] = 'no Return-Path'
56         super(NoReturnPath, self).__init__(**kwargs)
57
58
59 class UnregisteredAddress (_InvalidMessage):
60     def __init__(self, address, **kwargs):
61         if 'error' not in kwargs:
62             kwargs['error'] = 'unregistered address {}'.format(address)
63         super(UnregisteredAddress, self).__init__(**kwargs)
64         self.address = address
65
66
67 class AmbiguousAddress (_InvalidMessage):
68     def __init__(self, address, people, **kwargs):
69         if 'error' not in kwargs:
70             kwargs['error'] = 'ambiguous address {}'.format(address)
71         super(AmbiguousAddress, self).__init__(**kwargs)
72         self.address = address
73         self.people = people
74
75
76 class WrongSignatureMessage (_InsecureMessage):
77     def __init__(self, pgp_key=None, fingerprints=None, decrypted=None,
78                  **kwargs):
79         if 'error' not in kwargs:
80             kwargs['error'] = 'not signed by the expected key'
81         super(WrongSignatureMessage, self).__init__(**kwargs)
82         self.pgp_key = pgp_key
83         self.fingerprints = fingerprints
84         self.decrypted = decrypted
85
86 class UnverifiedSignatureMessage (_InsecureMessage):
87     def __init__(self, signature=None, decrypted=None, **kwargs):
88         if 'error' not in kwargs:
89             kwargs['error'] = 'unverified signature'
90         super(UnverifiedSignatureMessage, self).__init__(**kwargs)
91         self.signature = signature
92         self.decrypted = decrypted
93
94
95 class SubjectlessMessage (_InvalidSubjectMessage):
96     def __init__(self, **kwargs):
97         if 'error' not in kwargs:
98             kwargs['error'] = 'no subject'
99         super(SubjectlessMessage, self).__init__(**kwargs)
100
101
102 class InvalidHandlerMessage (_InvalidSubjectMessage):
103     def __init__(self, target=None, handlers=None, **kwargs):
104         if 'error' not in kwargs:
105             kwargs['error'] = 'no handler for {!r}'.format(target)
106         super(InvalidHandlerMessage, self).__init__(**kwargs)
107         self.target = target
108         self.handlers = handlers
109
110
111 def mailpipe(basedir, course, stream=None, mailbox=None, input_=None,
112              output=None, continue_after_invalid_message=False, max_late=0,
113              trust_email_infrastructure=False,
114              handlers={
115         'get': _handle_get,
116         'submit': _handle_submission,
117         }, respond=None, dry_run=False, **kwargs):
118     """Run from procmail to sort incomming submissions
119
120     For example, you can setup your ``.procmailrc`` like this::
121
122       SHELL=/bin/sh
123       DEFAULT=$MAIL
124       MAILDIR=$HOME/mail
125       DEFAULT=$MAILDIR/mbox
126       LOGFILE=$MAILDIR/procmail.log
127       #VERBOSE=yes
128       PYGRADE_MAILPIPE="pg.py -d $HOME/grades/phys160"
129
130       # Grab all incoming homeworks emails.  This rule eats matching emails
131       # (i.e. no further procmail processing).
132       :0
133       * ^Subject:.*\[phys160:submit]
134       | "$PYGRADE_MAILPIPE" mailpipe
135
136     If you don't want procmail to eat the message, you can use the
137     ``c`` flag (carbon copy) by starting your rule off with ``:0 c``.
138
139     >>> from io import StringIO
140     >>> from pgp_mime.email import encodedMIMEText
141     >>> from .handler import InvalidMessage, Response
142     >>> from .test.course import StubCourse
143
144     >>> course = StubCourse()
145     >>> def respond(message):
146     ...     print('respond with:\\n{}'.format(message.as_string()))
147     >>> def process(message):
148     ...     mailpipe(
149     ...         basedir=course.basedir, course=course.course,
150     ...         stream=StringIO(message.as_string()),
151     ...         output=course.mailbox,
152     ...         continue_after_invalid_message=True,
153     ...         respond=respond)
154     >>> message = encodedMIMEText('The answer is 42.')
155     >>> message['Message-ID'] = '<123.456@home.net>'
156     >>> message['Received'] = (
157     ...     'from smtp.home.net (smtp.home.net [123.456.123.456]) '
158     ...     'by smtp.mail.uu.edu (Postfix) with ESMTP id 5BA225C83EF '
159     ...     'for <wking@tremily.us>; Sun, 09 Oct 2011 11:50:46 -0400 (EDT)')
160     >>> message['From'] = 'Billy B <bb@greyhavens.net>'
161     >>> message['To'] = 'phys101 <phys101@tower.edu>'
162     >>> message['Subject'] = '[submit] assignment 1'
163
164     Messages with unrecognized ``Return-Path``\s are silently dropped:
165
166     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
167     >>> course.print_tree()  # doctest: +REPORT_UDIFF, +ELLIPSIS
168     course.conf
169     mail
170     mail/cur
171     mail/new
172     mail/tmp
173
174     Response to a message from an unregistered person:
175
176     >>> message['Return-Path'] = '<invalid.return.path@home.net>'
177     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
178     respond with:
179     Content-Type: multipart/signed; protocol="application/pgp-signature"; micalg="pgp-sha1"; boundary="===============...=="
180     MIME-Version: 1.0
181     Content-Disposition: inline
182     Date: ...
183     From: Robot101 <phys101@tower.edu>
184     Reply-to: Robot101 <phys101@tower.edu>
185     To: "invalid.return.path@home.net" <invalid.return.path@home.net>
186     Subject: unregistered address invalid.return.path@home.net
187     <BLANKLINE>
188     --===============...==
189     Content-Type: multipart/mixed; boundary="===============...=="
190     MIME-Version: 1.0
191     <BLANKLINE>
192     --===============...==
193     Content-Type: text/plain; charset="us-ascii"
194     MIME-Version: 1.0
195     Content-Transfer-Encoding: 7bit
196     Content-Disposition: inline
197     <BLANKLINE>
198     invalid.return.path@home.net,
199     <BLANKLINE>
200     Your email address is not registered with pygrader for
201     Physics 101.  If you feel it should be, contact your professor
202     or TA.
203     <BLANKLINE>
204     Yours,
205     phys-101 robot
206     <BLANKLINE>
207     --===============...==
208     Content-Type: message/rfc822
209     MIME-Version: 1.0
210     <BLANKLINE>
211     Content-Type: text/plain; charset="us-ascii"
212     MIME-Version: 1.0
213     Content-Transfer-Encoding: 7bit
214     Content-Disposition: inline
215     Message-ID: <123.456@home.net>
216     Received: from smtp.home.net (smtp.home.net [123.456.123.456]) by smtp.mail.uu.edu (Postfix) with ESMTP id 5BA225C83EF for <wking@tremily.us>; Sun, 09 Oct 2011 11:50:46 -0400 (EDT)
217     From: Billy B <bb@greyhavens.net>
218     To: phys101 <phys101@tower.edu>
219     Subject: [submit] assignment 1
220     Return-Path: <invalid.return.path@home.net>
221     <BLANKLINE>
222     The answer is 42.
223     --===============...==--
224     --===============...==
225     MIME-Version: 1.0
226     Content-Transfer-Encoding: 7bit
227     Content-Description: OpenPGP digital signature
228     Content-Type: application/pgp-signature; name="signature.asc"; charset="us-ascii"
229     <BLANKLINE>
230     -----BEGIN PGP SIGNATURE-----
231     Version: GnuPG v2.0.19 (GNU/Linux)
232     <BLANKLINE>
233     ...
234     -----END PGP SIGNATURE-----
235     <BLANKLINE>
236     --===============...==--
237
238     If we add a valid ``Return-Path``, we get the expected delivery:
239
240     >>> del message['Return-Path']
241     >>> message['Return-Path'] = '<bb@greyhavens.net>'
242     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
243     respond with:
244     Content-Type: multipart/signed; protocol="application/pgp-signature"; micalg="pgp-sha1"; boundary="===============...=="
245     MIME-Version: 1.0
246     Content-Disposition: inline
247     Date: ...
248     From: Robot101 <phys101@tower.edu>
249     Reply-to: Robot101 <phys101@tower.edu>
250     To: Bilbo Baggins <bb@shire.org>
251     Subject: Received Assignment 1 submission
252     <BLANKLINE>
253     --===============...==
254     Content-Type: text/plain; charset="us-ascii"
255     MIME-Version: 1.0
256     Content-Disposition: inline
257     Content-Transfer-Encoding: 7bit
258     <BLANKLINE>
259     Billy,
260     <BLANKLINE>
261     We received your submission for Assignment 1 on Sun, 09 Oct 2011 15:50:46 -0000.
262     <BLANKLINE>
263     Yours,
264     phys-101 robot
265     <BLANKLINE>
266     --===============...==
267     MIME-Version: 1.0
268     Content-Transfer-Encoding: 7bit
269     Content-Description: OpenPGP digital signature
270     Content-Type: application/pgp-signature; name="signature.asc"; charset="us-ascii"
271     <BLANKLINE>
272     -----BEGIN PGP SIGNATURE-----
273     Version: GnuPG v2.0.19 (GNU/Linux)
274     <BLANKLINE>
275     ...
276     -----END PGP SIGNATURE-----
277     <BLANKLINE>
278     --===============...==--
279
280     >>> course.print_tree()  # doctest: +REPORT_UDIFF, +ELLIPSIS
281     Bilbo_Baggins
282     Bilbo_Baggins/Assignment_1
283     Bilbo_Baggins/Assignment_1/mail
284     Bilbo_Baggins/Assignment_1/mail/cur
285     Bilbo_Baggins/Assignment_1/mail/new
286     Bilbo_Baggins/Assignment_1/mail/new/...:2,S
287     Bilbo_Baggins/Assignment_1/mail/tmp
288     course.conf
289     mail
290     mail/cur
291     mail/new
292     mail/new/...
293     mail/tmp
294
295     The last ``Received`` is used to timestamp the message:
296
297     >>> del message['Message-ID']
298     >>> message['Message-ID'] = '<abc.def@home.net>'
299     >>> del message['Received']
300     >>> message['Received'] = (
301     ...     'from smtp.mail.uu.edu (localhost.localdomain [127.0.0.1]) '
302     ...     'by smtp.mail.uu.edu (Postfix) with SMTP id 68CB45C8453 '
303     ...     'for <wking@tremily.us>; Mon, 10 Oct 2011 12:50:46 -0400 (EDT)')
304     >>> message['Received'] = (
305     ...     'from smtp.home.net (smtp.home.net [123.456.123.456]) '
306     ...     'by smtp.mail.uu.edu (Postfix) with ESMTP id 5BA225C83EF '
307     ...     'for <wking@tremily.us>; Mon, 09 Oct 2011 11:50:46 -0400 (EDT)')
308     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
309     respond with:
310     Content-Type: multipart/signed; protocol="application/pgp-signature"; micalg="pgp-sha1"; boundary="===============...=="
311     MIME-Version: 1.0
312     Content-Disposition: inline
313     Date: ...
314     From: Robot101 <phys101@tower.edu>
315     Reply-to: Robot101 <phys101@tower.edu>
316     To: Bilbo Baggins <bb@shire.org>
317     Subject: Received Assignment 1 submission
318     <BLANKLINE>
319     --===============...==
320     Content-Type: text/plain; charset="us-ascii"
321     MIME-Version: 1.0
322     Content-Disposition: inline
323     Content-Transfer-Encoding: 7bit
324     <BLANKLINE>
325     Billy,
326     <BLANKLINE>
327     We received your submission for Assignment 1 on Mon, 10 Oct 2011 16:50:46 -0000.
328     <BLANKLINE>
329     Yours,
330     phys-101 robot
331     <BLANKLINE>
332     --===============...==
333     MIME-Version: 1.0
334     Content-Transfer-Encoding: 7bit
335     Content-Description: OpenPGP digital signature
336     Content-Type: application/pgp-signature; name="signature.asc"; charset="us-ascii"
337     <BLANKLINE>
338     -----BEGIN PGP SIGNATURE-----
339     Version: GnuPG v2.0.19 (GNU/Linux)
340     <BLANKLINE>
341     ...
342     -----END PGP SIGNATURE-----
343     <BLANKLINE>
344     --===============...==--
345
346     >>> course.print_tree()  # doctest: +REPORT_UDIFF, +ELLIPSIS
347     Bilbo_Baggins
348     Bilbo_Baggins/Assignment_1
349     Bilbo_Baggins/Assignment_1/late
350     Bilbo_Baggins/Assignment_1/mail
351     Bilbo_Baggins/Assignment_1/mail/cur
352     Bilbo_Baggins/Assignment_1/mail/new
353     Bilbo_Baggins/Assignment_1/mail/new/...:2,S
354     Bilbo_Baggins/Assignment_1/mail/new/...:2,S
355     Bilbo_Baggins/Assignment_1/mail/tmp
356     course.conf
357     mail
358     mail/cur
359     mail/new
360     mail/new/...
361     mail/new/...
362     mail/tmp
363
364     You can send receipts to the acknowledge incoming messages, which
365     includes warnings about dropped messages (except for messages
366     without ``Return-Path`` and messages where the ``Return-Path``
367     email belongs to multiple ``People``.  The former should only
368     occur with malicious emails, and the latter with improper pygrader
369     configurations).
370
371     Response to a successful submission:
372
373     >>> del message['Message-ID']
374     >>> message['Message-ID'] = '<hgi.jlk@home.net>'
375     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
376     respond with:
377     Content-Type: multipart/signed; protocol="application/pgp-signature"; micalg="pgp-sha1"; boundary="===============...=="
378     MIME-Version: 1.0
379     Content-Disposition: inline
380     Date: ...
381     From: Robot101 <phys101@tower.edu>
382     Reply-to: Robot101 <phys101@tower.edu>
383     To: Bilbo Baggins <bb@shire.org>
384     Subject: Received Assignment 1 submission
385     <BLANKLINE>
386     --===============...==
387     Content-Type: text/plain; charset="us-ascii"
388     MIME-Version: 1.0
389     Content-Disposition: inline
390     Content-Transfer-Encoding: 7bit
391     <BLANKLINE>
392     Billy,
393     <BLANKLINE>
394     We received your submission for Assignment 1 on Mon, 10 Oct 2011 16:50:46 -0000.
395     <BLANKLINE>
396     Yours,
397     phys-101 robot
398     <BLANKLINE>
399     --===============...==
400     MIME-Version: 1.0
401     Content-Transfer-Encoding: 7bit
402     Content-Description: OpenPGP digital signature
403     Content-Type: application/pgp-signature; name="signature.asc"; charset="us-ascii"
404     <BLANKLINE>
405     -----BEGIN PGP SIGNATURE-----
406     Version: GnuPG v2.0.19 (GNU/Linux)
407     <BLANKLINE>
408     ...
409     -----END PGP SIGNATURE-----
410     <BLANKLINE>
411     --===============...==--
412
413     Response to a submission on an unsubmittable assignment:
414
415     >>> del message['Subject']
416     >>> message['Subject'] = '[submit] attendance 1'
417     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
418     respond with:
419     Content-Type: multipart/signed; protocol="application/pgp-signature"; micalg="pgp-sha1"; boundary="===============...=="
420     MIME-Version: 1.0
421     Content-Disposition: inline
422     Date: ...
423     From: Robot101 <phys101@tower.edu>
424     Reply-to: Robot101 <phys101@tower.edu>
425     To: Bilbo Baggins <bb@shire.org>
426     Subject: Received invalid Attendance 1 submission
427     <BLANKLINE>
428     --===============...==
429     Content-Type: multipart/mixed; boundary="===============...=="
430     MIME-Version: 1.0
431     <BLANKLINE>
432     --===============...==
433     Content-Type: text/plain; charset="us-ascii"
434     MIME-Version: 1.0
435     Content-Transfer-Encoding: 7bit
436     Content-Disposition: inline
437     <BLANKLINE>
438     Billy,
439     <BLANKLINE>
440     We received your submission for Attendance 1, but you are not
441     allowed to submit that assignment via email.
442     <BLANKLINE>
443     Yours,
444     phys-101 robot
445     <BLANKLINE>
446     --===============...==
447     Content-Type: message/rfc822
448     MIME-Version: 1.0
449     <BLANKLINE>
450     Content-Type: text/plain; charset="us-ascii"
451     MIME-Version: 1.0
452     Content-Transfer-Encoding: 7bit
453     Content-Disposition: inline
454     From: Billy B <bb@greyhavens.net>
455     To: phys101 <phys101@tower.edu>
456     Return-Path: <bb@greyhavens.net>
457     Received: from smtp.mail.uu.edu (localhost.localdomain [127.0.0.1]) by smtp.mail.uu.edu (Postfix) with SMTP id 68CB45C8453 for <wking@tremily.us>; Mon, 10 Oct 2011 12:50:46 -0400 (EDT)
458     Received: from smtp.home.net (smtp.home.net [123.456.123.456]) by smtp.mail.uu.edu (Postfix) with ESMTP id 5BA225C83EF for <wking@tremily.us>; Mon, 09 Oct 2011 11:50:46 -0400 (EDT)
459     Message-ID: <hgi.jlk@home.net>
460     Subject: [submit] attendance 1
461     <BLANKLINE>
462     The answer is 42.
463     --===============...==--
464     --===============...==
465     MIME-Version: 1.0
466     Content-Transfer-Encoding: 7bit
467     Content-Description: OpenPGP digital signature
468     Content-Type: application/pgp-signature; name="signature.asc"; charset="us-ascii"
469     <BLANKLINE>
470     -----BEGIN PGP SIGNATURE-----
471     Version: GnuPG v2.0.19 (GNU/Linux)
472     <BLANKLINE>
473     ...
474     -----END PGP SIGNATURE-----
475     <BLANKLINE>
476     --===============...==--
477
478     Response to a bad subject:
479
480     >>> del message['Subject']
481     >>> message['Subject'] = 'need help for the first homework'
482     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
483     respond with:
484     Content-Type: multipart/signed; protocol="application/pgp-signature"; micalg="pgp-sha1"; boundary="===============...=="
485     MIME-Version: 1.0
486     Content-Disposition: inline
487     Date: ...
488     From: Robot101 <phys101@tower.edu>
489     Reply-to: Robot101 <phys101@tower.edu>
490     To: Bilbo Baggins <bb@shire.org>
491     Subject: no tag in 'need help for the first homework'
492     <BLANKLINE>
493     --===============...==
494     Content-Type: multipart/mixed; boundary="===============...=="
495     MIME-Version: 1.0
496     <BLANKLINE>
497     --===============...==
498     Content-Type: text/plain; charset="us-ascii"
499     MIME-Version: 1.0
500     Content-Transfer-Encoding: 7bit
501     Content-Disposition: inline
502     <BLANKLINE>
503     Billy,
504     <BLANKLINE>
505     We received an email message from you with an invalid
506     subject.
507     <BLANKLINE>
508     Yours,
509     phys-101 robot
510     <BLANKLINE>
511     --===============...==
512     Content-Type: message/rfc822
513     MIME-Version: 1.0
514     <BLANKLINE>
515     Content-Type: text/plain; charset="us-ascii"
516     MIME-Version: 1.0
517     Content-Transfer-Encoding: 7bit
518     Content-Disposition: inline
519     From: Billy B <bb@greyhavens.net>
520     To: phys101 <phys101@tower.edu>
521     Return-Path: <bb@greyhavens.net>
522     Received: from smtp.mail.uu.edu (localhost.localdomain [127.0.0.1]) by smtp.mail.uu.edu (Postfix) with SMTP id 68CB45C8453 for <wking@tremily.us>; Mon, 10 Oct 2011 12:50:46 -0400 (EDT)
523     Received: from smtp.home.net (smtp.home.net [123.456.123.456]) by smtp.mail.uu.edu (Postfix) with ESMTP id 5BA225C83EF for <wking@tremily.us>; Mon, 09 Oct 2011 11:50:46 -0400 (EDT)
524     Message-ID: <hgi.jlk@home.net>
525     Subject: need help for the first homework
526     <BLANKLINE>
527     The answer is 42.
528     --===============...==--
529     --===============...==
530     MIME-Version: 1.0
531     Content-Transfer-Encoding: 7bit
532     Content-Description: OpenPGP digital signature
533     Content-Type: application/pgp-signature; name="signature.asc"; charset="us-ascii"
534     <BLANKLINE>
535     -----BEGIN PGP SIGNATURE-----
536     Version: GnuPG v2.0.19 (GNU/Linux)
537     <BLANKLINE>
538     ...
539     -----END PGP SIGNATURE-----
540     <BLANKLINE>
541     --===============...==--
542
543     Response to a missing subject:
544
545     >>> del message['Subject']
546     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
547     respond with:
548     Content-Type: multipart/signed; protocol="application/pgp-signature"; micalg="pgp-sha1"; boundary="===============...=="
549     MIME-Version: 1.0
550     Content-Disposition: inline
551     Date: ...
552     From: Robot101 <phys101@tower.edu>
553     Reply-to: Robot101 <phys101@tower.edu>
554     To: Bilbo Baggins <bb@shire.org>
555     Subject: no subject in <hgi.jlk@home.net>
556     <BLANKLINE>
557     --===============...==
558     Content-Type: multipart/mixed; boundary="===============...=="
559     MIME-Version: 1.0
560     <BLANKLINE>
561     --===============...==
562     Content-Type: text/plain; charset="us-ascii"
563     MIME-Version: 1.0
564     Content-Transfer-Encoding: 7bit
565     Content-Disposition: inline
566     <BLANKLINE>
567     Billy,
568     <BLANKLINE>
569     We received an email message from you without a subject.
570     <BLANKLINE>
571     Yours,
572     phys-101 robot
573     <BLANKLINE>
574     --===============...==
575     Content-Type: message/rfc822
576     MIME-Version: 1.0
577     <BLANKLINE>
578     Content-Type: text/plain; charset="us-ascii"
579     MIME-Version: 1.0
580     Content-Transfer-Encoding: 7bit
581     Content-Disposition: inline
582     From: Billy B <bb@greyhavens.net>
583     To: phys101 <phys101@tower.edu>
584     Return-Path: <bb@greyhavens.net>
585     Received: from smtp.mail.uu.edu (localhost.localdomain [127.0.0.1]) by smtp.mail.uu.edu (Postfix) with SMTP id 68CB45C8453 for <wking@tremily.us>; Mon, 10 Oct 2011 12:50:46 -0400 (EDT)
586     Received: from smtp.home.net (smtp.home.net [123.456.123.456]) by smtp.mail.uu.edu (Postfix) with ESMTP id 5BA225C83EF for <wking@tremily.us>; Mon, 09 Oct 2011 11:50:46 -0400 (EDT)
587     Message-ID: <hgi.jlk@home.net>
588     <BLANKLINE>
589     The answer is 42.
590     --===============...==--
591     --===============...==
592     MIME-Version: 1.0
593     Content-Transfer-Encoding: 7bit
594     Content-Description: OpenPGP digital signature
595     Content-Type: application/pgp-signature; name="signature.asc"; charset="us-ascii"
596     <BLANKLINE>
597     -----BEGIN PGP SIGNATURE-----
598     Version: GnuPG v2.0.19 (GNU/Linux)
599     <BLANKLINE>
600     ...
601     -----END PGP SIGNATURE-----
602     <BLANKLINE>
603     --===============...==--
604
605     Response to an insecure message from a person with a PGP key:
606
607     >>> student = course.course.person(email='bb@greyhavens.net')
608     >>> student.pgp_key = '4332B6E3'
609     >>> del message['Subject']
610     >>> process(message)  # doctest: +REPORT_UDIFF, +ELLIPSIS
611     respond with:
612     Content-Type: multipart/encrypted; protocol="application/pgp-encrypted"; micalg="pgp-sha1"; boundary="===============...=="
613     MIME-Version: 1.0
614     Content-Disposition: inline
615     Date: ...
616     From: Robot101 <phys101@tower.edu>
617     Reply-to: Robot101 <phys101@tower.edu>
618     To: Bilbo Baggins <bb@shire.org>
619     Subject: unsigned message <hgi.jlk@home.net>
620     <BLANKLINE>
621     --===============...==
622     MIME-Version: 1.0
623     Content-Transfer-Encoding: 7bit
624     Content-Type: application/pgp-encrypted; charset="us-ascii"
625     <BLANKLINE>
626     Version: 1
627     <BLANKLINE>
628     --===============...==
629     MIME-Version: 1.0
630     Content-Transfer-Encoding: 7bit
631     Content-Description: OpenPGP encrypted message
632     Content-Type: application/octet-stream; name="encrypted.asc"; charset="us-ascii"
633     <BLANKLINE>
634     -----BEGIN PGP MESSAGE-----
635     Version: GnuPG v2.0.19 (GNU/Linux)
636     <BLANKLINE>
637     ...
638     -----END PGP MESSAGE-----
639     <BLANKLINE>
640     --===============...==--
641
642     >>> course.cleanup()
643     """
644     if stream is None:
645         stream = _sys.stdin
646     for original,message,person,subject,target in _load_messages(
647         course=course, stream=stream, mailbox=mailbox, input_=input_,
648         output=output, dry_run=dry_run,
649         continue_after_invalid_message=continue_after_invalid_message,
650         trust_email_infrastructure=trust_email_infrastructure,
651         respond=respond):
652         try:
653             handler = _get_handler(handlers=handlers, target=target)
654             _LOG.debug('handling {}'.format(target))
655             handler(
656                 basedir=basedir, course=course, message=message,
657                 person=person, subject=subject,
658                 max_late=max_late,
659                 trust_email_infrastructure=trust_email_infrastructure,
660                 dry_run=dry_run)
661         except _InvalidMessage as error:
662             error.course = course
663             error.message = original
664             for attribute,value in [('person', person),
665                                     ('subject', subject),
666                                     ('target', target)]:
667                 if (value is not None and
668                     getattr(error, attribute, None) is None):
669                     setattr(error, attribute, value)
670             _LOG.warn('invalid message {}'.format(error.message_id()))
671             if not continue_after_invalid_message:
672                 raise
673             _LOG.warn('{}'.format(error))
674             if respond:
675                 response = _get_error_response(error)
676                 respond(response)
677         except _Response as response:
678             if respond:
679                 msg = response.message
680                 if not response.complete:
681                     author = course.robot
682                     target = person
683                     msg = response.message
684                     if isinstance(response.message, _MIMEText):
685                         # Manipulate body (based on pgp_mime.append_text)
686                         original_encoding = msg.get_charset().input_charset
687                         original_payload = str(
688                             msg.get_payload(decode=True), original_encoding)
689                         new_payload = (
690                             '{},\n\n'
691                             '{}\n\n'
692                             'Yours,\n'
693                             '{}\n').format(
694                             target.alias(), original_payload, author.alias())
695                         new_encoding = _pgp_mime.guess_encoding(new_payload)
696                         if msg.get('content-transfer-encoding', None):
697                             # clear CTE so set_payload will set it properly
698                             del msg['content-transfer-encoding']
699                         msg.set_payload(new_payload, new_encoding)
700                     subject = msg['Subject']
701                     assert subject is not None, msg
702                     del msg['Subject']
703                     msg = _construct_email(
704                         author=author, targets=[person], subject=subject,
705                         message=msg)
706                 respond(msg)
707
708 def _load_messages(course, stream, mailbox=None, input_=None, output=None,
709                    continue_after_invalid_message=False,
710                    trust_email_infrastructure=False, respond=None,
711                    dry_run=False):
712     if mailbox is None:
713         _LOG.debug('loading message from {}'.format(stream))
714         mbox = None
715         messages = [(None,_message_from_file(stream))]
716         if output is not None:
717             ombox = _mailbox.Maildir(output, factory=None, create=True)
718     elif mailbox == 'mbox':
719         mbox = _mailbox.mbox(input_, factory=None, create=False)
720         messages = mbox.items()
721         if output is not None:
722             ombox = _mailbox.mbox(output, factory=None, create=True)
723     elif mailbox == 'maildir':
724         mbox = _mailbox.Maildir(input_, factory=None, create=False)
725         messages = []
726         for key,msg in mbox.items():
727             subpath = mbox._lookup(key)
728             if subpath.endswith('.gitignore'):
729                 _LOG.debug('skipping non-message {}'.format(subpath))
730                 continue
731             messages.append((key, msg))
732         if output is not None:
733             ombox = _mailbox.Maildir(output, factory=None, create=True)
734     else:
735         raise ValueError(mailbox)
736     messages.sort(key=_get_message_time)
737     for key,msg in messages:
738         try:
739             ret = _parse_message(
740                 course=course, message=msg,
741                 trust_email_infrastructure=trust_email_infrastructure)
742         except _InvalidMessage as error:
743             error.message = msg
744             _LOG.warn('invalid message {}'.format(error.message_id()))
745             if not continue_after_invalid_message:
746                 raise
747             _LOG.warn('{}'.format(error))
748             if respond:
749                 response = _get_error_response(error)
750                 if response is not None:
751                     respond(response)
752             continue
753         if output is not None and dry_run is False:
754             # move message from input mailbox to output mailbox
755             ombox.add(msg)
756             if mbox is not None:
757                 del mbox[key]
758         yield ret
759
760 def _parse_message(course, message, trust_email_infrastructure=False):
761     """Parse an incoming email and respond if neccessary.
762
763     Return ``(msg, person, assignment, time)`` on successful parsing.
764     Return ``None`` on failure.
765     """
766     original = message
767     person = subject = target = None
768     try:
769         person = _get_message_person(course=course, message=message)
770         if person.pgp_key:
771             _LOG.debug('verify message is from {}'.format(person))
772             try:
773                 message = _get_verified_message(message, person.pgp_key)
774             except _UnsignedMessage as error:
775                 if trust_email_infrastructure:
776                     _LOG.warn('{}'.format(error))
777                 else:
778                     raise
779         subject = _get_message_subject(message=message)
780         target = _get_message_target(subject=subject)
781     except _InvalidMessage as error:
782         error.course = course
783         error.message = original
784         for attribute,value in [('person', person),
785                                 ('subject', subject),
786                                 ('target', target)]:
787             if (value is not None and
788                 getattr(error, attribute, None) is None):
789                 setattr(error, attribute, value)
790         raise
791     return (original, message, person, subject, target)
792
793 def _get_message_person(course, message):
794     sender = message['Return-Path']  # RFC 822
795     if sender is None:
796         raise NoReturnPath(message)
797     sender = sender[1:-1]  # strip wrapping '<' and '>'
798     people = list(course.find_people(email=sender))
799     if len(people) == 0:
800         raise UnregisteredAddress(message=message, address=sender)
801     if len(people) > 1:
802         raise AmbiguousAddress(message=message, address=sender, people=people)
803     return people[0]
804
805 def _get_message_subject(message):
806     """
807     >>> from email.header import Header
808     >>> from pgp_mime.email import encodedMIMEText
809     >>> message = encodedMIMEText('The answer is 42.')
810     >>> message['Message-ID'] = 'msg-id'
811     >>> _get_message_subject(message=message)
812     Traceback (most recent call last):
813       ...
814     pygrader.mailpipe.SubjectlessMessage: no subject
815     >>> del message['Subject']
816     >>> subject = Header('unicode part', 'utf-8')
817     >>> subject.append('-ascii part', 'ascii')
818     >>> message['Subject'] = subject.encode()
819     >>> _get_message_subject(message=message)
820     'unicode part-ascii part'
821     >>> del message['Subject']
822     >>> message['Subject'] = 'clean subject'
823     >>> _get_message_subject(message=message)
824     'clean subject'
825     """
826     if message['Subject'] is None:
827         raise SubjectlessMessage(subject=None, message=message)
828
829     parts = _decode_header(message['Subject'])
830     part_strings = []
831     for string,encoding in parts:
832         if encoding is None:
833             encoding = 'ascii'
834         if not isinstance(string, str):
835             string = str(string, encoding)
836         part_strings.append(string)
837     subject = ''.join(part_strings)
838     _LOG.debug('decoded header {} -> {}'.format(parts[0], subject))
839     return subject.lower().replace('#', '')
840
841 def _get_message_target(subject):
842     """
843     >>> _get_message_target(subject='no tag')
844     Traceback (most recent call last):
845       ...
846     pygrader.handler.InvalidSubjectMessage: no tag in 'no tag'
847     >>> _get_message_target(subject='[] empty tag')
848     Traceback (most recent call last):
849       ...
850     pygrader.handler.InvalidSubjectMessage: empty tag in '[] empty tag'
851     >>> _get_message_target(subject='[abc] empty tag')
852     'abc'
853     >>> _get_message_target(subject='[phys160:abc] empty tag')
854     'abc'
855     """
856     match = _TAG_REGEXP.match(subject)
857     if match is None:
858         raise _InvalidSubjectMessage(
859             subject=subject, error='no tag in {!r}'.format(subject))
860     tag = match.group(1)
861     if tag == '':
862         raise _InvalidSubjectMessage(
863             subject=subject, error='empty tag in {!r}'.format(subject))
864     target = tag.rsplit(':', 1)[-1]
865     _LOG.debug('extracted target {} -> {}'.format(subject, target))
866     return target
867
868 def _get_handler(handlers, target):
869     try:
870         handler = handlers[target]
871     except KeyError as error:
872         raise InvalidHandlerMessage(
873             target=target, handlers=handlers) from error
874     return handler
875
876 def _get_verified_message(message, pgp_key):
877     """
878
879     >>> from pgp_mime import sign, encodedMIMEText
880
881     The student composes a message...
882
883     >>> message = encodedMIMEText('1.23 joules')
884
885     ... and signs it (with the pgp-mime test key).
886
887     >>> signed = sign(message, signers=['pgp-mime-test'])
888
889     As it is being delivered, the message picks up extra headers.
890
891     >>> signed['Message-ID'] = '<01234567@home.net>'
892     >>> signed['Received'] = 'from smtp.mail.uu.edu ...'
893     >>> signed['Received'] = 'from smtp.home.net ...'
894
895     We check that the message is signed, and that it is signed by the
896     appropriate key.
897
898     >>> signed.authenticated
899     Traceback (most recent call last):
900       ...
901     AttributeError: 'MIMEMultipart' object has no attribute 'authenticated'
902     >>> our_message = _get_verified_message(signed, pgp_key='4332B6E3')
903     >>> print(our_message.as_string())  # doctest: +REPORT_UDIFF
904     Content-Type: text/plain; charset="us-ascii"
905     MIME-Version: 1.0
906     Content-Transfer-Encoding: 7bit
907     Content-Disposition: inline
908     Message-ID: <01234567@home.net>
909     Received: from smtp.mail.uu.edu ...
910     Received: from smtp.home.net ...
911     <BLANKLINE>
912     1.23 joules
913     >>> our_message.authenticated
914     True
915
916     If it is signed, but not by the right key, we get an error.
917
918     >>> print(_get_verified_message(signed, pgp_key='01234567'))
919     Traceback (most recent call last):
920       ...
921     pygrader.mailpipe.WrongSignatureMessage: not signed by the expected key
922
923     If it is not signed at all, we get another error.
924
925     >>> print(_get_verified_message(message, pgp_key='4332B6E3'))
926     Traceback (most recent call last):
927       ...
928     pygrader.handler.UnsignedMessage: unsigned message
929     """
930     mid = message['message-id']
931     try:
932         decrypted,verified,result = _pgp_mime.verify(message=message)
933     except (ValueError, AssertionError) as error:
934         raise _UnsignedMessage(message=message) from error
935     _LOG.debug(str(result, 'utf-8'))
936     tree = _etree.fromstring(result.replace(b'\x00', b''))
937     match = None
938     fingerprints = []
939     for signature in tree.findall('.//signature'):
940         for fingerprint in signature.iterchildren('fpr'):
941             fingerprints.append(fingerprint)
942     matches = [f for f in fingerprints if f.text.endswith(pgp_key)]
943     if len(matches) == 0:
944         raise WrongSignatureMessage(
945             message=message, pgp_key=pgp_key, fingerprints=fingerprints,
946             decrypted=decrypted)
947     match = matches[0]
948     if not verified:
949         sumhex = list(signature.iterchildren('summary'))[0].get('value')
950         summary = int(sumhex, 16)
951         if summary != 0:
952             raise UnverifiedSignatureMessage(
953                 message=message, signature=signature, decrypted=decrypted)
954         # otherwise, we may have an untrusted key.  We'll count that
955         # as verified here, because the caller is explicity looking
956         # for signatures by this fingerprint.
957     for k,v in message.items(): # copy over useful headers
958         if k.lower() not in ['content-type',
959                              'mime-version',
960                              'content-disposition',
961                              ]:
962             decrypted[k] = v
963     decrypted.authenticated = True
964     return decrypted
965
966 def _get_error_response(error):
967     author = error.course.robot
968     target = getattr(error, 'person', None)
969     subject = str(error)
970     if isinstance(error, _InvalidSubmission):
971         subject = 'Received invalid {} submission'.format(
972             error.assignment.name)
973         text = (
974             'We received your submission for {}, but you are not\n'
975             'allowed to submit that assignment via email.'
976             ).format(error.assignment.name)
977     elif isinstance(error, InvalidHandlerMessage):
978         targets = sorted(error.handlers.keys())
979         if not targets:
980             hint = (
981                 'In fact, there are no available handlers for this\n'
982                 'course!')
983         else:
984             hint = (
985                 'Perhaps you meant to use one of the following:\n'
986                 '  {}').format('\n  '.join(targets))
987         text = (
988             'We got an email from you with the following subject:\n'
989             '  {!r}\n'
990             'which does not match any submittable handler name for\n'
991             '{}.\n'
992             '{}').format(error.subject, error.course.name, hint)
993     elif isinstance(error, SubjectlessMessage):
994         subject = 'no subject in {}'.format(error.message['Message-ID'])
995         text = 'We received an email message from you without a subject.'
996     elif isinstance(error, AmbiguousAddress):
997         text = (
998             'Multiple people match {} ({})'.format(
999                 error.address, ', '.join(p.name for p in error.people)))
1000     elif isinstance(error, UnregisteredAddress):
1001         target = _Person(name=error.address, emails=[error.address])
1002         text = (
1003             'Your email address is not registered with pygrader for\n'
1004             '{}.  If you feel it should be, contact your professor\n'
1005             'or TA.').format(error.course.name)
1006     elif isinstance(error, NoReturnPath):
1007         return
1008     elif isinstance(error, _InvalidAssignmentSubject):
1009         if error.assignments:
1010             hint = (
1011                 'but it matches several assignments:\n'
1012                 '  * {}').format('\n  * '.join(
1013                     a.name for a in error.assignments))
1014         else:
1015             # prefer a submittable example assignment
1016             assignments = [
1017                 a for a in error.course.assignments if a.submittable]
1018             assignments += course.assignments  # but fall back to any one
1019             hint = (
1020                 'Remember to use the full name for the assignment in the\n'
1021                 'subject.  For example:\n'
1022                 '  {} submission').format(assignments[0].name)
1023         text = (
1024             'We got an email from you with the following subject:\n'
1025             '  {!r}\n{}').format(error.subject, hint)
1026     elif isinstance(error, _InvalidStudentSubject):
1027         text = (
1028             'We got an email from you with the following subject:\n'
1029             '  {!r}\n'
1030             'but it matches several students:\n'
1031             '  * {}').format(
1032             error.subject, '\n  * '.join(s.name for s in error.students))
1033     elif isinstance(error, _InvalidSubjectMessage):
1034         text = (
1035             'We received an email message from you with an invalid\n'
1036             'subject.')
1037     elif isinstance(error, _UnsignedMessage):
1038         subject = 'unsigned message {}'.format(error.message['Message-ID'])
1039         text = (
1040             'We received an email message from you without a valid\n'
1041             'PGP signature.')
1042     elif isinstance(error, _InvalidMessage):
1043         text = subject
1044     else:
1045         raise NotImplementedError((type(error), error))
1046     if target is None:
1047         raise NotImplementedError((type(error), error))
1048     return _construct_response(
1049         author=author,
1050         targets=[target],
1051         subject=subject,
1052         text=(
1053             '{},\n\n'
1054             '{}\n\n'
1055             'Yours,\n'
1056             '{}\n'.format(target.alias(), text, author.alias())),
1057         original=error.message)
1058
1059 def _get_message_time(key_message):
1060     "Key function for sorting mailbox (key,message) tuples."
1061     key,message = key_message
1062     return _message_time(message)