From: W. Trevor King Date: Fri, 7 Nov 2014 07:32:03 +0000 (-0800) Subject: ssoma-mda: Handle Subject:s that aren't RFC-2047-encoded too X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=22ae96b272be1e0337859286768833a0b073ea75;p=ssoma-mda.git ssoma-mda: Handle Subject:s that aren't RFC-2047-encoded too If the string is not RFC-2047 encoded, the charset from decode_header is None: >>> import email.header >>> email.header.decode_header('hello') [('hello', None)] so str(decoded, charset) will fail with: TypeError: str() argument 2 must be str, not None Avoid that by checking charset before attempting to decode with charset. Since that's a bit awkward, pull it out into its own _decode_header function. The Simonsen example is from RFC 2047 [1]. [1]: http://tools.ietf.org/html/rfc2047#section-8 --- diff --git a/ssoma-mda b/ssoma-mda index abae180..f02b1b1 100755 --- a/ssoma-mda +++ b/ssoma-mda @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Copyright (C) 2013, Eric Wong and all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) # Try to keep this small as it may be invoked frequently for each message @@ -219,14 +220,32 @@ def message_id_path(message_id): return _os_path.join(hash[:2], hash[2:]) +def _decode_header(string): + """Wrap email.header.decode_header to assemble a string. + + >>> _decode_header(string='hello') + 'hello' + >>> _decode_header(string='=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=') + 'Keld Jørn Simonsen' + """ + chunks = [] + for decoded, charset in _email_header.decode_header(string): + if charset: + decoded = str(decoded, charset) + chunks.append(decoded) + return ''.join(chunks) + + def get_commit_message(message): - """Unwrap and decode the message subject for use as a commit message.""" + r"""Unwrap and decode the message subject for use as a commit message. + + >>> get_commit_message( + ... message={'Subject': '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\t=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='}) + 'If you can read this you understand the example.' + """ commit_message = message.get('Subject', '') commit_message = _FOLDING_WHITESPACE_REGEX.sub(' ', commit_message) - commit_message = ''.join( - str(decoded, charset) for decoded, charset - in _email_header.decode_header(commit_message)) - return commit_message + return _decode_header(string=commit_message) def get_author(message):