From 22ae96b272be1e0337859286768833a0b073ea75 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 6 Nov 2014 23:32:03 -0800 Subject: [PATCH] ssoma-mda: Handle Subject:s that aren't RFC-2047-encoded too If the string is not RFC-2047 encoded, the charset from decode_header is None: >>> import email.header >>> email.header.decode_header('hello') [('hello', None)] so str(decoded, charset) will fail with: TypeError: str() argument 2 must be str, not None Avoid that by checking charset before attempting to decode with charset. Since that's a bit awkward, pull it out into its own _decode_header function. The Simonsen example is from RFC 2047 [1]. [1]: http://tools.ietf.org/html/rfc2047#section-8 --- ssoma-mda | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/ssoma-mda b/ssoma-mda index abae180..f02b1b1 100755 --- a/ssoma-mda +++ b/ssoma-mda @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Copyright (C) 2013, Eric Wong and all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) # Try to keep this small as it may be invoked frequently for each message @@ -219,14 +220,32 @@ def message_id_path(message_id): return _os_path.join(hash[:2], hash[2:]) +def _decode_header(string): + """Wrap email.header.decode_header to assemble a string. + + >>> _decode_header(string='hello') + 'hello' + >>> _decode_header(string='=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=') + 'Keld Jørn Simonsen' + """ + chunks = [] + for decoded, charset in _email_header.decode_header(string): + if charset: + decoded = str(decoded, charset) + chunks.append(decoded) + return ''.join(chunks) + + def get_commit_message(message): - """Unwrap and decode the message subject for use as a commit message.""" + r"""Unwrap and decode the message subject for use as a commit message. + + >>> get_commit_message( + ... message={'Subject': '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\t=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='}) + 'If you can read this you understand the example.' + """ commit_message = message.get('Subject', '') commit_message = _FOLDING_WHITESPACE_REGEX.sub(' ', commit_message) - commit_message = ''.join( - str(decoded, charset) for decoded, charset - in _email_header.decode_header(commit_message)) - return commit_message + return _decode_header(string=commit_message) def get_author(message): -- 2.26.2