From: W. Trevor King Date: Thu, 24 Jan 2013 19:01:52 +0000 (-0500) Subject: mutt_ldap.py: Add an output-encoding option X-Git-Tag: v0.1^0 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=abcdc67e7bcdd3e0ac7cdc252d6d34c1e80c3f8f;p=mutt-ldap.git mutt_ldap.py: Add an output-encoding option This avoids crashing with: UnicodeEncodeError: 'ascii' codec can't encode characters when mutt_ldap.py's stdout is not connected to a terminal (and therefore doesn't have sys.stdout.encodign set up). $ python2.7 -c "print(u'α')" | cat Traceback (most recent call last): File "", line 1, in UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128) $ python2.7 -c 'import sys; print(sys.stdout.encoding)' UTF-8 $ python2.7 -c 'import sys; print(sys.stdout.encoding)' | cat None $ python3.2 -c "print('α')" | cat α $ python3.2 -c 'import sys; print(sys.stdout.encoding)' UTF-8 $ python3.2 -c 'import sys; print(sys.stdout.encoding)' | cat UTF-8 Both settings should match the value of the Mutt configuration variable $charset [1,2]: On Thu, Jan 24, 2013 at 06:25:24PM +0000, Michael Elkins wrote: > On Thu, Jan 24, 2013 at 12:36:02PM -0500, W. Trevor King wrote: > > I have a script that queries for address completion using the > > query_command setting, but the docs are silent on the encoding for > > the arguments and expected output [1]. ... > > It expects the output to be in the same encoding as $charset. > Mutt performs the RFC2047 encoding automatically. On Thu, Jan 24, 2013 at 07:00:53PM +0000, Michael Elkins wrote: > On Thu, Jan 24, 2013 at 01:42:34PM -0500, W. Trevor King wrote: > > ...Is it also $charset for the argv input? > > Yes. If you don't specify a $charset in your Mutt config, you can probably get away with not specifying charsets for these config options, because Mutt's default charset detection will likely match up with Python's locale.getpreferredencoding(). [1]: http://marc.info/?l=mutt-dev&m=135905197022081&w=2 [2]: http://marc.info/?l=mutt-dev&m=135905407723082&w=2 --- diff --git a/mutt_ldap.py b/mutt_ldap.py index f3bb5ba..21a1174 100755 --- a/mutt_ldap.py +++ b/mutt_ldap.py @@ -60,8 +60,10 @@ CONFIG.set('cache', 'path', '~/.mutt-ldap.cache') # cache results here CONFIG.set('cache', 'fields', '') # fields to cache (if empty, setup in the main block) CONFIG.set('cache', 'longevity-days', '14') # TODO: cache results for 14 days by default CONFIG.add_section('system') +# HACK: Python 2.x support, see http://bugs.python.org/issue13329#msg147475 +CONFIG.set('system', 'output-encoding', '') # match .muttrc's $charset # HACK: Python 2.x support, see http://bugs.python.org/issue2128 -CONFIG.set('system', 'argv-encoding', 'utf-8') +CONFIG.set('system', 'argv-encoding', '') CONFIG.read(_os_path.expanduser('~/.mutt-ldap.rc')) @@ -264,8 +266,20 @@ def format_entry(entry): if __name__ == '__main__': + import codecs as _codecs + import locale as _locale import sys + default_encoding = _locale.getpreferredencoding(do_setlocale=True) + for key in ['output-encoding', 'argv-encoding']: + CONFIG.set( + 'system', key, + CONFIG.get('system', key, raw=True) or default_encoding) + + # HACK: convert sys.stdout to Unicode (not needed in Python 3) + output_encoding = CONFIG.get('system', 'output-encoding') + sys.stdout = _codecs.getwriter(output_encoding)(sys.stdout) + # HACK: convert sys.argv to Unicode (not needed in Python 3) argv_encoding = CONFIG.get('system', 'argv-encoding') sys.argv = [unicode(arg, argv_encoding) for arg in sys.argv]