From: Bernhard Reutner-Fischer Date: Wed, 9 Jan 2013 11:32:39 +0000 (+0100) Subject: irkerhook: Fix handling of non-ascii author name X-Git-Tag: 1.16~3 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=f6fbbca0f653ca7820f08a7afbba8ba59ebbb08f;p=irker.git irkerhook: Fix handling of non-ascii author name Signed-off-by: Bernhard Reutner-Fischer --- for non-ascii input, irkerhook.py dies with: remote: Traceback (most recent call last): remote: File "/usr/bin/irkerhook.py", line 484, in remote: ship(extractor, commit, not notify) remote: File "/usr/bin/irkerhook.py", line 391, in ship remote: metadata = extractor.commit_factory(commit) remote: File "/usr/bin/irkerhook.py", line 262, in commit_factory remote: metainfo = do("git log -1 '--pretty=format:%an <%ae>|%s' " + shellquote(commit.commit)) remote: File "/usr/bin/irkerhook.py", line 50, in do remote: return unicode(commands.getstatusoutput(command)[1], locale.getpreferredencoding() or 'UTF-8') remote: UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 8: ordinal not in range(128) for e.g. this author name: $ git log -n1 --pretty="format:%an" | cat;echo Timo Teräs $ git log -n1 --pretty="format:%an" | od -x;echo 0000000 6954 6f6d 5420 7265 a4c3 0073 0000013 So: $ for i in C POSIX en_US.UTF-8;do echo "# $i";LC_ALL=$i python -c "import locale;print locale.getlocale()[1];print unicode(\"ä\", locale.getlocale()[1] or 'UTF-8').encode(locale.getlocale()[1] or 'UTF-8')";done None ä None ä None ä --- diff --git a/irkerhook.py b/irkerhook.py index 59bd796..96f7d1f 100755 --- a/irkerhook.py +++ b/irkerhook.py @@ -47,7 +47,7 @@ except ImportError: import json def do(command): - return unicode(commands.getstatusoutput(command)[1], locale.getpreferredencoding() or 'UTF-8') + return unicode(commands.getstatusoutput(command)[1], locale.getlocale()[1] or 'UTF-8').encode(locale.getlocale()[1] or 'UTF-8') class Commit: def __init__(self, extractor, commit):