From f6fbbca0f653ca7820f08a7afbba8ba59ebbb08f Mon Sep 17 00:00:00 2001 From: Bernhard Reutner-Fischer Date: Wed, 9 Jan 2013 12:32:39 +0100 Subject: [PATCH] irkerhook: Fix handling of non-ascii author name MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernhard Reutner-Fischer --- for non-ascii input, irkerhook.py dies with: remote: Traceback (most recent call last): remote: File "/usr/bin/irkerhook.py", line 484, in remote: ship(extractor, commit, not notify) remote: File "/usr/bin/irkerhook.py", line 391, in ship remote: metadata = extractor.commit_factory(commit) remote: File "/usr/bin/irkerhook.py", line 262, in commit_factory remote: metainfo = do("git log -1 '--pretty=format:%an <%ae>|%s' " + shellquote(commit.commit)) remote: File "/usr/bin/irkerhook.py", line 50, in do remote: return unicode(commands.getstatusoutput(command)[1], locale.getpreferredencoding() or 'UTF-8') remote: UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 8: ordinal not in range(128) for e.g. this author name: $ git log -n1 --pretty="format:%an" | cat;echo Timo Teräs $ git log -n1 --pretty="format:%an" | od -x;echo 0000000 6954 6f6d 5420 7265 a4c3 0073 0000013 So: $ for i in C POSIX en_US.UTF-8;do echo "# $i";LC_ALL=$i python -c "import locale;print locale.getlocale()[1];print unicode(\"ä\", locale.getlocale()[1] or 'UTF-8').encode(locale.getlocale()[1] or 'UTF-8')";done None ä None ä None ä --- irkerhook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irkerhook.py b/irkerhook.py index 59bd796..96f7d1f 100755 --- a/irkerhook.py +++ b/irkerhook.py @@ -47,7 +47,7 @@ except ImportError: import json def do(command): - return unicode(commands.getstatusoutput(command)[1], locale.getpreferredencoding() or 'UTF-8') + return unicode(commands.getstatusoutput(command)[1], locale.getlocale()[1] or 'UTF-8').encode(locale.getlocale()[1] or 'UTF-8') class Commit: def __init__(self, extractor, commit): -- 2.26.2