From: Laurent Bachelier Date: Tue, 9 Oct 2012 19:35:54 +0000 (+0200) Subject: Use unicode as much as possible. X-Git-Tag: 1.11~7 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=5864963f04cb863487078a94b404b7170c202c49;p=irker.git Use unicode as much as possible. This fixes a corner case where if you had a non-ASCII character in a commit and a filter command, you would end up with an unicode string and irkerhook.py would crash. But it would somehow work if you did not have a filter command. For both modes cases to work, forcing unicode in do() and then using unicode methods are required. JSON is unicode by default, and this is why the issues happened. Signed-off-by: Eric S. Raymond --- diff --git a/irkerhook.py b/irkerhook.py index 0a87565..755a451 100755 --- a/irkerhook.py +++ b/irkerhook.py @@ -39,7 +39,7 @@ default_channels = "irc://chat.freenode.net/%(project)s,irc://chat.freenode.net/ version = "1.10" -import os, sys, commands, socket, urllib, subprocess +import os, sys, commands, socket, urllib, subprocess, locale from pipes import quote as shellquote try: import simplejson as json # Faster, also makes us Python-2.5-compatible @@ -47,7 +47,7 @@ except ImportError: import json def do(command): - return commands.getstatusoutput(command)[1] + return unicode(commands.getstatusoutput(command)[1], locale.getpreferredencoding() or 'UTF-8') class Commit: def __init__(self, extractor, commit): @@ -61,7 +61,7 @@ class Commit: self.logmsg = None self.url = None self.__dict__.update(extractor.__dict__) - def __str__(self): + def __unicode__(self): "Produce a notification string from this commit." if self.urlprefix.lower() == "none": self.url = "" @@ -401,10 +401,10 @@ def ship(extractor, commit, debug): # purposes the commit text is more important. If it's still too long # there's nothing much can be done other than ship it expecting the IRC # server to truncate. - privmsg = str(metadata) + privmsg = unicode(metadata) if len(privmsg) > 510: metadata.files = "" - privmsg = str(metadata) + privmsg = unicode(metadata) # Anti-spamming guard. It's deliberate that we get maxchannels not from # the user-filtered metadata but from the extractor data - means repo