From e29c46a94edfb6f3ca7ef61cb2d200c359bd7755 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 6 Mar 2014 20:21:21 -0800 Subject: [PATCH] irkerd: Add Python-3-compatible string handling This implements the necessary changes to work around the (str, unicode) -> (bytes, str) transition. We decode the bytes as soon as possible after receiving them in the Irker*Handler classes. For IRC-side connections, we still encode outgoing data right before sending it in IRCServerConnection.ship. We decode incoming IRC-side bytes in IRCServerConnection.consume, after storing them as bytes in the LineBufferedStream IRCServerConnection.buffer. That ensures that we don't try and decode partial code points which are split across two socket messages. --- irkerd | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/irkerd b/irkerd index 3b30a20..34977ff 100755 --- a/irkerd +++ b/irkerd @@ -19,6 +19,7 @@ Requires Python 2.7, or: * 2.6 with the argparse package installed. """ +from __future__ import unicode_literals from __future__ import with_statement # These things might need tuning @@ -70,6 +71,11 @@ LOG = logging.getLogger(__name__) LOG.setLevel(logging.ERROR) LOG_LEVELS = ['critical', 'error', 'warning', 'info', 'debug'] +try: # Python 2 + UNICODE_TYPE = unicode +except NameError: # Python 3 + UNICODE_TYPE = str + # Sketch of implementation: # @@ -188,7 +194,7 @@ class LineBufferedStream(): _crlf_re = re.compile(b'\r?\n') def __init__(self): - self.buffer = '' + self.buffer = b'' def append(self, newbytes): self.buffer += newbytes @@ -270,6 +276,8 @@ class IRCServerConnection(): self.buffer.append(incoming) for line in self.buffer: + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') LOG.debug("FROM: %s" % line) if not line: @@ -574,7 +582,7 @@ class Connection: LOG.warning(( "irclib rejected a message to %s on %s " "because: %s") % ( - channel, self.target, str(err))) + channel, self.target, UNICODE_TYPE(err))) LOG.debug(err.format_exc()) time.sleep(ANTI_FLOOD_DELAY) self.last_xmit = self.channels_joined[channel] = time.time() @@ -802,10 +810,10 @@ class Irker: "malformed request - 'to' or 'privmsg' missing: %r" % request) channels = request['to'] message = request['privmsg'] - if not isinstance(channels, (list, basestring)): + if not isinstance(channels, (list, UNICODE_TYPE)): raise InvalidRequest( "malformed request - unexpected channel type: %r" % channels) - if not isinstance(message, basestring): + if not isinstance(message, UNICODE_TYPE): raise InvalidRequest( "malformed request - unexpected message type: %r" % message) if not isinstance(channels, list): @@ -813,14 +821,14 @@ class Irker: targets = [] for url in channels: try: - if not isinstance(url, basestring): + if not isinstance(url, UNICODE_TYPE): raise InvalidRequest( "malformed request - URL has unexpected type: %r" % url) target = Target(url) target.validate() except InvalidRequest as e: - LOG.error(str(e)) + LOG.error(UNICODE_TYPE(e)) else: targets.append(target) return (targets, message) @@ -857,7 +865,7 @@ class Irker: key=lambda name: self.servers[name].last_xmit()) del self.servers[oldest] except InvalidRequest as e: - LOG.error(str(e)) + LOG.error(UNICODE_TYPE(e)) except ValueError: self.logerr("can't recognize JSON on input: %r" % line) except RuntimeError: @@ -869,13 +877,17 @@ class IrkerTCPHandler(socketserver.StreamRequestHandler): line = self.rfile.readline() if not line: break - irker.handle(line.strip()) + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + irker.handle(line=line.strip()) class IrkerUDPHandler(socketserver.BaseRequestHandler): def handle(self): - data = self.request[0].strip() + line = self.request[0].strip() #socket = self.request[1] - irker.handle(data) + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + irker.handle(line=line.strip()) if __name__ == '__main__': -- 2.26.2