Wrap library calls in a mutex for thread safety.
[irker.git] / irkerd
diff --git a/irkerd b/irkerd
index 598017b55801ede85bdbd6c46515ef7649c98feb..b9814c1f21d62125e5cb2ae55cf04ddfa4d4676f 100755 (executable)
--- a/irkerd
+++ b/irkerd
@@ -30,20 +30,25 @@ PORT = 6659
 NAMESTYLE = "irker%03d"                # IRC nick template - must contain '%d'
 XMIT_TTL = (3 * 60 * 60)       # Time to live, seconds from last transmit
 PING_TTL = (15 * 60)           # Time to live, seconds from last PING
+HANDSHAKE_TTL = 60             # Time to live, seconds from nick transmit
+CHANNEL_TTL = (3 * 60 * 60)    # Time to live, seconds from last transmit
 DISCONNECT_TTL = (24 * 60 * 60)        # Time to live, seconds from last connect
 UNSEEN_TTL = 60                        # Time to live, seconds since first request
 CHANNEL_MAX = 18               # Max channels open per socket (default)
-ANTI_FLOOD_DELAY = 0.125       # Anti-flood delay after transmissions, seconds
+ANTI_FLOOD_DELAY = 0.5         # Anti-flood delay after transmissions, seconds
 ANTI_BUZZ_DELAY = 0.09         # Anti-buzz delay after queue-empty check
 
 # No user-serviceable parts below this line
 
+version = "1.8"
+
 # This black magic imports support for green threads (coroutines),
 # then has kinky sex with the import library internals, replacing
-# "threading" with a coroutine-using impostor.  Threads then become
+# "threading" with a coroutine-using imposter.  Threads then become
 # ultra-light-weight and cooperatively scheduled.
 try:
-    import eventlet; eventlet.monkey_patch()
+    import eventlet
+    eventlet.monkey_patch()
     green_threads = True
     # With greenlets we don't worry about thread exhaustion, only the
     # file descriptor limit (typically 1024 on modern Unixes). Thus we
@@ -57,11 +62,13 @@ except ImportError:
     CONNECTION_MAX = 200
     green_threads = False
 
-import sys, json, getopt, urlparse, time, random
+import sys, getopt, urlparse, time, random, socket
 import threading, Queue, SocketServer
 import irc.client, logging
-
-version = "1.2"
+try:
+    import simplejson as json  # Faster, also makes us Python-2.4-compatible
+except ImportError:
+    import json
 
 # Sketch of implementation:
 #
@@ -97,23 +104,24 @@ version = "1.2"
 # DEAF and CHANLIMIT and (obsolete) MAXCHANNELS features.  CHANLIMIT
 # is as described in the Internet RFC draft
 # draft-brocklesby-irc-isupport-03 at <http://www.mirc.com/isupport.html>.
+# The ",isnick" feature is as described in
+# <http://ftp.ics.uci.edu/pub/ietf/uri/draft-mirashi-url-irc-01.txt>.
 
 class Connection:
-    def __init__(self, irkerd, servername, port, nick_base=1):
+    def __init__(self, irkerd, servername, port):
         self.irker = irkerd
         self.servername = servername
         self.port = port
-        self.nick_trial = nick_base
+        self.nick_trial = None
         self.connection = None
-        self.status = "unseen"
+        self.status = None
         self.last_xmit = time.time()
         self.last_ping = time.time()
-        self.channels_joined = []
+        self.channels_joined = {}
         self.channel_limits = {}
         # The consumer thread
         self.queue = Queue.Queue()
-        self.thread = threading.Thread(target=self.dequeue)
-        self.thread.start()
+        self.thread = None
     def nickname(self, n=None):
         "Return a name for the nth server connection."
         if n is None:
@@ -132,85 +140,133 @@ class Connection:
         # Randomness prevents a malicious user or bot from antcipating the
         # next trial name in order to block us from completing the handshake.
         self.nick_trial += random.randint(1, 3)
+        self.last_xmit = time.time()
         self.connection.nick(self.nickname())
+    def handle_disconnect(self):
+        "Server disconnected us for flooding or some other reason."
+        self.connection = None
+        self.status = "disconnected"
+    def handle_kick(self, outof):
+        "We've been kicked."
+        self.status = "handshaking"
+        try:
+            del self.channels_joined[outof]
+        except KeyError:
+            self.irker.logerr("kicked by %s from %s that's not joined"
+                              % (self.servername, outof))
+        qcopy = []
+        while not self.queue.empty():
+            (channel, message) = self.queue.get()
+            if channel != outof:
+                qcopy.append((channel, message))
+        for (channel, message) in qcopy:
+            self.queue.put((channel, message))
+        self.status = "ready"
     def enqueue(self, channel, message):
         "Enque a message for transmission."
+        if self.thread is None or not self.thread.is_alive():
+            self.status = "unseen"
+            self.thread = threading.Thread(target=self.dequeue)
+            self.thread.setDaemon(True)
+            self.thread.start()
         self.queue.put((channel, message))
     def dequeue(self):
         "Try to ship pending messages from the queue."
-        while True:
-            # We want to be kind to the IRC servers and not hold unused
-            # sockets open forever, so they have a time-to-live.  The
-            # loop is coded this particular way so that we can drop
-            # the actual server connection when its time-to-live
-            # expires, then reconnect and resume transmission if the
-            # queue fills up again.
-            if not self.connection:
-                self.connection = self.irker.irc.server()
-                self.connection.context = self
-                self.nick_trial = 1
-                self.channels_joined = []
-                # This will throw irc.client.ServerConnectionError on failure
-                try:
-                    self.connection.connect(self.servername,
-                                        self.port,
-                                        nickname=self.nickname(),
-                                        username="irker",
-                                        ircname="irker relaying client")
-                    self.status = "handshaking"
-                    self.irker.debug(1, "XMIT_TTL bump (%s connection) at %s" % (self.servername, time.asctime()))
-                    self.last_xmit = time.time()
-                except irc.client.ServerConnectionError:
-                    self.status = "disconnected"
-            elif self.status == "handshaking":
-                # Don't buzz on the empty-queue test while we're handshaking 
-                time.sleep(ANTI_BUZZ_DELAY)
-            elif self.queue.empty():
-                # Queue is empty, at some point we want to time out
-                # the connection rather than holding a socket open in
-                # the server forever.
-                now = time.time()
-                if now > self.last_xmit + XMIT_TTL \
-                       or now > self.last_ping + PING_TTL:
-                    self.irker.debug(1, "timing out inactive connection to %s at %s" % (self.servername, time.asctime()))
-                    self.connection.context = None
-                    self.connection.close()
-                    self.connection = None
-                    self.status = "disconnected"
-                else:
-                    # Prevent this thread from hogging the CPU by pausing
-                    # for just a little bit after the queue-empty check.
-                    # As long as this is less that the duration of a human
-                    # reflex arc it is highly unlikely any human will ever
-                    # notice.
-                    time.sleep(ANTI_BUZZ_DELAY)
-            elif self.status == "disconnected" \
-                     and time.time() > self.last_xmit + DISCONNECT_TTL:
-                # Queue is nonempty, but the IRC server might be down. Letting
-                # failed connections retain queue space forever would be a
-                # memory leak.  
-                self.status = "expired"
-                break
-            elif self.status == "unseen" \
-                     and time.time() > self.last_xmit + UNSEEN_TTL:
-                # Nasty people could attempt a denial-of-service
-                # attack by flooding us with requests with invalid
-                # servernames. We guard against this by rapidly
-                # expiring connections that have a nonempty queue but
-                # have never had a successful open.
-                self.status = "expired"
-                break
-            elif self.status == "ready":
-                (channel, message) = self.queue.get()
-                if channel not in self.channels_joined:
-                    self.channels_joined.append(channel)
-                    self.connection.join(channel)
-                for segment in message.split("\n"):
-                    self.connection.privmsg(channel, segment)
-                self.last_xmit = time.time()
-                self.irker.debug(1, "XMIT_TTL bump (%s transmission) at %s" % (self.servername, time.asctime()))
-                self.queue.task_done()
-                time.sleep(ANTI_FLOOD_DELAY)
+        try:
+            while True:
+                # We want to be kind to the IRC servers and not hold unused
+                # sockets open forever, so they have a time-to-live.  The
+                # loop is coded this particular way so that we can drop
+                # the actual server connection when its time-to-live
+                # expires, then reconnect and resume transmission if the
+                # queue fills up again.
+                if self.queue.empty():
+                    # Queue is empty, at some point we want to time out
+                    # the connection rather than holding a socket open in
+                    # the server forever.
+                    now = time.time()
+                    xmit_timeout = now > self.last_xmit + XMIT_TTL
+                    ping_timeout = now > self.last_ping + PING_TTL
+                    if (xmit_timeout or ping_timeout) and self.status != "disconnected":
+                        self.irker.debug(1, "timing out connection to %s at %s (ping_timeout=%s, xmit_timeout=%s)" % (self.servername, time.asctime(), ping_timeout, xmit_timeout))
+                        with self.irker.library_lock:
+                            self.connection.context = None
+                            self.connection.quit("transmission timeout")
+                            self.connection.close()
+                            self.connection = None
+                        self.status = "disconnected"
+                    else:
+                        # Prevent this thread from hogging the CPU by pausing
+                        # for just a little bit after the queue-empty check.
+                        # As long as this is less that the duration of a human
+                        # reflex arc it is highly unlikely any human will ever
+                        # notice.
+                        time.sleep(ANTI_BUZZ_DELAY)
+                elif not self.connection:
+                    # Queue is nonempty but server isn't connected.
+                    with self.irker.library_lock:
+                        self.connection = self.irker.irc.server()
+                        self.connection.context = self
+                        # Try to avoid colliding with other instances
+                        self.nick_trial = random.randint(1, 990)
+                        self.channels_joined = {}
+                        try:
+                            # This will throw
+                            # irc.client.ServerConnectionError on failure
+                            self.connection.connect(self.servername,
+                                                self.port,
+                                                nickname=self.nickname(),
+                                                username="irker",
+                                                ircname="irker relaying client")
+                            self.status = "handshaking"
+                            self.irker.debug(1, "XMIT_TTL bump (%s connection) at %s" % (self.servername, time.asctime()))
+                            self.last_xmit = time.time()
+                        except irc.client.ServerConnectionError:
+                            self.status = "disconnected"
+                elif self.status == "handshaking":
+                    if time.time() > self.last_xmit + HANDSHAKE_TTL:
+                        self.status = "expired"
+                        break
+                    else:
+                        # Don't buzz on the empty-queue test while we're
+                        # handshaking
+                        time.sleep(ANTI_BUZZ_DELAY)
+                elif self.status == "disconnected" \
+                         and time.time() > self.last_xmit + DISCONNECT_TTL:
+                    # Queue is nonempty, but the IRC server might be
+                    # down. Letting failed connections retain queue
+                    # space forever would be a memory leak.
+                    self.status = "expired"
+                    break
+                elif self.status == "unseen" \
+                         and time.time() > self.last_xmit + UNSEEN_TTL:
+                    # Nasty people could attempt a denial-of-service
+                    # attack by flooding us with requests with invalid
+                    # servernames. We guard against this by rapidly
+                    # expiring connections that have a nonempty queue but
+                    # have never had a successful open.
+                    self.status = "expired"
+                    break
+                elif self.status == "ready":
+                    with self.irker.library_lock:
+                        (channel, message) = self.queue.get()
+                        if channel not in self.channels_joined:
+                            self.connection.join(channel)
+                            self.irker.debug(1, "joining %s on %s." % (channel, self.servername))
+                        for segment in message.split("\n"):
+                            self.connection.privmsg(channel, segment)
+                            time.sleep(ANTI_FLOOD_DELAY)
+                        self.last_xmit = self.channels_joined[channel] = time.time()
+                        self.irker.debug(1, "XMIT_TTL bump (%s transmission) at %s" % (self.servername, time.asctime()))
+                        self.queue.task_done()
+        except:
+            (exc_type, _exc_value, exc_traceback) = sys.exc_info()
+            self.irker.logerr("exception %s in thread for %s" % \
+                              (exc_type, self.servername))
+            # This is so we can see tracebacks for errors inside the thread
+            # when we need to be able to for debugging purposes.
+            if debuglvl > 0:
+                raise exc_type, _exc_value, exc_traceback
     def live(self):
         "Should this connection not be scavenged?"
         return self.status != "expired"
@@ -236,8 +292,16 @@ class Target():
         if not ircport:
             ircport = 6667
         self.servername = irchost
-        self.channel = parsed.path.lstrip('/')
-        if self.channel[0] not in "#&+":
+        # IRC channel names are case-insensitive.  If we don't smash
+        # case here we may run into problems later. There was a bug
+        # observed on irc.rizon.net where an irkerd user specified #Channel,
+        # got kicked, and irkerd crashed because the server returned
+        # "#channel" in the notification that our kick handler saw.
+        self.channel = parsed.path.lstrip('/').lower()
+        isnick = self.channel.endswith(",isnick")
+        if isnick:
+            self.channel = self.channel[:-7]
+        if self.channel and not isnick and self.channel[0] not in "#&+":
             self.channel = "#" + self.channel
         self.port = int(ircport)
     def valid(self):
@@ -256,21 +320,42 @@ class Dispatcher:
         self.connections = []
     def dispatch(self, channel, message):
         "Dispatch messages for our server-port combination."
+        # First, check if there is room for another channel
+        # on any of our existing connections.
         connections = [x for x in self.connections if x.live()]
         eligibles = [x for x in connections if x.joined_to(channel)] \
                     or [x for x in connections if x.accepting(channel)]
-        if not eligibles:
-            newconn = Connection(self.irker,
-                                 self.servername,
-                                 self.port,
-                                 len(self.connections)+1)
-            self.connections.append(newconn)
-            eligibles = [newconn]
-        eligibles[0].enqueue(channel, message)
+        if eligibles:
+            eligibles[0].enqueue(channel, message)
+            return
+        # All connections are full up. Look for one old enough to be
+        # scavenged.
+        ancients = []
+        for connection in connections:
+            for (chan, age) in connections.channels_joined.items():
+                if age < time.time() - CHANNEL_TTL:
+                    ancients.append((connection, chan, age))
+        if ancients:
+            ancients.sort(key=lambda x: x[2]) 
+            (found_connection, drop_channel, _drop_age) = ancients[0]
+            found_connection.part(drop_channel, "scavenged by irkerd")
+            del found_connection.channels_joined[drop_channel]
+            #time.sleep(ANTI_FLOOD_DELAY)
+            found_connection.enqueue(channel, message)
+            return
+        # Didn't find any channels with no recent activity
+        newconn = Connection(self.irker,
+                             self.servername,
+                             self.port)
+        self.connections.append(newconn)
+        newconn.enqueue(channel, message)
     def live(self):
         "Does this server-port combination have any live connections?"
         self.connections = [x for x in self.connections if x.live()]
         return len(self.connections) > 0
+    def last_xmit(self):
+        "Return the time of the most recent transmission."
+        return max([x.last_xmit for x in self.connections])
 
 class Irker:
     "Persistent IRC multiplexer."
@@ -284,7 +369,11 @@ class Irker:
         self.irc.add_global_handler("nickcollision", self._handle_badnick)
         self.irc.add_global_handler("unavailresource", self._handle_badnick)
         self.irc.add_global_handler("featurelist", self._handle_features)
-        thread = threading.Thread(target=self.irc.process_forever)
+        self.irc.add_global_handler("disconnect", self._handle_disconnect)
+        self.irc.add_global_handler("kick", self._handle_kick)
+        self.library_lock = threading.Lock()
+        thread = threading.Thread(target=self._process_forever)
+        thread.setDaemon(True)
         self.irc._thread = thread
         thread.start()
         self.servers = {}
@@ -295,6 +384,12 @@ class Irker:
         "Debugging information."
         if self.debuglevel >= level:
             sys.stderr.write("irkerd: %s\n" % errmsg)
+    def _process_forever(self):
+        "IRC library process_forever with mutex."
+        self.debug(1, "process_forever()")
+        while True:
+            with self.library_lock:
+                self.irc.process_once()
     def _handle_ping(self, connection, _event):
         "PING arrived, bump the last-received time for the connection."
         if connection.context:
@@ -318,7 +413,7 @@ class Irker:
                     m = int(lump[12:])
                     for pref in "#&+":
                         cxt.channel_limits[pref] = m
-                    self.debug(1, "%s maxchannels is %d" \
+                    self.debug(1, "%s maxchannels is %d"
                                % (connection.server, m))
                 elif lump.startswith("CHANLIMIT=#:"):
                     limits = lump[10:].split(",")
@@ -328,30 +423,42 @@ class Irker:
                             limit = int(limit)
                             for c in prefixes:
                                 cxt.channel_limits[c] = limit
-                        self.debug(1, "%s channel limit map is %s" \
+                        self.debug(1, "%s channel limit map is %s"
                                    % (connection.server, cxt.channel_limits))
                     except ValueError:
                         self.logerr("ill-formed CHANLIMIT property")
+    def _handle_disconnect(self, connection, _event):
+        "Server hung up the connection."
+        self.debug(1, "server %s disconnected" % connection.server)
+        connection.close()
+        if connection.context:
+            connection.context.handle_disconnect()
+    def _handle_kick(self, connection, event):
+        "Server hung up the connection."
+        self.debug(1, "irker has been kicked from %s on %s" % (event.target(), connection.server))
+        if connection.context:
+            connection.context.handle_kick(event.target())
     def handle(self, line):
         "Perform a JSON relay request."
         try:
             request = json.loads(line.strip())
-            if type(request) != type({}):
-                self.logerr("request in tot a JSON dictionary: %s" % repr(request))
+            if not isinstance(request, dict):
+                self.logerr("request is not a JSON dictionary: %r" % request)
             elif "to" not in request or "privmsg" not in request:
-                self.logerr("malformed reqest - 'to' or 'privmsg' missing: %s" % repr(request))
+                self.logerr("malformed request - 'to' or 'privmsg' missing: %r" % request)
             else:
                 channels = request['to']
                 message = request['privmsg']
-                if type(channels) not in (type([]), type(u"")) \
-                       or type(message) != type(u""):
-                    self.logerr("malformed request - unexpected types: %s" % repr(request))
+                if not isinstance(channels, (list, basestring)):
+                    self.logerr("malformed request - unexpected channel type: %r" % channels)
+                if not isinstance(message, basestring):
+                    self.logerr("malformed request - unexpected message type: %r" % message)
                 else:
-                    if type(channels) == type(u""):
+                    if not isinstance(channels, list):
                         channels = [channels]
                     for url in channels:
-                        if type(url) != type(u""):
-                            self.logerr("malformed request - unexpected type: %s" % repr(request))
+                        if not isinstance(url, basestring):
+                            self.logerr("malformed request - URL has unexpected type: %r" % url)
                         else:
                             target = Target(url)
                             if not target.valid():
@@ -379,13 +486,17 @@ class Irker:
                             # assumption that message activity is likely
                             # to be clumpy.
                             oldest = None
+                            oldtime = float("inf")
                             if len(self.servers) >= CONNECTION_MAX:
                                 for (name, server) in self.servers.items():
-                                    if not oldest or server.last_xmit < self.servers[oldest].last_xmit:
+                                    if server.last_xmit() < oldtime:
                                         oldest = name
+                                        oldtime = server.last_xmit()
                                 del self.servers[oldest]
         except ValueError:
-            self.logerr("can't recognize JSON on input: %s" % repr(line))
+            self.logerr("can't recognize JSON on input: %r" % line)
+        except RuntimeError:
+            self.logerr("wildly malformed JSON blew the parser stack.")
 
 class IrkerTCPHandler(SocketServer.StreamRequestHandler):
     def handle(self):
@@ -413,14 +524,20 @@ if __name__ == '__main__':
             sys.stdout.write("irkerd version %s\n" % version)
             sys.exit(0)
     irker = Irker(debuglevel=debuglvl)
-    tcpserver = SocketServer.TCPServer((HOST, PORT), IrkerTCPHandler)
-    udpserver = SocketServer.UDPServer((HOST, PORT), IrkerUDPHandler)
-    threading.Thread(target=tcpserver.serve_forever).start()
-    threading.Thread(target=udpserver.serve_forever).start()
-    # Main thread has to stay alive forever for the cooperative
-    # scheduling of the green threads to work.
-    if green_threads:
-        while True:
-            time.sleep(10)
+    irker.debug(1, "irkerd version %s" % version)
+    try:
+        tcpserver = SocketServer.TCPServer((HOST, PORT), IrkerTCPHandler)
+        udpserver = SocketServer.UDPServer((HOST, PORT), IrkerUDPHandler)
+        for server in [tcpserver, udpserver]:
+            server = threading.Thread(target=server.serve_forever)
+            server.setDaemon(True)
+            server.start()
+        try:
+            while True:
+                time.sleep(10)
+        except KeyboardInterrupt:
+            raise SystemExit(1)
+    except socket.error, e:
+        sys.stderr.write("irkerd: server launch failed: %r\n" % e)
 
 # end