From 0747c534081e2ed2db71a450f995cc8b11245ade Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sat, 18 Feb 2012 14:03:29 -0500 Subject: [PATCH] Add apachelog.resolve.Resolver for reverse-DNS. --- apachelog/resolve.py | 73 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 apachelog/resolve.py diff --git a/apachelog/resolve.py b/apachelog/resolve.py new file mode 100644 index 0000000..db17f6b --- /dev/null +++ b/apachelog/resolve.py @@ -0,0 +1,73 @@ +import re as _re +import socket as _socket + + +class Resolver (object): + """A simple reverse-DNS resolver. + + Maintains a class-level cache of resolved IPs to avoid repeated + lookups on the same IP address. + + >>> r = Resolver() + >>> r.resolve('198.41.0.4') + 'a.root-servers.net' + >>> r.IP + {'198.41.0.4': ('a.root-servers.net', [], ['198.41.0.4'])} + + If you want to give shorter names to various DNS names, you can + add an entry to the class-level ``REGEXPS``. The entry should use + your name as the key, and a list of matching regexps as the value. + You need to enable this enhanced resolution using the ``smart`` + argument. + + >>> r.resolve('66.249.68.33') + 'crawl-66-249-68-33.googlebot.com' + >>> r = Resolver(smart=True) + >>> r.resolve('66.249.68.34') + 'googlebot' + """ + IP = {} + + REGEXPS = { + 'feedburner': [_re.compile('.*rate-limited-proxy-.*.google.com.*')], + } + for bot in [ + 'googlebot', + 'yandex', + 'baiduspider', + 'msnbot', + ]: + REGEXPS[bot] = [_re.compile('.*{}.*'.format(bot))] + + def __init__(self, smart=False): + self._smart = smart + + def resolve(self, ip): + if ip not in self.IP: + try: + self.IP[ip] = _socket.gethostbyaddr(ip) + except _socket.herror as e: + self.IP[ip] = (ip, [], [ip]) + except _socket.gaierror as e: + self.IP[ip] = (ip, [], [ip]) + else: + if self._smart: + self._smart_resolve(ip) + return self.IP[ip][0] + + def _smart_resolve(self, ip): + x = self.IP[ip] + if self._smart: + for name,regexps in self.REGEXPS.items(): + for regexp in regexps: + if regexp.match(self.IP[ip][0]): + self.IP[ip] = (name, x[1], x[2]) + + def ips(self, name): + "Return a set of IP addresses used by a smart-resolved name." + ips = set() + for ip,values in self.IP.items(): + if values[0] == name: + for x in values[2]: + ips.add(x) + return ips -- 2.26.2