1 import os.path as _os_path
2 import pickle as _pickle
4 import socket as _socket
7 class Resolver (object):
8 """A simple reverse-DNS resolver.
10 Maintains a class-level cache of resolved IPs to avoid repeated
11 lookups on the same IP address.
13 Avoid hanging if we can't resolve a name.
16 >>> if hasattr(_socket, 'setdefaulttimeout'):
17 ... socket.setdefaulttimeout(5) # set 5 second timeout
20 >>> r.IP = {} # clear cache of date from previous tests
21 >>> r.resolve('198.41.0.4')
24 {'198.41.0.4': ('a.root-servers.net', [], ['198.41.0.4'])}
26 If you want to give shorter names to various DNS names, you can
27 add an entry to the class-level ``REGEXPS``. The entry should use
28 your name as the key, and a list of matching regexps as the value.
29 You need to enable this enhanced resolution using the ``smart``
32 >>> r.resolve('66.249.68.33')
33 'crawl-66-249-68-33.googlebot.com'
34 >>> r = Resolver(smart=True)
35 >>> r.resolve('66.249.68.34')
41 'feedburner': [_re.compile('.*rate-limited-proxy-.*.google.com.*')],
42 'yahoo': [_re.compile('.*crawl.yahoo.*')], # slurp
47 'msnbot', # a.k.a: bingbot
50 REGEXPS[bot] = [_re.compile('.*{}.*'.format(bot))]
52 _cache_file = _os_path.expanduser(
53 _os_path.join('~', '.apachelog-resolver.cache'))
57 def __init__(self, smart=False):
63 if not self._cache_loaded:
64 self._cache_loaded = True
66 with open(self._cache_file, 'rb') as f:
67 self.IP = _pickle.load(f)
68 self._cache_dirty = False
76 self.load_cache() # avoid clobbering unloaded content
78 with open(self._cache_file, 'wb') as f:
79 _pickle.dump(self.IP, f)
81 def resolve(self, ip):
83 Resolver._cache_dirty = True
85 self.IP[ip] = _socket.gethostbyaddr(ip)
86 except _socket.herror as e:
87 self.IP[ip] = (ip, [], [ip])
88 except _socket.gaierror as e:
89 self.IP[ip] = (ip, [], [ip])
92 self._smart_resolve(ip)
95 def _smart_resolve(self, ip):
98 for name,regexps in self.REGEXPS.items():
99 for regexp in regexps:
100 if regexp.match(self.IP[ip][0]):
101 self.IP[ip] = (name, x[1], x[2])
104 "Return a set of IP addresses used by a smart-resolved name."
106 for ip,values in self.IP.items():
107 if values[0] == name: