3 """Process Apache (or similarly formated) log files using ``apachelog``.
5 Use the options to build a parser and list of processors. Each file
6 listed on the command line will be parsed by this parser and processed
7 by each processor. After processing is complete, interesting
8 information from each processor will be printed to stdout.
11 import socket as _socket
13 from apachelog import __version__
14 from apachelog.file import open as _open
15 from apachelog.parser import FORMATS as _FORMATS
16 from apachelog.parser import Parser as _Parser
17 from apachelog.processor import process as _process
18 from apachelog.processor.bandwidth import (
19 BandwidthProcessor as _BandwidthProcessor)
20 from apachelog.processor.bandwidth import (
21 IPBandwidthProcessor as _IPBandwidthProcessor)
22 from apachelog.processor.set import SetProcessor as _SetProcessor
23 from apachelog.processor.status import StatusProcessor as _StatusProcessor
24 from apachelog.processor.time import LogTimeProcessor as _LogTimeProcessor
25 from apachelog.resolve import Resolver as _Resolver
29 'bandwidth': _BandwidthProcessor,
30 'ip-bandwidth': _IPBandwidthProcessor,
32 'status': _StatusProcessor,
36 def display_processor(processor, **kwargs):
37 for name,type_ in PROCESSORS.items():
38 if type(processor) == type_:
39 pname = name.replace('-', '_')
40 display = globals()['display_{}'.format(pname)]
41 return display(processor=processor, **kwargs)
43 def display_bandwidth(stream, processor, args, **kwargs):
45 stream.write('# IP bandwidth ({})\n'.format(scale))
46 stream.write('{}\n'.format(processor.bandwidth(scale=scale)))
48 def display_ip_bandwidth(stream, processor, resolver, args):
51 stream.write('# IP bandwidth ({})\n'.format(scale))
52 if resolver is not None:
53 processor.resolve(resolver=resolver, top=top)
54 remaining = processor.bandwidth(scale=scale)
55 for ip,bw in processor.ip_bandwidth(
56 scale=scale, sort_by_bandwidth=True)[-1:-top:-1]:
58 stream.write('\t'.join([str(bw), ip]))
59 if resolver is not None: # also print the raw IPs
60 ips = resolver.ips(ip)
65 stream.write('\t{}'.format(' '.join(sorted(ips))))
67 stream.write('\t'.join([str(remaining), 'REMAINING']))
70 def display_set(stream, processor, **kwargs):
71 stream.write('# Value sets\n')
72 for key,values in sorted(processor.values.items()):
73 stream.write('{}\n'.format(key))
74 for value in sorted(values):
75 stream.write('\t{}\n'.format(value))
77 def display_status(stream, processor, **kwargs):
78 stream.write('# Status\n')
79 for request,status in sorted(processor.request.items()):
80 stream.write('\t'.join([request, ', '.join(sorted(status))]))
82 for status,request in sorted(processor.status.items()):
83 stream.write('{}\n'.format(status))
84 for r in sorted(request):
85 stream.write('\t{}\n'.format(r))
88 if __name__ == '__main__':
92 parser = argparse.ArgumentParser(description=__doc__, version=__version__)
94 '-f', '--format', default='common',
95 help='Log format string, or one of the predefined formats: {}'.format(
96 ', '.join(sorted(_FORMATS.keys()))))
97 for processor in sorted(PROCESSORS.keys()):
99 '--{}'.format(processor), default=False, action='store_const',
101 help='Use the {} processor'.format(processor))
103 '-r', '--resolve', default=False, action='store_const', const=True,
104 help='Resolve IP addresses for bandwidth measurements')
106 '-t', '--top', default=10, type=int,
107 help='Number of IPs to print for ip-bandwidth measurements')
109 '-s', '--scale', default='MB/month',
110 choices=sorted(_BandwidthProcessor._scales.keys()),
111 help='Scale for the bandwidth processors')
113 '-k', '--key', action='append', help='Add a key to the set processor')
115 'file', nargs='+', help='Path to log file')
117 args = parser.parse_args()
119 if hasattr(_socket, 'setdefaulttimeout'):
120 _socket.setdefaulttimeout(5) # set 5 second timeout
122 fmt = _FORMATS.get(args.format, args.format)
123 parser = _Parser(fmt)
126 resolver = _Resolver(smart=True)
131 log_time_processor = None
132 for processor in sorted(PROCESSORS.keys()):
133 pattr = processor.replace('-', '_')
134 if not getattr(args, pattr):
138 kwargs['keys'] = args.key
139 if (log_time_processor is not None and
140 issubclass(PROCESSORS[processor], _LogTimeProcessor)):
141 kwargs['previous_log_time_processor'] = log_time_processor
142 p = PROCESSORS[processor](**kwargs)
143 if log_time_processor is None and isinstance(p, _LogTimeProcessor):
144 log_time_processor = p
147 for filename in args.file:
148 with _open(filename) as f:
149 _process(stream=f, parser=parser, processors=processors)
150 for processor in processors:
152 stream=sys.stdout, processor=processor, resolver=resolver,
154 if processor != processors[-1]:
155 print '' # blank line between output blocks