Add my Processor framework as apachelog.processor.Processor.
authorW. Trevor King <wking@drexel.edu>
Sat, 18 Feb 2012 19:18:51 +0000 (14:18 -0500)
committerW. Trevor King <wking@drexel.edu>
Sat, 18 Feb 2012 19:18:51 +0000 (14:18 -0500)
apachelog/processor/__init__.py [new file with mode: 0644]

diff --git a/apachelog/processor/__init__.py b/apachelog/processor/__init__.py
new file mode 100644 (file)
index 0000000..29f2ba1
--- /dev/null
@@ -0,0 +1,38 @@
+"""Define ``Processor`` classes for aggregating data across log files.
+"""
+
+class Processor (object):
+    def process(self, data):
+        pass
+
+
+def process(stream, parser, processors):
+    r"""Process a log with a list of processors.
+
+    For each line in the log located at ``filename``, parse the line
+    using ``parser`` and analyze it with each of the ``Processor``
+    instances in the list ``processors``.
+
+    >>> import StringIO
+    >>> from apachelog.parser import Parser, FORMATS
+    >>> class PrinthostProcessor (Processor):
+    ...     def __init__(self, name):
+    ...         self.name = name
+    ...     def process(self, data):
+    ...         print('{}: {}'.format(self.name, data['%h']))
+    >>> stream = StringIO.StringIO('\n'.join([
+    ...         '192.168.0.1 - - [18/Feb/2012:10:25:43 -0500] "GET / HTTP/1.1" 200 561 "-" "Mozilla/5.0 (...)"',
+    ...         '192.168.0.2 - - [18/Feb/2012:10:25:58 -0500] "GET / HTTP/1.1" 200 561 "-" "Mozilla/5.0 (...)"',
+    ...         ]))
+    >>> parser = Parser(FORMATS['extended'])
+    >>> processors = [PrinthostProcessor('a'), PrinthostProcessor('b')]
+    >>> process(stream, parser, processors)
+    a: 192.168.0.1
+    b: 192.168.0.1
+    a: 192.168.0.2
+    b: 192.168.0.2
+    """
+    for line in stream:
+        data = parser.parse(line)
+        for processor in processors:
+            processor.process(data)