From ad60a7dfc25ea519a36ebf2fd8fb70288fd4361a Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sat, 18 Feb 2012 12:31:47 -0500 Subject: [PATCH] Move parse_date to the apachelog.date module. I also consolidated the doc-string example and duplicate unittest cases into a single doctest, so now you need to run nosetest --with-doctest apachelog to run the full suite. --- apachelog/date.py | 50 +++++++++++++++++++++++++++++++++++ apachelog/parser.py | 47 -------------------------------- apachelog/test/test_parser.py | 10 +------ 3 files changed, 51 insertions(+), 56 deletions(-) create mode 100644 apachelog/date.py diff --git a/apachelog/date.py b/apachelog/date.py new file mode 100644 index 0000000..570eb47 --- /dev/null +++ b/apachelog/date.py @@ -0,0 +1,50 @@ +""" +The `parse_date` function is intended as a fast way to convert a log +date into something useful, without incurring a significant date +parsing overhead---good enough for basic stuff but will be a problem +if you need to deal with log from multiple servers in different +timezones. +""" + + +MONTHS = { + 'Jan':'01', + 'Feb':'02', + 'Mar':'03', + 'Apr':'04', + 'May':'05', + 'Jun':'06', + 'Jul':'07', + 'Aug':'08', + 'Sep':'09', + 'Oct':'10', + 'Nov':'11', + 'Dec':'12' + } + + +def parse_date(date): + """Convert a date to a (`timestamp`, `offset`) tuple. + + Takes a date in the format: [05/Dec/2006:10:51:44 +0000] + (including square brackets) and returns a two element + tuple containing first a timestamp of the form + YYYYMMDDHH24IISS e.g. 20061205105144 and second the + timezone offset as is e.g.; + + >>> parse_date('[05/Dec/2006:10:51:44 +0000]') + ('20061205105144', '+0000') + + It does not attempt to adjust the timestamp according + to the timezone---this is your problem. + """ + date = date[1:-1] + elems = [ + date[7:11], + MONTHS[date[3:6]], + date[0:2], + date[12:14], + date[15:17], + date[18:20], + ] + return (''.join(elems),date[21:]) diff --git a/apachelog/parser.py b/apachelog/parser.py index 925b128..6c7e3ae 100644 --- a/apachelog/parser.py +++ b/apachelog/parser.py @@ -76,12 +76,6 @@ On my system, using a loop like; ...was able to parse ~60,000 lines / second. Adding psyco to the mix, up that to ~75,000 lines / second. - -The parse_date function is intended as a fast way to convert a log -date into something useful, without incurring a significant date -parsing overhead - good enough for basic stuff but will be a problem -if you need to deal with log from multiple servers in different -timezones. """ __version__ = "1.1" @@ -345,47 +339,6 @@ class parser: """ return self._names -months = { - 'Jan':'01', - 'Feb':'02', - 'Mar':'03', - 'Apr':'04', - 'May':'05', - 'Jun':'06', - 'Jul':'07', - 'Aug':'08', - 'Sep':'09', - 'Oct':'10', - 'Nov':'11', - 'Dec':'12' - } - -def parse_date(date): - """ - Takes a date in the format: [05/Dec/2006:10:51:44 +0000] - (including square brackets) and returns a two element - tuple containing first a timestamp of the form - YYYYMMDDHH24IISS e.g. 20061205105144 and second the - timezone offset as is e.g.; - - parse_date('[05/Dec/2006:10:51:44 +0000]') - >> ('20061205105144', '+0000') - - It does not attempt to adjust the timestamp according - to the timezone - this is your problem. - """ - date = date[1:-1] - elems = [ - date[7:11], - months[date[3:6]], - date[0:2], - date[12:14], - date[15:17], - date[18:20], - ] - return (''.join(elems),date[21:]) - - """ Frequenty used log formats stored here """ diff --git a/apachelog/test/test_parser.py b/apachelog/test/test_parser.py index 2af9896..2faa76a 100644 --- a/apachelog/test/test_parser.py +++ b/apachelog/test/test_parser.py @@ -1,6 +1,6 @@ import unittest -from ..parser import ApacheLogParserError, parser, months, parse_date, formats +from ..parser import ApacheLogParserError, parser, formats class TestApacheLogParser(unittest.TestCase): @@ -140,10 +140,6 @@ class TestApacheLogParser(unittest.TestCase): self.assertEqual(data['%b'],'xyz', '%c') self.assertEqual(data['%c'],'bar', '%c') - def testparsedate(self): - date = '[05/Dec/2006:10:51:44 +0000]' - self.assertEqual(('20061205105144','+0000'),parse_date(date)) - class TestApacheLogParserFriendlyNames(unittest.TestCase): def setUp(self): @@ -282,10 +278,6 @@ class TestApacheLogParserFriendlyNames(unittest.TestCase): self.assertEqual(data['%b'],'xyz', '%c') self.assertEqual(data['%c'],'bar', '%c') - def testparsedate(self): - date = '[05/Dec/2006:10:51:44 +0000]' - self.assertEqual(('20061205105144','+0000'),parse_date(date)) - if __name__ is '__main__': unittest.main() -- 2.26.2