2faa76a9fe07e6126c1bf075d5e638fd7de1cce7
[apachelog.git] / apachelog / test / test_parser.py
1 import unittest
2
3 from ..parser import ApacheLogParserError, parser, formats
4
5
6 class TestApacheLogParser(unittest.TestCase):
7
8     def setUp(self):
9         self.format = r'%h %l %u %t \"%r\" %>s '\
10                       r'%b \"%{Referer}i\" \"%{User-Agent}i\"'
11         self.fields = '%h %l %u %t %r %>s %b %{Referer}i '\
12                       '%{User-Agent}i'.split(' ')
13         self.pattern = '^(\\S*) (\\S*) (\\S*) (\\[[^\\]]+\\]) '\
14                        '\\\"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)\\\" '\
15                        '(\\S*) (\\S*) \\\"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)\\\" '\
16                        '\\\"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)\\\"$'
17         self.line1  = r'212.74.15.68 - - [23/Jan/2004:11:36:20 +0000] '\
18                       r'"GET /images/previous.png HTTP/1.1" 200 2607 '\
19                       r'"http://peterhi.dyndns.org/bandwidth/index.html" '\
20                       r'"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) '\
21                       r'Gecko/20021202"'
22         self.line2  = r'212.74.15.68 - - [23/Jan/2004:11:36:20 +0000] '\
23                       r'"GET /images/previous.png=\" HTTP/1.1" 200 2607 '\
24                       r'"http://peterhi.dyndns.org/bandwidth/index.html" '\
25                       r'"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) '\
26                       r'Gecko/20021202"'
27         self.line3  = r'4.224.234.46 - - [20/Jul/2004:13:18:55 -0700] '\
28                       r'"GET /core/listing/pl_boat_detail.jsp?&units=Feet&checked'\
29                       r'_boats=1176818&slim=broker&&hosturl=giffordmarine&&ywo='\
30                       r'giffordmarine& HTTP/1.1" 200 2888 "http://search.yahoo.com/'\
31                       r'bin/search?p=\"grady%20white%20306%20bimini\"" '\
32                       r'"\"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; '\
33                       r'YPC 3.0.3; yplus 4.0.00d)\""'
34 #                          r'"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; '\
35 #                          r'YPC 3.0.3; yplus 4.0.00d)"'
36         self.p = parser(self.format)
37
38     def testpattern(self):
39         self.assertEqual(self.pattern, self.p.pattern())
40
41     def testnames(self):
42         self.assertEqual(self.fields, self.p.names())
43
44     def testline1(self):
45         data = self.p.parse(self.line1)
46         self.assertEqual(data['%h'], '212.74.15.68', msg = 'Line 1 %h')
47         self.assertEqual(data['%l'], '-', msg = 'Line 1 %l')
48         self.assertEqual(data['%u'], '-', msg = 'Line 1 %u')
49         self.assertEqual(data['%t'], '[23/Jan/2004:11:36:20 +0000]', msg = 'Line 1 %t')
50         self.assertEqual(
51             data['%r'],
52             'GET /images/previous.png HTTP/1.1',
53             msg = 'Line 1 %r'
54             )
55         self.assertEqual(data['%>s'], '200', msg = 'Line 1 %>s')
56         self.assertEqual(data['%b'], '2607', msg = 'Line 1 %b')
57         self.assertEqual(
58             data['%{Referer}i'],
59             'http://peterhi.dyndns.org/bandwidth/index.html',
60             msg = 'Line 1 %{Referer}i'
61             )
62         self.assertEqual(
63             data['%{User-Agent}i'],
64             'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202',
65             msg = 'Line 1 %{User-Agent}i'
66             )
67
68
69     def testline2(self):
70         data = self.p.parse(self.line2)
71         self.assertEqual(data['%h'], '212.74.15.68', msg = 'Line 2 %h')
72         self.assertEqual(data['%l'], '-', msg = 'Line 2 %l')
73         self.assertEqual(data['%u'], '-', msg = 'Line 2 %u')
74         self.assertEqual(
75             data['%t'],
76             '[23/Jan/2004:11:36:20 +0000]',
77             msg = 'Line 2 %t'
78             )
79         self.assertEqual(
80             data['%r'],
81             r'GET /images/previous.png=\" HTTP/1.1',
82             msg = 'Line 2 %r'
83             )
84         self.assertEqual(data['%>s'], '200', msg = 'Line 2 %>s')
85         self.assertEqual(data['%b'], '2607', msg = 'Line 2 %b')
86         self.assertEqual(
87             data['%{Referer}i'],
88             'http://peterhi.dyndns.org/bandwidth/index.html',
89             msg = 'Line 2 %{Referer}i'
90             )
91         self.assertEqual(
92             data['%{User-Agent}i'],
93             'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202',
94             msg = 'Line 2 %{User-Agent}i'
95             )
96
97     def testline3(self):
98         data = self.p.parse(self.line3)
99         self.assertEqual(data['%h'], '4.224.234.46', msg = 'Line 3 %h')
100         self.assertEqual(data['%l'], '-', msg = 'Line 3 %l')
101         self.assertEqual(data['%u'], '-', msg = 'Line 3 %u')
102         self.assertEqual(
103             data['%t'],
104             '[20/Jul/2004:13:18:55 -0700]',
105             msg = 'Line 3 %t'
106             )
107         self.assertEqual(
108             data['%r'],
109             r'GET /core/listing/pl_boat_detail.jsp?&units=Feet&checked_boats='\
110             r'1176818&slim=broker&&hosturl=giffordmarine&&ywo=giffordmarine& '\
111             r'HTTP/1.1',
112             msg = 'Line 3 %r'
113             )
114         self.assertEqual(data['%>s'], '200', msg = 'Line 3 %>s')
115         self.assertEqual(data['%b'], '2888', msg = 'Line 3 %b')
116         self.assertEqual(
117             data['%{Referer}i'],
118             r'http://search.yahoo.com/bin/search?p=\"grady%20white%20306'\
119             r'%20bimini\"',
120             msg = 'Line 3 %{Referer}i'
121             )
122         self.assertEqual(
123             data['%{User-Agent}i'],
124             '\\"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; YPC 3.0.3; '\
125             'yplus 4.0.00d)\\"',
126 #                'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; YPC 3.0.3; '\
127 #                'yplus 4.0.00d)',
128             msg = 'Line 3 %{User-Agent}i'
129             )
130
131
132     def testjunkline(self):
133         self.assertRaises(ApacheLogParserError,self.p.parse,'foobar')
134
135     def testhasquotesaltn(self):
136         p = parser(r'%a \"%b\" %c')
137         line = r'foo "xyz" bar'
138         data = p.parse(line)
139         self.assertEqual(data['%a'],'foo', '%a')
140         self.assertEqual(data['%b'],'xyz', '%c')
141         self.assertEqual(data['%c'],'bar', '%c')
142
143 class TestApacheLogParserFriendlyNames(unittest.TestCase):
144
145     def setUp(self):
146         self.format = r'%h %l %u %t \"%r\" %>s '\
147                       r'%b \"%{Referer}i\" \"%{User-Agent}i\"'
148         self.fields = ('remote_host remote_logname remote_user time '
149                        'first_line last_status response_bytes_clf '
150                        'header_Referer header_User_Agent').split(' ')
151         self.pattern = '^(\\S*) (\\S*) (\\S*) (\\[[^\\]]+\\]) '\
152                        '\\\"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)\\\" '\
153                        '(\\S*) (\\S*) \\\"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)\\\" '\
154                        '\\\"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)\\\"$'
155         self.line1  = r'212.74.15.68 - - [23/Jan/2004:11:36:20 +0000] '\
156                       r'"GET /images/previous.png HTTP/1.1" 200 2607 '\
157                       r'"http://peterhi.dyndns.org/bandwidth/index.html" '\
158                       r'"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) '\
159                       r'Gecko/20021202"'
160         self.line2  = r'212.74.15.68 - - [23/Jan/2004:11:36:20 +0000] '\
161                       r'"GET /images/previous.png=\" HTTP/1.1" 200 2607 '\
162                       r'"http://peterhi.dyndns.org/bandwidth/index.html" '\
163                       r'"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) '\
164                       r'Gecko/20021202"'
165         self.line3  = r'4.224.234.46 - - [20/Jul/2004:13:18:55 -0700] '\
166                       r'"GET /core/listing/pl_boat_detail.jsp?&units=Feet&checked'\
167                       r'_boats=1176818&slim=broker&&hosturl=giffordmarine&&ywo='\
168                       r'giffordmarine& HTTP/1.1" 200 2888 "http://search.yahoo.com/'\
169                       r'bin/search?p=\"grady%20white%20306%20bimini\"" '\
170                       r'"\"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; '\
171                       r'YPC 3.0.3; yplus 4.0.00d)\""'
172 #                          r'"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; '\
173 #                          r'YPC 3.0.3; yplus 4.0.00d)"'
174         self.p = parser(self.format, True)
175
176     def testpattern(self):
177         self.assertEqual(self.pattern, self.p.pattern())
178
179     def testnames(self):
180         self.assertEqual(self.fields, self.p.names())
181
182     def testline1(self):
183         data = self.p.parse(self.line1)
184         self.assertEqual(data.remote_host, '212.74.15.68', msg = 'Line 1 remote_host')
185         self.assertEqual(data.remote_logname, '-', msg = 'Line 1 remote_logname')
186         self.assertEqual(data.remote_user, '-', msg = 'Line 1 remote_user')
187         self.assertEqual(data.time, '[23/Jan/2004:11:36:20 +0000]', msg = 'Line 1 time')
188         self.assertEqual(
189             data.first_line,
190             'GET /images/previous.png HTTP/1.1',
191             msg = 'Line 1 first_line'
192             )
193         self.assertEqual(data.last_status, '200', msg = 'Line 1 last_status')
194         self.assertEqual(data.response_bytes_clf, '2607', msg = 'Line 1 response_bytes_clf')
195         self.assertEqual(
196             data.header_Referer,
197             'http://peterhi.dyndns.org/bandwidth/index.html',
198             msg = 'Line 1 %{Referer}i'
199             )
200         self.assertEqual(
201             data.header_User_Agent,
202             'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202',
203             msg = 'Line 1 %{User-Agent}i'
204             )
205
206
207     def testline2(self):
208         data = self.p.parse(self.line2)
209         self.assertEqual(data.remote_host, '212.74.15.68', msg = 'Line 2 remote_host')
210         self.assertEqual(data.remote_logname, '-', msg = 'Line 2 remote_logname')
211         self.assertEqual(data.remote_user, '-', msg = 'Line 2 remote_user')
212         self.assertEqual(
213             data.time,
214             '[23/Jan/2004:11:36:20 +0000]',
215             msg = 'Line 2 time'
216             )
217         self.assertEqual(
218             data.first_line,
219             r'GET /images/previous.png=\" HTTP/1.1',
220             msg = 'Line 2 first_line'
221             )
222         self.assertEqual(data.last_status, '200', msg = 'Line 2 last_status')
223         self.assertEqual(data.response_bytes_clf, '2607', msg = 'Line 2 response_bytes_clf')
224         self.assertEqual(
225             data.header_Referer,
226             'http://peterhi.dyndns.org/bandwidth/index.html',
227             msg = 'Line 2 %{Referer}i'
228             )
229         self.assertEqual(
230             data.header_User_Agent,
231             'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202',
232             msg = 'Line 2 %{User-Agent}i'
233             )
234
235     def testline3(self):
236         data = self.p.parse(self.line3)
237         self.assertEqual(data.remote_host, '4.224.234.46', msg = 'Line 3 remote_host')
238         self.assertEqual(data.remote_logname, '-', msg = 'Line 3 remote_logname')
239         self.assertEqual(data.remote_user, '-', msg = 'Line 3 remote_user')
240         self.assertEqual(
241             data.time,
242             '[20/Jul/2004:13:18:55 -0700]',
243             msg = 'Line 3 time'
244             )
245         self.assertEqual(
246             data.first_line,
247             r'GET /core/listing/pl_boat_detail.jsp?&units=Feet&checked_boats='\
248             r'1176818&slim=broker&&hosturl=giffordmarine&&ywo=giffordmarine& '\
249             r'HTTP/1.1',
250             msg = 'Line 3 first_line'
251             )
252         self.assertEqual(data.last_status, '200', msg = 'Line 3 last_status')
253         self.assertEqual(data.response_bytes_clf, '2888', msg = 'Line 3 response_bytes_clf')
254         self.assertEqual(
255             data.header_Referer,
256             r'http://search.yahoo.com/bin/search?p=\"grady%20white%20306'\
257             r'%20bimini\"',
258             msg = 'Line 3 %{Referer}i'
259             )
260         self.assertEqual(
261             data.header_User_Agent,
262             '\\"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; YPC 3.0.3; '\
263             'yplus 4.0.00d)\\"',
264 #                'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; YPC 3.0.3; '\
265 #                'yplus 4.0.00d)',
266             msg = 'Line 3 %{User-Agent}i'
267             )
268
269
270     def testjunkline(self):
271         self.assertRaises(ApacheLogParserError,self.p.parse,'foobar')
272
273     def testhasquotesaltn(self):
274         p = parser(r'%a \"%b\" %c')
275         line = r'foo "xyz" bar'
276         data = p.parse(line)
277         self.assertEqual(data['%a'],'foo', '%a')
278         self.assertEqual(data['%b'],'xyz', '%c')
279         self.assertEqual(data['%c'],'bar', '%c')
280
281
282 if __name__ is '__main__':
283     unittest.main()