server: Don't use a keyword for the response_headers argument to start_response
[package-cache.git] / package_cache / server.py
1 # Copyright
2
3 import email.utils as _email_utils
4 import mimetypes as _mimetypes
5 import os as _os
6 import urllib.parse as _urllib_parse
7
8
9 class InvalidFile (ValueError):
10     def __init__(self, url):
11         super(InvalidFile, self).__init__('invalid file {!r}'.format(url))
12         self.url = url
13
14
15 class Server (object):
16     def __init__(self, sources, cache):
17         self.sources = sources
18         self.cache = cache
19         if not _os.path.isdir(self.cache):
20             _os.makedirs(self.cache, exist_ok=True)
21
22     def __call__(self, environ, start_response):
23         try:
24             return self._serve_request(
25                 environ=environ, start_response=start_response)
26         except InvalidFile:
27             start_response('404 Not Found', [])
28
29     def _serve_request(self, environ, start_response):
30         method = environ['REQUEST_METHOD']
31         url = environ.get('PATH_INFO', None)
32         if url is None:
33             raise InvalidFile(url=url)
34         parsed_url = _urllib_parse.urlparse(url)
35         relative_path = parsed_url.path.lstrip('/').replace('/', _os.path.sep)
36         cache_path = _os.path.join(self.cache, relative_path)
37         if not _os.path.exists(path=cache_path):
38             self._get_file(url=url, path=cache_path)
39         if not _os.path.isfile(path=cache_path):
40             raise InvalidFile(url=url)
41         return self._serve_file(
42             path=cache_path, environ=environ, start_response=start_response)
43
44     def _get_file(self, url, path):
45         raise NotImplementedError()
46
47     def _serve_file(self, path, environ, start_response):
48         headers = {
49             'Content-Length': self._get_content_length(path=path),
50             'Content-Type': self._get_content_type(path=path),
51             'Last-Modified': self._get_last_modified(path=path),
52             }
53         f = open(path, 'rb')
54         if 'wsgi.file_wrapper' in environ:
55             file_iterator = environ['wsgi.file_wrapper'](f)
56         else:
57             file_iterator = iter(lambda: f.read(block_size), '')
58         start_response('200 OK', list(headers.items()))
59         return file_iterator
60
61     def _get_content_length(self, path):
62         """Content-Length value per RFC 2616
63
64         Content-Length:
65           https://tools.ietf.org/html/rfc2616#section-14.13
66         """
67         return str(_os.path.getsize(path))
68
69     def _get_content_type(self, path):
70         """Content-Type value per RFC 2616
71
72         Content-Type:
73           https://tools.ietf.org/html/rfc2616#section-14.17
74         Media types:
75           https://tools.ietf.org/html/rfc2616#section-3.7
76         """
77         mimetype, charset = _mimetypes.guess_type(url=path)
78         if charset:
79             return '{}; charset={}'.format(mimetype, charset)
80         else:
81             return mimetype
82
83     def _get_last_modified(self, path):
84         """Last-Modified value per RFC 2616
85
86         Last-Modified:
87           https://tools.ietf.org/html/rfc2616#section-14.29
88         Date formats:
89           https://tools.ietf.org/html/rfc2616#section-3.3.1
90           https://tools.ietf.org/html/rfc1123#page-55
91           https://tools.ietf.org/html/rfc822#section-5
92         """
93         mtime = _os.path.getmtime(path)
94         return _email_utils.formatdate(
95             timeval=mtime, localtime=False, usegmt=True)