Run update-copyright.py
[package-cache.git] / package_cache / server.py
1 # Copyright (C) 2014 W. Trevor King <wking@tremily.us>
2 #
3 # This file is part of package-cache.
4 #
5 # package-cache is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
8 # later version.
9 #
10 # package-cache is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU General Public License along with
16 # package-cache.  If not, see <http://www.gnu.org/licenses/>.
17
18 import calendar as _calendar
19 import email.utils as _email_utils
20 import mimetypes as _mimetypes
21 import os as _os
22 import urllib.error as _urllib_error
23 import urllib.request as _urllib_request
24
25 from . import __version__
26
27
28 class InvalidFile (ValueError):
29     def __init__(self, url):
30         super(InvalidFile, self).__init__('invalid file {!r}'.format(url))
31         self.url = url
32
33
34 class Server (object):
35     def __init__(self, sources, cache):
36         self.sources = sources
37         self.cache = cache
38         self.opener = _urllib_request.build_opener()
39         self.opener.addheaders = [
40             ('User-agent', 'Package-cache/{}'.format(__version__)),
41             ]
42
43     def __call__(self, environ, start_response):
44         try:
45             return self._serve_request(
46                 environ=environ, start_response=start_response)
47         except InvalidFile:
48             start_response('404 Not Found', [])
49         except _urllib_error.HTTPError as e:
50             print('{} {}'.format(e.code, e.reason))
51             start_response('{} {}'.format(e.code, e.reason), [])
52         return [b'']
53
54     def _serve_request(self, environ, start_response):
55         method = environ['REQUEST_METHOD']
56         url = environ.get('PATH_INFO', None)
57         if url is None:
58             raise InvalidFile(url=url)
59         cache_path = self._get_cache_path(url=url)
60         if not _os.path.exists(path=cache_path):
61             self._get_file_from_sources(url=url, path=cache_path)
62         if not _os.path.isfile(path=cache_path):
63             raise InvalidFile(url=url)
64         return self._serve_file(
65             path=cache_path, environ=environ, start_response=start_response)
66
67     def _get_cache_path(self, url):
68         relative_path = url.lstrip('/').replace('/', _os.path.sep)
69         cache_path = _os.path.abspath(_os.path.join(self.cache, relative_path))
70         check_relative_path = _os.path.relpath(
71             path=cache_path, start=self.cache)
72         if check_relative_path.startswith(_os.pardir + _os.path.sep):
73             raise InvalidFile(url=url)
74         return cache_path
75
76     def _get_file_from_sources(self, url, path):
77         dirname = _os.path.dirname(path)
78         if not _os.path.isdir(dirname):
79             _os.makedirs(dirname, exist_ok=True)
80         for i, source in enumerate(self.sources):
81             source_url = source.rstrip('/') + url
82             try:
83                 self._get_file(url=source_url, path=path)
84             except _urllib_error.HTTPError:
85                 if i == len(self.sources) - 1:
86                     raise
87             else:
88                 return
89
90     def _get_file(self, url, path):
91         with self.opener.open(url) as response:
92             last_modified = response.getheader('Last-Modified', None)
93             content_length = int(response.getheader('Content-Length'))
94             with open(path, 'wb') as f:
95                 block_size = 8192
96                 while True:
97                     data = response.read(block_size)
98                     f.write(data)
99                     if len(data) < block_size:
100                         break
101         if last_modified:
102             mtime = _calendar.timegm(_email_utils.parsedate(last_modified))
103             _os.utime(path=path, times=(mtime, mtime))
104
105     def _serve_file(self, path, environ, start_response):
106         headers = {
107             'Content-Length': self._get_content_length(path=path),
108             'Content-Type': self._get_content_type(path=path),
109             'Last-Modified': self._get_last_modified(path=path),
110             }
111         f = open(path, 'rb')
112         if 'wsgi.file_wrapper' in environ:
113             file_iterator = environ['wsgi.file_wrapper'](f)
114         else:
115             file_iterator = iter(lambda: f.read(block_size), '')
116         start_response('200 OK', list(headers.items()))
117         return file_iterator
118
119     def _get_content_length(self, path):
120         """Content-Length value per RFC 2616
121
122         Content-Length:
123           https://tools.ietf.org/html/rfc2616#section-14.13
124         """
125         return str(_os.path.getsize(path))
126
127     def _get_content_type(self, path):
128         """Content-Type value per RFC 2616
129
130         Content-Type:
131           https://tools.ietf.org/html/rfc2616#section-14.17
132         Media types:
133           https://tools.ietf.org/html/rfc2616#section-3.7
134         """
135         mimetype, charset = _mimetypes.guess_type(url=path)
136         if charset:
137             return '{}; charset={}'.format(mimetype, charset)
138         else:
139             return mimetype
140
141     def _get_last_modified(self, path):
142         """Last-Modified value per RFC 2616
143
144         Last-Modified:
145           https://tools.ietf.org/html/rfc2616#section-14.29
146         Date formats:
147           https://tools.ietf.org/html/rfc2616#section-3.3.1
148           https://tools.ietf.org/html/rfc1123#page-55
149           https://tools.ietf.org/html/rfc822#section-5
150         """
151         mtime = _os.path.getmtime(path)
152         return _email_utils.formatdate(
153             timeval=mtime, localtime=False, usegmt=True)