2dfbc1e613c5675ef752b4f0d24a16013120bd8f
[package-cache.git] / package_cache / server.py
1 # Copyright (C) 2014 W. Trevor King <wking@tremily.us>
2 #
3 # This file is part of package-cache.
4 #
5 # package-cache is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
8 # later version.
9 #
10 # package-cache is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU General Public License along with
16 # package-cache.  If not, see <http://www.gnu.org/licenses/>.
17
18 import calendar as _calendar
19 import email.utils as _email_utils
20 import logging as _logging
21 import mimetypes as _mimetypes
22 import os as _os
23 import urllib.error as _urllib_error
24 import urllib.request as _urllib_request
25
26 from . import __version__
27
28
29 LOG = _logging.getLogger(__name__)
30
31
32 class InvalidFile (ValueError):
33     def __init__(self, url):
34         super(InvalidFile, self).__init__('invalid file {!r}'.format(url))
35         self.url = url
36
37
38 class Server (object):
39     def __init__(self, sources, cache):
40         self.sources = sources
41         self.cache = cache
42         self.opener = _urllib_request.build_opener()
43         self.opener.addheaders = [
44             ('User-agent', 'Package-cache/{}'.format(__version__)),
45             ]
46
47     def __call__(self, environ, start_response):
48         try:
49             return self._serve_request(
50                 environ=environ, start_response=start_response)
51         except InvalidFile:
52             start_response('404 Not Found', [])
53         except _urllib_error.HTTPError as e:
54             print('{} {}'.format(e.code, e.reason))
55             start_response('{} {}'.format(e.code, e.reason), [])
56         return [b'']
57
58     def _serve_request(self, environ, start_response):
59         method = environ['REQUEST_METHOD']
60         url = environ.get('PATH_INFO', None)
61         if url is None:
62             raise InvalidFile(url=url)
63         cache_path = self._get_cache_path(url=url)
64         if not _os.path.exists(path=cache_path):
65             self._get_file_from_sources(url=url, path=cache_path)
66         if not _os.path.isfile(path=cache_path):
67             raise InvalidFile(url=url)
68         return self._serve_file(
69             path=cache_path, environ=environ, start_response=start_response)
70
71     def _get_cache_path(self, url):
72         relative_path = url.lstrip('/').replace('/', _os.path.sep)
73         cache_path = _os.path.abspath(_os.path.join(self.cache, relative_path))
74         check_relative_path = _os.path.relpath(
75             path=cache_path, start=self.cache)
76         if check_relative_path.startswith(_os.pardir + _os.path.sep):
77             raise InvalidFile(url=url)
78         return cache_path
79
80     def _get_file_from_sources(self, url, path):
81         dirname = _os.path.dirname(path)
82         if not _os.path.isdir(dirname):
83             _os.makedirs(dirname, exist_ok=True)
84         for i, source in enumerate(self.sources):
85             source_url = source.rstrip('/') + url
86             try:
87                 self._get_file(url=source_url, path=path)
88             except _urllib_error.HTTPError as e:
89                 LOG.warn('error getting {}: {} {}'.format(
90                     source_url, e.code, e.reason))
91                 if i == len(self.sources) - 1:
92                     raise
93             else:
94                 return
95
96     def _get_file(self, url, path):
97         LOG.info('GET {}'.format(url))
98         with self.opener.open(url) as response:
99             last_modified = response.getheader('Last-Modified', None)
100             content_length = int(response.getheader('Content-Length'))
101             with open(path, 'wb') as f:
102                 block_size = 8192
103                 while True:
104                     data = response.read(block_size)
105                     f.write(data)
106                     if len(data) < block_size:
107                         break
108         if last_modified:
109             mtime = _calendar.timegm(_email_utils.parsedate(last_modified))
110             _os.utime(path=path, times=(mtime, mtime))
111         LOG.info('got {}'.format(url))
112
113     def _serve_file(self, path, environ, start_response):
114         headers = {
115             'Content-Length': self._get_content_length(path=path),
116             'Content-Type': self._get_content_type(path=path),
117             'Last-Modified': self._get_last_modified(path=path),
118             }
119         f = open(path, 'rb')
120         if 'wsgi.file_wrapper' in environ:
121             file_iterator = environ['wsgi.file_wrapper'](f)
122         else:
123             file_iterator = iter(lambda: f.read(block_size), '')
124         start_response('200 OK', list(headers.items()))
125         return file_iterator
126
127     def _get_content_length(self, path):
128         """Content-Length value per RFC 2616
129
130         Content-Length:
131           https://tools.ietf.org/html/rfc2616#section-14.13
132         """
133         return str(_os.path.getsize(path))
134
135     def _get_content_type(self, path):
136         """Content-Type value per RFC 2616
137
138         Content-Type:
139           https://tools.ietf.org/html/rfc2616#section-14.17
140         Media types:
141           https://tools.ietf.org/html/rfc2616#section-3.7
142         """
143         mimetype, charset = _mimetypes.guess_type(url=path)
144         if charset:
145             return '{}; charset={}'.format(mimetype, charset)
146         else:
147             return mimetype
148
149     def _get_last_modified(self, path):
150         """Last-Modified value per RFC 2616
151
152         Last-Modified:
153           https://tools.ietf.org/html/rfc2616#section-14.29
154         Date formats:
155           https://tools.ietf.org/html/rfc2616#section-3.3.1
156           https://tools.ietf.org/html/rfc1123#page-55
157           https://tools.ietf.org/html/rfc822#section-5
158         """
159         mtime = _os.path.getmtime(path)
160         return _email_utils.formatdate(
161             timeval=mtime, localtime=False, usegmt=True)