1 # Copyright (C) 2014 W. Trevor King <wking@tremily.us>
3 # This file is part of package-cache.
5 # package-cache is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
10 # package-cache is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
15 # You should have received a copy of the GNU General Public License along with
16 # package-cache. If not, see <http://www.gnu.org/licenses/>.
18 import calendar as _calendar
19 import email.utils as _email_utils
20 import mimetypes as _mimetypes
22 import urllib.error as _urllib_error
23 import urllib.request as _urllib_request
25 from . import __version__
28 class InvalidFile (ValueError):
29 def __init__(self, url):
30 super(InvalidFile, self).__init__('invalid file {!r}'.format(url))
34 class Server (object):
35 def __init__(self, sources, cache):
36 self.sources = sources
38 self.opener = _urllib_request.build_opener()
39 self.opener.addheaders = [
40 ('User-agent', 'Package-cache/{}'.format(__version__)),
43 def __call__(self, environ, start_response):
45 return self._serve_request(
46 environ=environ, start_response=start_response)
48 start_response('404 Not Found', [])
49 except _urllib_error.HTTPError as e:
50 print('{} {}'.format(e.code, e.reason))
51 start_response('{} {}'.format(e.code, e.reason), [])
54 def _serve_request(self, environ, start_response):
55 method = environ['REQUEST_METHOD']
56 url = environ.get('PATH_INFO', None)
58 raise InvalidFile(url=url)
59 cache_path = self._get_cache_path(url=url)
60 if not _os.path.exists(path=cache_path):
61 self._get_file_from_sources(url=url, path=cache_path)
62 if not _os.path.isfile(path=cache_path):
63 raise InvalidFile(url=url)
64 return self._serve_file(
65 path=cache_path, environ=environ, start_response=start_response)
67 def _get_cache_path(self, url):
68 relative_path = url.lstrip('/').replace('/', _os.path.sep)
69 cache_path = _os.path.abspath(_os.path.join(self.cache, relative_path))
70 check_relative_path = _os.path.relpath(
71 path=cache_path, start=self.cache)
72 if check_relative_path.startswith(_os.pardir + _os.path.sep):
73 raise InvalidFile(url=url)
76 def _get_file_from_sources(self, url, path):
77 dirname = _os.path.dirname(path)
78 if not _os.path.isdir(dirname):
79 _os.makedirs(dirname, exist_ok=True)
80 for i, source in enumerate(self.sources):
81 source_url = source.rstrip('/') + url
83 self._get_file(url=source_url, path=path)
84 except _urllib_error.HTTPError:
85 if i == len(self.sources) - 1:
90 def _get_file(self, url, path):
91 with self.opener.open(url) as response:
92 last_modified = response.getheader('Last-Modified', None)
93 content_length = int(response.getheader('Content-Length'))
94 with open(path, 'wb') as f:
97 data = response.read(block_size)
99 if len(data) < block_size:
102 mtime = _calendar.timegm(_email_utils.parsedate(last_modified))
103 _os.utime(path=path, times=(mtime, mtime))
105 def _serve_file(self, path, environ, start_response):
107 'Content-Length': self._get_content_length(path=path),
108 'Content-Type': self._get_content_type(path=path),
109 'Last-Modified': self._get_last_modified(path=path),
112 if 'wsgi.file_wrapper' in environ:
113 file_iterator = environ['wsgi.file_wrapper'](f)
115 file_iterator = iter(lambda: f.read(block_size), '')
116 start_response('200 OK', list(headers.items()))
119 def _get_content_length(self, path):
120 """Content-Length value per RFC 2616
123 https://tools.ietf.org/html/rfc2616#section-14.13
125 return str(_os.path.getsize(path))
127 def _get_content_type(self, path):
128 """Content-Type value per RFC 2616
131 https://tools.ietf.org/html/rfc2616#section-14.17
133 https://tools.ietf.org/html/rfc2616#section-3.7
135 mimetype, charset = _mimetypes.guess_type(url=path)
137 return '{}; charset={}'.format(mimetype, charset)
141 def _get_last_modified(self, path):
142 """Last-Modified value per RFC 2616
145 https://tools.ietf.org/html/rfc2616#section-14.29
147 https://tools.ietf.org/html/rfc2616#section-3.3.1
148 https://tools.ietf.org/html/rfc1123#page-55
149 https://tools.ietf.org/html/rfc822#section-5
151 mtime = _os.path.getmtime(path)
152 return _email_utils.formatdate(
153 timeval=mtime, localtime=False, usegmt=True)