1 # Copyright (C) 2014 W. Trevor King <wking@tremily.us>
3 # This file is part of package-cache.
5 # package-cache is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
10 # package-cache is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
15 # You should have received a copy of the GNU General Public License along with
16 # package-cache. If not, see <http://www.gnu.org/licenses/>.
18 import calendar as _calendar
19 import email.utils as _email_utils
20 import logging as _logging
21 import mimetypes as _mimetypes
23 import urllib.error as _urllib_error
24 import urllib.request as _urllib_request
26 from . import __version__
29 LOG = _logging.getLogger(__name__)
32 class InvalidFile (ValueError):
33 def __init__(self, url):
34 super(InvalidFile, self).__init__('invalid file {!r}'.format(url))
38 class Server (object):
39 def __init__(self, sources, cache):
40 self.sources = sources
42 self.opener = _urllib_request.build_opener()
43 self.opener.addheaders = [
44 ('User-agent', 'Package-cache/{}'.format(__version__)),
47 def __call__(self, environ, start_response):
49 return self._serve_request(
50 environ=environ, start_response=start_response)
52 start_response('404 Not Found', [])
53 except _urllib_error.HTTPError as e:
54 print('{} {}'.format(e.code, e.reason))
55 start_response('{} {}'.format(e.code, e.reason), [])
58 def _serve_request(self, environ, start_response):
59 method = environ['REQUEST_METHOD']
60 url = environ.get('PATH_INFO', None)
62 raise InvalidFile(url=url)
63 cache_path = self._get_cache_path(url=url)
64 if not _os.path.exists(path=cache_path):
65 self._get_file_from_sources(url=url, path=cache_path)
66 if not _os.path.isfile(path=cache_path):
67 raise InvalidFile(url=url)
68 return self._serve_file(
69 path=cache_path, environ=environ, start_response=start_response)
71 def _get_cache_path(self, url):
72 relative_path = url.lstrip('/').replace('/', _os.path.sep)
73 cache_path = _os.path.abspath(_os.path.join(self.cache, relative_path))
74 check_relative_path = _os.path.relpath(
75 path=cache_path, start=self.cache)
76 if check_relative_path.startswith(_os.pardir + _os.path.sep):
77 raise InvalidFile(url=url)
80 def _get_file_from_sources(self, url, path):
81 dirname = _os.path.dirname(path)
82 if not _os.path.isdir(dirname):
83 _os.makedirs(dirname, exist_ok=True)
84 for i, source in enumerate(self.sources):
85 source_url = source.rstrip('/') + url
87 self._get_file(url=source_url, path=path)
88 except _urllib_error.HTTPError as e:
89 LOG.warn('error getting {}: {} {}'.format(
90 source_url, e.code, e.reason))
91 if i == len(self.sources) - 1:
96 def _get_file(self, url, path):
97 LOG.info('GET {}'.format(url))
98 with self.opener.open(url) as response:
99 last_modified = response.getheader('Last-Modified', None)
100 content_length = int(response.getheader('Content-Length'))
101 with open(path, 'wb') as f:
104 data = response.read(block_size)
106 if len(data) < block_size:
109 mtime = _calendar.timegm(_email_utils.parsedate(last_modified))
110 _os.utime(path=path, times=(mtime, mtime))
111 LOG.info('got {}'.format(url))
113 def _serve_file(self, path, environ, start_response):
115 'Content-Length': self._get_content_length(path=path),
116 'Content-Type': self._get_content_type(path=path),
117 'Last-Modified': self._get_last_modified(path=path),
120 if 'wsgi.file_wrapper' in environ:
121 file_iterator = environ['wsgi.file_wrapper'](f)
123 file_iterator = iter(lambda: f.read(block_size), '')
124 start_response('200 OK', list(headers.items()))
127 def _get_content_length(self, path):
128 """Content-Length value per RFC 2616
131 https://tools.ietf.org/html/rfc2616#section-14.13
133 return str(_os.path.getsize(path))
135 def _get_content_type(self, path):
136 """Content-Type value per RFC 2616
139 https://tools.ietf.org/html/rfc2616#section-14.17
141 https://tools.ietf.org/html/rfc2616#section-3.7
143 mimetype, charset = _mimetypes.guess_type(url=path)
145 return '{}; charset={}'.format(mimetype, charset)
149 def _get_last_modified(self, path):
150 """Last-Modified value per RFC 2616
153 https://tools.ietf.org/html/rfc2616#section-14.29
155 https://tools.ietf.org/html/rfc2616#section-3.3.1
156 https://tools.ietf.org/html/rfc1123#page-55
157 https://tools.ietf.org/html/rfc822#section-5
159 mtime = _os.path.getmtime(path)
160 return _email_utils.formatdate(
161 timeval=mtime, localtime=False, usegmt=True)