From: W. Trevor King Date: Thu, 20 Feb 2014 20:16:47 +0000 (-0800) Subject: server: Implement Server._get_file X-Git-Tag: v0.1~23 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=1d56acc9a6cda51549f2fcbdc05d9b2d7edbb2cd;p=package-cache.git server: Implement Server._get_file It would be nice to use sendfile to copy between the HTTPResponse object [1] and the cache file. Linux supports arbitrary files (not just sockets) for out_fd since 2.6.33, so the "to the cache file" side works. However, from sendfile(2) [2]: The in_fd argument must correspond to a file which supports mmap(2)-like operations (i.e., it cannot be a socket). So reading from the HTTPResponse is not going to happen (yet). Once Linux gains support for socket in_fd, we could use something like: _os.sendfile( f.fileno(), response.fileno(), offset=None, count=content_length) [1]: http://docs.python.org/3/library/http.client.html#httpresponse-objects [2]: http://man7.org/linux/man-pages/man2/sendfile.2.html --- diff --git a/package_cache/server.py b/package_cache/server.py index f31ddce..97deb52 100644 --- a/package_cache/server.py +++ b/package_cache/server.py @@ -3,7 +3,11 @@ import email.utils as _email_utils import mimetypes as _mimetypes import os as _os +import urllib.error as _urllib_error import urllib.parse as _urllib_parse +import urllib.request as _urllib_request + +from . import __version__ class InvalidFile (ValueError): @@ -16,6 +20,10 @@ class Server (object): def __init__(self, sources, cache): self.sources = sources self.cache = cache + self.opener = _urllib_request.build_opener() + self.opener.addheaders = [ + ('User-agent', 'Package-cache/{}'.format(__version__)), + ] if not _os.path.isdir(self.cache): _os.makedirs(self.cache, exist_ok=True) @@ -25,6 +33,10 @@ class Server (object): environ=environ, start_response=start_response) except InvalidFile: start_response('404 Not Found', []) + except _urllib_error.HTTPError as e: + print('{} {}'.format(e.code, e.reason)) + start_response('{} {}'.format(e.code, e.reason), []) + return [b''] def _serve_request(self, environ, start_response): method = environ['REQUEST_METHOD'] @@ -35,14 +47,33 @@ class Server (object): relative_path = parsed_url.path.lstrip('/').replace('/', _os.path.sep) cache_path = _os.path.join(self.cache, relative_path) if not _os.path.exists(path=cache_path): - self._get_file(url=url, path=cache_path) + self._get_file_from_sources(url=url, path=cache_path) if not _os.path.isfile(path=cache_path): raise InvalidFile(url=url) return self._serve_file( path=cache_path, environ=environ, start_response=start_response) + def _get_file_from_sources(self, url, path): + for i, source in enumerate(self.sources): + source_url = source.rstrip('/') + url + try: + self._get_file(url=source_url, path=path) + except _urllib_error.HTTPError: + if i == len(self.sources) - 1: + raise + else: + return + def _get_file(self, url, path): - raise NotImplementedError() + with self.opener.open(url) as response: + content_length = int(response.getheader('Content-Length')) + with open(path, 'wb') as f: + block_size = 8192 + while True: + data = response.read(block_size) + f.write(data) + if len(data) < block_size: + break def _serve_file(self, path, environ, start_response): headers = {