Use If-Modified-Since HTTP-header and avoid downloading a remote index if the local...
authorW-Mark Kubacki <wmark@hurrikane.de>
Wed, 1 Aug 2012 17:49:34 +0000 (19:49 +0200)
committerZac Medico <zmedico@gentoo.org>
Thu, 2 Aug 2012 00:51:41 +0000 (17:51 -0700)
pym/portage/dbapi/bintree.py
pym/portage/util/_urlopen.py

index 9527b0766d94e4298eca91b73a999c225e47802e..16ae8eccf8551556b6f04a870b865f9beaf83b0c 100644 (file)
@@ -54,6 +54,11 @@ if sys.hexversion >= 0x3000000:
 else:
        _unicode = unicode
 
+class UseCachedCopyOfRemoteIndex(Exception):
+       # If the local copy is recent enough
+       # then fetching the remote index can be skipped.
+       pass
+
 class bindbapi(fakedbapi):
        _known_keys = frozenset(list(fakedbapi._known_keys) + \
                ["CHOST", "repository", "USE"])
@@ -852,6 +857,7 @@ class binarytree(object):
                                if e.errno != errno.ENOENT:
                                        raise
                        local_timestamp = pkgindex.header.get("TIMESTAMP", None)
+                       remote_timestamp = None
                        rmt_idx = self._new_pkgindex()
                        proc = None
                        tmp_filename = None
@@ -861,8 +867,13 @@ class binarytree(object):
                                # slash, so join manually...
                                url = base_url.rstrip("/") + "/Packages"
                                try:
-                                       f = _urlopen(url)
-                               except IOError:
+                                       f = _urlopen(url, if_modified_since=local_timestamp)
+                                       if hasattr(f, 'headers') and f.headers.get('timestamp', ''):
+                                               remote_timestamp = f.headers.get('timestamp')
+                               except IOError as err:
+                                       if hasattr(err, 'code') and err.code == 304: # not modified (since local_timestamp)
+                                               raise UseCachedCopyOfRemoteIndex()
+
                                        path = parsed_url.path.rstrip("/") + "/Packages"
 
                                        if parsed_url.scheme == 'sftp':
@@ -903,7 +914,8 @@ class binarytree(object):
                                        _encodings['repo.content'], errors='replace')
                                try:
                                        rmt_idx.readHeader(f_dec)
-                                       remote_timestamp = rmt_idx.header.get("TIMESTAMP", None)
+                                       if not remote_timestamp: # in case it had not been read from HTTP header
+                                               remote_timestamp = rmt_idx.header.get("TIMESTAMP", None)
                                        if not remote_timestamp:
                                                # no timestamp in the header, something's wrong
                                                pkgindex = None
@@ -931,6 +943,12 @@ class binarytree(object):
                                                writemsg("\n\n!!! %s\n" % \
                                                        _("Timed out while closing connection to binhost"),
                                                        noiselevel=-1)
+                       except UseCachedCopyOfRemoteIndex:
+                               writemsg_stdout("\n")
+                               writemsg_stdout(
+                                       colorize("GOOD", _("Local copy of remote index is up-to-date and will be used.")) + \
+                                       "\n")
+                               rmt_idx = pkgindex
                        except EnvironmentError as e:
                                writemsg(_("\n\n!!! Error fetching binhost package" \
                                        " info from '%s'\n") % _hide_url_passwd(base_url))
index 307624bc474492ee8877bd191110a7cb4050a4ba..42961883de3a1ea8835c646ab0c013687f7403e6 100644 (file)
@@ -2,6 +2,9 @@
 # Distributed under the terms of the GNU General Public License v2
 
 import sys
+from datetime import datetime
+from time import mktime
+from email.utils import formatdate, parsedate
 
 try:
        from urllib.request import urlopen as _urlopen
@@ -14,15 +17,39 @@ except ImportError:
        import urllib2 as urllib_request
        from urllib import splituser as urllib_parse_splituser
 
-def urlopen(url):
+if sys.hexversion >= 0x3000000:
+       long = int
+
+# to account for the difference between TIMESTAMP of the index' contents
+#  and the file-'mtime'
+TIMESTAMP_TOLERANCE=5
+
+def urlopen(url, if_modified_since=None):
+       parse_result = urllib_parse.urlparse(url)
        try:
-               return _urlopen(url)
+               if parse_result.scheme not in ("http", "https"):
+                       return _urlopen(url)
+               request = urllib_request.Request(url)
+               request.add_header('User-Agent', 'Gentoo Portage')
+               if if_modified_since:
+                       request.add_header('If-Modified-Since', _timestamp_to_http(if_modified_since))
+               opener = urllib_request.build_opener()
+               hdl = opener.open(request)
+               if hdl.headers.get('last-modified', ''):
+                       try:
+                               add_header = hdl.headers.add_header
+                       except AttributeError:
+                               # Python 2
+                               add_header = hdl.headers.addheader
+                       add_header('timestamp', _http_to_timestamp(hdl.headers.get('last-modified')))
+               return hdl
        except SystemExit:
                raise
-       except Exception:
+       except Exception as e:
+               if hasattr(e, 'code') and e.code == 304: # HTTPError 304: not modified
+                       raise
                if sys.hexversion < 0x3000000:
                        raise
-               parse_result = urllib_parse.urlparse(url)
                if parse_result.scheme not in ("http", "https") or \
                        not parse_result.username:
                        raise
@@ -40,3 +67,13 @@ def _new_urlopen(url):
        auth_handler = urllib_request.HTTPBasicAuthHandler(password_manager)
        opener = urllib_request.build_opener(auth_handler)
        return opener.open(url)
+
+def _timestamp_to_http(timestamp):
+       dt = datetime.fromtimestamp(float(long(timestamp)+TIMESTAMP_TOLERANCE))
+       stamp = mktime(dt.timetuple())
+       return formatdate(timeval=stamp, localtime=False, usegmt=True)
+
+def _http_to_timestamp(http_datetime_string):
+       tuple = parsedate(http_datetime_string)
+       timestamp = mktime(tuple)
+       return str(long(timestamp))