Add a _pkg_str class to cache catpkgsplit results
[portage.git] / pym / portage / dbapi / bintree.py
index d8c40d4efca09c4989a933d7e786754f3d79f08c..52b85b88e13610ae27069d280cbc50b436af2eeb 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright 1998-2010 Gentoo Foundation
+# Copyright 1998-2012 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
 __all__ = ["bindbapi", "binarytree"]
@@ -16,16 +16,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.util:atomic_ofstream,ensure_dirs,normalize_path,' + \
                'writemsg,writemsg_stdout',
        'portage.util.listdir:listdir',
-       'portage.versions:best,catpkgsplit,catsplit',
+       'portage.versions:best,catpkgsplit,catsplit,_pkg_str',
 )
 
 from portage.cache.mappings import slot_dict_class
 from portage.const import CACHE_PATH
 from portage.dbapi.virtual import fakedbapi
 from portage.dep import Atom, use_reduce, paren_enclose
-from portage.exception import InvalidPackageName, \
+from portage.exception import AlarmSignal, InvalidPackageName, \
        PermissionDenied, PortageException
 from portage.localization import _
+from portage.util import urlopen
 from portage import _movefile
 from portage import os
 from portage import _encodings
@@ -34,17 +35,19 @@ from portage import _unicode_encode
 
 import codecs
 import errno
+import io
 import re
 import stat
+import subprocess
 import sys
+import tempfile
 import textwrap
+import warnings
 from itertools import chain
 try:
        from urllib.parse import urlparse
-       from urllib.request import urlopen as urllib_request_urlopen
 except ImportError:
        from urlparse import urlparse
-       from urllib import urlopen as urllib_request_urlopen
 
 if sys.hexversion >= 0x3000000:
        basestring = str
@@ -64,7 +67,7 @@ class bindbapi(fakedbapi):
                        ["BUILD_TIME", "CHOST", "DEPEND", "EAPI", "IUSE", "KEYWORDS",
                        "LICENSE", "PDEPEND", "PROPERTIES", "PROVIDE",
                        "RDEPEND", "repository", "RESTRICT", "SLOT", "USE", "DEFINED_PHASES",
-                       "REQUIRED_USE"])
+                       ])
                self._aux_cache_slot_dict = slot_dict_class(self._aux_cache_keys)
                self._aux_cache = {}
 
@@ -73,7 +76,7 @@ class bindbapi(fakedbapi):
                        self.bintree.populate()
                return fakedbapi.match(self, *pargs, **kwargs)
 
-       def cpv_exists(self, cpv):
+       def cpv_exists(self, cpv, myrepo=None):
                if self.bintree and not self.bintree.populated:
                        self.bintree.populate()
                return fakedbapi.cpv_exists(self, cpv)
@@ -86,7 +89,7 @@ class bindbapi(fakedbapi):
                self._aux_cache.pop(cpv, None)
                fakedbapi.cpv_remove(self, cpv)
 
-       def aux_get(self, mycpv, wants):
+       def aux_get(self, mycpv, wants, myrepo=None):
                if self.bintree and not self.bintree.populated:
                        self.bintree.populate()
                cache_me = False
@@ -174,6 +177,34 @@ class bindbapi(fakedbapi):
                        self.bintree.populate()
                return fakedbapi.cpv_all(self)
 
+       def getfetchsizes(self, pkg):
+               """
+               This will raise MissingSignature if SIZE signature is not available,
+               or InvalidSignature if SIZE signature is invalid.
+               """
+
+               if not self.bintree.populated:
+                       self.bintree.populate()
+
+               pkg = getattr(pkg, 'cpv', pkg)
+
+               filesdict = {}
+               if not self.bintree.isremote(pkg):
+                       pass
+               else:
+                       metadata = self.bintree._remotepkgs[pkg]
+                       try:
+                               size = int(metadata["SIZE"])
+                       except KeyError:
+                               raise portage.exception.MissingSignature("SIZE")
+                       except ValueError:
+                               raise portage.exception.InvalidSignature(
+                                       "SIZE: %s" % metadata["SIZE"])
+                       else:
+                               filesdict[os.path.basename(self.bintree.getname(pkg))] = size
+
+               return filesdict
+
 def _pkgindex_cpv_map_latest_build(pkgindex):
        """
        Given a PackageIndex instance, create a dict of cpv -> metadata map.
@@ -182,7 +213,7 @@ def _pkgindex_cpv_map_latest_build(pkgindex):
        @param pkgindex: A PackageIndex instance.
        @type pkgindex: PackageIndex
        @rtype: dict
-       @returns: a dict containing entry for the give cpv.
+       @return: a dict containing entry for the give cpv.
        """
        cpv_map = {}
 
@@ -205,16 +236,35 @@ def _pkgindex_cpv_map_latest_build(pkgindex):
                        if other_btime and (not btime or other_btime > btime):
                                continue
 
-               cpv_map[cpv] = d
+               cpv_map[_pkg_str(cpv)] = d
 
        return cpv_map
 
 class binarytree(object):
        "this tree scans for a list of all packages available in PKGDIR"
-       def __init__(self, root, pkgdir, virtual=None, settings=None):
+       def __init__(self, _unused=None, pkgdir=None,
+               virtual=DeprecationWarning, settings=None):
+
+               if pkgdir is None:
+                       raise TypeError("pkgdir parameter is required")
+
+               if settings is None:
+                       raise TypeError("settings parameter is required")
+
+               if _unused is not None and _unused != settings['ROOT']:
+                       warnings.warn("The root parameter of the "
+                               "portage.dbapi.bintree.binarytree"
+                               " constructor is now unused. Use "
+                               "settings['ROOT'] instead.",
+                               DeprecationWarning, stacklevel=2)
+
+               if virtual is not DeprecationWarning:
+                       warnings.warn("The 'virtual' parameter of the "
+                               "portage.dbapi.bintree.binarytree"
+                               " constructor is unused",
+                               DeprecationWarning, stacklevel=2)
+
                if True:
-                       self.root = root
-                       #self.pkgdir=settings["PKGDIR"]
                        self.pkgdir = normalize_path(pkgdir)
                        self.dbapi = bindbapi(self, settings=settings)
                        self.update_ents = self.dbapi.update_ents
@@ -222,12 +272,11 @@ class binarytree(object):
                        self.populated = 0
                        self.tree = {}
                        self._remote_has_index = False
-                       self._remote_base_uri = None
                        self._remotepkgs = None # remote metadata indexed by cpv
-                       self.__remotepkgs = {}  # indexed by tbz2 name (deprecated)
                        self.invalids = []
                        self.settings = settings
                        self._pkg_paths = {}
+                       self._pkgindex_uri = {}
                        self._populating = False
                        self._all_directory = os.path.isdir(
                                os.path.join(self.pkgdir, "All"))
@@ -240,7 +289,7 @@ class binarytree(object):
                                ["BUILD_TIME", "CHOST", "DEPEND", "DESCRIPTION", "EAPI",
                                "IUSE", "KEYWORDS", "LICENSE", "PDEPEND", "PROPERTIES",
                                "PROVIDE", "RDEPEND", "repository", "SLOT", "USE", "DEFINED_PHASES",
-                               "REQUIRED_USE"]
+                               "BASE_URI"]
                        self._pkgindex_aux_keys = list(self._pkgindex_aux_keys)
                        self._pkgindex_use_evaluated_keys = \
                                ("LICENSE", "RDEPEND", "DEPEND",
@@ -248,7 +297,7 @@ class binarytree(object):
                        self._pkgindex_header_keys = set([
                                "ACCEPT_KEYWORDS", "ACCEPT_LICENSE",
                                "ACCEPT_PROPERTIES", "CBUILD",
-                               "CHOST", "CONFIG_PROTECT", "CONFIG_PROTECT_MASK", "FEATURES",
+                               "CONFIG_PROTECT", "CONFIG_PROTECT_MASK", "FEATURES",
                                "GENTOO_MIRRORS", "INSTALL_MASK", "SYNC", "USE"])
                        self._pkgindex_default_pkg_data = {
                                "BUILD_TIME"         : "",
@@ -266,12 +315,25 @@ class binarytree(object):
                                "SLOT"    : "0",
                                "USE"     : "",
                                "DEFINED_PHASES" : "",
-                               "REQUIRED_USE" : ""
                        }
                        self._pkgindex_inherited_keys = ["CHOST", "repository"]
+
+                       # Populate the header with appropriate defaults.
                        self._pkgindex_default_header_data = {
-                               "repository":""
+                               "CHOST"        : self.settings.get("CHOST", ""),
+                               "repository"   : "",
                        }
+
+                       # It is especially important to populate keys like
+                       # "repository" that save space when entries can
+                       # inherit them from the header. If an existing
+                       # pkgindex header already defines these keys, then
+                       # they will appropriately override our defaults.
+                       main_repo = self.settings.repositories.mainRepo()
+                       if main_repo is not None and not main_repo.missing_repo_name:
+                               self._pkgindex_default_header_data["repository"] = \
+                                       main_repo.name
+
                        self._pkgindex_translated_keys = (
                                ("DESCRIPTION"   ,   "DESC"),
                                ("repository"    ,   "REPO"),
@@ -283,10 +345,18 @@ class binarytree(object):
                                self._pkgindex_hashes,
                                self._pkgindex_default_pkg_data,
                                self._pkgindex_inherited_keys,
-                               self._pkgindex_default_header_data,
                                chain(*self._pkgindex_translated_keys)
                        ))
 
+       @property
+       def root(self):
+               warnings.warn("The root attribute of "
+                       "portage.dbapi.bintree.binarytree"
+                       " is deprecated. Use "
+                       "settings['ROOT'] instead.",
+                       DeprecationWarning, stacklevel=3)
+               return self.settings['ROOT']
+
        def move_ent(self, mylist, repo_match=None):
                if not self.populated:
                        self.populate()
@@ -331,12 +401,10 @@ class binarytree(object):
                        mydata = mytbz2.get_data()
                        updated_items = update_dbentries([mylist], mydata)
                        mydata.update(updated_items)
-                       mydata[_unicode_encode('PF',
-                               encoding=_encodings['repo.content'])] = \
+                       mydata[b'PF'] = \
                                _unicode_encode(mynewpkg + "\n",
                                encoding=_encodings['repo.content'])
-                       mydata[_unicode_encode('CATEGORY',
-                               encoding=_encodings['repo.content'])] = \
+                       mydata[b'CATEGORY'] = \
                                _unicode_encode(mynewcat + "\n",
                                encoding=_encodings['repo.content'])
                        if mynewpkg != myoldpkg:
@@ -437,9 +505,7 @@ class binarytree(object):
 
                if st is not None:
                        # For invalid packages, other_cat could be None.
-                       other_cat = portage.xpak.tbz2(dest_path).getfile(
-                               _unicode_encode("CATEGORY",
-                               encoding=_encodings['repo.content']))
+                       other_cat = portage.xpak.tbz2(dest_path).getfile(b"CATEGORY")
                        if other_cat:
                                other_cat = _unicode_decode(other_cat,
                                        encoding=_encodings['repo.content'], errors='replace')
@@ -592,6 +658,7 @@ class binarytree(object):
                                                        if mycpv in pkg_paths:
                                                                # discard duplicates (All/ is preferred)
                                                                continue
+                                                       mycpv = _pkg_str(mycpv)
                                                        pkg_paths[mycpv] = mypath
                                                        # update the path if the package has been moved
                                                        oldpath = d.get("PATH")
@@ -619,17 +686,11 @@ class binarytree(object):
                                                self.invalids.append(myfile[:-5])
                                                continue
                                        metadata_bytes = portage.xpak.tbz2(full_path).get_data()
-                                       mycat = _unicode_decode(metadata_bytes.get(
-                                               _unicode_encode("CATEGORY",
-                                               encoding=_encodings['repo.content']), ""),
+                                       mycat = _unicode_decode(metadata_bytes.get(b"CATEGORY", ""),
                                                encoding=_encodings['repo.content'], errors='replace')
-                                       mypf = _unicode_decode(metadata_bytes.get(
-                                               _unicode_encode("PF",
-                                               encoding=_encodings['repo.content']), ""),
+                                       mypf = _unicode_decode(metadata_bytes.get(b"PF", ""),
                                                encoding=_encodings['repo.content'], errors='replace')
-                                       slot = _unicode_decode(metadata_bytes.get(
-                                               _unicode_encode("SLOT",
-                                               encoding=_encodings['repo.content']), ""),
+                                       slot = _unicode_decode(metadata_bytes.get(b"SLOT", ""),
                                                encoding=_encodings['repo.content'], errors='replace')
                                        mypkg = myfile[:-5]
                                        if not mycat or not mypf or not slot:
@@ -673,6 +734,7 @@ class binarytree(object):
                                                        (mycpv, self.settings["PORTAGE_CONFIGROOT"]),
                                                        noiselevel=-1)
                                                continue
+                                       mycpv = _pkg_str(mycpv)
                                        pkg_paths[mycpv] = mypath
                                        self.dbapi.cpv_inject(mycpv)
                                        update_pkgindex = True
@@ -736,14 +798,32 @@ class binarytree(object):
                        writemsg(_("!!! PORTAGE_BINHOST unset, but use is requested.\n"),
                                noiselevel=-1)
 
-               if getbinpkgs and 'PORTAGE_BINHOST' in self.settings:
-                       base_url = self.settings["PORTAGE_BINHOST"]
-                       urldata = urlparse(base_url)
+               if not getbinpkgs or 'PORTAGE_BINHOST' not in self.settings:
+                       self.populated=1
+                       return
+               self._remotepkgs = {}
+               for base_url in self.settings["PORTAGE_BINHOST"].split():
+                       parsed_url = urlparse(base_url)
+                       host = parsed_url.netloc
+                       port = parsed_url.port
+                       user = None
+                       passwd = None
+                       user_passwd = ""
+                       if "@" in host:
+                               user, host = host.split("@", 1)
+                               user_passwd = user + "@"
+                               if ":" in user:
+                                       user, passwd = user.split(":", 1)
+                       port_args = []
+                       if port is not None:
+                               port_str = ":%s" % (port,)
+                               if host.endswith(port_str):
+                                       host = host[:-len(port_str)]
                        pkgindex_file = os.path.join(self.settings["EROOT"], CACHE_PATH, "binhost",
-                               urldata[1] + urldata[2], "Packages")
+                               host, parsed_url.path.lstrip("/"), "Packages")
                        pkgindex = self._new_pkgindex()
                        try:
-                               f = codecs.open(_unicode_encode(pkgindex_file,
+                               f = io.open(_unicode_encode(pkgindex_file,
                                        encoding=_encodings['fs'], errors='strict'),
                                        mode='r', encoding=_encodings['repo.content'],
                                        errors='replace')
@@ -756,11 +836,52 @@ class binarytree(object):
                                        raise
                        local_timestamp = pkgindex.header.get("TIMESTAMP", None)
                        rmt_idx = self._new_pkgindex()
+                       proc = None
+                       tmp_filename = None
                        try:
                                # urlparse.urljoin() only works correctly with recognized
                                # protocols and requires the base url to have a trailing
                                # slash, so join manually...
-                               f = urllib_request_urlopen(base_url.rstrip("/") + "/Packages")
+                               url = base_url.rstrip("/") + "/Packages"
+                               try:
+                                       f = urlopen(url)
+                               except IOError:
+                                       path = parsed_url.path.rstrip("/") + "/Packages"
+
+                                       if parsed_url.scheme == 'sftp':
+                                               # The sftp command complains about 'Illegal seek' if
+                                               # we try to make it write to /dev/stdout, so use a
+                                               # temp file instead.
+                                               fd, tmp_filename = tempfile.mkstemp()
+                                               os.close(fd)
+                                               if port is not None:
+                                                       port_args = ['-P', "%s" % (port,)]
+                                               proc = subprocess.Popen(['sftp'] + port_args + \
+                                                       [user_passwd + host + ":" + path, tmp_filename])
+                                               if proc.wait() != os.EX_OK:
+                                                       raise
+                                               f = open(tmp_filename, 'rb')
+                                       elif parsed_url.scheme == 'ssh':
+                                               if port is not None:
+                                                       port_args = ['-p', "%s" % (port,)]
+                                               proc = subprocess.Popen(['ssh'] + port_args + \
+                                                       [user_passwd + host, '--', 'cat', path],
+                                                       stdout=subprocess.PIPE)
+                                               f = proc.stdout
+                                       else:
+                                               setting = 'FETCHCOMMAND_' + parsed_url.scheme.upper()
+                                               fcmd = self.settings.get(setting)
+                                               if not fcmd:
+                                                       raise
+                                               fd, tmp_filename = tempfile.mkstemp()
+                                               tmp_dirname, tmp_basename = os.path.split(tmp_filename)
+                                               os.close(fd)
+                                               success = portage.getbinpkg.file_get(url,
+                                                    tmp_dirname, fcmd=fcmd, filename=tmp_basename)
+                                               if not success:
+                                                       raise EnvironmentError("%s failed" % (setting,))
+                                               f = open(tmp_filename, 'rb')
+
                                f_dec = codecs.iterdecode(f,
                                        _encodings['repo.content'], errors='replace')
                                try:
@@ -769,6 +890,8 @@ class binarytree(object):
                                        if not remote_timestamp:
                                                # no timestamp in the header, something's wrong
                                                pkgindex = None
+                                               writemsg(_("\n\n!!! Binhost package index " \
+                                               " has no TIMESTAMP field.\n"), noiselevel=-1)
                                        else:
                                                if not self._pkgindex_version_supported(rmt_idx):
                                                        writemsg(_("\n\n!!! Binhost package index version" \
@@ -779,13 +902,34 @@ class binarytree(object):
                                                        rmt_idx.readBody(f_dec)
                                                        pkgindex = rmt_idx
                                finally:
-                                       f.close()
+                                       # Timeout after 5 seconds, in case close() blocks
+                                       # indefinitely (see bug #350139).
+                                       try:
+                                               try:
+                                                       AlarmSignal.register(5)
+                                                       f.close()
+                                               finally:
+                                                       AlarmSignal.unregister()
+                                       except AlarmSignal:
+                                               writemsg("\n\n!!! %s\n" % \
+                                                       _("Timed out while closing connection to binhost"),
+                                                       noiselevel=-1)
                        except EnvironmentError as e:
                                writemsg(_("\n\n!!! Error fetching binhost package" \
                                        " info from '%s'\n") % base_url)
                                writemsg("!!! %s\n\n" % str(e))
                                del e
                                pkgindex = None
+                       if proc is not None:
+                               if proc.poll() is None:
+                                       proc.kill()
+                                       proc.wait()
+                               proc = None
+                       if tmp_filename is not None:
+                               try:
+                                       os.unlink(tmp_filename)
+                               except OSError:
+                                       pass
                        if pkgindex is rmt_idx:
                                pkgindex.modified = False # don't update the header
                                try:
@@ -800,13 +944,15 @@ class binarytree(object):
                                        # file, but that's alright.
                        if pkgindex:
                                # Organize remote package list as a cpv -> metadata map.
-                               self._remotepkgs = _pkgindex_cpv_map_latest_build(pkgindex)
+                               remotepkgs = _pkgindex_cpv_map_latest_build(pkgindex)
+                               remote_base_uri = pkgindex.header.get("URI", base_url)
+                               for cpv, remote_metadata in remotepkgs.items():
+                                       remote_metadata["BASE_URI"] = remote_base_uri
+                                       self._pkgindex_uri[cpv] = url
+                               self._remotepkgs.update(remotepkgs)
                                self._remote_has_index = True
-                               self._remote_base_uri = pkgindex.header.get("URI", base_url)
-                               self.__remotepkgs = {}
-                               for cpv in self._remotepkgs:
+                               for cpv in remotepkgs:
                                        self.dbapi.cpv_inject(cpv)
-                               self.populated = 1
                                if True:
                                        # Remote package instances override local package
                                        # if they are not identical.
@@ -836,8 +982,7 @@ class binarytree(object):
                                        # Local package instances override remote instances.
                                        for cpv in metadata:
                                                self._remotepkgs.pop(cpv, None)
-                               return
-                       self._remotepkgs = {}
+                               continue
                        try:
                                chunk_size = long(self.settings["PORTAGE_BINHOST_CHUNKSIZE"])
                                if chunk_size < 8:
@@ -848,18 +993,17 @@ class binarytree(object):
                        writemsg_stdout(
                                colorize("GOOD", _("Fetching bininfo from ")) + \
                                re.sub(r'//(.+):.+@(.+)/', r'//\1:*password*@\2/', base_url) + "\n")
-                       self.__remotepkgs = portage.getbinpkg.dir_get_metadata(
-                               self.settings["PORTAGE_BINHOST"], chunk_size=chunk_size)
-                       #writemsg(green("  -- DONE!\n\n"))
+                       remotepkgs = portage.getbinpkg.dir_get_metadata(
+                               base_url, chunk_size=chunk_size)
 
-                       for mypkg in list(self.__remotepkgs):
-                               if "CATEGORY" not in self.__remotepkgs[mypkg]:
+                       for mypkg, remote_metadata in remotepkgs.items():
+                               mycat = remote_metadata.get("CATEGORY")
+                               if mycat is None:
                                        #old-style or corrupt package
                                        writemsg(_("!!! Invalid remote binary package: %s\n") % mypkg,
                                                noiselevel=-1)
-                                       del self.__remotepkgs[mypkg]
                                        continue
-                               mycat = self.__remotepkgs[mypkg]["CATEGORY"].strip()
+                               mycat = mycat.strip()
                                fullpkg = mycat+"/"+mypkg[:-5]
 
                                if fullpkg in metadata:
@@ -881,9 +1025,9 @@ class binarytree(object):
                                try:
                                        # invalid tbz2's can hurt things.
                                        self.dbapi.cpv_inject(fullpkg)
-                                       remote_metadata = self.__remotepkgs[mypkg]
                                        for k, v in remote_metadata.items():
                                                remote_metadata[k] = v.strip()
+                                       remote_metadata["BASE_URI"] = base_url
 
                                        # Eliminate metadata values with names that digestCheck
                                        # uses, since they are not valid when using the old
@@ -901,7 +1045,6 @@ class binarytree(object):
                                except:
                                        writemsg(_("!!! Failed to inject remote binary package: %s\n") % fullpkg,
                                                noiselevel=-1)
-                                       del self.__remotepkgs[mypkg]
                                        continue
                self.populated=1
 
@@ -1015,7 +1158,7 @@ class binarytree(object):
                Performs checksums and evaluates USE flag conditionals.
                Raises InvalidDependString if necessary.
                @rtype: dict
-               @returns: a dict containing entry for the give cpv.
+               @return: a dict containing entry for the give cpv.
                """
 
                pkg_path = self.getname(cpv)
@@ -1155,6 +1298,10 @@ class binarytree(object):
                # package is downloaded, state is updated by self.inject().
                return True
 
+       def get_pkgindex_uri(self, pkgname):
+               """Returns the URI to the Packages file for a given package."""
+               return self._pkgindex_uri.get(pkgname)
+
        def gettbz2(self, pkgname):
                """Fetches the package from a remote site, if necessary.  Attempts to
                resume if the file appears to be partially downloaded."""
@@ -1176,7 +1323,8 @@ class binarytree(object):
                        rel_url = self._remotepkgs[pkgname].get("PATH")
                        if not rel_url:
                                rel_url = pkgname+".tbz2"
-                       url = self._remote_base_uri.rstrip("/") + "/" + rel_url.lstrip("/")
+                       remote_base_uri = self._remotepkgs[pkgname]["BASE_URI"]
+                       url = remote_base_uri.rstrip("/") + "/" + rel_url.lstrip("/")
                else:
                        url = self.settings["PORTAGE_BINHOST"].rstrip("/") + "/" + tbz2name
                protocol = urlparse(url)[0]
@@ -1198,7 +1346,7 @@ class binarytree(object):
        def _load_pkgindex(self):
                pkgindex = self._new_pkgindex()
                try:
-                       f = codecs.open(_unicode_encode(self._pkgindex_file,
+                       f = io.open(_unicode_encode(self._pkgindex_file,
                                encoding=_encodings['fs'], errors='strict'),
                                mode='r', encoding=_encodings['repo.content'],
                                errors='replace')
@@ -1216,7 +1364,7 @@ class binarytree(object):
                Verify digests for the given package and raise DigestException
                if verification fails.
                @rtype: bool
-               @returns: True if digests could be located, False otherwise.
+               @return: True if digests could be located, False otherwise.
                """
                cpv = pkg
                if not isinstance(cpv, basestring):