* Optimize metadata caching in pordbapi and bindbapi so that cache is
authorZac Medico <zmedico@gentoo.org>
Mon, 7 Apr 2008 22:12:04 +0000 (22:12 -0000)
committerZac Medico <zmedico@gentoo.org>
Mon, 7 Apr 2008 22:12:04 +0000 (22:12 -0000)
  never pulled for the same package twice.
* Cache a Package instances constructed during package selection
  and reuse them during later package selections.

svn path=/main/trunk/; revision=9741

pym/_emerge/__init__.py
pym/portage/dbapi/__init__.py
pym/portage/dbapi/bintree.py
pym/portage/dbapi/porttree.py
pym/portage/getbinpkg.py

index 173404378395a8417210af33dc5e3baf32d0bc79..4397545dd2983037d2b31ff0e595dda215d94dc0 100644 (file)
@@ -1590,6 +1590,8 @@ class depgraph(object):
                self._select_package = self._select_pkg_highest_available
                self._highest_pkg_cache = {}
                self._installed_pkg_cache = {}
+               # All Package instances
+               self._pkg_cache = {}
 
        def _show_slot_collision_notice(self):
                """Show an informational message advising the user to mask one of the
@@ -2531,6 +2533,11 @@ class depgraph(object):
                                        cpv_list = db.xmatch("match-all", atom)
                                else:
                                        cpv_list = db.match(atom)
+                               if not cpv_list:
+                                       continue
+                               pkg_status = "merge"
+                               if installed or onlydeps:
+                                       pkg_status = "nomerge"
                                # descending order
                                cpv_list.reverse()
                                for cpv in cpv_list:
@@ -2539,36 +2546,48 @@ class depgraph(object):
                                                cpv in vardb.match(atom):
                                                break
                                        reinstall_for_flags = None
-                                       try:
-                                               metadata = dict(izip(db_keys,
-                                                       db.aux_get(cpv, db_keys)))
-                                       except KeyError:
-                                               continue
-                                       if not built:
-                                               if "?" in metadata["LICENSE"]:
+                                       cache_key = (pkg_type, root, cpv, pkg_status)
+                                       calculated_use = True
+                                       pkg = self._pkg_cache.get(cache_key)
+                                       if pkg is None:
+                                               calculated_use = False
+                                               try:
+                                                       metadata = dict(izip(self._mydbapi_keys,
+                                                               db.aux_get(cpv, self._mydbapi_keys)))
+                                               except KeyError:
+                                                       continue
+                                               if not built and ("?" in metadata["LICENSE"] or \
+                                                       "?" in metadata["PROVIDE"]):
+                                                       # This is avoided whenever possible because
+                                                       # it's expensive. It only needs to be done here
+                                                       # if it has an effect on visibility.
                                                        pkgsettings.setcpv(cpv, mydb=metadata)
                                                        metadata["USE"] = pkgsettings["PORTAGE_USE"]
-                                               else:
-                                                       metadata["USE"] = ""
+                                                       calculated_use = True
+                                               pkg = Package(built=built, cpv=cpv,
+                                                       installed=installed, metadata=metadata,
+                                                       onlydeps=onlydeps, root=root, type_name=pkg_type)
+                                               self._pkg_cache[pkg] = pkg
                                        myarg = None
                                        if root == self.target_root:
                                                try:
-                                                       myarg = self._get_arg_for_pkg(
-                                                               Package(type_name=pkg_type, root=root,
-                                                                       cpv=cpv, metadata=metadata,
-                                                                       built=built, installed=installed,
-                                                                       onlydeps=onlydeps))
+                                                       myarg = self._iter_atoms_for_pkg(pkg).next()
+                                               except StopIteration:
+                                                       pass
                                                except portage.exception.InvalidDependString:
                                                        if not installed:
                                                                # masked by corruption
                                                                continue
-                                       pkg = Package(built=built, cpv=cpv, installed=installed,
-                                               metadata=metadata, type_name=pkg_type)
                                        if not installed:
                                                if myarg:
                                                        found_available_arg = True
                                                if not visible(pkgsettings, pkg):
                                                        continue
+                                       if not built and not calculated_use:
+                                               # This is avoided whenever possible because
+                                               # it's expensive.
+                                               pkgsettings.setcpv(cpv, mydb=pkg.metadata)
+                                               pkg.metadata["USE"] = pkgsettings["PORTAGE_USE"]
                                        if pkg.cp == atom_cp:
                                                if highest_version is None:
                                                        highest_version = pkg
@@ -2580,9 +2599,7 @@ class depgraph(object):
                                        # will always end with a break statement below
                                        # this point.
                                        if find_existing_node:
-                                               slot_atom = "%s:%s" % (
-                                                       portage.cpv_getkey(cpv), metadata["SLOT"])
-                                               e_pkg = self._slot_pkg_map[root].get(slot_atom)
+                                               e_pkg = self._slot_pkg_map[root].get(pkg.slot_atom)
                                                if not e_pkg:
                                                        break
                                                cpv_slot = "%s:%s" % \
@@ -2606,9 +2623,9 @@ class depgraph(object):
                                                ("--newuse" in self.myopts or \
                                                "--reinstall" in self.myopts):
                                                iuses = set(filter_iuse_defaults(
-                                                       metadata["IUSE"].split()))
-                                               old_use = metadata["USE"].split()
-                                               mydb = metadata
+                                                       pkg.metadata["IUSE"].split()))
+                                               old_use = pkg.metadata["USE"].split()
+                                               mydb = pkg.metadata
                                                if myeb and not usepkgonly:
                                                        mydb = portdb
                                                if myeb:
@@ -2634,7 +2651,7 @@ class depgraph(object):
                                                ("--newuse" in self.myopts or \
                                                "--reinstall" in self.myopts) and \
                                                cpv in vardb.match(atom):
-                                               pkgsettings.setcpv(cpv, mydb=metadata)
+                                               pkgsettings.setcpv(cpv, mydb=pkg.metadata)
                                                forced_flags = set()
                                                forced_flags.update(pkgsettings.useforce)
                                                forced_flags.update(pkgsettings.usemask)
@@ -2643,7 +2660,7 @@ class depgraph(object):
                                                        vardb.aux_get(cpv, ["IUSE"])[0].split()))
                                                cur_use = pkgsettings["PORTAGE_USE"].split()
                                                cur_iuse = set(filter_iuse_defaults(
-                                                       metadata["IUSE"].split()))
+                                                       pkg.metadata["IUSE"].split()))
                                                reinstall_for_flags = \
                                                        self._reinstall_for_flags(
                                                        forced_flags, old_use, old_iuse,
@@ -2657,22 +2674,8 @@ class depgraph(object):
                                                        not must_reinstall and \
                                                        cpv in vardb.match(atom):
                                                        break
-                                       # Metadata accessed above is cached internally by
-                                       # each db in order to optimize visibility checks.
-                                       # Now that all possible checks visibility checks
-                                       # are complete, it's time to pull the rest of the
-                                       # metadata (including *DEPEND). This part is more
-                                       # expensive, so avoid it whenever possible.
-                                       metadata.update(izip(self._mydbapi_keys,
-                                               db.aux_get(cpv, self._mydbapi_keys)))
                                        if not built:
-                                               pkgsettings.setcpv(cpv, mydb=metadata)
-                                               metadata["USE"] = pkgsettings["PORTAGE_USE"]
                                                myeb = cpv
-                                       pkg = Package(type_name=pkg_type, root=root,
-                                               cpv=cpv, metadata=metadata,
-                                               built=built, installed=installed,
-                                               onlydeps=onlydeps)
                                        matched_packages.append(pkg)
                                        if reinstall_for_flags:
                                                self._reinstall_nodes[pkg] = \
index cd3208d504e3a5668c7dab784076c60b57a53f94..1431c1074e5a97f06aabc80421030428e9fa136a 100644 (file)
@@ -8,7 +8,7 @@ from portage.dep import dep_getslot, dep_getkey, match_from_list
 from portage.locks import unlockfile
 from portage.output import red
 from portage.util import writemsg
-from portage import dep_expand
+from portage import auxdbkeys, dep_expand
 from portage.versions import catpkgsplit, catsplit, pkgcmp
 
 
@@ -16,6 +16,8 @@ class dbapi(object):
        _category_re = re.compile(r'^\w[-.+\w]*$')
        _pkg_dir_name_re = re.compile(r'^\w[-+\w]*$')
        _categories = None
+       _known_keys = frozenset(x for x in auxdbkeys
+               if not x.startswith("UNUSED_0"))
        def __init__(self):
                pass
 
index 6b7e346d8464ef10aacd5461f93a887820557ae2..d2b499ba102e17fd83ddc1905bc397568dff938b 100644 (file)
@@ -19,6 +19,8 @@ import re
 from itertools import izip
 
 class bindbapi(fakedbapi):
+       _known_keys = frozenset(list(fakedbapi._known_keys) + \
+               ["CHOST", "repository", "USE"])
        def __init__(self, mybintree=None, **kwargs):
                fakedbapi.__init__(self, **kwargs)
                self.bintree = mybintree
@@ -27,8 +29,9 @@ class bindbapi(fakedbapi):
                self.cpdict={}
                # Selectively cache metadata in order to optimize dep matching.
                self._aux_cache_keys = set(
-                       ["CHOST", "EAPI", "IUSE", "KEYWORDS",
-                       "LICENSE", "PROVIDE", "SLOT", "USE"])
+                       ["CHOST", "DEPEND", "EAPI", "IUSE", "KEYWORDS",
+                       "LICENSE", "PDEPEND", "PROVIDE",
+                       "RDEPEND", "repository", "RESTRICT", "SLOT", "USE"])
                self._aux_cache = {}
 
        def match(self, *pargs, **kwargs):
@@ -40,10 +43,11 @@ class bindbapi(fakedbapi):
                if self.bintree and not self.bintree.populated:
                        self.bintree.populate()
                cache_me = False
-               if not set(wants).difference(self._aux_cache_keys):
+               if not self._known_keys.intersection(
+                       wants).difference(self._aux_cache_keys):
                        aux_cache = self._aux_cache.get(mycpv)
                        if aux_cache is not None:
-                               return [aux_cache[x] for x in wants]
+                               return [aux_cache.get(x, "") for x in wants]
                        cache_me = True
                mysplit = mycpv.split("/")
                mylist = []
@@ -148,21 +152,28 @@ class binarytree(object):
                        self._pkgindex_aux_keys = \
                                ["CHOST", "DEPEND", "DESCRIPTION", "EAPI",
                                "IUSE", "KEYWORDS", "LICENSE", "PDEPEND",
-                               "PROVIDE", "RDEPEND", "SLOT", "USE"]
+                               "PROVIDE", "RDEPEND", "repository", "SLOT", "USE"]
                        self._pkgindex_aux_keys = list(self._pkgindex_aux_keys)
                        self._pkgindex_header_keys = set(["ACCEPT_KEYWORDS", "CBUILD",
                                "CHOST", "CONFIG_PROTECT", "CONFIG_PROTECT_MASK", "FEATURES",
                                "GENTOO_MIRRORS", "INSTALL_MASK", "SYNC", "USE"])
                        self._pkgindex_default_pkg_data = {
+                               "DEPEND"  : "",
                                "EAPI"    : "0",
                                "IUSE"    : "",
                                "KEYWORDS": "",
                                "LICENSE" : "",
+                               "PDEPEND" : "",
                                "PROVIDE" : "",
+                               "RDEPEND" : "",
+                               "RESTRICT": "",
                                "SLOT"    : "0",
                                "USE"     : ""
                        }
-                       self._pkgindex_inherited_keys = ["CHOST"]
+                       self._pkgindex_inherited_keys = ["CHOST", "repository"]
+                       self._pkgindex_default_header_data = {
+                               "repository":""
+                       }
 
        def move_ent(self, mylist):
                if not self.populated:
@@ -798,6 +809,7 @@ class binarytree(object):
 
        def _new_pkgindex(self):
                return portage.getbinpkg.PackageIndex(
+                       default_header_data=self._pkgindex_default_header_data,
                        default_pkg_data=self._pkgindex_default_pkg_data,
                        inherited_keys=self._pkgindex_inherited_keys)
 
index de5a03bfe33e12181f1e4937356ddf0b73aee52b..532530131a1b17d0cb607417efd435c4b31088cb 100644 (file)
@@ -83,11 +83,13 @@ class portdbapi(dbapi):
                self.porttrees = [self.porttree_root] + \
                        [os.path.realpath(t) for t in self.mysettings["PORTDIR_OVERLAY"].split()]
                self.treemap = {}
+               self._repository_map = {}
                for path in self.porttrees:
                        repo_name_path = os.path.join(path, REPO_NAME_LOC)
                        try:
                                repo_name = open(repo_name_path, 'r').readline().strip()
                                self.treemap[repo_name] = path
+                               self._repository_map[path] = repo_name
                        except (OSError,IOError):
                                # warn about missing repo_name at some other time, since we
                                # don't want to see a warning every time the portage module is
@@ -116,7 +118,9 @@ class portdbapi(dbapi):
                                        self.depcachedir, x, filtered_auxdbkeys, gid=portage_gid)
                # Selectively cache metadata in order to optimize dep matching.
                self._aux_cache_keys = set(
-                       ["EAPI", "IUSE", "KEYWORDS", "LICENSE", "PROVIDE", "SLOT"])
+                       ["DEPEND", "EAPI", "IUSE", "KEYWORDS", "LICENSE",
+                       "PDEPEND", "PROVIDE", "RDEPEND", "repository",
+                       "RESTRICT", "SLOT"])
                self._aux_cache = {}
                self._broken_ebuilds = set()
 
@@ -208,10 +212,11 @@ class portdbapi(dbapi):
                cache_me = False
                if not mytree:
                        cache_me = True
-               if not mytree and not set(mylist).difference(self._aux_cache_keys):
+               if not mytree and not self._known_keys.intersection(
+                       mylist).difference(self._aux_cache_keys):
                        aux_cache = self._aux_cache.get(mycpv)
                        if aux_cache is not None:
-                               return [aux_cache[x] for x in mylist]
+                               return [aux_cache.get(x, "") for x in mylist]
                        cache_me = True
                global auxdbkeys, auxdbkeylen
                cat,pkg = mycpv.split("/", 1)
@@ -335,6 +340,10 @@ class portdbapi(dbapi):
                if not mydata.setdefault("EAPI", "0"):
                        mydata["EAPI"] = "0"
 
+               # do we have a origin repository name for the current package
+               mydata["repository"] = self._repository_map.get(
+                       os.path.sep.join(myebuild.split(os.path.sep)[:-3]), "")
+
                #finally, we look at our internal cache entry and return the requested data.
                returnme = []
                for x in mylist:
index 1470076592a92dd99b53fba60a86fcab61803e98..3767679574eadbfa4727e9a9717d0c105bea4c52 100644 (file)
@@ -667,10 +667,14 @@ def _cmp_cpv(d1, d2):
 
 class PackageIndex(object):
 
-       def __init__(self, default_pkg_data=None, inherited_keys=None):
+       def __init__(self, default_header_data=None, default_pkg_data=None,
+               inherited_keys=None):
+               self._default_header_data = default_header_data
                self._default_pkg_data = default_pkg_data
                self._inherited_keys = inherited_keys
                self.header = {}
+               if self._default_header_data:
+                       self.header.update(self._default_header_data)
                self.packages = []
                self.modified = True
 
@@ -725,7 +729,8 @@ class PackageIndex(object):
                        self.header["PACKAGES"] = str(len(self.packages))
                keys = self.header.keys()
                keys.sort()
-               self._writepkgindex(pkgfile, [(k, self.header[k]) for k in keys])
+               self._writepkgindex(pkgfile, [(k, self.header[k]) \
+                       for k in keys if self.header[k]])
                for metadata in sorted(self.packages, _cmp_cpv):
                        metadata = metadata.copy()
                        cpv = metadata["CPV"]