From: Zac Medico Date: Mon, 7 Apr 2008 22:12:04 +0000 (-0000) Subject: * Optimize metadata caching in pordbapi and bindbapi so that cache is X-Git-Tag: v2.2_pre6~228 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=2527a0bc8f3fc9be217e0ab5b31468d2b36718f6;p=portage.git * Optimize metadata caching in pordbapi and bindbapi so that cache is never pulled for the same package twice. * Cache a Package instances constructed during package selection and reuse them during later package selections. svn path=/main/trunk/; revision=9741 --- diff --git a/pym/_emerge/__init__.py b/pym/_emerge/__init__.py index 173404378..4397545dd 100644 --- a/pym/_emerge/__init__.py +++ b/pym/_emerge/__init__.py @@ -1590,6 +1590,8 @@ class depgraph(object): self._select_package = self._select_pkg_highest_available self._highest_pkg_cache = {} self._installed_pkg_cache = {} + # All Package instances + self._pkg_cache = {} def _show_slot_collision_notice(self): """Show an informational message advising the user to mask one of the @@ -2531,6 +2533,11 @@ class depgraph(object): cpv_list = db.xmatch("match-all", atom) else: cpv_list = db.match(atom) + if not cpv_list: + continue + pkg_status = "merge" + if installed or onlydeps: + pkg_status = "nomerge" # descending order cpv_list.reverse() for cpv in cpv_list: @@ -2539,36 +2546,48 @@ class depgraph(object): cpv in vardb.match(atom): break reinstall_for_flags = None - try: - metadata = dict(izip(db_keys, - db.aux_get(cpv, db_keys))) - except KeyError: - continue - if not built: - if "?" in metadata["LICENSE"]: + cache_key = (pkg_type, root, cpv, pkg_status) + calculated_use = True + pkg = self._pkg_cache.get(cache_key) + if pkg is None: + calculated_use = False + try: + metadata = dict(izip(self._mydbapi_keys, + db.aux_get(cpv, self._mydbapi_keys))) + except KeyError: + continue + if not built and ("?" in metadata["LICENSE"] or \ + "?" in metadata["PROVIDE"]): + # This is avoided whenever possible because + # it's expensive. It only needs to be done here + # if it has an effect on visibility. pkgsettings.setcpv(cpv, mydb=metadata) metadata["USE"] = pkgsettings["PORTAGE_USE"] - else: - metadata["USE"] = "" + calculated_use = True + pkg = Package(built=built, cpv=cpv, + installed=installed, metadata=metadata, + onlydeps=onlydeps, root=root, type_name=pkg_type) + self._pkg_cache[pkg] = pkg myarg = None if root == self.target_root: try: - myarg = self._get_arg_for_pkg( - Package(type_name=pkg_type, root=root, - cpv=cpv, metadata=metadata, - built=built, installed=installed, - onlydeps=onlydeps)) + myarg = self._iter_atoms_for_pkg(pkg).next() + except StopIteration: + pass except portage.exception.InvalidDependString: if not installed: # masked by corruption continue - pkg = Package(built=built, cpv=cpv, installed=installed, - metadata=metadata, type_name=pkg_type) if not installed: if myarg: found_available_arg = True if not visible(pkgsettings, pkg): continue + if not built and not calculated_use: + # This is avoided whenever possible because + # it's expensive. + pkgsettings.setcpv(cpv, mydb=pkg.metadata) + pkg.metadata["USE"] = pkgsettings["PORTAGE_USE"] if pkg.cp == atom_cp: if highest_version is None: highest_version = pkg @@ -2580,9 +2599,7 @@ class depgraph(object): # will always end with a break statement below # this point. if find_existing_node: - slot_atom = "%s:%s" % ( - portage.cpv_getkey(cpv), metadata["SLOT"]) - e_pkg = self._slot_pkg_map[root].get(slot_atom) + e_pkg = self._slot_pkg_map[root].get(pkg.slot_atom) if not e_pkg: break cpv_slot = "%s:%s" % \ @@ -2606,9 +2623,9 @@ class depgraph(object): ("--newuse" in self.myopts or \ "--reinstall" in self.myopts): iuses = set(filter_iuse_defaults( - metadata["IUSE"].split())) - old_use = metadata["USE"].split() - mydb = metadata + pkg.metadata["IUSE"].split())) + old_use = pkg.metadata["USE"].split() + mydb = pkg.metadata if myeb and not usepkgonly: mydb = portdb if myeb: @@ -2634,7 +2651,7 @@ class depgraph(object): ("--newuse" in self.myopts or \ "--reinstall" in self.myopts) and \ cpv in vardb.match(atom): - pkgsettings.setcpv(cpv, mydb=metadata) + pkgsettings.setcpv(cpv, mydb=pkg.metadata) forced_flags = set() forced_flags.update(pkgsettings.useforce) forced_flags.update(pkgsettings.usemask) @@ -2643,7 +2660,7 @@ class depgraph(object): vardb.aux_get(cpv, ["IUSE"])[0].split())) cur_use = pkgsettings["PORTAGE_USE"].split() cur_iuse = set(filter_iuse_defaults( - metadata["IUSE"].split())) + pkg.metadata["IUSE"].split())) reinstall_for_flags = \ self._reinstall_for_flags( forced_flags, old_use, old_iuse, @@ -2657,22 +2674,8 @@ class depgraph(object): not must_reinstall and \ cpv in vardb.match(atom): break - # Metadata accessed above is cached internally by - # each db in order to optimize visibility checks. - # Now that all possible checks visibility checks - # are complete, it's time to pull the rest of the - # metadata (including *DEPEND). This part is more - # expensive, so avoid it whenever possible. - metadata.update(izip(self._mydbapi_keys, - db.aux_get(cpv, self._mydbapi_keys))) if not built: - pkgsettings.setcpv(cpv, mydb=metadata) - metadata["USE"] = pkgsettings["PORTAGE_USE"] myeb = cpv - pkg = Package(type_name=pkg_type, root=root, - cpv=cpv, metadata=metadata, - built=built, installed=installed, - onlydeps=onlydeps) matched_packages.append(pkg) if reinstall_for_flags: self._reinstall_nodes[pkg] = \ diff --git a/pym/portage/dbapi/__init__.py b/pym/portage/dbapi/__init__.py index cd3208d50..1431c1074 100644 --- a/pym/portage/dbapi/__init__.py +++ b/pym/portage/dbapi/__init__.py @@ -8,7 +8,7 @@ from portage.dep import dep_getslot, dep_getkey, match_from_list from portage.locks import unlockfile from portage.output import red from portage.util import writemsg -from portage import dep_expand +from portage import auxdbkeys, dep_expand from portage.versions import catpkgsplit, catsplit, pkgcmp @@ -16,6 +16,8 @@ class dbapi(object): _category_re = re.compile(r'^\w[-.+\w]*$') _pkg_dir_name_re = re.compile(r'^\w[-+\w]*$') _categories = None + _known_keys = frozenset(x for x in auxdbkeys + if not x.startswith("UNUSED_0")) def __init__(self): pass diff --git a/pym/portage/dbapi/bintree.py b/pym/portage/dbapi/bintree.py index 6b7e346d8..d2b499ba1 100644 --- a/pym/portage/dbapi/bintree.py +++ b/pym/portage/dbapi/bintree.py @@ -19,6 +19,8 @@ import re from itertools import izip class bindbapi(fakedbapi): + _known_keys = frozenset(list(fakedbapi._known_keys) + \ + ["CHOST", "repository", "USE"]) def __init__(self, mybintree=None, **kwargs): fakedbapi.__init__(self, **kwargs) self.bintree = mybintree @@ -27,8 +29,9 @@ class bindbapi(fakedbapi): self.cpdict={} # Selectively cache metadata in order to optimize dep matching. self._aux_cache_keys = set( - ["CHOST", "EAPI", "IUSE", "KEYWORDS", - "LICENSE", "PROVIDE", "SLOT", "USE"]) + ["CHOST", "DEPEND", "EAPI", "IUSE", "KEYWORDS", + "LICENSE", "PDEPEND", "PROVIDE", + "RDEPEND", "repository", "RESTRICT", "SLOT", "USE"]) self._aux_cache = {} def match(self, *pargs, **kwargs): @@ -40,10 +43,11 @@ class bindbapi(fakedbapi): if self.bintree and not self.bintree.populated: self.bintree.populate() cache_me = False - if not set(wants).difference(self._aux_cache_keys): + if not self._known_keys.intersection( + wants).difference(self._aux_cache_keys): aux_cache = self._aux_cache.get(mycpv) if aux_cache is not None: - return [aux_cache[x] for x in wants] + return [aux_cache.get(x, "") for x in wants] cache_me = True mysplit = mycpv.split("/") mylist = [] @@ -148,21 +152,28 @@ class binarytree(object): self._pkgindex_aux_keys = \ ["CHOST", "DEPEND", "DESCRIPTION", "EAPI", "IUSE", "KEYWORDS", "LICENSE", "PDEPEND", - "PROVIDE", "RDEPEND", "SLOT", "USE"] + "PROVIDE", "RDEPEND", "repository", "SLOT", "USE"] self._pkgindex_aux_keys = list(self._pkgindex_aux_keys) self._pkgindex_header_keys = set(["ACCEPT_KEYWORDS", "CBUILD", "CHOST", "CONFIG_PROTECT", "CONFIG_PROTECT_MASK", "FEATURES", "GENTOO_MIRRORS", "INSTALL_MASK", "SYNC", "USE"]) self._pkgindex_default_pkg_data = { + "DEPEND" : "", "EAPI" : "0", "IUSE" : "", "KEYWORDS": "", "LICENSE" : "", + "PDEPEND" : "", "PROVIDE" : "", + "RDEPEND" : "", + "RESTRICT": "", "SLOT" : "0", "USE" : "" } - self._pkgindex_inherited_keys = ["CHOST"] + self._pkgindex_inherited_keys = ["CHOST", "repository"] + self._pkgindex_default_header_data = { + "repository":"" + } def move_ent(self, mylist): if not self.populated: @@ -798,6 +809,7 @@ class binarytree(object): def _new_pkgindex(self): return portage.getbinpkg.PackageIndex( + default_header_data=self._pkgindex_default_header_data, default_pkg_data=self._pkgindex_default_pkg_data, inherited_keys=self._pkgindex_inherited_keys) diff --git a/pym/portage/dbapi/porttree.py b/pym/portage/dbapi/porttree.py index de5a03bfe..532530131 100644 --- a/pym/portage/dbapi/porttree.py +++ b/pym/portage/dbapi/porttree.py @@ -83,11 +83,13 @@ class portdbapi(dbapi): self.porttrees = [self.porttree_root] + \ [os.path.realpath(t) for t in self.mysettings["PORTDIR_OVERLAY"].split()] self.treemap = {} + self._repository_map = {} for path in self.porttrees: repo_name_path = os.path.join(path, REPO_NAME_LOC) try: repo_name = open(repo_name_path, 'r').readline().strip() self.treemap[repo_name] = path + self._repository_map[path] = repo_name except (OSError,IOError): # warn about missing repo_name at some other time, since we # don't want to see a warning every time the portage module is @@ -116,7 +118,9 @@ class portdbapi(dbapi): self.depcachedir, x, filtered_auxdbkeys, gid=portage_gid) # Selectively cache metadata in order to optimize dep matching. self._aux_cache_keys = set( - ["EAPI", "IUSE", "KEYWORDS", "LICENSE", "PROVIDE", "SLOT"]) + ["DEPEND", "EAPI", "IUSE", "KEYWORDS", "LICENSE", + "PDEPEND", "PROVIDE", "RDEPEND", "repository", + "RESTRICT", "SLOT"]) self._aux_cache = {} self._broken_ebuilds = set() @@ -208,10 +212,11 @@ class portdbapi(dbapi): cache_me = False if not mytree: cache_me = True - if not mytree and not set(mylist).difference(self._aux_cache_keys): + if not mytree and not self._known_keys.intersection( + mylist).difference(self._aux_cache_keys): aux_cache = self._aux_cache.get(mycpv) if aux_cache is not None: - return [aux_cache[x] for x in mylist] + return [aux_cache.get(x, "") for x in mylist] cache_me = True global auxdbkeys, auxdbkeylen cat,pkg = mycpv.split("/", 1) @@ -335,6 +340,10 @@ class portdbapi(dbapi): if not mydata.setdefault("EAPI", "0"): mydata["EAPI"] = "0" + # do we have a origin repository name for the current package + mydata["repository"] = self._repository_map.get( + os.path.sep.join(myebuild.split(os.path.sep)[:-3]), "") + #finally, we look at our internal cache entry and return the requested data. returnme = [] for x in mylist: diff --git a/pym/portage/getbinpkg.py b/pym/portage/getbinpkg.py index 147007659..376767957 100644 --- a/pym/portage/getbinpkg.py +++ b/pym/portage/getbinpkg.py @@ -667,10 +667,14 @@ def _cmp_cpv(d1, d2): class PackageIndex(object): - def __init__(self, default_pkg_data=None, inherited_keys=None): + def __init__(self, default_header_data=None, default_pkg_data=None, + inherited_keys=None): + self._default_header_data = default_header_data self._default_pkg_data = default_pkg_data self._inherited_keys = inherited_keys self.header = {} + if self._default_header_data: + self.header.update(self._default_header_data) self.packages = [] self.modified = True @@ -725,7 +729,8 @@ class PackageIndex(object): self.header["PACKAGES"] = str(len(self.packages)) keys = self.header.keys() keys.sort() - self._writepkgindex(pkgfile, [(k, self.header[k]) for k in keys]) + self._writepkgindex(pkgfile, [(k, self.header[k]) \ + for k in keys if self.header[k]]) for metadata in sorted(self.packages, _cmp_cpv): metadata = metadata.copy() cpv = metadata["CPV"]