From: Zac Medico Date: Tue, 8 Apr 2008 05:38:33 +0000 (-0000) Subject: * Optimize metadata caching in pordbapi and bindbapi so that cache is X-Git-Tag: v2.1.5~235 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=f52995bf6e8f10f033001058beab422bd1e27b28;p=portage.git * Optimize metadata caching in pordbapi and bindbapi so that cache is never pulled for the same package twice. * Cache a Package instances constructed during package selection and reuse them during later package selections. (trunk r9741) svn path=/main/branches/2.1.2/; revision=9752 --- diff --git a/bin/emerge b/bin/emerge index a4865d1ab..1eeae340c 100755 --- a/bin/emerge +++ b/bin/emerge @@ -1485,7 +1485,7 @@ class CompositeDbapi(object): except portage_exception.InvalidDependString: arg = None arg_cp = None - else: + if arg: arg_cp = portage.dep_getkey(arg) if arg and arg_cp != pkg.cp: # If this argument matches via PROVIDE but there is a @@ -1619,6 +1619,8 @@ class depgraph: self._world_problems = False self._select_package = self._select_pkg_highest_available self._highest_pkg_cache = {} + # All Package instances + self._pkg_cache = {} def _show_slot_collision_notice(self): """Show an informational message advising the user to mask one of the @@ -2316,6 +2318,11 @@ class depgraph: cpv_list = db.xmatch("match-all", atom) else: cpv_list = db.match(atom) + if not cpv_list: + continue + pkg_status = "merge" + if installed or onlydeps: + pkg_status = "nomerge" # descending order cpv_list.reverse() for cpv in cpv_list: @@ -2324,33 +2331,47 @@ class depgraph: cpv in vardb.match(atom): break reinstall_for_flags = None - try: - metadata = dict(izip(db_keys, - db.aux_get(cpv, db_keys))) - except KeyError: - continue - if not built: - if "?" in metadata["LICENSE"]: + cache_key = (pkg_type, root, cpv, pkg_status) + calculated_use = True + pkg = self._pkg_cache.get(cache_key) + if pkg is None: + calculated_use = False + try: + metadata = dict(izip(self._mydbapi_keys, + db.aux_get(cpv, self._mydbapi_keys))) + except KeyError: + continue + if not built and ("?" in metadata["LICENSE"] or \ + "?" in metadata["PROVIDE"]): + # This is avoided whenever possible because + # it's expensive. It only needs to be done here + # if it has an effect on visibility. pkgsettings.setcpv(cpv, mydb=metadata) metadata["USE"] = pkgsettings["PORTAGE_USE"] - else: - metadata["USE"] = "" + calculated_use = True + pkg = Package(built=built, cpv=cpv, + installed=installed, metadata=metadata, + onlydeps=onlydeps, root=root, type_name=pkg_type) + self._pkg_cache[pkg] = pkg myarg = None if root == self.target_root: try: myarg = self._set_atoms.findAtomForPackage( - cpv, metadata) + pkg.cpv, pkg.metadata) except portage_exception.InvalidDependString: if not installed: # masked by corruption continue - pkg = Package(built=built, cpv=cpv, installed=installed, - metadata=metadata, type_name=pkg_type) if not installed: if myarg: found_available_arg = True if not visible(pkgsettings, pkg): continue + if not built and not calculated_use: + # This is avoided whenever possible because + # it's expensive. + pkgsettings.setcpv(cpv, mydb=pkg.metadata) + pkg.metadata["USE"] = pkgsettings["PORTAGE_USE"] if pkg.cp == atom_cp: if highest_version is None: highest_version = pkg @@ -2362,9 +2383,7 @@ class depgraph: # will always end with a break statement below # this point. if find_existing_node: - slot_atom = "%s:%s" % ( - portage.cpv_getkey(cpv), metadata["SLOT"]) - e_pkg = self._slot_pkg_map[root].get(slot_atom) + e_pkg = self._slot_pkg_map[root].get(pkg.slot_atom) if not e_pkg: break cpv_slot = "%s:%s" % \ @@ -2388,9 +2407,9 @@ class depgraph: ("--newuse" in self.myopts or \ "--reinstall" in self.myopts): iuses = set(filter_iuse_defaults( - metadata["IUSE"].split())) - old_use = metadata["USE"].split() - mydb = metadata + pkg.metadata["IUSE"].split())) + old_use = pkg.metadata["USE"].split() + mydb = pkg.metadata if myeb and not usepkgonly: mydb = portdb if myeb: @@ -2416,7 +2435,7 @@ class depgraph: ("--newuse" in self.myopts or \ "--reinstall" in self.myopts) and \ cpv in vardb.match(atom): - pkgsettings.setcpv(cpv, mydb=metadata) + pkgsettings.setcpv(cpv, mydb=pkg.metadata) forced_flags = set() forced_flags.update(pkgsettings.useforce) forced_flags.update(pkgsettings.usemask) @@ -2425,7 +2444,7 @@ class depgraph: vardb.aux_get(cpv, ["IUSE"])[0].split())) cur_use = pkgsettings["PORTAGE_USE"].split() cur_iuse = set(filter_iuse_defaults( - metadata["IUSE"].split())) + pkg.metadata["IUSE"].split())) reinstall_for_flags = \ self._reinstall_for_flags( forced_flags, old_use, old_iuse, @@ -2439,22 +2458,8 @@ class depgraph: not must_reinstall and \ cpv in vardb.match(atom): break - # Metadata accessed above is cached internally by - # each db in order to optimize visibility checks. - # Now that all possible checks visibility checks - # are complete, it's time to pull the rest of the - # metadata (including *DEPEND). This part is more - # expensive, so avoid it whenever possible. - metadata.update(izip(self._mydbapi_keys, - db.aux_get(cpv, self._mydbapi_keys))) if not built: - pkgsettings.setcpv(cpv, mydb=metadata) - metadata["USE"] = pkgsettings["PORTAGE_USE"] myeb = cpv - pkg = Package(type_name=pkg_type, root=root, - cpv=cpv, metadata=metadata, - built=built, installed=installed, - onlydeps=onlydeps) matched_packages.append(pkg) if reinstall_for_flags: self._reinstall_nodes[pkg] = \ diff --git a/pym/portage.py b/pym/portage.py index 0cc187da1..4fee4e47a 100644 --- a/pym/portage.py +++ b/pym/portage.py @@ -5945,6 +5945,17 @@ def getmaskingstatus(mycpv, settings=None, portdb=None): rValue.append(kmask+" keyword") return rValue + +auxdbkeys=[ + 'DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI', + 'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION', + 'KEYWORDS', 'INHERITED', 'IUSE', 'CDEPEND', + 'PDEPEND', 'PROVIDE', 'EAPI', + 'UNUSED_01', 'UNUSED_02', 'UNUSED_03', 'UNUSED_04', + 'UNUSED_05', 'UNUSED_06', 'UNUSED_07', + ] +auxdbkeylen=len(auxdbkeys) + class portagetree: def __init__(self, root="/", virtual=None, clone=None, settings=None): @@ -6020,10 +6031,12 @@ class portagetree: return myslot -class dbapi: +class dbapi(object): _category_re = re.compile(r'^\w[-.+\w]*$') _pkg_dir_name_re = re.compile(r'^\w[-+\w]*$') _categories = None + _known_keys = frozenset(x for x in auxdbkeys + if not x.startswith("UNUSED_0")) def __init__(self): pass @@ -6243,6 +6256,8 @@ class fakedbapi(dbapi): self.cpvdict[cpv].update(values) class bindbapi(fakedbapi): + _known_keys = frozenset(list(fakedbapi._known_keys) + \ + ["CHOST", "repository", "USE"]) def __init__(self, mybintree=None, settings=None): self.bintree = mybintree self.move_ent = mybintree.move_ent @@ -6254,8 +6269,9 @@ class bindbapi(fakedbapi): self._match_cache = {} # Selectively cache metadata in order to optimize dep matching. self._aux_cache_keys = set( - ["CHOST", "EAPI", "IUSE", "KEYWORDS", - "LICENSE", "PROVIDE", "SLOT", "USE"]) + ["CHOST", "DEPEND", "EAPI", "IUSE", "KEYWORDS", + "LICENSE", "PDEPEND", "PROVIDE", + "RDEPEND", "repository", "RESTRICT", "SLOT", "USE"]) self._aux_cache = {} def match(self, *pargs, **kwargs): @@ -6267,10 +6283,11 @@ class bindbapi(fakedbapi): if self.bintree and not self.bintree.populated: self.bintree.populate() cache_me = False - if not set(wants).difference(self._aux_cache_keys): + if not self._known_keys.intersection( + wants).difference(self._aux_cache_keys): aux_cache = self._aux_cache.get(mycpv) if aux_cache is not None: - return [aux_cache[x] for x in wants] + return [aux_cache.get(x, "") for x in wants] cache_me = True mysplit = mycpv.split("/") mylist = [] @@ -6970,16 +6987,6 @@ class vartree(object): def populate(self): self.populated=1 -auxdbkeys=[ - 'DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI', - 'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION', - 'KEYWORDS', 'INHERITED', 'IUSE', 'CDEPEND', - 'PDEPEND', 'PROVIDE', 'EAPI', - 'UNUSED_01', 'UNUSED_02', 'UNUSED_03', 'UNUSED_04', - 'UNUSED_05', 'UNUSED_06', 'UNUSED_07', - ] -auxdbkeylen=len(auxdbkeys) - def close_portdbapi_caches(): for i in portdbapi.portdbapi_instances: i.close_caches() @@ -7051,11 +7058,13 @@ class portdbapi(dbapi): self.porttrees = [self.porttree_root] + \ [os.path.realpath(t) for t in self.mysettings["PORTDIR_OVERLAY"].split()] self.treemap = {} + self._repository_map = {} for path in self.porttrees: repo_name_path = os.path.join(path, portage_const.REPO_NAME_LOC) try: repo_name = open(repo_name_path, 'r').readline().strip() self.treemap[repo_name] = path + self._repository_map[path] = repo_name except (OSError,IOError): # warn about missing repo_name at some other time, since we # don't want to see a warning every time the portage module is @@ -7084,7 +7093,9 @@ class portdbapi(dbapi): self.depcachedir, x, filtered_auxdbkeys, gid=portage_gid) # Selectively cache metadata in order to optimize dep matching. self._aux_cache_keys = set( - ["EAPI", "IUSE", "KEYWORDS", "LICENSE", "PROVIDE", "SLOT"]) + ["DEPEND", "EAPI", "IUSE", "KEYWORDS", "LICENSE", + "PDEPEND", "PROVIDE", "RDEPEND", "repository", + "RESTRICT", "SLOT"]) self._aux_cache = {} self._broken_ebuilds = set() @@ -7179,10 +7190,11 @@ class portdbapi(dbapi): cache_me = False if not mytree: cache_me = True - if not mytree and not set(mylist).difference(self._aux_cache_keys): + if not mytree and not self._known_keys.intersection( + mylist).difference(self._aux_cache_keys): aux_cache = self._aux_cache.get(mycpv) if aux_cache is not None: - return [aux_cache[x] for x in mylist] + return [aux_cache.get(x, "") for x in mylist] cache_me = True global auxdbkeys,auxdbkeylen cat,pkg = mycpv.split("/", 1) @@ -7304,6 +7316,10 @@ class portdbapi(dbapi): if not mydata.setdefault("EAPI", "0"): mydata["EAPI"] = "0" + # do we have a origin repository name for the current package + mydata["repository"] = self._repository_map.get( + os.path.sep.join(myebuild.split(os.path.sep)[:-3]), "") + #finally, we look at our internal cache entry and return the requested data. returnme = [] for x in mylist: