From: Zac Medico Date: Thu, 28 Dec 2006 13:07:03 +0000 (-0000) Subject: Use a pickle to cache vdb metadata. This greatly improves the performance for loadin... X-Git-Tag: v2.1.2~210 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=a155a23e88c446294ea9787e10ab7c04095501a1;p=portage.git Use a pickle to cache vdb metadata. This greatly improves the performance for loading the vdb when the buffer cache is cold. svn path=/main/trunk/; revision=5402 --- diff --git a/bin/emerge b/bin/emerge index 1b07d77bf..5d7406d1f 100755 --- a/bin/emerge +++ b/bin/emerge @@ -744,6 +744,7 @@ class FakeVartree(portage.vartree): continue slot_counters[myslot_atom] = mycounter self.dbapi.cpv_inject(cpv, metadata=metadata) + real_dbapi.flush_cache() finally: if vdb_lock: portage_locks.unlockdir(vdb_lock) diff --git a/pym/portage.py b/pym/portage.py index 9d2526f21..d7dc3b561 100644 --- a/pym/portage.py +++ b/pym/portage.py @@ -4706,6 +4706,12 @@ class vardbapi(dbapi): if vartree is None: vartree = globals()["db"][root]["vartree"] self.vartree = vartree + self._aux_cache_keys = set(["SLOT", "COUNTER", "PROVIDE", "USE", + "IUSE", "DEPEND", "RDEPEND", "PDEPEND"]) + self._aux_cache = None + self._aux_cache_version = "1" + self._aux_cache_filename = os.path.join(self.root, + CACHE_PATH.lstrip(os.path.sep), "vdb_metadata.pickle") def cpv_exists(self,mykey): "Tells us whether an actual ebuild exists on disk (no masking)" @@ -4969,7 +4975,96 @@ class vardbapi(dbapi): def findname(self, mycpv): return self.root+VDB_PATH+"/"+str(mycpv)+"/"+mycpv.split("/")[1]+".ebuild" + def flush_cache(self): + """If the current user has permission and the internal aux_get cache has + been updated, save it to disk and mark it unmodified. This is called + by emerge after it has loaded the full vdb for use in dependency + calculations. Currently, the cache is only written if the user has + superuser privileges (since that's required to obtain a lock), but all + users have read access and benefit from faster metadata lookups (as + long as at least part of the cache is still valid).""" + if self._aux_cache is not None and \ + self._aux_cache["modified"] and \ + secpass >= 2: + valid_nodes = set(self.cpv_all()) + for cpv in self._aux_cache["packages"].keys(): + if cpv not in valid_nodes: + del self._aux_cache["packages"][cpv] + del self._aux_cache["modified"] + try: + f = atomic_ofstream(self._aux_cache_filename) + cPickle.dump(self._aux_cache, f, -1) + f.close() + portage_util.apply_secpass_permissions( + self._aux_cache_filename, gid=portage_gid, mode=0644) + except (IOError, OSError), e: + pass + self._aux_cache["modified"] = False + def aux_get(self, mycpv, wants): + """This automatically caches selected keys that are frequently needed + by emerge for dependency calculations. The cached metadata is + considered valid if the mtime of the package directory has not changed + since the data was cached. The cache is stored in a pickled dict + object with the following format: + + {version:"1", "packages":{cpv1:(mtime,{k1,v1, k2,v2, ...}), cpv2...}} + + If an error occurs while loading the cache pickle or the version is + unrecognized, the cache will simple be recreated from scratch (it is + completely disposable). + """ + if not self._aux_cache_keys.intersection(wants): + return self._aux_get(mycpv, wants) + if self._aux_cache is None: + try: + f = open(self._aux_cache_filename) + mypickle = cPickle.Unpickler(f) + mypickle.find_global = None + self._aux_cache = mypickle.load() + f.close() + del f + except (IOError, OSError, EOFError, cPickle.UnpicklingError): + pass + if not self._aux_cache or \ + not isinstance(self._aux_cache, dict) or \ + self._aux_cache.get("version") != self._aux_cache_version: + self._aux_cache = {"version":self._aux_cache_version} + self._aux_cache["packages"] = {} + self._aux_cache["modified"] = False + mydir = os.path.join(self.root, VDB_PATH, mycpv) + mydir_stat = None + try: + mydir_stat = os.stat(mydir) + except OSError, e: + if e.errno != errno.ENOENT: + raise + raise KeyError(mycpv) + mydir_mtime = long(mydir_stat.st_mtime) + pkg_data = self._aux_cache["packages"].get(mycpv) + mydata = {} + cache_valid = False + if pkg_data: + cache_mtime, metadata = pkg_data + cache_valid = cache_mtime == mydir_mtime + if cache_valid: + mydata.update(metadata) + pull_me = set(wants).difference(self._aux_cache_keys) + else: + pull_me = self._aux_cache_keys.union(wants) + if pull_me: + # pull any needed data and cache it + aux_keys = list(pull_me) + mydata.update(izip(aux_keys, self._aux_get(mycpv, aux_keys))) + if not cache_valid: + cache_data = {} + for aux_key in self._aux_cache_keys: + cache_data[aux_key] = mydata[aux_key] + self._aux_cache["packages"][mycpv] = (mydir_mtime, cache_data) + self._aux_cache["modified"] = True + return [mydata[x] for x in wants] + + def _aux_get(self, mycpv, wants): mydir = os.path.join(self.root, VDB_PATH, mycpv) if not os.path.isdir(mydir): raise KeyError(mycpv)