Use a pickle to cache vdb metadata. This greatly improves the performance for loadin...
authorZac Medico <zmedico@gentoo.org>
Thu, 28 Dec 2006 13:07:03 +0000 (13:07 -0000)
committerZac Medico <zmedico@gentoo.org>
Thu, 28 Dec 2006 13:07:03 +0000 (13:07 -0000)
svn path=/main/trunk/; revision=5402

bin/emerge
pym/portage.py

index 1b07d77bf2a7064cac6f44e03efcb3f70b7b505d..5d7406d1f3fe07c8a83219971e005030e72e976d 100755 (executable)
@@ -744,6 +744,7 @@ class FakeVartree(portage.vartree):
                                                continue
                                slot_counters[myslot_atom] = mycounter
                                self.dbapi.cpv_inject(cpv, metadata=metadata)
+                       real_dbapi.flush_cache()
                finally:
                        if vdb_lock:
                                portage_locks.unlockdir(vdb_lock)
index 9d2526f21995cd3d1a578005105a1affa0450897..d7dc3b5613d305380ad835e544e2f84964656366 100644 (file)
@@ -4706,6 +4706,12 @@ class vardbapi(dbapi):
                if vartree is None:
                        vartree = globals()["db"][root]["vartree"]
                self.vartree = vartree
+               self._aux_cache_keys = set(["SLOT", "COUNTER", "PROVIDE", "USE",
+                       "IUSE", "DEPEND", "RDEPEND", "PDEPEND"])
+               self._aux_cache = None
+               self._aux_cache_version = "1"
+               self._aux_cache_filename = os.path.join(self.root,
+                       CACHE_PATH.lstrip(os.path.sep), "vdb_metadata.pickle")
 
        def cpv_exists(self,mykey):
                "Tells us whether an actual ebuild exists on disk (no masking)"
@@ -4969,7 +4975,96 @@ class vardbapi(dbapi):
        def findname(self, mycpv):
                return self.root+VDB_PATH+"/"+str(mycpv)+"/"+mycpv.split("/")[1]+".ebuild"
 
+       def flush_cache(self):
+               """If the current user has permission and the internal aux_get cache has
+               been updated, save it to disk and mark it unmodified.  This is called
+               by emerge after it has loaded the full vdb for use in dependency
+               calculations.  Currently, the cache is only written if the user has
+               superuser privileges (since that's required to obtain a lock), but all
+               users have read access and benefit from faster metadata lookups (as
+               long as at least part of the cache is still valid)."""
+               if self._aux_cache is not None and \
+                       self._aux_cache["modified"] and \
+                       secpass >= 2:
+                       valid_nodes = set(self.cpv_all())
+                       for cpv in self._aux_cache["packages"].keys():
+                               if cpv not in valid_nodes:
+                                       del self._aux_cache["packages"][cpv]
+                       del self._aux_cache["modified"]
+                       try:
+                               f = atomic_ofstream(self._aux_cache_filename)
+                               cPickle.dump(self._aux_cache, f, -1)
+                               f.close()
+                               portage_util.apply_secpass_permissions(
+                                       self._aux_cache_filename, gid=portage_gid, mode=0644)
+                       except (IOError, OSError), e:
+                               pass
+                       self._aux_cache["modified"] = False
+
        def aux_get(self, mycpv, wants):
+               """This automatically caches selected keys that are frequently needed
+               by emerge for dependency calculations.  The cached metadata is
+               considered valid if the mtime of the package directory has not changed
+               since the data was cached.  The cache is stored in a pickled dict
+               object with the following format:
+
+               {version:"1", "packages":{cpv1:(mtime,{k1,v1, k2,v2, ...}), cpv2...}}
+
+               If an error occurs while loading the cache pickle or the version is
+               unrecognized, the cache will simple be recreated from scratch (it is
+               completely disposable).
+               """
+               if not self._aux_cache_keys.intersection(wants):
+                       return self._aux_get(mycpv, wants)
+               if self._aux_cache is None:
+                       try:
+                               f = open(self._aux_cache_filename)
+                               mypickle = cPickle.Unpickler(f)
+                               mypickle.find_global = None
+                               self._aux_cache = mypickle.load()
+                               f.close()
+                               del f
+                       except (IOError, OSError, EOFError, cPickle.UnpicklingError):
+                               pass
+                       if not self._aux_cache or \
+                               not isinstance(self._aux_cache, dict) or \
+                               self._aux_cache.get("version") != self._aux_cache_version:
+                               self._aux_cache = {"version":self._aux_cache_version}
+                               self._aux_cache["packages"] = {}
+                       self._aux_cache["modified"] = False
+               mydir = os.path.join(self.root, VDB_PATH, mycpv)
+               mydir_stat = None
+               try:
+                       mydir_stat = os.stat(mydir)
+               except OSError, e:
+                       if e.errno != errno.ENOENT:
+                               raise
+                       raise KeyError(mycpv)
+               mydir_mtime = long(mydir_stat.st_mtime)
+               pkg_data = self._aux_cache["packages"].get(mycpv)
+               mydata = {}
+               cache_valid = False
+               if pkg_data:
+                       cache_mtime, metadata = pkg_data
+                       cache_valid = cache_mtime == mydir_mtime
+               if cache_valid:
+                       mydata.update(metadata)
+                       pull_me = set(wants).difference(self._aux_cache_keys)
+               else:
+                       pull_me = self._aux_cache_keys.union(wants)
+               if pull_me:
+                       # pull any needed data and cache it
+                       aux_keys = list(pull_me)
+                       mydata.update(izip(aux_keys, self._aux_get(mycpv, aux_keys)))
+                       if not cache_valid:
+                               cache_data = {}
+                               for aux_key in self._aux_cache_keys:
+                                       cache_data[aux_key] = mydata[aux_key]
+                               self._aux_cache["packages"][mycpv] = (mydir_mtime, cache_data)
+                               self._aux_cache["modified"] = True
+               return [mydata[x] for x in wants]
+
+       def _aux_get(self, mycpv, wants):
                mydir = os.path.join(self.root, VDB_PATH, mycpv)
                if not os.path.isdir(mydir):
                        raise KeyError(mycpv)