From: Zac Medico Date: Tue, 22 May 2007 11:16:50 +0000 (-0000) Subject: Cache xpak metadata in ${PKGDIR}/Packages in order to minimize disk load when binaryt... X-Git-Tag: v2.2_pre1~1396 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=f45516f92d9cfa12736783ec62273e78f3805120;p=portage.git Cache xpak metadata in ${PKGDIR}/Packages in order to minimize disk load when binarytree.populate() is called. This does not do all the keys that genpkgindex does but it will copy metadata from the existing Packages file if it appears valid. The binarytree.populate() code is performance critical, so the MD5 will certainly have to be done elsewhere (like just after a package is built). svn path=/main/trunk/; revision=6569 --- diff --git a/pym/portage/dbapi/bintree.py b/pym/portage/dbapi/bintree.py index 2a9651df5..d15f00b8b 100644 --- a/pym/portage/dbapi/bintree.py +++ b/pym/portage/dbapi/bintree.py @@ -115,6 +115,7 @@ class binarytree(object): self._pkg_paths = {} self._all_directory = os.path.isdir( os.path.join(self.pkgdir, "All")) + self._pkgindex_keys = set(["CPV", "SLOT", "MTIME", "SIZE"]) def move_ent(self, mylist): if not self.populated: @@ -371,20 +372,66 @@ class binarytree(object): dirs.remove("All") dirs.sort() dirs.insert(0, "All") + pkgfile = os.path.join(self.pkgdir, "Packages") + metadata = {} + header = {} + try: + f = open(pkgfile) + except EnvironmentError: + pass + else: + try: + header = portage.getbinpkg.readpkgindex(f) + while True: + d = portage.getbinpkg.readpkgindex(f) + if not d: + break + mycpv = d.get("CPV") + if not mycpv: + continue + d.setdefault("SLOT", "0") + metadata[mycpv] = d + finally: + f.close() + del f + update_pkgindex = False for mydir in dirs: for myfile in listdir(os.path.join(self.pkgdir, mydir)): if not myfile.endswith(".tbz2"): continue mypath = os.path.join(mydir, myfile) full_path = os.path.join(self.pkgdir, mypath) - if os.path.islink(full_path): + s = os.lstat(full_path) + if stat.S_ISLNK(s.st_mode): continue + if mydir != "All": + # Validate data from the package index and try to avoid + # reading the xpak if possible. + mycpv = mydir + "/" + myfile[:-5] + d = metadata.get(mycpv) + skip = False + if d: + try: + if long(d.get("MTIME")) == long(s.st_mtime): + skip = True + except ValueError: + pass + if skip and not self._pkgindex_keys.difference(d): + pkg_paths[mycpv] = mypath + self.dbapi.cpv_inject(mycpv) + if not self.dbapi._aux_cache_keys.difference(d): + aux_cache = {} + for k in self.dbapi._aux_cache_keys: + aux_cache[k] = d[k] + self.dbapi._aux_cache[mycpv] = aux_cache + continue mytbz2 = portage.xpak.tbz2(full_path) # For invalid packages, mycat could be None. mycat = mytbz2.getfile("CATEGORY") mypf = mytbz2.getfile("PF") + slot = mytbz2.getfile("SLOT") mypkg = myfile[:-5] - if not mycat or not mypf: + if not mycat or not mypf or not slot: #old-style or corrupt package writemsg("!!! Invalid binary package: '%s'\n" % full_path, noiselevel=-1) @@ -394,6 +441,7 @@ class binarytree(object): self.invalids.append(mypkg) continue mycat = mycat.strip() + slot = slot.strip() if mycat != mydir and mydir != "All": continue if mypkg != mypf.strip(): @@ -412,7 +460,59 @@ class binarytree(object): continue pkg_paths[mycpv] = mypath self.dbapi.cpv_inject(mycpv) + update_pkgindex = True + d = metadata.get(mycpv, {}) + if d: + # Reuse metadata such as MD5, since we won't calculate + # MD5 here due to the performance hit. + mtime = d.get("MTIME") + if mtime: + # genpgkindex really should include the mtime and + # then this mtime check should be forced. + try: + if long(mtime) != long(s.st_mtime): + d.clear() + except ValueError: + d.clear() + if d: + try: + if long(d.get("SIZE")) != long(s.st_size): + d.clear() + except ValueError: + d.clear() + + d["CPV"] = mycpv + d["SLOT"] = slot + d["MTIME"] = str(long(s.st_mtime)) + d["SIZE"] = str(s.st_size) + metadata[mycpv] = d + if not self.dbapi._aux_cache_keys.difference(d): + aux_cache = {} + for k in self.dbapi._aux_cache_keys: + aux_cache[k] = d[k] + self.dbapi._aux_cache[mycpv] = aux_cache + self._pkg_paths = pkg_paths + if update_pkgindex and os.access(self.pkgdir, os.W_OK): + cpv_all = self._pkg_paths.keys() + stale = set(metadata).difference(cpv_all) + for cpv in stale: + del metadata[cpv] + cpv_all.sort() + import time + from portage.util import atomic_ofstream + header["TIMESTAMP"] = str(long(time.time())) + header["PACKAGES"] = str(len(cpv_all)) + f = atomic_ofstream(pkgfile) + try: + portage.getbinpkg.writepkgindex(f, header.iteritems()) + for cpv in cpv_all: + d = metadata[cpv] + if d["SLOT"] == "0": + del d["SLOT"] + portage.getbinpkg.writepkgindex(f, d.iteritems()) + finally: + f.close() if getbinpkgs and not self.settings["PORTAGE_BINHOST"]: writemsg(red("!!! PORTAGE_BINHOST unset, but use is requested.\n"), diff --git a/pym/portage/getbinpkg.py b/pym/portage/getbinpkg.py index b0d5ca9eb..070b036b5 100644 --- a/pym/portage/getbinpkg.py +++ b/pym/portage/getbinpkg.py @@ -570,3 +570,23 @@ def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache= conn.close() return metadata[baseurl]["data"] + +def readpkgindex(pkgfile): + d = {} + for line in pkgfile: + line = line.rstrip("\n") + if not line: + break + line = line.split(":", 1) + if not len(line) == 2: + continue + k, v = line + if v: + v = v[1:] + d[k] = v + return d + +def writepkgindex(pkgfile, items): + for k, v in items: + pkgfile.write("%s: %s\n" % (k, v)) + pkgfile.write("\n")