From: Brian Harring Date: Sat, 5 Nov 2005 06:09:45 +0000 (-0000) Subject: replacement cache subsystem that's gestated in 2.1 and 3.0. X-Git-Tag: v2.1_pre1~71 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=6972e5da0cbda080c1cfd2c07eedb2298fa9c0b9;p=portage.git replacement cache subsystem that's gestated in 2.1 and 3.0. it rocks your world, baby. svn path=/main/branches/2.0/; revision=2257 --- diff --git a/pym/cache/__init__.py b/pym/cache/__init__.py new file mode 100644 index 000000000..189885cb0 --- /dev/null +++ b/pym/cache/__init__.py @@ -0,0 +1,5 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: __init__.py 1911 2005-08-25 03:44:21Z ferringb $ + diff --git a/pym/cache/anydbm.py b/pym/cache/anydbm.py new file mode 100644 index 000000000..bc380ae27 --- /dev/null +++ b/pym/cache/anydbm.py @@ -0,0 +1,75 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: anydbm.py 1911 2005-08-25 03:44:21Z ferringb $ + +anydbm_module = __import__("anydbm") +try: + import cPickle as pickle +except ImportError: + import pickle +import os +import fs_template +import cache_errors + + +class database(fs_template.FsBased): + + autocommits = True + cleanse_keys = True + + def __init__(self, *args, **config): + super(database,self).__init__(*args, **config) + + default_db = config.get("dbtype","anydbm") + if not default_db.startswith("."): + default_db = '.' + default_db + + self._db_path = os.path.join(self.location, fs_template.gen_label(self.location, self.label)+default_db) + self.__db = None + try: + self.__db = anydbm_module.open(self._db_path, "w", self._perms) + + except anydbm_module.error: + # XXX handle this at some point + try: + self._ensure_dirs() + self._ensure_dirs(self._db_path) + self._ensure_access(self._db_path) + except (OSError, IOError), e: + raise cache_errors.InitializationError(self.__class__, e) + + # try again if failed + try: + if self.__db == None: + self.__db = anydbm_module.open(self._db_path, "c", self._perms) + except andbm_module.error, e: + raise cache_errors.InitializationError(self.__class__, e) + + def iteritems(self): + return self.__db.iteritems() + + def __getitem__(self, cpv): + # we override getitem because it's just a cpickling of the data handed in. + return pickle.loads(self.__db[cpv]) + + + def _setitem(self, cpv, values): + self.__db[cpv] = pickle.dumps(values,pickle.HIGHEST_PROTOCOL) + + def _delitem(self, cpv): + del self.__db[cpv] + + + def iterkeys(self): + return iter(self.__db) + + + def has_key(self, cpv): + return cpv in self.__db + + + def __del__(self): + if "__db" in self.__dict__ and self.__db != None: + self.__db.sync() + self.__db.close() diff --git a/pym/cache/cache_errors.py b/pym/cache/cache_errors.py new file mode 100644 index 000000000..2f5b831bf --- /dev/null +++ b/pym/cache/cache_errors.py @@ -0,0 +1,41 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: cache_errors.py 1911 2005-08-25 03:44:21Z ferringb $ + +class CacheError(Exception): pass + +class InitializationError(CacheError): + def __init__(self, class_name, error): + self.error, self.class_name = error, class_name + def __str__(self): + return "Creation of instance %s failed due to %s" % \ + (self.class_name, str(self.error)) + + +class CacheCorruption(CacheError): + def __init__(self, key, ex): + self.key, self.ex = key, ex + def __str__(self): + return "%s is corrupt: %s" % (self.key, str(self.ex)) + + +class GeneralCacheCorruption(CacheError): + def __init__(self,ex): self.ex = ex + def __str__(self): return "corruption detected: %s" % str(self.ex) + + +class InvalidRestriction(CacheError): + def __init__(self, key, restriction, exception=None): + if exception == None: exception = '' + self.key, self.restriction, self.ex = key, restriction, ex + def __str__(self): + return "%s:%s is not valid: %s" % \ + (self.key, self.restriction, str(self.ex)) + + +class ReadOnlyRestriction(CacheError): + def __init__(self, info=''): + self.info = info + def __str__(self): + return "cache is non-modifiable"+str(self.info) diff --git a/pym/cache/flat_hash.py b/pym/cache/flat_hash.py new file mode 100644 index 000000000..6882afd7e --- /dev/null +++ b/pym/cache/flat_hash.py @@ -0,0 +1,129 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: flat_list.py 1911 2005-08-25 03:44:21Z ferringb $ + +import fs_template +import cache_errors +import os, stat +from mappings import LazyLoad, ProtectedDict +from template import reconstruct_eclasses +# store the current key order *here*. +class database(fs_template.FsBased): + + autocommits = True + + def __init__(self, *args, **config): + super(database,self).__init__(*args, **config) + self.location = os.path.join(self.location, + self.label.lstrip(os.path.sep).rstrip(os.path.sep)) + + if not os.path.exists(self.location): + self._ensure_dirs() + + def __getitem__(self, cpv): + fp = os.path.join(self.location, cpv) + try: + def curry(*args): + def callit(*args2): + return args[0](*args[1:]+args2) + return callit + return ProtectedDict(LazyLoad(curry(self._pull, fp, cpv), initial_items=[("_mtime_", os.stat(fp).st_mtime)])) + except OSError: + raise KeyError(cpv) + return self._getitem(cpv) + + def _pull(self, fp, cpv): + try: + myf = open(fp,"r") + except IOError: + raise KeyError(cpv) + except OSError, e: + raise cache_errors.CacheCorruption(cpv, e) + try: + d = self._parse_data(myf, cpv) + except (OSError, ValueError), e: + myf.close() + raise cache_errors.CacheCorruption(cpv, e) + myf.close() + return d + + + def _parse_data(self, data, cpv, mtime=0): + d = dict(map(lambda x:x.rstrip().split("=", 1), data)) + if mtime != 0: + d["_mtime_"] = long(mtime) + if "_eclasses_" in d: + d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"]) + return d + + for x in self._known_keys: + if x not in d: + d[x] = '' + + + return d + + + def _setitem(self, cpv, values): +# import pdb;pdb.set_trace() + s = cpv.rfind("/") + fp = os.path.join(self.location,cpv[:s],".update.%i.%s" % (os.getpid(), cpv[s+1:])) + try: myf=open(fp, "w") + except IOError, ie: + if ie.errno == 2: + try: + self._ensure_dirs(cpv) + myf=open(fp,"w") + except (OSError, IOError),e: + raise cache_errors.CacheCorruption(cpv, e) + except OSError, e: + raise cache_errors.CacheCorruption(cpv, e) + + for k, v in values.items(): + if k != "_mtime_": + myf.writelines("%s=%s\n" % (k, v)) + + myf.close() + self._ensure_access(fp, mtime=values["_mtime_"]) + + #update written. now we move it. + + new_fp = os.path.join(self.location,cpv) + try: os.rename(fp, new_fp) + except (OSError, IOError), e: + os.remove(fp) + raise cache_errors.CacheCorruption(cpv, e) + + + def _delitem(self, cpv): +# import pdb;pdb.set_trace() + try: + os.remove(os.path.join(self.location,cpv)) + except OSError, e: + if e.errno == 2: + raise KeyError(cpv) + else: + raise cache_errors.CacheCorruption(cpv, e) + + + def has_key(self, cpv): + return os.path.exists(os.path.join(self.location, cpv)) + + + def iterkeys(self): + """generator for walking the dir struct""" + dirs = [self.location] + len_base = len(self.location) + while len(dirs): + for l in os.listdir(dirs[0]): + if l.endswith(".cpickle"): + continue + p = os.path.join(dirs[0],l) + st = os.lstat(p) + if stat.S_ISDIR(st.st_mode): + dirs.append(p) + continue + yield p[len_base+1:] + dirs.pop(0) + diff --git a/pym/cache/flat_list.py b/pym/cache/flat_list.py new file mode 100644 index 000000000..944f7406d --- /dev/null +++ b/pym/cache/flat_list.py @@ -0,0 +1,109 @@ +import fs_template +import cache_errors +import os, stat + +# store the current key order *here*. +class database(fs_template.FsBased): + + autocommits = True + + # do not screw with this ordering. _eclasses_ needs to be last + auxdbkey_order=('DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI', + 'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION', + 'KEYWORDS', 'IUSE', 'CDEPEND', + 'PDEPEND', 'PROVIDE','_eclasses_') + + def __init__(self, label, auxdbkeys, **config): + super(database,self).__init__(label, auxdbkeys, **config) + self._base = os.path.join(self._base, + self.label.lstrip(os.path.sep).rstrip(os.path.sep)) + + if len(self._known_keys) > len(self.auxdbkey_order) + 2: + raise Exception("less ordered keys then auxdbkeys") + if not os.path.exists(self._base): + self._ensure_dirs() + + + def _getitem(self, cpv): + d = {} + try: + myf = open(os.path.join(self._base, cpv),"r") + for k,v in zip(self.auxdbkey_order, myf): + d[k] = v.rstrip("\n") + except (OSError, IOError),e: + if isinstance(e,IOError) and e.errno == 2: +# print "caught for %s" % cpv, e +# l=os.listdir(os.path.dirname(os.path.join(self._base,cpv))) +# l.sort() +# print l + raise KeyError(cpv) + raise cache_errors.CacheCorruption(cpv, e) + + try: d["_mtime_"] = os.fstat(myf.fileno()).st_mtime + except OSError, e: + myf.close() + raise cache_errors.CacheCorruption(cpv, e) + myf.close() + return d + + + def _setitem(self, cpv, values): + s = cpv.rfind("/") + fp=os.path.join(self._base,cpv[:s],".update.%i.%s" % (os.getpid(), cpv[s+1:])) + try: myf=open(fp, "w") + except (OSError, IOError), e: + if e.errno == 2: + try: + self._ensure_dirs(cpv) + myf=open(fp,"w") + except (OSError, IOError),e: + raise cache_errors.CacheCorruption(cpv, e) + else: + raise cache_errors.CacheCorruption(cpv, e) + + + for x in self.auxdbkey_order: + myf.write(values.get(x,"")+"\n") + + myf.close() + self._ensure_access(fp, mtime=values["_mtime_"]) + #update written. now we move it. + new_fp = os.path.join(self._base,cpv) + try: os.rename(fp, new_fp) + except (OSError, IOError), e: + os.remove(fp) + raise cache_errors.CacheCorruption(cpv, e) + + + def _delitem(self, cpv): + try: + os.remove(os.path.join(self._base,cpv)) + except OSError, e: + if e.errno == 2: + raise KeyError(cpv) + else: + raise cache_errors.CacheCorruption(cpv, e) + + + def has_key(self, cpv): + return os.path.exists(os.path.join(self._base, cpv)) + + + def iterkeys(self): + """generator for walking the dir struct""" + dirs = [self._base] + len_base = len(self._base) + while len(dirs): + for l in os.listdir(dirs[0]): + if l.endswith(".cpickle"): + continue + p = os.path.join(dirs[0],l) + st = os.lstat(p) + if stat.S_ISDIR(st.st_mode): + dirs.append(p) + continue + yield p[len_base+1:] + dirs.pop(0) + + + def commit(self): pass diff --git a/pym/cache/fs_template.py b/pym/cache/fs_template.py new file mode 100644 index 000000000..49d5ca073 --- /dev/null +++ b/pym/cache/fs_template.py @@ -0,0 +1,74 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: fs_template.py 1911 2005-08-25 03:44:21Z ferringb $ + +import os +import template, cache_errors +from portage_data import portage_gid + +class FsBased(template.database): + """template wrapping fs needed options, and providing _ensure_access as a way to + attempt to ensure files have the specified owners/perms""" + + def __init__(self, *args, **config): + """throws InitializationError if needs args aren't specified + gid and perms aren't listed do to an oddity python currying mechanism + gid=portage_gid + perms=0665""" + + for x,y in (("gid",portage_gid),("perms",0664)): + if x in config: + setattr(self, "_"+x, config[x]) + del config[x] + else: + setattr(self, "_"+x, y) + super(FsBased, self).__init__(*args, **config) + + if self.label.startswith(os.path.sep): + # normpath. + self.label = os.path.sep + os.path.normpath(self.label).lstrip(os.path.sep) + + + def _ensure_access(self, path, mtime=-1): + """returns true or false if it's able to ensure that path is properly chmod'd and chowned. + if mtime is specified, attempts to ensure that's correct also""" + try: + os.chown(path, -1, self._gid) + os.chmod(path, self._perms) + if mtime: + mtime=long(mtime) + os.utime(path, (mtime, mtime)) + except OSError, IOError: + return False + return True + + def _ensure_dirs(self, path=None): + """with path!=None, ensure beyond self.location. otherwise, ensure self.location""" + if path: + path = os.path.dirname(path) + base = self.location + else: + path = self.location + base='/' + + for dir in path.lstrip(os.path.sep).rstrip(os.path.sep).split(os.path.sep): + base = os.path.join(base,dir) + if not os.path.exists(base): + um=os.umask(0) + try: + os.mkdir(base, self._perms | 0111) + os.chown(base, -1, self._gid) + finally: + os.umask(um) + + +def gen_label(base, label): + """if supplied label is a path, generate a unique label based upon label, and supplied base path""" + if label.find(os.path.sep) == -1: + return label + label = label.strip("\"").strip("'") + label = os.path.join(*(label.rstrip(os.path.sep).split(os.path.sep))) + tail = os.path.split(label)[1] + return "%s-%X" % (tail, abs(label.__hash__())) + diff --git a/pym/cache/mappings.py b/pym/cache/mappings.py new file mode 100644 index 000000000..2251c4449 --- /dev/null +++ b/pym/cache/mappings.py @@ -0,0 +1,103 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: mappings.py 2015 2005-09-20 23:14:26Z ferringb $ + +import UserDict + +class ProtectedDict(UserDict.DictMixin): + """ + given an initial dict, this wraps that dict storing changes in a secondary dict, protecting + the underlying dict from changes + """ + __slots__=("orig","new","blacklist") + + def __init__(self, orig): + self.orig = orig + self.new = {} + self.blacklist = {} + + + def __setitem__(self, key, val): + self.new[key] = val + if key in self.blacklist: + del self.blacklist[key] + + + def __getitem__(self, key): + if key in self.new: + return self.new[key] + if key in self.blacklist: + raise KeyError(key) + return self.orig[key] + + + def __delitem__(self, key): + if key in self.new: + del self.new[key] + elif key in self.orig: + if key not in self.blacklist: + self.blacklist[key] = True + return + raise KeyError(key) + + + def __iter__(self): + for k in self.new.iterkeys(): + yield k + for k in self.orig.iterkeys(): + if k not in self.blacklist and k not in self.new: + yield k + + + def keys(self): + return list(self.__iter__()) + + + def has_key(self, key): + return key in self.new or (key not in self.blacklist and key in self.orig) + + +class LazyLoad(UserDict.DictMixin): + """ + Lazy loading of values for a dict + """ + __slots__=("pull", "d") + + def __init__(self, pull_items_func, initial_items=[]): + self.d = {} + for k, v in initial_items: + self.d[k] = v + self.pull = pull_items_func + + def __getitem__(self, key): + if key in self.d: + return self.d[key] + elif self.pull != None: + self.d.update(self.pull()) + self.pull = None + return self.d[key] + + + def __iter__(self): + return iter(self.keys()) + + def keys(self): + if self.pull != None: + self.d.update(self.pull()) + self.pull = None + return self.d.keys() + + + def has_key(self, key): + return key in self + + + def __contains__(self, key): + if key in self.d: + return True + elif self.pull != None: + self.d.update(self.pull()) + self.pull = None + return key in self.d + diff --git a/pym/cache/metadata.py b/pym/cache/metadata.py new file mode 100644 index 000000000..5e5f0bde7 --- /dev/null +++ b/pym/cache/metadata.py @@ -0,0 +1,88 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: metadata.py 1964 2005-09-03 00:16:16Z ferringb $ + +import os, stat +import flat_hash +import cache_errors +import eclass_cache +from template import reconstruct_eclasses, serialize_eclasses +from mappings import ProtectedDict, LazyLoad + +# this is the old cache format, flat_list. count maintained here. +magic_line_count = 22 + +# store the current key order *here*. +class database(flat_hash.database): + complete_eclass_entries = False + auxdbkey_order=('DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI', + 'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION', + 'KEYWORDS', 'INHERITED', 'IUSE', 'CDEPEND', + 'PDEPEND', 'PROVIDE', 'EAPI') + + autocommits = True + + def __init__(self, location, *args, **config): + loc = location + super(database, self).__init__(location, *args, **config) + self.location = os.path.join(loc, "metadata","cache") + self.ec = eclass_cache.cache(loc) + + def __getitem__(self, cpv): + return flat_hash.database.__getitem__(self, cpv) + + + def _parse_data(self, data, mtime): + # easy attempt first. + data = list(data) + if len(data) != magic_line_count: + d = flat_hash.database._parse_data(self, data, mtime) + else: + # this one's interesting. + d = {} + + for line in data: + # yes, meant to iterate over a string. + hashed = False + # poor mans enumerate. replace when python 2.3 is required + for idx, c in zip(range(len(line)), line): + if not c.isalpha(): + if c == "=" and idx > 0: + hashed = True + d[line[:idx]] = line[idx + 1:] + elif c == "_" or c.isdigit(): + continue + break + elif not c.isupper(): + break + + if not hashed: + # non hashed. + d.clear() + # poor mans enumerate. replace when python 2.3 is required + for idx, key in zip(range(len(self.auxdbkey_order)), self.auxdbkey_order): + d[key] = data[idx].strip() + break + + if "_eclasses_" not in d: + if "INHERITED" in d: + d["_eclasses_"] = self.ec.get_eclass_data(d["INHERITED"].split(), from_master_only=True) + del d["INHERITED"] + else: + d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"]) + + return d + + + + def _setitem(self, cpv, values): + values = ProtectedDict(values) + + # hack. proper solution is to make this a __setitem__ override, since template.__setitem__ + # serializes _eclasses_, then we reconstruct it. + if "_eclasses_" in values: + values["INHERITED"] = ' '.join(reconstruct_eclasses(cpv, values["_eclasses_"]).keys()) + del values["_eclasses_"] + + flat_hash.database._setitem(self, cpv, values) diff --git a/pym/cache/sql_template.py b/pym/cache/sql_template.py new file mode 100644 index 000000000..c69ed772f --- /dev/null +++ b/pym/cache/sql_template.py @@ -0,0 +1,275 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: sql_template.py 1911 2005-08-25 03:44:21Z ferringb $ + +import template, cache_errors +from template import reconstruct_eclasses + +class SQLDatabase(template.database): + """template class for RDBM based caches + + This class is designed such that derivatives don't have to change much code, mostly constant strings. + _BaseError must be an exception class that all Exceptions thrown from the derived RDBMS are derived + from. + + SCHEMA_INSERT_CPV_INTO_PACKAGE should be modified dependant on the RDBMS, as should SCHEMA_PACKAGE_CREATE- + basically you need to deal with creation of a unique pkgid. If the dbapi2 rdbms class has a method of + recovering that id, then modify _insert_cpv to remove the extra select. + + Creation of a derived class involves supplying _initdb_con, and table_exists. + Additionally, the default schemas may have to be modified. + """ + + SCHEMA_PACKAGE_NAME = "package_cache" + SCHEMA_PACKAGE_CREATE = "CREATE TABLE %s (\ + pkgid INTEGER PRIMARY KEY, label VARCHAR(255), cpv VARCHAR(255), UNIQUE(label, cpv))" % SCHEMA_PACKAGE_NAME + SCHEMA_PACKAGE_DROP = "DROP TABLE %s" % SCHEMA_PACKAGE_NAME + + SCHEMA_VALUES_NAME = "values_cache" + SCHEMA_VALUES_CREATE = "CREATE TABLE %s ( pkgid integer references %s (pkgid) on delete cascade, \ + key varchar(255), value text, UNIQUE(pkgid, key))" % (SCHEMA_VALUES_NAME, SCHEMA_PACKAGE_NAME) + SCHEMA_VALUES_DROP = "DROP TABLE %s" % SCHEMA_VALUES_NAME + SCHEMA_INSERT_CPV_INTO_PACKAGE = "INSERT INTO %s (label, cpv) VALUES(%%s, %%s)" % SCHEMA_PACKAGE_NAME + + _BaseError = () + _dbClass = None + + autocommits = False +# cleanse_keys = True + + # boolean indicating if the derived RDBMS class supports replace syntax + _supports_replace = False + + def __init__(self, location, label, auxdbkeys, *args, **config): + """initialize the instance. + derived classes shouldn't need to override this""" + + super(SQLDatabase, self).__init__(location, label, auxdbkeys, *args, **config) + + config.setdefault("host","127.0.0.1") + config.setdefault("autocommit", self.autocommits) + self._initdb_con(config) + + self.label = self._sfilter(self.label) + + + def _dbconnect(self, config): + """should be overridden if the derived class needs special parameters for initializing + the db connection, or cursor""" + self.db = self._dbClass(**config) + self.con = self.db.cursor() + + + def _initdb_con(self,config): + """ensure needed tables are in place. + If the derived class needs a different set of table creation commands, overload the approriate + SCHEMA_ attributes. If it needs additional execution beyond, override""" + + self._dbconnect(config) + if not self._table_exists(self.SCHEMA_PACKAGE_NAME): + if self.readonly: + raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \ + self.SCHEMA_PACKAGE_NAME) + try: self.con.execute(self.SCHEMA_PACKAGE_CREATE) + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + if not self._table_exists(self.SCHEMA_VALUES_NAME): + if self.readonly: + raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \ + self.SCHEMA_VALUES_NAME) + try: self.con.execute(self.SCHEMA_VALUES_CREATE) + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + + def _table_exists(self, tbl): + """return true if a table exists + derived classes must override this""" + raise NotImplementedError + + + def _sfilter(self, s): + """meta escaping, returns quoted string for use in sql statements""" + return "\"%s\"" % s.replace("\\","\\\\").replace("\"","\\\"") + + + def _getitem(self, cpv): + try: self.con.execute("SELECT key, value FROM %s NATURAL JOIN %s " + "WHERE label=%s AND cpv=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME, + self.label, self._sfilter(cpv))) + except self._BaseError, e: + raise cache_errors.CacheCorruption(self, cpv, e) + + rows = self.con.fetchall() + + if len(rows) == 0: + raise KeyError(cpv) + + vals = dict([(k,"") for k in self._known_keys]) + vals.update(dict(rows)) + return vals + + + def _delitem(self, cpv): + """delete a cpv cache entry + derived RDBM classes for this *must* either support cascaded deletes, or + override this method""" + try: + try: + self.con.execute("DELETE FROM %s WHERE label=%s AND cpv=%s" % \ + (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv))) + if self.autocommits: + self.commit() + except self._BaseError, e: + raise cache_errors.CacheCorruption(self, cpv, e) + if self.con.rowcount <= 0: + raise KeyError(cpv) + except Exception: + if not self.autocommits: + self.db.rollback() + # yes, this can roll back a lot more then just the delete. deal. + raise + + def __del__(self): + # just to be safe. + if "db" in self.__dict__ and self.db != None: + self.commit() + self.db.close() + + def _setitem(self, cpv, values): + + try: + # insert. + try: pkgid = self._insert_cpv(cpv) + except self._BaseError, e: + raise cache_errors.CacheCorruption(cpv, e) + + # __getitem__ fills out missing values, + # so we store only what's handed to us and is a known key + db_values = [] + for key in self._known_keys: + if values.has_key(key) and values[key] != '': + db_values.append({"key":key, "value":values[key]}) + + if len(db_values) > 0: + try: self.con.executemany("INSERT INTO %s (pkgid, key, value) VALUES(\"%s\", %%(key)s, %%(value)s)" % \ + (self.SCHEMA_VALUES_NAME, str(pkgid)), db_values) + except self._BaseError, e: + raise cache_errors.CacheCorruption(cpv, e) + if self.autocommits: + self.commit() + + except Exception: + if not self.autocommits: + try: self.db.rollback() + except self._BaseError: pass + raise + + + def _insert_cpv(self, cpv): + """uses SCHEMA_INSERT_CPV_INTO_PACKAGE, which must be overloaded if the table definition + doesn't support auto-increment columns for pkgid. + returns the cpvs new pkgid + note this doesn't commit the transaction. The caller is expected to.""" + + cpv = self._sfilter(cpv) + if self._supports_replace: + query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1) + else: + # just delete it. + try: del self[cpv] + except (cache_errors.CacheCorruption, KeyError): pass + query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE + try: + self.con.execute(query_str % (self.label, cpv)) + except self._BaseError: + self.db.rollback() + raise + self.con.execute("SELECT pkgid FROM %s WHERE label=%s AND cpv=%s" % \ + (self.SCHEMA_PACKAGE_NAME, self.label, cpv)) + + if self.con.rowcount != 1: + raise cache_error.CacheCorruption(cpv, "Tried to insert the cpv, but found " + " %i matches upon the following select!" % len(rows)) + return self.con.fetchone()[0] + + + def has_key(self, cpv): + if not self.autocommits: + try: self.commit() + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) + + try: self.con.execute("SELECT cpv FROM %s WHERE label=%s AND cpv=%s" % \ + (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv))) + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) + return self.con.rowcount > 0 + + + def iterkeys(self): + if not self.autocommits: + try: self.commit() + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) + + try: self.con.execute("SELECT cpv FROM %s WHERE label=%s" % + (self.SCHEMA_PACKAGE_NAME, self.label)) + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) +# return [ row[0] for row in self.con.fetchall() ] + for x in self.con.fetchall(): + yield x[0] + + def iteritems(self): + try: self.con.execute("SELECT cpv, key, value FROM %s NATURAL JOIN %s " + "WHERE label=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME, + self.label)) + except self._BaseError, e: + raise cache_errors.CacheCorruption(self, cpv, e) + + oldcpv = None + l = [] + for x, y, v in self.con.fetchall(): + if oldcpv != x: + if oldcpv != None: + d = dict(l) + if "_eclasses_" in d: + d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"]) + yield cpv, d + l.clear() + oldcpv = x + l.append((y,v)) + if oldcpv != None: + d = dict(l) + if "_eclasses_" in d: + d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"]) + yield cpv, d + + def commit(self): + self.db.commit() + + def get_matches(self,match_dict): + query_list = [] + for k,v in match_dict.items(): + if k not in self._known_keys: + raise cache_errors.InvalidRestriction(k, v, "key isn't known to this cache instance") + v = v.replace("%","\\%") + v = v.replace(".*","%") + query_list.append("(key=%s AND value LIKE %s)" % (self._sfilter(k), self._sfilter(v))) + + if len(query_list): + query = " AND "+" AND ".join(query_list) + else: + query = '' + + print "query = SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % (self.label, query) + try: self.con.execute("SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % \ + (self.label, query)) + except self._BaseError, e: + raise cache_errors.GeneralCacheCorruption(e) + + return [ row[0] for row in self.con.fetchall() ] + diff --git a/pym/cache/sqlite.py b/pym/cache/sqlite.py new file mode 100644 index 000000000..4114eee32 --- /dev/null +++ b/pym/cache/sqlite.py @@ -0,0 +1,67 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: sqlite.py 1911 2005-08-25 03:44:21Z ferringb $ + +sqlite_module =__import__("sqlite") +import os +import sql_template, fs_template +import cache_errors + +class database(fs_template.FsBased, sql_template.SQLDatabase): + + SCHEMA_DELETE_NAME = "delete_package_values" + SCHEMA_DELETE_TRIGGER = """CREATE TRIGGER %s AFTER DELETE on %s + begin + DELETE FROM %s WHERE pkgid=old.pkgid; + end;""" % (SCHEMA_DELETE_NAME, sql_template.SQLDatabase.SCHEMA_PACKAGE_NAME, + sql_template.SQLDatabase.SCHEMA_VALUES_NAME) + + _BaseError = sqlite_module.Error + _dbClass = sqlite_module + _supports_replace = True + + def _dbconnect(self, config): + self._dbpath = os.path.join(self.location, fs_template.gen_label(self.location, self.label)+".sqldb") + try: + self.db = sqlite_module.connect(self._dbpath, mode=self._perms, autocommit=False) + if not self._ensure_access(self._dbpath): + raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath) + self.con = self.db.cursor() + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + + def _initdb_con(self, config): + sql_template.SQLDatabase._initdb_con(self, config) + try: + self.con.execute("SELECT name FROM sqlite_master WHERE type=\"trigger\" AND name=%s" % \ + self._sfilter(self.SCHEMA_DELETE_NAME)) + if self.con.rowcount == 0: + self.con.execute(self.SCHEMA_DELETE_TRIGGER); + self.db.commit() + except self._BaseError, e: + raise cache_errors.InitializationError(self.__class__, e) + + def _table_exists(self, tbl): + """return true/false dependant on a tbl existing""" + try: self.con.execute("SELECT name FROM sqlite_master WHERE type=\"table\" AND name=%s" % + self._sfilter(tbl)) + except self._BaseError, e: + # XXX crappy. + return False + return len(self.con.fetchall()) == 1 + + # we can do it minus a query via rowid. + def _insert_cpv(self, cpv): + cpv = self._sfilter(cpv) + try: self.con.execute(self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1) % \ + (self.label, cpv)) + except self._BaseError, e: + raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed: %s" % str(e)) + + # sums the delete also + if self.con.rowcount <= 0 or self.con.rowcount > 2: + raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed- %i rows modified" % self.rowcount) + return self.con.lastrowid + diff --git a/pym/cache/template.py b/pym/cache/template.py new file mode 100644 index 000000000..c230b9389 --- /dev/null +++ b/pym/cache/template.py @@ -0,0 +1,171 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: template.py 1911 2005-08-25 03:44:21Z ferringb $ + +import cache_errors +from mappings import ProtectedDict + +class database(object): + # this is for metadata/cache transfer. + # basically flags the cache needs be updated when transfered cache to cache. + # leave this. + + complete_eclass_entries_ = True + autocommits = False + cleanse_keys = False + + def __init__(self, location, label, auxdbkeys, readonly=False): + """ initialize the derived class; specifically, store label/keys""" + self._known_keys = auxdbkeys + self.location = location + self.label = label + self.readonly = readonly + self.sync_rate = 0 + self.updates = 0 + + + def __getitem__(self, cpv): + """set a cpv to values + This shouldn't be overriden in derived classes since it handles the __eclasses__ conversion. + that said, if the class handles it, they can override it.""" + if self.updates > self.sync_rate: + self.commit() + self.updates = 0 + d=self._getitem(cpv) + if "_eclasses_" in d: + d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"]) + return d + + def _getitem(self, cpv): + """get cpv's values. + override this in derived classess""" + raise NotImplementedError + + + def __setitem__(self, cpv, values): + """set a cpv to values + This shouldn't be overriden in derived classes since it handles the readonly checks""" + if self.readonly: + raise cache_errors.ReadOnlyRestriction() + if self.cleanse_keys: + d=ProtectedDict(values) + for k in d.keys(): + if d[k] == '': + del d[k] + if "_eclasses_" in values: + d["_eclasses_"] = serialize_eclasses(d["_eclasses_"]) + elif "_eclasses_" in values: + d = ProtectedDict(values) + d["_eclasses_"] = serialize_eclasses(d["_eclasses_"]) + else: + d = values + self._setitem(cpv, d) + if not self.autocommits: + self.updates += 1 + if self.updates > self.sync_rate: + self.commit() + self.updates = 0 + + + def _setitem(self, name, values): + """__setitem__ calls this after readonly checks. override it in derived classes + note _eclassees_ key *must* be handled""" + raise NotImplementedError + + + def __delitem__(self, cpv): + """delete a key from the cache. + This shouldn't be overriden in derived classes since it handles the readonly checks""" + if self.readonly: + raise cache_errors.ReadOnlyRestriction() + if not self.autocommits: + self.updates += 1 + self._delitem(cpv) + if self.updates > self.sync_rate: + self.commit() + self.updates = 0 + + + def _delitem(self,cpv): + """__delitem__ calls this after readonly checks. override it in derived classes""" + raise NotImplementedError + + + def has_key(self, cpv): + raise NotImplementedError + + + def keys(self): + return tuple(self.iterkeys()) + + def iterkeys(self): + raise NotImplementedError + + def iteritems(self): + for x in self.iterkeys(): + yield (x, self[x]) + + def items(self): + return list(self.iteritems()) + + def sync(self, rate=0): + self.sync_rate = rate + if(rate == 0): + self.commit() + + def commit(self): + if not self.autocommits: + raise NotImplementedError + + def get_matches(self, match_dict): + """generic function for walking the entire cache db, matching restrictions to + filter what cpv's are returned. Derived classes should override this if they + can implement a faster method then pulling each cpv:values, and checking it. + + For example, RDBMS derived classes should push the matching logic down to the + actual RDBM.""" + + import re + restricts = {} + for key,match in match_dict.iteritems(): + # XXX this sucks. + try: + if isinstance(match, str): + restricts[key] = re.compile(match).match + else: + restricts[key] = re.compile(match[0],match[1]).match + except re.error, e: + raise InvalidRestriction(key, match, e) + if key not in self.__known_keys: + raise InvalidRestriction(key, match, "Key isn't valid") + + for cpv in self.keys(): + cont = True + vals = self[cpv] + for key, match in restricts.iteritems(): + if not match(vals[key]): + cont = False + break + if cont: +# yield cpv,vals + yield cpv + + +def serialize_eclasses(eclass_dict): + """takes a dict, returns a string representing said dict""" + return "\t".join(["%s\t%s\t%s" % (k, v[0], str(v[1])) for k,v in eclass_dict.items()]) + +def reconstruct_eclasses(cpv, eclass_string): + """returns a dict when handed a string generated by serialize_eclasses""" + eclasses = eclass_string.rstrip().lstrip().split("\t") + if eclasses == [""]: + # occasionally this occurs in the fs backends. they suck. + return {} + if len(eclasses) % 3 != 0: + raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) + d={} + for x in range(0, len(eclasses), 3): + d[eclasses[x]] = (eclasses[x + 1], long(eclasses[x + 2])) + del eclasses + return d diff --git a/pym/cache/util.py b/pym/cache/util.py new file mode 100644 index 000000000..26d917af3 --- /dev/null +++ b/pym/cache/util.py @@ -0,0 +1,103 @@ +# Copyright: 2005 Gentoo Foundation +# Author(s): Brian Harring (ferringb@gentoo.org) +# License: GPL2 +# $Id: util.py 1911 2005-08-25 03:44:21Z ferringb $ + +import cache_errors + +def mirror_cache(valid_nodes_iterable, src_cache, trg_cache, eclass_cache=None, verbose_instance=None): + + if not src_cache.complete_eclass_entries and not eclass_cache: + raise Exception("eclass_cache required for cache's of class %s!" % src_cache.__class__) + + if verbose_instance == None: + noise=quiet_mirroring() + else: + noise=verbose_instance + + dead_nodes = {} + dead_nodes.fromkeys(trg_cache.keys()) + count=0 + + if not trg_cache.autocommits: + trg_cache.sync(100) + + for x in valid_nodes_iterable: +# print "processing x=",x + count+=1 + if dead_nodes.has_key(x): + del dead_nodes[x] + try: entry = src_cache[x] + except KeyError, e: + noise.missing_entry(x) + del e + continue + write_it = True + try: + trg = trg_cache[x] + if long(trg["_mtime_"]) == long(entry["_mtime_"]) and eclass_cache.is_eclass_data_valid(trg["_eclasses_"]): + write_it = False + except (cache_errors.CacheError, KeyError): + pass + + if write_it: + if entry.get("INHERITED",""): + if src_cache.complete_eclass_entries: + if not "_eclasses_" in entry: + noise.corruption(x,"missing _eclasses_ field") + continue + if not eclass_cache.is_eclass_data_valid(entry["_eclasses_"]): + noise.eclass_stale(x) + continue + else: + entry["_eclasses_"] = eclass_cache.get_eclass_data(entry["INHERITED"].split(), \ + from_master_only=True) + if not entry["_eclasses_"]: + noise.eclass_stale(x) + continue + + # by this time, if it reaches here, the eclass has been validated, and the entry has + # been updated/translated (if needs be, for metadata/cache mainly) + try: trg_cache[x] = entry + except cache_errors.CacheError, ce: + noise.exception(x, ce) + del ce + continue + if count >= noise.call_update_min: + noise.update(x) + count = 0 + + if not trg_cache.autocommits: + trg_cache.commit() + + # ok. by this time, the trg_cache is up to date, and we have a dict + # with a crapload of cpv's. we now walk the target db, removing stuff if it's in the list. + for key in dead_nodes: + try: del trg_cache[key] + except cache_errors.CacheError, ce: + noise.exception(ce) + del ce + dead_nodes.clear() + noise.finish() + + +class quiet_mirroring(object): + # call_update_every is used by mirror_cache to determine how often to call in. + # quiet defaults to 2^24 -1. Don't call update, 'cept once every 16 million or so :) + call_update_min = 0xffffff + def update(self,key,*arg): pass + def exception(self,key,*arg): pass + def eclass_stale(self,*arg): pass + def missing_entry(self, key): pass + def misc(self,key,*arg): pass + def corruption(self, key, s): pass + def finish(self, *arg): pass + +class non_quiet_mirroring(quiet_mirroring): + call_update_min=1 + def update(self,key,*arg): print "processed",key + def exception(self, key, *arg): print "exec",key,arg + def missing(self,key): print "key %s is missing", key + def corruption(self,key,*arg): print "corrupt %s:" % key,arg + def eclass_stale(self,key,*arg):print "stale %s:"%key,arg +