replacement cache subsystem that's gestated in 2.1 and 3.0.
authorBrian Harring <ferringb@gentoo.org>
Sat, 5 Nov 2005 06:09:45 +0000 (06:09 -0000)
committerBrian Harring <ferringb@gentoo.org>
Sat, 5 Nov 2005 06:09:45 +0000 (06:09 -0000)
it rocks your world, baby.

svn path=/main/branches/2.0/; revision=2257

12 files changed:
pym/cache/__init__.py [new file with mode: 0644]
pym/cache/anydbm.py [new file with mode: 0644]
pym/cache/cache_errors.py [new file with mode: 0644]
pym/cache/flat_hash.py [new file with mode: 0644]
pym/cache/flat_list.py [new file with mode: 0644]
pym/cache/fs_template.py [new file with mode: 0644]
pym/cache/mappings.py [new file with mode: 0644]
pym/cache/metadata.py [new file with mode: 0644]
pym/cache/sql_template.py [new file with mode: 0644]
pym/cache/sqlite.py [new file with mode: 0644]
pym/cache/template.py [new file with mode: 0644]
pym/cache/util.py [new file with mode: 0644]

diff --git a/pym/cache/__init__.py b/pym/cache/__init__.py
new file mode 100644 (file)
index 0000000..189885c
--- /dev/null
@@ -0,0 +1,5 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: __init__.py 1911 2005-08-25 03:44:21Z ferringb $
+
diff --git a/pym/cache/anydbm.py b/pym/cache/anydbm.py
new file mode 100644 (file)
index 0000000..bc380ae
--- /dev/null
@@ -0,0 +1,75 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: anydbm.py 1911 2005-08-25 03:44:21Z ferringb $
+
+anydbm_module = __import__("anydbm")
+try:
+       import cPickle as pickle
+except ImportError:
+       import pickle
+import os
+import fs_template
+import cache_errors
+
+
+class database(fs_template.FsBased):
+
+       autocommits = True
+       cleanse_keys = True
+
+       def __init__(self, *args, **config):
+               super(database,self).__init__(*args, **config)
+
+               default_db = config.get("dbtype","anydbm")
+               if not default_db.startswith("."):
+                       default_db = '.' + default_db
+
+               self._db_path = os.path.join(self.location, fs_template.gen_label(self.location, self.label)+default_db)
+               self.__db = None
+               try:
+                       self.__db = anydbm_module.open(self._db_path, "w", self._perms)
+                               
+               except anydbm_module.error:
+                       # XXX handle this at some point
+                       try:
+                               self._ensure_dirs()
+                               self._ensure_dirs(self._db_path)
+                               self._ensure_access(self._db_path)
+                       except (OSError, IOError), e:
+                               raise cache_errors.InitializationError(self.__class__, e)
+
+                       # try again if failed
+                       try:
+                               if self.__db == None:
+                                       self.__db = anydbm_module.open(self._db_path, "c", self._perms)
+                       except andbm_module.error, e:
+                               raise cache_errors.InitializationError(self.__class__, e)
+
+       def iteritems(self):
+               return self.__db.iteritems()
+
+       def __getitem__(self, cpv):
+               # we override getitem because it's just a cpickling of the data handed in.
+               return pickle.loads(self.__db[cpv])
+
+
+       def _setitem(self, cpv, values):
+               self.__db[cpv] = pickle.dumps(values,pickle.HIGHEST_PROTOCOL)
+
+       def _delitem(self, cpv):
+               del self.__db[cpv]
+
+
+       def iterkeys(self):
+               return iter(self.__db)
+
+
+       def has_key(self, cpv):
+               return cpv in self.__db
+
+
+       def __del__(self):
+               if "__db" in self.__dict__ and self.__db != None:
+                       self.__db.sync()
+                       self.__db.close()
diff --git a/pym/cache/cache_errors.py b/pym/cache/cache_errors.py
new file mode 100644 (file)
index 0000000..2f5b831
--- /dev/null
@@ -0,0 +1,41 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: cache_errors.py 1911 2005-08-25 03:44:21Z ferringb $
+
+class CacheError(Exception):   pass
+
+class InitializationError(CacheError):
+       def __init__(self, class_name, error):
+               self.error, self.class_name = error, class_name
+       def __str__(self):
+               return "Creation of instance %s failed due to %s" % \
+                       (self.class_name, str(self.error))
+
+
+class CacheCorruption(CacheError):
+       def __init__(self, key, ex):
+               self.key, self.ex = key, ex
+       def __str__(self):
+               return "%s is corrupt: %s" % (self.key, str(self.ex))
+
+
+class GeneralCacheCorruption(CacheError):
+       def __init__(self,ex):  self.ex = ex
+       def __str__(self):      return "corruption detected: %s" % str(self.ex)
+
+
+class InvalidRestriction(CacheError):
+       def __init__(self, key, restriction, exception=None):
+               if exception == None:   exception = ''
+               self.key, self.restriction, self.ex = key, restriction, ex
+       def __str__(self):
+               return "%s:%s is not valid: %s" % \
+                       (self.key, self.restriction, str(self.ex))
+
+
+class ReadOnlyRestriction(CacheError):
+       def __init__(self, info=''):
+               self.info = info
+       def __str__(self):
+               return "cache is non-modifiable"+str(self.info)
diff --git a/pym/cache/flat_hash.py b/pym/cache/flat_hash.py
new file mode 100644 (file)
index 0000000..6882afd
--- /dev/null
@@ -0,0 +1,129 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: flat_list.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import fs_template
+import cache_errors
+import os, stat
+from mappings import LazyLoad, ProtectedDict
+from template import reconstruct_eclasses
+# store the current key order *here*.
+class database(fs_template.FsBased):
+
+       autocommits = True
+
+       def __init__(self, *args, **config):
+               super(database,self).__init__(*args, **config)
+               self.location = os.path.join(self.location, 
+                       self.label.lstrip(os.path.sep).rstrip(os.path.sep))
+
+               if not os.path.exists(self.location):
+                       self._ensure_dirs()
+
+       def __getitem__(self, cpv):
+               fp = os.path.join(self.location, cpv)
+               try:
+                       def curry(*args):
+                               def callit(*args2):
+                                       return args[0](*args[1:]+args2)
+                               return callit
+                       return ProtectedDict(LazyLoad(curry(self._pull, fp, cpv), initial_items=[("_mtime_", os.stat(fp).st_mtime)]))
+               except OSError:
+                       raise KeyError(cpv)
+               return self._getitem(cpv)
+
+       def _pull(self, fp, cpv):
+               try:
+                       myf = open(fp,"r")
+               except IOError:
+                       raise KeyError(cpv)
+               except OSError, e:
+                       raise cache_errors.CacheCorruption(cpv, e)
+               try:
+                       d = self._parse_data(myf, cpv)
+               except (OSError, ValueError), e:
+                       myf.close()
+                       raise cache_errors.CacheCorruption(cpv, e)
+               myf.close()
+               return d
+
+
+       def _parse_data(self, data, cpv, mtime=0):
+               d = dict(map(lambda x:x.rstrip().split("=", 1), data))
+               if mtime != 0:
+                       d["_mtime_"] = long(mtime)
+               if "_eclasses_" in d:
+                       d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"])
+               return d
+               
+               for x in self._known_keys:
+                       if x not in d:
+                               d[x] = ''
+
+
+               return d
+
+
+       def _setitem(self, cpv, values):
+#              import pdb;pdb.set_trace()
+               s = cpv.rfind("/")
+               fp = os.path.join(self.location,cpv[:s],".update.%i.%s" % (os.getpid(), cpv[s+1:]))
+               try:    myf=open(fp, "w")
+               except IOError, ie:
+                       if ie.errno == 2:
+                               try:
+                                       self._ensure_dirs(cpv)
+                                       myf=open(fp,"w")
+                               except (OSError, IOError),e:
+                                       raise cache_errors.CacheCorruption(cpv, e)
+               except OSError, e:
+                       raise cache_errors.CacheCorruption(cpv, e)
+               
+               for k, v in values.items():
+                       if k != "_mtime_":
+                               myf.writelines("%s=%s\n" % (k, v))
+
+               myf.close()
+               self._ensure_access(fp, mtime=values["_mtime_"])
+
+               #update written.  now we move it.
+
+               new_fp = os.path.join(self.location,cpv)
+               try:    os.rename(fp, new_fp)
+               except (OSError, IOError), e:
+                       os.remove(fp)
+                       raise cache_errors.CacheCorruption(cpv, e)
+
+
+       def _delitem(self, cpv):
+#              import pdb;pdb.set_trace()
+               try:
+                       os.remove(os.path.join(self.location,cpv))
+               except OSError, e:
+                       if e.errno == 2:
+                               raise KeyError(cpv)
+                       else:
+                               raise cache_errors.CacheCorruption(cpv, e)
+
+
+       def has_key(self, cpv):
+               return os.path.exists(os.path.join(self.location, cpv))
+
+
+       def iterkeys(self):
+               """generator for walking the dir struct"""
+               dirs = [self.location]
+               len_base = len(self.location)
+               while len(dirs):
+                       for l in os.listdir(dirs[0]):
+                               if l.endswith(".cpickle"):
+                                       continue
+                               p = os.path.join(dirs[0],l)
+                               st = os.lstat(p)
+                               if stat.S_ISDIR(st.st_mode):
+                                       dirs.append(p)
+                                       continue
+                               yield p[len_base+1:]
+                       dirs.pop(0)
+
diff --git a/pym/cache/flat_list.py b/pym/cache/flat_list.py
new file mode 100644 (file)
index 0000000..944f740
--- /dev/null
@@ -0,0 +1,109 @@
+import fs_template
+import cache_errors
+import os, stat
+
+# store the current key order *here*.
+class database(fs_template.FsBased):
+
+       autocommits = True
+
+       # do not screw with this ordering. _eclasses_ needs to be last
+       auxdbkey_order=('DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI',
+               'RESTRICT',  'HOMEPAGE',  'LICENSE', 'DESCRIPTION',
+               'KEYWORDS',  'IUSE', 'CDEPEND',
+               'PDEPEND',   'PROVIDE','_eclasses_')
+
+       def __init__(self, label, auxdbkeys, **config):
+               super(database,self).__init__(label, auxdbkeys, **config)
+               self._base = os.path.join(self._base, 
+                       self.label.lstrip(os.path.sep).rstrip(os.path.sep))
+
+               if len(self._known_keys) > len(self.auxdbkey_order) + 2:
+                       raise Exception("less ordered keys then auxdbkeys")
+               if not os.path.exists(self._base):
+                       self._ensure_dirs()
+
+
+       def _getitem(self, cpv):
+               d = {}
+               try:
+                       myf = open(os.path.join(self._base, cpv),"r")
+                       for k,v in zip(self.auxdbkey_order, myf):
+                               d[k] = v.rstrip("\n")
+               except (OSError, IOError),e:
+                       if isinstance(e,IOError) and e.errno == 2:
+#                              print "caught for %s" % cpv, e
+#                              l=os.listdir(os.path.dirname(os.path.join(self._base,cpv)))
+#                              l.sort()
+#                              print l
+                               raise KeyError(cpv)
+                       raise cache_errors.CacheCorruption(cpv, e)
+
+               try:    d["_mtime_"] = os.fstat(myf.fileno()).st_mtime
+               except OSError, e:      
+                       myf.close()
+                       raise cache_errors.CacheCorruption(cpv, e)
+               myf.close()
+               return d
+
+
+       def _setitem(self, cpv, values):
+               s = cpv.rfind("/")
+               fp=os.path.join(self._base,cpv[:s],".update.%i.%s" % (os.getpid(), cpv[s+1:]))
+               try:    myf=open(fp, "w")
+               except (OSError, IOError), e:
+                       if e.errno == 2:
+                               try:
+                                       self._ensure_dirs(cpv)
+                                       myf=open(fp,"w")
+                               except (OSError, IOError),e:
+                                       raise cache_errors.CacheCorruption(cpv, e)
+                       else:
+                               raise cache_errors.CacheCorruption(cpv, e)
+               
+
+               for x in self.auxdbkey_order:
+                       myf.write(values.get(x,"")+"\n")
+
+               myf.close()
+               self._ensure_access(fp, mtime=values["_mtime_"])
+               #update written.  now we move it.
+               new_fp = os.path.join(self._base,cpv)
+               try:    os.rename(fp, new_fp)
+               except (OSError, IOError), e:
+                       os.remove(fp)
+                       raise cache_errors.CacheCorruption(cpv, e)
+
+
+       def _delitem(self, cpv):
+               try:
+                       os.remove(os.path.join(self._base,cpv))
+               except OSError, e:
+                       if e.errno == 2:
+                               raise KeyError(cpv)
+                       else:
+                               raise cache_errors.CacheCorruption(cpv, e)
+
+
+       def has_key(self, cpv):
+               return os.path.exists(os.path.join(self._base, cpv))
+
+
+       def iterkeys(self):
+               """generator for walking the dir struct"""
+               dirs = [self._base]
+               len_base = len(self._base)
+               while len(dirs):
+                       for l in os.listdir(dirs[0]):
+                               if l.endswith(".cpickle"):
+                                       continue
+                               p = os.path.join(dirs[0],l)
+                               st = os.lstat(p)
+                               if stat.S_ISDIR(st.st_mode):
+                                       dirs.append(p)
+                                       continue
+                               yield p[len_base+1:]
+                       dirs.pop(0)
+
+
+       def commit(self):       pass
diff --git a/pym/cache/fs_template.py b/pym/cache/fs_template.py
new file mode 100644 (file)
index 0000000..49d5ca0
--- /dev/null
@@ -0,0 +1,74 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: fs_template.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import os
+import template, cache_errors
+from portage_data import portage_gid
+
+class FsBased(template.database):
+       """template wrapping fs needed options, and providing _ensure_access as a way to 
+       attempt to ensure files have the specified owners/perms"""
+
+       def __init__(self, *args, **config):
+               """throws InitializationError if needs args aren't specified
+               gid and perms aren't listed do to an oddity python currying mechanism
+               gid=portage_gid
+               perms=0665"""
+
+               for x,y in (("gid",portage_gid),("perms",0664)):
+                       if x in config:
+                               setattr(self, "_"+x, config[x])
+                               del config[x]
+                       else:
+                               setattr(self, "_"+x, y)
+               super(FsBased, self).__init__(*args, **config)
+
+               if self.label.startswith(os.path.sep):
+                       # normpath.
+                       self.label = os.path.sep + os.path.normpath(self.label).lstrip(os.path.sep)
+
+
+       def _ensure_access(self, path, mtime=-1):
+               """returns true or false if it's able to ensure that path is properly chmod'd and chowned.
+               if mtime is specified, attempts to ensure that's correct also"""
+               try:
+                       os.chown(path, -1, self._gid)
+                       os.chmod(path, self._perms)
+                       if mtime:
+                               mtime=long(mtime)
+                               os.utime(path, (mtime, mtime))
+               except OSError, IOError:
+                       return False
+               return True
+
+       def _ensure_dirs(self, path=None):
+               """with path!=None, ensure beyond self.location.  otherwise, ensure self.location"""
+               if path:
+                       path = os.path.dirname(path)
+                       base = self.location
+               else:
+                       path = self.location
+                       base='/'
+
+               for dir in path.lstrip(os.path.sep).rstrip(os.path.sep).split(os.path.sep):
+                       base = os.path.join(base,dir)
+                       if not os.path.exists(base):
+                               um=os.umask(0)
+                               try:
+                                       os.mkdir(base, self._perms | 0111)
+                                       os.chown(base, -1, self._gid)
+                               finally:
+                                       os.umask(um)
+
+       
+def gen_label(base, label):
+       """if supplied label is a path, generate a unique label based upon label, and supplied base path"""
+       if label.find(os.path.sep) == -1:
+               return label
+       label = label.strip("\"").strip("'")
+       label = os.path.join(*(label.rstrip(os.path.sep).split(os.path.sep)))
+       tail = os.path.split(label)[1]
+       return "%s-%X" % (tail, abs(label.__hash__()))
+
diff --git a/pym/cache/mappings.py b/pym/cache/mappings.py
new file mode 100644 (file)
index 0000000..2251c44
--- /dev/null
@@ -0,0 +1,103 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: mappings.py 2015 2005-09-20 23:14:26Z ferringb $
+
+import UserDict
+
+class ProtectedDict(UserDict.DictMixin):
+       """
+       given an initial dict, this wraps that dict storing changes in a secondary dict, protecting
+       the underlying dict from changes
+       """
+       __slots__=("orig","new","blacklist")
+
+       def __init__(self, orig):
+               self.orig = orig
+               self.new = {}
+               self.blacklist = {}
+
+
+       def __setitem__(self, key, val):
+               self.new[key] = val
+               if key in self.blacklist:
+                       del self.blacklist[key]
+
+
+       def __getitem__(self, key):
+               if key in self.new:
+                       return self.new[key]
+               if key in self.blacklist:
+                       raise KeyError(key)
+               return self.orig[key]
+
+
+       def __delitem__(self, key):
+               if key in self.new:
+                       del self.new[key]
+               elif key in self.orig:
+                       if key not in self.blacklist:
+                               self.blacklist[key] = True
+                               return
+               raise KeyError(key)
+                       
+
+       def __iter__(self):
+               for k in self.new.iterkeys():
+                       yield k
+               for k in self.orig.iterkeys():
+                       if k not in self.blacklist and k not in self.new:
+                               yield k
+
+
+       def keys(self):
+               return list(self.__iter__())
+
+
+       def has_key(self, key):
+               return key in self.new or (key not in self.blacklist and key in self.orig)
+
+
+class LazyLoad(UserDict.DictMixin):
+       """
+       Lazy loading of values for a dict
+       """
+       __slots__=("pull", "d")
+
+       def __init__(self, pull_items_func, initial_items=[]):
+               self.d = {}
+               for k, v in initial_items:
+                       self.d[k] = v
+               self.pull = pull_items_func
+
+       def __getitem__(self, key):
+               if key in self.d:
+                       return self.d[key]
+               elif self.pull != None:
+                       self.d.update(self.pull())
+                       self.pull = None
+               return self.d[key]
+
+
+       def __iter__(self):
+               return iter(self.keys())
+
+       def keys(self):
+               if self.pull != None:
+                       self.d.update(self.pull())
+                       self.pull = None
+               return self.d.keys()
+
+
+       def has_key(self, key):
+               return key in self
+
+
+       def __contains__(self, key):
+               if key in self.d:
+                       return True
+               elif self.pull != None:
+                       self.d.update(self.pull())
+                       self.pull = None
+               return key in self.d
+
diff --git a/pym/cache/metadata.py b/pym/cache/metadata.py
new file mode 100644 (file)
index 0000000..5e5f0bd
--- /dev/null
@@ -0,0 +1,88 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: metadata.py 1964 2005-09-03 00:16:16Z ferringb $
+
+import os, stat
+import flat_hash
+import cache_errors
+import eclass_cache 
+from template import reconstruct_eclasses, serialize_eclasses
+from mappings import ProtectedDict, LazyLoad
+
+# this is the old cache format, flat_list.  count maintained here.
+magic_line_count = 22
+
+# store the current key order *here*.
+class database(flat_hash.database):
+       complete_eclass_entries = False
+       auxdbkey_order=('DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI',
+               'RESTRICT',  'HOMEPAGE',  'LICENSE', 'DESCRIPTION',
+               'KEYWORDS',  'INHERITED', 'IUSE', 'CDEPEND',
+               'PDEPEND',   'PROVIDE', 'EAPI')
+
+       autocommits = True
+
+       def __init__(self, location, *args, **config):
+               loc = location
+               super(database, self).__init__(location, *args, **config)
+               self.location = os.path.join(loc, "metadata","cache")
+               self.ec = eclass_cache.cache(loc)
+
+       def __getitem__(self, cpv):
+               return flat_hash.database.__getitem__(self, cpv)
+
+
+       def _parse_data(self, data, mtime):
+               # easy attempt first.
+               data = list(data)
+               if len(data) != magic_line_count:
+                       d = flat_hash.database._parse_data(self, data, mtime)
+               else:
+                       # this one's interesting.
+                       d = {}
+
+                       for line in data:
+                               # yes, meant to iterate over a string.
+                               hashed = False
+                               # poor mans enumerate.  replace when python 2.3 is required
+                               for idx, c in zip(range(len(line)), line):
+                                       if not c.isalpha():
+                                               if c == "=" and idx > 0:
+                                                       hashed = True
+                                                       d[line[:idx]] = line[idx + 1:]
+                                               elif c == "_" or c.isdigit():
+                                                       continue
+                                               break
+                                       elif not c.isupper():
+                                               break
+                       
+                               if not hashed:
+                                       # non hashed.
+                                       d.clear()
+                                       # poor mans enumerate.  replace when python 2.3 is required
+                                       for idx, key in zip(range(len(self.auxdbkey_order)), self.auxdbkey_order):
+                                               d[key] = data[idx].strip()
+                                       break
+
+               if "_eclasses_" not in d:
+                       if "INHERITED" in d:
+                               d["_eclasses_"] = self.ec.get_eclass_data(d["INHERITED"].split(), from_master_only=True)
+                               del d["INHERITED"]
+               else:
+                       d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"])
+
+               return d
+
+
+               
+       def _setitem(self, cpv, values):
+               values = ProtectedDict(values)
+               
+               # hack.  proper solution is to make this a __setitem__ override, since template.__setitem__ 
+               # serializes _eclasses_, then we reconstruct it.
+               if "_eclasses_" in values:
+                       values["INHERITED"] = ' '.join(reconstruct_eclasses(cpv, values["_eclasses_"]).keys())
+                       del values["_eclasses_"]
+
+               flat_hash.database._setitem(self, cpv, values)
diff --git a/pym/cache/sql_template.py b/pym/cache/sql_template.py
new file mode 100644 (file)
index 0000000..c69ed77
--- /dev/null
@@ -0,0 +1,275 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: sql_template.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import template, cache_errors
+from template import reconstruct_eclasses
+
+class SQLDatabase(template.database):
+       """template class for RDBM based caches
+       
+       This class is designed such that derivatives don't have to change much code, mostly constant strings.
+       _BaseError must be an exception class that all Exceptions thrown from the derived RDBMS are derived
+       from.
+
+       SCHEMA_INSERT_CPV_INTO_PACKAGE should be modified dependant on the RDBMS, as should SCHEMA_PACKAGE_CREATE-
+       basically you need to deal with creation of a unique pkgid.  If the dbapi2 rdbms class has a method of 
+       recovering that id, then modify _insert_cpv to remove the extra select.
+
+       Creation of a derived class involves supplying _initdb_con, and table_exists.
+       Additionally, the default schemas may have to be modified.
+       """
+       
+       SCHEMA_PACKAGE_NAME     = "package_cache"
+       SCHEMA_PACKAGE_CREATE   = "CREATE TABLE %s (\
+               pkgid INTEGER PRIMARY KEY, label VARCHAR(255), cpv VARCHAR(255), UNIQUE(label, cpv))" % SCHEMA_PACKAGE_NAME
+       SCHEMA_PACKAGE_DROP     = "DROP TABLE %s" % SCHEMA_PACKAGE_NAME
+
+       SCHEMA_VALUES_NAME      = "values_cache"
+       SCHEMA_VALUES_CREATE    = "CREATE TABLE %s ( pkgid integer references %s (pkgid) on delete cascade, \
+               key varchar(255), value text, UNIQUE(pkgid, key))" % (SCHEMA_VALUES_NAME, SCHEMA_PACKAGE_NAME)
+       SCHEMA_VALUES_DROP      = "DROP TABLE %s" % SCHEMA_VALUES_NAME
+       SCHEMA_INSERT_CPV_INTO_PACKAGE  = "INSERT INTO %s (label, cpv) VALUES(%%s, %%s)" % SCHEMA_PACKAGE_NAME
+
+       _BaseError = ()
+       _dbClass = None
+
+       autocommits = False
+#      cleanse_keys = True
+
+       # boolean indicating if the derived RDBMS class supports replace syntax
+       _supports_replace = False
+
+       def __init__(self, location, label, auxdbkeys, *args, **config):
+               """initialize the instance.
+               derived classes shouldn't need to override this"""
+
+               super(SQLDatabase, self).__init__(location, label, auxdbkeys, *args, **config)
+
+               config.setdefault("host","127.0.0.1")
+               config.setdefault("autocommit", self.autocommits)
+               self._initdb_con(config)
+
+               self.label = self._sfilter(self.label)
+
+
+       def _dbconnect(self, config):
+               """should be overridden if the derived class needs special parameters for initializing
+               the db connection, or cursor"""
+               self.db = self._dbClass(**config)
+               self.con = self.db.cursor()
+
+
+       def _initdb_con(self,config):
+               """ensure needed tables are in place.
+               If the derived class needs a different set of table creation commands, overload the approriate
+               SCHEMA_ attributes.  If it needs additional execution beyond, override"""
+
+               self._dbconnect(config)
+               if not self._table_exists(self.SCHEMA_PACKAGE_NAME):
+                       if self.readonly:
+                               raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \
+                                       self.SCHEMA_PACKAGE_NAME)
+                       try:    self.con.execute(self.SCHEMA_PACKAGE_CREATE)
+                       except  self._BaseError, e:
+                               raise cache_errors.InitializationError(self.__class__, e)
+
+               if not self._table_exists(self.SCHEMA_VALUES_NAME):
+                       if self.readonly:
+                               raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \
+                                       self.SCHEMA_VALUES_NAME)
+                       try:    self.con.execute(self.SCHEMA_VALUES_CREATE)
+                       except  self._BaseError, e:
+                               raise cache_errors.InitializationError(self.__class__, e)
+
+
+       def _table_exists(self, tbl):
+               """return true if a table exists
+               derived classes must override this"""
+               raise NotImplementedError
+
+
+       def _sfilter(self, s):
+               """meta escaping, returns quoted string for use in sql statements"""
+               return "\"%s\"" % s.replace("\\","\\\\").replace("\"","\\\"")
+
+
+       def _getitem(self, cpv):
+               try:    self.con.execute("SELECT key, value FROM %s NATURAL JOIN %s "
+                       "WHERE label=%s AND cpv=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME,
+                       self.label, self._sfilter(cpv)))
+               except self._BaseError, e:
+                       raise cache_errors.CacheCorruption(self, cpv, e)
+
+               rows = self.con.fetchall()
+
+               if len(rows) == 0:
+                       raise KeyError(cpv)
+
+               vals = dict([(k,"") for k in self._known_keys])
+               vals.update(dict(rows))
+               return vals
+
+
+       def _delitem(self, cpv):
+               """delete a cpv cache entry
+               derived RDBM classes for this *must* either support cascaded deletes, or 
+               override this method"""
+               try:
+                       try:    
+                               self.con.execute("DELETE FROM %s WHERE label=%s AND cpv=%s" % \
+                               (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv)))
+                               if self.autocommits:
+                                       self.commit()
+                       except self._BaseError, e:
+                               raise cache_errors.CacheCorruption(self, cpv, e)
+                       if self.con.rowcount <= 0:
+                               raise KeyError(cpv)
+               except Exception:
+                       if not self.autocommits:
+                               self.db.rollback()
+                               # yes, this can roll back a lot more then just the delete.  deal.
+                       raise
+
+       def __del__(self):
+               # just to be safe.
+               if "db" in self.__dict__ and self.db != None:
+                       self.commit()
+                       self.db.close()
+
+       def _setitem(self, cpv, values):
+
+               try:
+                       # insert.
+                       try:    pkgid = self._insert_cpv(cpv)
+                       except self._BaseError, e:
+                               raise cache_errors.CacheCorruption(cpv, e)
+
+                       # __getitem__ fills out missing values, 
+                       # so we store only what's handed to us and is a known key
+                       db_values = []
+                       for key in self._known_keys:
+                               if values.has_key(key) and values[key] != '':
+                                       db_values.append({"key":key, "value":values[key]})
+
+                       if len(db_values) > 0:
+                               try:    self.con.executemany("INSERT INTO %s (pkgid, key, value) VALUES(\"%s\", %%(key)s, %%(value)s)" % \
+                                       (self.SCHEMA_VALUES_NAME, str(pkgid)), db_values)
+                               except self._BaseError, e:
+                                       raise cache_errors.CacheCorruption(cpv, e)
+                       if self.autocommits:
+                               self.commit()
+
+               except Exception:
+                       if not self.autocommits:
+                               try:    self.db.rollback()
+                               except self._BaseError: pass
+                       raise
+
+
+       def _insert_cpv(self, cpv):
+               """uses SCHEMA_INSERT_CPV_INTO_PACKAGE, which must be overloaded if the table definition
+               doesn't support auto-increment columns for pkgid.
+               returns the cpvs new pkgid
+               note this doesn't commit the transaction.  The caller is expected to."""
+               
+               cpv = self._sfilter(cpv)
+               if self._supports_replace:
+                       query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1)
+               else:
+                       # just delete it.
+                       try:    del self[cpv]
+                       except  (cache_errors.CacheCorruption, KeyError):       pass
+                       query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE
+               try:
+                       self.con.execute(query_str % (self.label, cpv))
+               except self._BaseError:
+                       self.db.rollback()
+                       raise
+               self.con.execute("SELECT pkgid FROM %s WHERE label=%s AND cpv=%s" % \
+                       (self.SCHEMA_PACKAGE_NAME, self.label, cpv))
+                       
+               if self.con.rowcount != 1:
+                       raise cache_error.CacheCorruption(cpv, "Tried to insert the cpv, but found "
+                               " %i matches upon the following select!" % len(rows))
+               return self.con.fetchone()[0]
+
+
+       def has_key(self, cpv):
+               if not self.autocommits:
+                       try:    self.commit()
+                       except self._BaseError, e:
+                               raise cache_errors.GeneralCacheCorruption(e)
+
+               try:    self.con.execute("SELECT cpv FROM %s WHERE label=%s AND cpv=%s" % \
+                               (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv)))
+               except self._BaseError, e:
+                       raise cache_errors.GeneralCacheCorruption(e)
+               return self.con.rowcount > 0
+
+
+       def iterkeys(self):
+               if not self.autocommits:
+                       try:    self.commit()
+                       except self._BaseError, e:
+                               raise cache_errors.GeneralCacheCorruption(e)
+
+               try:    self.con.execute("SELECT cpv FROM %s WHERE label=%s" % 
+                               (self.SCHEMA_PACKAGE_NAME, self.label))
+               except self._BaseError, e:
+                       raise cache_errors.GeneralCacheCorruption(e)
+#              return [ row[0] for row in self.con.fetchall() ]
+               for x in self.con.fetchall():
+                       yield x[0]
+
+       def iteritems(self):
+               try:    self.con.execute("SELECT cpv, key, value FROM %s NATURAL JOIN %s "
+                       "WHERE label=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME,
+                       self.label))
+               except self._BaseError, e:
+                       raise cache_errors.CacheCorruption(self, cpv, e)
+               
+               oldcpv = None
+               l = []
+               for x, y, v in self.con.fetchall():
+                       if oldcpv != x:
+                               if oldcpv != None:
+                                       d = dict(l)
+                                       if "_eclasses_" in d:
+                                               d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"])
+                                       yield cpv, d
+                               l.clear()
+                               oldcpv = x
+                       l.append((y,v))
+               if oldcpv != None:
+                       d = dict(l)
+                       if "_eclasses_" in d:
+                               d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"])
+                       yield cpv, d                    
+
+       def commit(self):
+               self.db.commit()
+
+       def get_matches(self,match_dict):
+               query_list = []
+               for k,v in match_dict.items():
+                       if k not in self._known_keys:
+                               raise cache_errors.InvalidRestriction(k, v, "key isn't known to this cache instance")
+                       v = v.replace("%","\\%")
+                       v = v.replace(".*","%")
+                       query_list.append("(key=%s AND value LIKE %s)" % (self._sfilter(k), self._sfilter(v)))
+
+               if len(query_list):
+                       query = " AND "+" AND ".join(query_list)
+               else:
+                       query = ''
+
+               print "query = SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % (self.label, query)
+               try:    self.con.execute("SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % \
+                               (self.label, query))
+               except self._BaseError, e:
+                       raise cache_errors.GeneralCacheCorruption(e)
+
+               return [ row[0] for row in self.con.fetchall() ]
+
diff --git a/pym/cache/sqlite.py b/pym/cache/sqlite.py
new file mode 100644 (file)
index 0000000..4114eee
--- /dev/null
@@ -0,0 +1,67 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: sqlite.py 1911 2005-08-25 03:44:21Z ferringb $
+
+sqlite_module =__import__("sqlite")
+import os
+import sql_template, fs_template
+import cache_errors
+
+class database(fs_template.FsBased, sql_template.SQLDatabase):
+
+       SCHEMA_DELETE_NAME      = "delete_package_values"
+       SCHEMA_DELETE_TRIGGER   = """CREATE TRIGGER %s AFTER DELETE on %s
+       begin
+       DELETE FROM %s WHERE pkgid=old.pkgid;
+       end;""" % (SCHEMA_DELETE_NAME, sql_template.SQLDatabase.SCHEMA_PACKAGE_NAME, 
+               sql_template.SQLDatabase.SCHEMA_VALUES_NAME)
+
+       _BaseError = sqlite_module.Error
+       _dbClass = sqlite_module
+       _supports_replace = True
+
+       def _dbconnect(self, config):
+               self._dbpath = os.path.join(self.location, fs_template.gen_label(self.location, self.label)+".sqldb")
+               try:
+                       self.db = sqlite_module.connect(self._dbpath, mode=self._perms, autocommit=False)
+                       if not self._ensure_access(self._dbpath):
+                               raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath)
+                       self.con = self.db.cursor()
+               except self._BaseError, e:
+                       raise cache_errors.InitializationError(self.__class__, e)
+
+               
+       def _initdb_con(self, config):
+               sql_template.SQLDatabase._initdb_con(self, config)
+               try:
+                       self.con.execute("SELECT name FROM sqlite_master WHERE type=\"trigger\" AND name=%s" % \
+                               self._sfilter(self.SCHEMA_DELETE_NAME))
+                       if self.con.rowcount == 0:
+                               self.con.execute(self.SCHEMA_DELETE_TRIGGER);
+                               self.db.commit()
+               except self._BaseError, e:
+                       raise cache_errors.InitializationError(self.__class__, e)
+
+       def _table_exists(self, tbl):
+               """return true/false dependant on a tbl existing"""
+               try:    self.con.execute("SELECT name FROM sqlite_master WHERE type=\"table\" AND name=%s" % 
+                       self._sfilter(tbl))
+               except self._BaseError, e:
+                       # XXX crappy.
+                       return False
+               return len(self.con.fetchall()) == 1
+
+       # we can do it minus a query via rowid.
+       def _insert_cpv(self, cpv):
+               cpv = self._sfilter(cpv)
+               try:    self.con.execute(self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1) % \
+                       (self.label, cpv))
+               except self._BaseError, e:
+                       raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed: %s" % str(e))
+
+               # sums the delete also
+               if self.con.rowcount <= 0 or self.con.rowcount > 2:
+                       raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed- %i rows modified" % self.rowcount)
+               return self.con.lastrowid
+
diff --git a/pym/cache/template.py b/pym/cache/template.py
new file mode 100644 (file)
index 0000000..c230b93
--- /dev/null
@@ -0,0 +1,171 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: template.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import cache_errors
+from mappings import ProtectedDict
+
+class database(object):
+       # this is for metadata/cache transfer.
+       # basically flags the cache needs be updated when transfered cache to cache.
+       # leave this.
+
+       complete_eclass_entries_ = True
+       autocommits = False
+       cleanse_keys = False
+
+       def __init__(self, location, label, auxdbkeys, readonly=False):
+               """ initialize the derived class; specifically, store label/keys"""
+               self._known_keys = auxdbkeys
+               self.location = location
+               self.label = label
+               self.readonly = readonly
+               self.sync_rate = 0
+               self.updates = 0
+
+       
+       def __getitem__(self, cpv):
+               """set a cpv to values
+               This shouldn't be overriden in derived classes since it handles the __eclasses__ conversion.
+               that said, if the class handles it, they can override it."""
+               if self.updates > self.sync_rate:
+                       self.commit()
+                       self.updates = 0
+               d=self._getitem(cpv)
+               if "_eclasses_" in d:
+                       d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"])
+               return d
+
+       def _getitem(self, cpv):
+               """get cpv's values.
+               override this in derived classess"""
+               raise NotImplementedError
+
+
+       def __setitem__(self, cpv, values):
+               """set a cpv to values
+               This shouldn't be overriden in derived classes since it handles the readonly checks"""
+               if self.readonly:
+                       raise cache_errors.ReadOnlyRestriction()
+               if self.cleanse_keys:
+                       d=ProtectedDict(values)
+                       for k in d.keys():
+                               if d[k] == '':
+                                       del d[k]
+                       if "_eclasses_" in values:
+                               d["_eclasses_"] = serialize_eclasses(d["_eclasses_"])
+               elif "_eclasses_" in values:
+                       d = ProtectedDict(values)
+                       d["_eclasses_"] = serialize_eclasses(d["_eclasses_"])
+               else:
+                       d = values
+               self._setitem(cpv, d)
+               if not self.autocommits:
+                       self.updates += 1
+                       if self.updates > self.sync_rate:
+                               self.commit()
+                               self.updates = 0
+
+
+       def _setitem(self, name, values):
+               """__setitem__ calls this after readonly checks.  override it in derived classes
+               note _eclassees_ key *must* be handled"""
+               raise NotImplementedError
+
+
+       def __delitem__(self, cpv):
+               """delete a key from the cache.
+               This shouldn't be overriden in derived classes since it handles the readonly checks"""
+               if self.readonly:
+                       raise cache_errors.ReadOnlyRestriction()
+               if not self.autocommits:
+                       self.updates += 1
+               self._delitem(cpv)
+               if self.updates > self.sync_rate:
+                       self.commit()
+                       self.updates = 0
+
+
+       def _delitem(self,cpv):
+               """__delitem__ calls this after readonly checks.  override it in derived classes"""
+               raise NotImplementedError
+
+
+       def has_key(self, cpv):
+               raise NotImplementedError
+
+
+       def keys(self):
+               return tuple(self.iterkeys())
+
+       def iterkeys(self):
+               raise NotImplementedError
+
+       def iteritems(self):
+               for x in self.iterkeys():
+                       yield (x, self[x])
+
+       def items(self):
+               return list(self.iteritems())
+
+       def sync(self, rate=0):
+               self.sync_rate = rate
+               if(rate == 0):
+                       self.commit()
+
+       def commit(self):
+               if not self.autocommits:
+                       raise NotImplementedError
+
+       def get_matches(self, match_dict):
+               """generic function for walking the entire cache db, matching restrictions to
+               filter what cpv's are returned.  Derived classes should override this if they
+               can implement a faster method then pulling each cpv:values, and checking it.
+               
+               For example, RDBMS derived classes should push the matching logic down to the
+               actual RDBM."""
+
+               import re
+               restricts = {}
+               for key,match in match_dict.iteritems():
+                       # XXX this sucks.
+                       try:
+                               if isinstance(match, str):
+                                       restricts[key] = re.compile(match).match
+                               else:
+                                       restricts[key] = re.compile(match[0],match[1]).match
+                       except re.error, e:
+                               raise InvalidRestriction(key, match, e)
+                       if key not in self.__known_keys:
+                               raise InvalidRestriction(key, match, "Key isn't valid")
+
+               for cpv in self.keys():
+                       cont = True
+                       vals = self[cpv]
+                       for key, match in restricts.iteritems():
+                               if not match(vals[key]):
+                                       cont = False
+                                       break
+                       if cont:
+#                              yield cpv,vals
+                               yield cpv
+
+
+def serialize_eclasses(eclass_dict):
+       """takes a dict, returns a string representing said dict"""
+       return "\t".join(["%s\t%s\t%s" % (k, v[0], str(v[1])) for k,v in eclass_dict.items()])
+
+def reconstruct_eclasses(cpv, eclass_string):
+       """returns a dict when handed a string generated by serialize_eclasses"""
+       eclasses = eclass_string.rstrip().lstrip().split("\t")
+       if eclasses == [""]:
+               # occasionally this occurs in the fs backends.  they suck.
+               return {}
+       if len(eclasses) % 3 != 0:
+               raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses))
+       d={}
+       for x in range(0, len(eclasses), 3):
+               d[eclasses[x]] = (eclasses[x + 1], long(eclasses[x + 2]))
+       del eclasses
+       return d
diff --git a/pym/cache/util.py b/pym/cache/util.py
new file mode 100644 (file)
index 0000000..26d917a
--- /dev/null
@@ -0,0 +1,103 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: util.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import cache_errors
+
+def mirror_cache(valid_nodes_iterable, src_cache, trg_cache, eclass_cache=None, verbose_instance=None):
+
+       if not src_cache.complete_eclass_entries and not eclass_cache:
+               raise Exception("eclass_cache required for cache's of class %s!" % src_cache.__class__)
+
+       if verbose_instance == None:
+               noise=quiet_mirroring()
+       else:
+               noise=verbose_instance
+
+       dead_nodes = {}
+       dead_nodes.fromkeys(trg_cache.keys())
+       count=0
+
+       if not trg_cache.autocommits:
+               trg_cache.sync(100)
+
+       for x in valid_nodes_iterable:
+#              print "processing x=",x
+               count+=1
+               if dead_nodes.has_key(x):
+                       del dead_nodes[x]
+               try:    entry = src_cache[x]
+               except KeyError, e:
+                       noise.missing_entry(x)
+                       del e
+                       continue
+               write_it = True
+               try:
+                       trg = trg_cache[x]
+                       if long(trg["_mtime_"]) == long(entry["_mtime_"]) and eclass_cache.is_eclass_data_valid(trg["_eclasses_"]):
+                               write_it = False
+               except (cache_errors.CacheError, KeyError):
+                       pass
+
+               if write_it:
+                       if entry.get("INHERITED",""):
+                               if src_cache.complete_eclass_entries:
+                                       if not "_eclasses_" in entry:
+                                               noise.corruption(x,"missing _eclasses_ field")
+                                               continue
+                                       if not eclass_cache.is_eclass_data_valid(entry["_eclasses_"]):
+                                               noise.eclass_stale(x)
+                                               continue
+                               else:
+                                       entry["_eclasses_"] = eclass_cache.get_eclass_data(entry["INHERITED"].split(), \
+                                               from_master_only=True)
+                                       if not entry["_eclasses_"]:
+                                               noise.eclass_stale(x)
+                                               continue
+
+                       # by this time, if it reaches here, the eclass has been validated, and the entry has 
+                       # been updated/translated (if needs be, for metadata/cache mainly)
+                       try:    trg_cache[x] = entry
+                       except cache_errors.CacheError, ce:
+                               noise.exception(x, ce)
+                               del ce
+                               continue
+               if count >= noise.call_update_min:
+                       noise.update(x)
+                       count = 0
+
+       if not trg_cache.autocommits:
+               trg_cache.commit()
+
+       # ok.  by this time, the trg_cache is up to date, and we have a dict
+       # with a crapload of cpv's.  we now walk the target db, removing stuff if it's in the list.
+       for key in dead_nodes:
+               try:    del trg_cache[key]
+               except cache_errors.CacheError, ce:
+                       noise.exception(ce)
+                       del ce
+       dead_nodes.clear()
+       noise.finish()
+
+
+class quiet_mirroring(object):
+       # call_update_every is used by mirror_cache to determine how often to call in.
+       # quiet defaults to 2^24 -1.  Don't call update, 'cept once every 16 million or so :)
+       call_update_min = 0xffffff
+       def update(self,key,*arg):              pass
+       def exception(self,key,*arg):   pass
+       def eclass_stale(self,*arg):    pass
+       def missing_entry(self, key):   pass
+       def misc(self,key,*arg):                pass
+       def corruption(self, key, s):   pass
+       def finish(self, *arg):                 pass
+       
+class non_quiet_mirroring(quiet_mirroring):
+       call_update_min=1
+       def update(self,key,*arg):      print "processed",key
+       def exception(self, key, *arg): print "exec",key,arg
+       def missing(self,key):          print "key %s is missing", key
+       def corruption(self,key,*arg):  print "corrupt %s:" % key,arg
+       def eclass_stale(self,key,*arg):print "stale %s:"%key,arg
+