Move LinkageMap to a new portage.util._dyn_libs.LinkageMapELF module.
authorZac Medico <zmedico@gentoo.org>
Mon, 30 Aug 2010 03:28:45 +0000 (20:28 -0700)
committerZac Medico <zmedico@gentoo.org>
Mon, 30 Aug 2010 05:25:48 +0000 (22:25 -0700)
pym/portage/dbapi/vartree.py
pym/portage/util/_dyn_libs/LinkageMapELF.py [new file with mode: 0644]

index fe8e0eac42d70eadbe060538d4717e1836db274d..48b0bc09fe3d91fc4a2bb41c1a0cb6c3890f4e67 100644 (file)
@@ -1,7 +1,7 @@
 # Copyright 1998-2010 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
-__all__ = ["LinkageMap",
+__all__ = [
        "vardbapi", "vartree", "dblink"] + \
        ["write_contents", "tar_contents"]
 
@@ -21,11 +21,12 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.update:fixdbentries',
        'portage.util:apply_secpass_permissions,ConfigProtect,ensure_dirs,' + \
                'writemsg,writemsg_level,write_atomic,atomic_ofstream,writedict,' + \
-               'grabfile,grabdict,normalize_path,new_protect_filename,getlibpaths',
+               'grabdict,normalize_path,new_protect_filename',
        'portage.util.digraph:digraph',
        'portage.util.env_update:env_update',
        'portage.util.listdir:dircache,listdir',
        'portage.util._dyn_libs.PreservedLibsRegistry:PreservedLibsRegistry',
+       'portage.util._dyn_libs.LinkageMapELF:LinkageMapELF@LinkageMap',
        'portage.versions:best,catpkgsplit,catsplit,cpv_getkey,pkgcmp,' + \
                '_pkgsplit@pkgsplit',
 )
@@ -49,7 +50,6 @@ from portage import _selinux_merge
 from portage import _unicode_decode
 from portage import _unicode_encode
 
-from portage.cache.mappings import slot_dict_class
 from _emerge.TaskScheduler import TaskScheduler
 from _emerge.MiscFunctionsProcess import MiscFunctionsProcess
 
@@ -73,617 +73,6 @@ if sys.hexversion >= 0x3000000:
        basestring = str
        long = int
 
-class LinkageMap(object):
-
-       """Models dynamic linker dependencies."""
-
-       _needed_aux_key = "NEEDED.ELF.2"
-       _soname_map_class = slot_dict_class(
-               ("consumers", "providers"), prefix="")
-
-       def __init__(self, vardbapi):
-               self._dbapi = vardbapi
-               self._root = self._dbapi._eroot
-               self._libs = {}
-               self._obj_properties = {}
-               self._obj_key_cache = {}
-               self._defpath = set()
-               self._path_key_cache = {}
-
-       def _clear_cache(self):
-               self._libs.clear()
-               self._obj_properties.clear()
-               self._obj_key_cache.clear()
-               self._defpath.clear()
-               self._path_key_cache.clear()
-
-       def _path_key(self, path):
-               key = self._path_key_cache.get(path)
-               if key is None:
-                       key = self._ObjectKey(path, self._root)
-                       self._path_key_cache[path] = key
-               return key
-
-       def _obj_key(self, path):
-               key = self._obj_key_cache.get(path)
-               if key is None:
-                       key = self._ObjectKey(path, self._root)
-                       self._obj_key_cache[path] = key
-               return key
-
-       class _ObjectKey(object):
-
-               """Helper class used as _obj_properties keys for objects."""
-
-               __slots__ = ("__weakref__", "_key")
-
-               def __init__(self, obj, root):
-                       """
-                       This takes a path to an object.
-
-                       @param object: path to a file
-                       @type object: string (example: '/usr/bin/bar')
-
-                       """
-                       self._key = self._generate_object_key(obj, root)
-
-               def __hash__(self):
-                       return hash(self._key)
-
-               def __eq__(self, other):
-                       return self._key == other._key
-
-               def _generate_object_key(self, obj, root):
-                       """
-                       Generate object key for a given object.
-
-                       @param object: path to a file
-                       @type object: string (example: '/usr/bin/bar')
-                       @rtype: 2-tuple of types (long, int) if object exists. string if
-                               object does not exist.
-                       @return:
-                               1. 2-tuple of object's inode and device from a stat call, if object
-                                       exists.
-                               2. realpath of object if object does not exist.
-
-                       """
-
-                       os = _os_merge
-
-                       try:
-                               _unicode_encode(obj,
-                                       encoding=_encodings['merge'], errors='strict')
-                       except UnicodeEncodeError:
-                               # The package appears to have been merged with a 
-                               # different value of sys.getfilesystemencoding(),
-                               # so fall back to utf_8 if appropriate.
-                               try:
-                                       _unicode_encode(obj,
-                                               encoding=_encodings['fs'], errors='strict')
-                               except UnicodeEncodeError:
-                                       pass
-                               else:
-                                       os = portage.os
-
-                       abs_path = os.path.join(root, obj.lstrip(os.sep))
-                       try:
-                               object_stat = os.stat(abs_path)
-                       except OSError:
-                               # Use the realpath as the key if the file does not exists on the
-                               # filesystem.
-                               return os.path.realpath(abs_path)
-                       # Return a tuple of the device and inode.
-                       return (object_stat.st_dev, object_stat.st_ino)
-
-               def file_exists(self):
-                       """
-                       Determine if the file for this key exists on the filesystem.
-
-                       @rtype: Boolean
-                       @return:
-                               1. True if the file exists.
-                               2. False if the file does not exist or is a broken symlink.
-
-                       """
-                       return isinstance(self._key, tuple)
-
-       class _LibGraphNode(_ObjectKey):
-               __slots__ = ("alt_paths",)
-
-               def __init__(self, obj, root):
-                       LinkageMap._ObjectKey.__init__(self, obj, root)
-                       self.alt_paths = set()
-
-               def __str__(self):
-                       return str(sorted(self.alt_paths))
-
-       def rebuild(self, exclude_pkgs=None, include_file=None):
-               """
-               Raises CommandNotFound if there are preserved libs
-               and the scanelf binary is not available.
-               """
-
-               os = _os_merge
-               root = self._root
-               root_len = len(root) - 1
-               self._clear_cache()
-               self._defpath.update(getlibpaths(self._root))
-               libs = self._libs
-               obj_key_cache = self._obj_key_cache
-               obj_properties = self._obj_properties
-
-               lines = []
-
-               # Data from include_file is processed first so that it
-               # overrides any data from previously installed files.
-               if include_file is not None:
-                       lines += grabfile(include_file)
-
-               aux_keys = [self._needed_aux_key]
-               for cpv in self._dbapi.cpv_all():
-                       if exclude_pkgs is not None and cpv in exclude_pkgs:
-                               continue
-                       lines += self._dbapi.aux_get(cpv, aux_keys)[0].split('\n')
-               # Cache NEEDED.* files avoid doing excessive IO for every rebuild.
-               self._dbapi.flush_cache()
-
-               # have to call scanelf for preserved libs here as they aren't 
-               # registered in NEEDED.ELF.2 files
-               plibs = set()
-               if self._dbapi.plib_registry and self._dbapi.plib_registry.getPreservedLibs():
-                       args = ["/usr/bin/scanelf", "-qF", "%a;%F;%S;%r;%n"]
-                       for items in self._dbapi.plib_registry.getPreservedLibs().values():
-                               plibs.update(items)
-                               args.extend(os.path.join(root, x.lstrip("." + os.sep)) \
-                                       for x in items)
-                       try:
-                               proc = subprocess.Popen(args, stdout=subprocess.PIPE)
-                       except EnvironmentError as e:
-                               if e.errno != errno.ENOENT:
-                                       raise
-                               raise CommandNotFound(args[0])
-                       else:
-                               for l in proc.stdout:
-                                       try:
-                                               l = _unicode_decode(l,
-                                                       encoding=_encodings['content'], errors='strict')
-                                       except UnicodeDecodeError:
-                                               l = _unicode_decode(l,
-                                                       encoding=_encodings['content'], errors='replace')
-                                               writemsg_level(_("\nError decoding characters " \
-                                                       "returned from scanelf: %s\n\n") % (l,),
-                                                       level=logging.ERROR, noiselevel=-1)
-                                       l = l[3:].rstrip("\n")
-                                       if not l:
-                                               continue
-                                       fields = l.split(";")
-                                       if len(fields) < 5:
-                                               writemsg_level(_("\nWrong number of fields " \
-                                                       "returned from scanelf: %s\n\n") % (l,),
-                                                       level=logging.ERROR, noiselevel=-1)
-                                               continue
-                                       fields[1] = fields[1][root_len:]
-                                       plibs.discard(fields[1])
-                                       lines.append(";".join(fields))
-                               proc.wait()
-
-               if plibs:
-                       # Preserved libraries that did not appear in the scanelf output.
-                       # This is known to happen with statically linked libraries.
-                       # Generate dummy lines for these, so we can assume that every
-                       # preserved library has an entry in self._obj_properties. This
-                       # is important in order to prevent findConsumers from raising
-                       # an unwanted KeyError.
-                       for x in plibs:
-                               lines.append(";".join(['', x, '', '', '']))
-
-               for l in lines:
-                       l = l.rstrip("\n")
-                       if not l:
-                               continue
-                       fields = l.split(";")
-                       if len(fields) < 5:
-                               writemsg_level(_("\nWrong number of fields " \
-                                       "in %s: %s\n\n") % (self._needed_aux_key, l),
-                                       level=logging.ERROR, noiselevel=-1)
-                               continue
-                       arch = fields[0]
-                       obj = fields[1]
-                       soname = fields[2]
-                       path = set([normalize_path(x) \
-                               for x in filter(None, fields[3].replace(
-                               "${ORIGIN}", os.path.dirname(obj)).replace(
-                               "$ORIGIN", os.path.dirname(obj)).split(":"))])
-                       needed = [x for x in fields[4].split(",") if x]
-
-                       obj_key = self._obj_key(obj)
-                       indexed = True
-                       myprops = obj_properties.get(obj_key)
-                       if myprops is None:
-                               indexed = False
-                               myprops = (arch, needed, path, soname, set())
-                               obj_properties[obj_key] = myprops
-                       # All object paths are added into the obj_properties tuple.
-                       myprops[4].add(obj)
-
-                       # Don't index the same file more that once since only one
-                       # set of data can be correct and therefore mixing data
-                       # may corrupt the index (include_file overrides previously
-                       # installed).
-                       if indexed:
-                               continue
-
-                       arch_map = libs.get(arch)
-                       if arch_map is None:
-                               arch_map = {}
-                               libs[arch] = arch_map
-                       if soname:
-                               soname_map = arch_map.get(soname)
-                               if soname_map is None:
-                                       soname_map = self._soname_map_class(
-                                               providers=set(), consumers=set())
-                                       arch_map[soname] = soname_map
-                               soname_map.providers.add(obj_key)
-                       for needed_soname in needed:
-                               soname_map = arch_map.get(needed_soname)
-                               if soname_map is None:
-                                       soname_map = self._soname_map_class(
-                                               providers=set(), consumers=set())
-                                       arch_map[needed_soname] = soname_map
-                               soname_map.consumers.add(obj_key)
-
-       def listBrokenBinaries(self, debug=False):
-               """
-               Find binaries and their needed sonames, which have no providers.
-
-               @param debug: Boolean to enable debug output
-               @type debug: Boolean
-               @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])})
-               @return: The return value is an object -> set-of-sonames mapping, where
-                       object is a broken binary and the set consists of sonames needed by
-                       object that have no corresponding libraries to fulfill the dependency.
-
-               """
-
-               os = _os_merge
-
-               class _LibraryCache(object):
-
-                       """
-                       Caches properties associated with paths.
-
-                       The purpose of this class is to prevent multiple instances of
-                       _ObjectKey for the same paths.
-
-                       """
-
-                       def __init__(cache_self):
-                               cache_self.cache = {}
-
-                       def get(cache_self, obj):
-                               """
-                               Caches and returns properties associated with an object.
-
-                               @param obj: absolute path (can be symlink)
-                               @type obj: string (example: '/usr/lib/libfoo.so')
-                               @rtype: 4-tuple with types
-                                       (string or None, string or None, 2-tuple, Boolean)
-                               @return: 4-tuple with the following components:
-                                       1. arch as a string or None if it does not exist,
-                                       2. soname as a string or None if it does not exist,
-                                       3. obj_key as 2-tuple,
-                                       4. Boolean representing whether the object exists.
-                                       (example: ('libfoo.so.1', (123L, 456L), True))
-
-                               """
-                               if obj in cache_self.cache:
-                                       return cache_self.cache[obj]
-                               else:
-                                       obj_key = self._obj_key(obj)
-                                       # Check that the library exists on the filesystem.
-                                       if obj_key.file_exists():
-                                               # Get the arch and soname from LinkageMap._obj_properties if
-                                               # it exists. Otherwise, None.
-                                               arch, _needed, _path, soname, _objs = \
-                                                               self._obj_properties.get(obj_key, (None,)*5)
-                                               return cache_self.cache.setdefault(obj, \
-                                                               (arch, soname, obj_key, True))
-                                       else:
-                                               return cache_self.cache.setdefault(obj, \
-                                                               (None, None, obj_key, False))
-
-               rValue = {}
-               cache = _LibraryCache()
-               providers = self.listProviders()
-
-               # Iterate over all obj_keys and their providers.
-               for obj_key, sonames in providers.items():
-                       arch, _needed, path, _soname, objs = self._obj_properties[obj_key]
-                       path = path.union(self._defpath)
-                       # Iterate over each needed soname and the set of library paths that
-                       # fulfill the soname to determine if the dependency is broken.
-                       for soname, libraries in sonames.items():
-                               # validLibraries is used to store libraries, which satisfy soname,
-                               # so if no valid libraries are found, the soname is not satisfied
-                               # for obj_key.  If unsatisfied, objects associated with obj_key
-                               # must be emerged.
-                               validLibraries = set()
-                               # It could be the case that the library to satisfy the soname is
-                               # not in the obj's runpath, but a symlink to the library is (eg
-                               # libnvidia-tls.so.1 in nvidia-drivers).  Also, since LinkageMap
-                               # does not catalog symlinks, broken or missing symlinks may go
-                               # unnoticed.  As a result of these cases, check that a file with
-                               # the same name as the soname exists in obj's runpath.
-                               # XXX If we catalog symlinks in LinkageMap, this could be improved.
-                               for directory in path:
-                                       cachedArch, cachedSoname, cachedKey, cachedExists = \
-                                                       cache.get(os.path.join(directory, soname))
-                                       # Check that this library provides the needed soname.  Doing
-                                       # this, however, will cause consumers of libraries missing
-                                       # sonames to be unnecessarily emerged. (eg libmix.so)
-                                       if cachedSoname == soname and cachedArch == arch:
-                                               validLibraries.add(cachedKey)
-                                               if debug and cachedKey not in \
-                                                               set(map(self._obj_key_cache.get, libraries)):
-                                                       # XXX This is most often due to soname symlinks not in
-                                                       # a library's directory.  We could catalog symlinks in
-                                                       # LinkageMap to avoid checking for this edge case here.
-                                                       writemsg(
-                                                               _("Found provider outside of findProviders:") + \
-                                                               (" %s -> %s %s\n" % (os.path.join(directory, soname),
-                                                               self._obj_properties[cachedKey][4], libraries)),
-                                                               noiselevel=-1)
-                                               # A valid library has been found, so there is no need to
-                                               # continue.
-                                               break
-                                       if debug and cachedArch == arch and \
-                                                       cachedKey in self._obj_properties:
-                                               writemsg((_("Broken symlink or missing/bad soname: " + \
-                                                       "%(dir_soname)s -> %(cachedKey)s " + \
-                                                       "with soname %(cachedSoname)s but expecting %(soname)s") % \
-                                                       {"dir_soname":os.path.join(directory, soname),
-                                                       "cachedKey": self._obj_properties[cachedKey],
-                                                       "cachedSoname": cachedSoname, "soname":soname}) + "\n",
-                                                       noiselevel=-1)
-                               # This conditional checks if there are no libraries to satisfy the
-                               # soname (empty set).
-                               if not validLibraries:
-                                       for obj in objs:
-                                               rValue.setdefault(obj, set()).add(soname)
-                                       # If no valid libraries have been found by this point, then
-                                       # there are no files named with the soname within obj's runpath,
-                                       # but if there are libraries (from the providers mapping), it is
-                                       # likely that soname symlinks or the actual libraries are
-                                       # missing or broken.  Thus those libraries are added to rValue
-                                       # in order to emerge corrupt library packages.
-                                       for lib in libraries:
-                                               rValue.setdefault(lib, set()).add(soname)
-                                               if debug:
-                                                       if not os.path.isfile(lib):
-                                                               writemsg(_("Missing library:") + " %s\n" % (lib,),
-                                                                       noiselevel=-1)
-                                                       else:
-                                                               writemsg(_("Possibly missing symlink:") + \
-                                                                       "%s\n" % (os.path.join(os.path.dirname(lib), soname)),
-                                                                       noiselevel=-1)
-               return rValue
-
-       def listProviders(self):
-               """
-               Find the providers for all object keys in LinkageMap.
-
-               @rtype: dict (example:
-                       {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}})
-               @return: The return value is an object key -> providers mapping, where
-                       providers is a mapping of soname -> set-of-library-paths returned
-                       from the findProviders method.
-
-               """
-               rValue = {}
-               if not self._libs:
-                       self.rebuild()
-               # Iterate over all object keys within LinkageMap.
-               for obj_key in self._obj_properties:
-                       rValue.setdefault(obj_key, self.findProviders(obj_key))
-               return rValue
-
-       def isMasterLink(self, obj):
-               """
-               Determine whether an object is a master link.
-
-               @param obj: absolute path to an object
-               @type obj: string (example: '/usr/bin/foo')
-               @rtype: Boolean
-               @return:
-                       1. True if obj is a master link
-                       2. False if obj is not a master link
-
-               """
-               os = _os_merge
-               basename = os.path.basename(obj)
-               obj_key = self._obj_key(obj)
-               if obj_key not in self._obj_properties:
-                       raise KeyError("%s (%s) not in object list" % (obj_key, obj))
-               soname = self._obj_properties[obj_key][3]
-               return (len(basename) < len(soname))
-
-       def listLibraryObjects(self):
-               """
-               Return a list of library objects.
-
-               Known limitation: library objects lacking an soname are not included.
-
-               @rtype: list of strings
-               @return: list of paths to all providers
-
-               """
-               rValue = []
-               if not self._libs:
-                       self.rebuild()
-               for arch_map in self._libs.values():
-                       for soname_map in arch_map.values():
-                               for obj_key in soname_map.providers:
-                                       rValue.extend(self._obj_properties[obj_key][4])
-               return rValue
-
-       def getSoname(self, obj):
-               """
-               Return the soname associated with an object.
-
-               @param obj: absolute path to an object
-               @type obj: string (example: '/usr/bin/bar')
-               @rtype: string
-               @return: soname as a string
-
-               """
-               if not self._libs:
-                       self.rebuild()
-               if isinstance(obj, self._ObjectKey):
-                       obj_key = obj
-                       if obj_key not in self._obj_properties:
-                               raise KeyError("%s not in object list" % obj_key)
-                       return self._obj_properties[obj_key][3]
-               if obj not in self._obj_key_cache:
-                       raise KeyError("%s not in object list" % obj)
-               return self._obj_properties[self._obj_key_cache[obj]][3]
-
-       def findProviders(self, obj):
-               """
-               Find providers for an object or object key.
-
-               This method may be called with a key from _obj_properties.
-
-               In some cases, not all valid libraries are returned.  This may occur when
-               an soname symlink referencing a library is in an object's runpath while
-               the actual library is not.  We should consider cataloging symlinks within
-               LinkageMap as this would avoid those cases and would be a better model of
-               library dependencies (since the dynamic linker actually searches for
-               files named with the soname in the runpaths).
-
-               @param obj: absolute path to an object or a key from _obj_properties
-               @type obj: string (example: '/usr/bin/bar') or _ObjectKey
-               @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])})
-               @return: The return value is a soname -> set-of-library-paths, where
-               set-of-library-paths satisfy soname.
-
-               """
-
-               os = _os_merge
-
-               rValue = {}
-
-               if not self._libs:
-                       self.rebuild()
-
-               # Determine the obj_key from the arguments.
-               if isinstance(obj, self._ObjectKey):
-                       obj_key = obj
-                       if obj_key not in self._obj_properties:
-                               raise KeyError("%s not in object list" % obj_key)
-               else:
-                       obj_key = self._obj_key(obj)
-                       if obj_key not in self._obj_properties:
-                               raise KeyError("%s (%s) not in object list" % (obj_key, obj))
-
-               arch, needed, path, _soname, _objs = self._obj_properties[obj_key]
-               path_keys = set(self._path_key(x) for x in path.union(self._defpath))
-               for soname in needed:
-                       rValue[soname] = set()
-                       if arch not in self._libs or soname not in self._libs[arch]:
-                               continue
-                       # For each potential provider of the soname, add it to rValue if it
-                       # resides in the obj's runpath.
-                       for provider_key in self._libs[arch][soname].providers:
-                               providers = self._obj_properties[provider_key][4]
-                               for provider in providers:
-                                       if self._path_key(os.path.dirname(provider)) in path_keys:
-                                               rValue[soname].add(provider)
-               return rValue
-
-       def findConsumers(self, obj):
-               """
-               Find consumers of an object or object key.
-
-               This method may be called with a key from _obj_properties.  If this
-               method is going to be called with an object key, to avoid not catching
-               shadowed libraries, do not pass new _ObjectKey instances to this method.
-               Instead pass the obj as a string.
-
-               In some cases, not all consumers are returned.  This may occur when
-               an soname symlink referencing a library is in an object's runpath while
-               the actual library is not. For example, this problem is noticeable for
-               binutils since it's libraries are added to the path via symlinks that
-               are gemerated in the /usr/$CHOST/lib/ directory by binutils-config.
-               Failure to recognize consumers of these symlinks makes preserve-libs
-               fail to preserve binutils libs that are needed by these unrecognized
-               consumers.
-
-               Note that library consumption via dlopen (common for kde plugins) is
-               currently undetected. However, it is possible to use the
-               corresponding libtool archive (*.la) files to detect such consumers
-               (revdep-rebuild is able to detect them).
-
-               @param obj: absolute path to an object or a key from _obj_properties
-               @type obj: string (example: '/usr/bin/bar') or _ObjectKey
-               @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar']))
-               @return: The return value is a soname -> set-of-library-paths, where
-               set-of-library-paths satisfy soname.
-
-               """
-
-               os = _os_merge
-
-               rValue = set()
-
-               if not self._libs:
-                       self.rebuild()
-
-               # Determine the obj_key and the set of objects matching the arguments.
-               if isinstance(obj, self._ObjectKey):
-                       obj_key = obj
-                       if obj_key not in self._obj_properties:
-                               raise KeyError("%s not in object list" % obj_key)
-                       objs = self._obj_properties[obj_key][4]
-               else:
-                       objs = set([obj])
-                       obj_key = self._obj_key(obj)
-                       if obj_key not in self._obj_properties:
-                               raise KeyError("%s (%s) not in object list" % (obj_key, obj))
-
-               # If there is another version of this lib with the
-               # same soname and the master link points to that
-               # other version, this lib will be shadowed and won't
-               # have any consumers.
-               if not isinstance(obj, self._ObjectKey):
-                       soname = self._obj_properties[obj_key][3]
-                       master_link = os.path.join(self._root,
-                               os.path.dirname(obj).lstrip(os.path.sep), soname)
-                       try:
-                               master_st = os.stat(master_link)
-                               obj_st = os.stat(obj)
-                       except OSError:
-                               pass
-                       else:
-                               if (obj_st.st_dev, obj_st.st_ino) != \
-                                       (master_st.st_dev, master_st.st_ino):
-                                       return set()
-
-               # Determine the directory(ies) from the set of objects.
-               objs_dir_keys = set(self._path_key(os.path.dirname(x)) for x in objs)
-               defpath_keys = set(self._path_key(x) for x in self._defpath)
-
-               arch, _needed, _path, soname, _objs = self._obj_properties[obj_key]
-               if arch in self._libs and soname in self._libs[arch]:
-                       # For each potential consumer, add it to rValue if an object from the
-                       # arguments resides in the consumer's runpath.
-                       for consumer_key in self._libs[arch][soname].consumers:
-                               _arch, _needed, path, _soname, consumer_objs = \
-                                               self._obj_properties[consumer_key]
-                               path_keys = defpath_keys.union(self._path_key(x) for x in path)
-                               if objs_dir_keys.intersection(path_keys):
-                                       rValue.update(consumer_objs)
-               return rValue
-
 class vardbapi(dbapi):
 
        _excluded_dirs = ["CVS", "lost+found"]
diff --git a/pym/portage/util/_dyn_libs/LinkageMapELF.py b/pym/portage/util/_dyn_libs/LinkageMapELF.py
new file mode 100644 (file)
index 0000000..a159f7a
--- /dev/null
@@ -0,0 +1,633 @@
+# Copyright 1998-2010 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import logging
+import subprocess
+
+import portage
+from portage import _encodings
+from portage import _os_merge
+from portage import _unicode_decode
+from portage import _unicode_encode
+from portage.cache.mappings import slot_dict_class
+from portage.exception import CommandNotFound
+from portage.localization import _
+from portage.util import getlibpaths
+from portage.util import grabfile
+from portage.util import normalize_path
+from portage.util import writemsg_level
+
+class LinkageMapELF(object):
+
+       """Models dynamic linker dependencies."""
+
+       _needed_aux_key = "NEEDED.ELF.2"
+       _soname_map_class = slot_dict_class(
+               ("consumers", "providers"), prefix="")
+
+       def __init__(self, vardbapi):
+               self._dbapi = vardbapi
+               self._eroot = self._dbapi._eroot
+               self._libs = {}
+               self._obj_properties = {}
+               self._obj_key_cache = {}
+               self._defpath = set()
+               self._path_key_cache = {}
+
+       def _clear_cache(self):
+               self._libs.clear()
+               self._obj_properties.clear()
+               self._obj_key_cache.clear()
+               self._defpath.clear()
+               self._path_key_cache.clear()
+
+       def _path_key(self, path):
+               key = self._path_key_cache.get(path)
+               if key is None:
+                       key = self._ObjectKey(path, self._eroot)
+                       self._path_key_cache[path] = key
+               return key
+
+       def _obj_key(self, path):
+               key = self._obj_key_cache.get(path)
+               if key is None:
+                       key = self._ObjectKey(path, self._eroot)
+                       self._obj_key_cache[path] = key
+               return key
+
+       class _ObjectKey(object):
+
+               """Helper class used as _obj_properties keys for objects."""
+
+               __slots__ = ("__weakref__", "_key")
+
+               def __init__(self, obj, root):
+                       """
+                       This takes a path to an object.
+
+                       @param object: path to a file
+                       @type object: string (example: '/usr/bin/bar')
+
+                       """
+                       self._key = self._generate_object_key(obj, root)
+
+               def __hash__(self):
+                       return hash(self._key)
+
+               def __eq__(self, other):
+                       return self._key == other._key
+
+               def _generate_object_key(self, obj, root):
+                       """
+                       Generate object key for a given object.
+
+                       @param object: path to a file
+                       @type object: string (example: '/usr/bin/bar')
+                       @rtype: 2-tuple of types (long, int) if object exists. string if
+                               object does not exist.
+                       @return:
+                               1. 2-tuple of object's inode and device from a stat call, if object
+                                       exists.
+                               2. realpath of object if object does not exist.
+
+                       """
+
+                       os = _os_merge
+
+                       try:
+                               _unicode_encode(obj,
+                                       encoding=_encodings['merge'], errors='strict')
+                       except UnicodeEncodeError:
+                               # The package appears to have been merged with a 
+                               # different value of sys.getfilesystemencoding(),
+                               # so fall back to utf_8 if appropriate.
+                               try:
+                                       _unicode_encode(obj,
+                                               encoding=_encodings['fs'], errors='strict')
+                               except UnicodeEncodeError:
+                                       pass
+                               else:
+                                       os = portage.os
+
+                       abs_path = os.path.join(root, obj.lstrip(os.sep))
+                       try:
+                               object_stat = os.stat(abs_path)
+                       except OSError:
+                               # Use the realpath as the key if the file does not exists on the
+                               # filesystem.
+                               return os.path.realpath(abs_path)
+                       # Return a tuple of the device and inode.
+                       return (object_stat.st_dev, object_stat.st_ino)
+
+               def file_exists(self):
+                       """
+                       Determine if the file for this key exists on the filesystem.
+
+                       @rtype: Boolean
+                       @return:
+                               1. True if the file exists.
+                               2. False if the file does not exist or is a broken symlink.
+
+                       """
+                       return isinstance(self._key, tuple)
+
+       class _LibGraphNode(_ObjectKey):
+               __slots__ = ("alt_paths",)
+
+               def __init__(self, obj, root):
+                       LinkageMapELF._ObjectKey.__init__(self, obj, root)
+                       self.alt_paths = set()
+
+               def __str__(self):
+                       return str(sorted(self.alt_paths))
+
+       def rebuild(self, exclude_pkgs=None, include_file=None):
+               """
+               Raises CommandNotFound if there are preserved libs
+               and the scanelf binary is not available.
+               """
+
+               os = _os_merge
+               root = self._eroot
+               root_len = len(root) - 1
+               self._clear_cache()
+               self._defpath.update(getlibpaths(self._eroot))
+               libs = self._libs
+               obj_properties = self._obj_properties
+
+               lines = []
+
+               # Data from include_file is processed first so that it
+               # overrides any data from previously installed files.
+               if include_file is not None:
+                       lines += grabfile(include_file)
+
+               aux_keys = [self._needed_aux_key]
+               for cpv in self._dbapi.cpv_all():
+                       if exclude_pkgs is not None and cpv in exclude_pkgs:
+                               continue
+                       lines += self._dbapi.aux_get(cpv, aux_keys)[0].split('\n')
+               # Cache NEEDED.* files avoid doing excessive IO for every rebuild.
+               self._dbapi.flush_cache()
+
+               # have to call scanelf for preserved libs here as they aren't 
+               # registered in NEEDED.ELF.2 files
+               plibs = set()
+               if self._dbapi.plib_registry and self._dbapi.plib_registry.getPreservedLibs():
+                       args = ["/usr/bin/scanelf", "-qF", "%a;%F;%S;%r;%n"]
+                       for items in self._dbapi.plib_registry.getPreservedLibs().values():
+                               plibs.update(items)
+                               args.extend(os.path.join(root, x.lstrip("." + os.sep)) \
+                                       for x in items)
+                       try:
+                               proc = subprocess.Popen(args, stdout=subprocess.PIPE)
+                       except EnvironmentError as e:
+                               if e.errno != errno.ENOENT:
+                                       raise
+                               raise CommandNotFound(args[0])
+                       else:
+                               for l in proc.stdout:
+                                       try:
+                                               l = _unicode_decode(l,
+                                                       encoding=_encodings['content'], errors='strict')
+                                       except UnicodeDecodeError:
+                                               l = _unicode_decode(l,
+                                                       encoding=_encodings['content'], errors='replace')
+                                               writemsg_level(_("\nError decoding characters " \
+                                                       "returned from scanelf: %s\n\n") % (l,),
+                                                       level=logging.ERROR, noiselevel=-1)
+                                       l = l[3:].rstrip("\n")
+                                       if not l:
+                                               continue
+                                       fields = l.split(";")
+                                       if len(fields) < 5:
+                                               writemsg_level(_("\nWrong number of fields " \
+                                                       "returned from scanelf: %s\n\n") % (l,),
+                                                       level=logging.ERROR, noiselevel=-1)
+                                               continue
+                                       fields[1] = fields[1][root_len:]
+                                       plibs.discard(fields[1])
+                                       lines.append(";".join(fields))
+                               proc.wait()
+
+               if plibs:
+                       # Preserved libraries that did not appear in the scanelf output.
+                       # This is known to happen with statically linked libraries.
+                       # Generate dummy lines for these, so we can assume that every
+                       # preserved library has an entry in self._obj_properties. This
+                       # is important in order to prevent findConsumers from raising
+                       # an unwanted KeyError.
+                       for x in plibs:
+                               lines.append(";".join(['', x, '', '', '']))
+
+               for l in lines:
+                       l = l.rstrip("\n")
+                       if not l:
+                               continue
+                       fields = l.split(";")
+                       if len(fields) < 5:
+                               writemsg_level(_("\nWrong number of fields " \
+                                       "in %s: %s\n\n") % (self._needed_aux_key, l),
+                                       level=logging.ERROR, noiselevel=-1)
+                               continue
+                       arch = fields[0]
+                       obj = fields[1]
+                       soname = fields[2]
+                       path = set([normalize_path(x) \
+                               for x in filter(None, fields[3].replace(
+                               "${ORIGIN}", os.path.dirname(obj)).replace(
+                               "$ORIGIN", os.path.dirname(obj)).split(":"))])
+                       needed = [x for x in fields[4].split(",") if x]
+
+                       obj_key = self._obj_key(obj)
+                       indexed = True
+                       myprops = obj_properties.get(obj_key)
+                       if myprops is None:
+                               indexed = False
+                               myprops = (arch, needed, path, soname, set())
+                               obj_properties[obj_key] = myprops
+                       # All object paths are added into the obj_properties tuple.
+                       myprops[4].add(obj)
+
+                       # Don't index the same file more that once since only one
+                       # set of data can be correct and therefore mixing data
+                       # may corrupt the index (include_file overrides previously
+                       # installed).
+                       if indexed:
+                               continue
+
+                       arch_map = libs.get(arch)
+                       if arch_map is None:
+                               arch_map = {}
+                               libs[arch] = arch_map
+                       if soname:
+                               soname_map = arch_map.get(soname)
+                               if soname_map is None:
+                                       soname_map = self._soname_map_class(
+                                               providers=set(), consumers=set())
+                                       arch_map[soname] = soname_map
+                               soname_map.providers.add(obj_key)
+                       for needed_soname in needed:
+                               soname_map = arch_map.get(needed_soname)
+                               if soname_map is None:
+                                       soname_map = self._soname_map_class(
+                                               providers=set(), consumers=set())
+                                       arch_map[needed_soname] = soname_map
+                               soname_map.consumers.add(obj_key)
+
+       def listBrokenBinaries(self, debug=False):
+               """
+               Find binaries and their needed sonames, which have no providers.
+
+               @param debug: Boolean to enable debug output
+               @type debug: Boolean
+               @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])})
+               @return: The return value is an object -> set-of-sonames mapping, where
+                       object is a broken binary and the set consists of sonames needed by
+                       object that have no corresponding libraries to fulfill the dependency.
+
+               """
+
+               os = _os_merge
+
+               class _LibraryCache(object):
+
+                       """
+                       Caches properties associated with paths.
+
+                       The purpose of this class is to prevent multiple instances of
+                       _ObjectKey for the same paths.
+
+                       """
+
+                       def __init__(cache_self):
+                               cache_self.cache = {}
+
+                       def get(cache_self, obj):
+                               """
+                               Caches and returns properties associated with an object.
+
+                               @param obj: absolute path (can be symlink)
+                               @type obj: string (example: '/usr/lib/libfoo.so')
+                               @rtype: 4-tuple with types
+                                       (string or None, string or None, 2-tuple, Boolean)
+                               @return: 4-tuple with the following components:
+                                       1. arch as a string or None if it does not exist,
+                                       2. soname as a string or None if it does not exist,
+                                       3. obj_key as 2-tuple,
+                                       4. Boolean representing whether the object exists.
+                                       (example: ('libfoo.so.1', (123L, 456L), True))
+
+                               """
+                               if obj in cache_self.cache:
+                                       return cache_self.cache[obj]
+                               else:
+                                       obj_key = self._obj_key(obj)
+                                       # Check that the library exists on the filesystem.
+                                       if obj_key.file_exists():
+                                               # Get the arch and soname from LinkageMap._obj_properties if
+                                               # it exists. Otherwise, None.
+                                               arch, _needed, _path, soname, _objs = \
+                                                               self._obj_properties.get(obj_key, (None,)*5)
+                                               return cache_self.cache.setdefault(obj, \
+                                                               (arch, soname, obj_key, True))
+                                       else:
+                                               return cache_self.cache.setdefault(obj, \
+                                                               (None, None, obj_key, False))
+
+               rValue = {}
+               cache = _LibraryCache()
+               providers = self.listProviders()
+
+               # Iterate over all obj_keys and their providers.
+               for obj_key, sonames in providers.items():
+                       arch, _needed, path, _soname, objs = self._obj_properties[obj_key]
+                       path = path.union(self._defpath)
+                       # Iterate over each needed soname and the set of library paths that
+                       # fulfill the soname to determine if the dependency is broken.
+                       for soname, libraries in sonames.items():
+                               # validLibraries is used to store libraries, which satisfy soname,
+                               # so if no valid libraries are found, the soname is not satisfied
+                               # for obj_key.  If unsatisfied, objects associated with obj_key
+                               # must be emerged.
+                               validLibraries = set()
+                               # It could be the case that the library to satisfy the soname is
+                               # not in the obj's runpath, but a symlink to the library is (eg
+                               # libnvidia-tls.so.1 in nvidia-drivers).  Also, since LinkageMap
+                               # does not catalog symlinks, broken or missing symlinks may go
+                               # unnoticed.  As a result of these cases, check that a file with
+                               # the same name as the soname exists in obj's runpath.
+                               # XXX If we catalog symlinks in LinkageMap, this could be improved.
+                               for directory in path:
+                                       cachedArch, cachedSoname, cachedKey, cachedExists = \
+                                                       cache.get(os.path.join(directory, soname))
+                                       # Check that this library provides the needed soname.  Doing
+                                       # this, however, will cause consumers of libraries missing
+                                       # sonames to be unnecessarily emerged. (eg libmix.so)
+                                       if cachedSoname == soname and cachedArch == arch:
+                                               validLibraries.add(cachedKey)
+                                               if debug and cachedKey not in \
+                                                               set(map(self._obj_key_cache.get, libraries)):
+                                                       # XXX This is most often due to soname symlinks not in
+                                                       # a library's directory.  We could catalog symlinks in
+                                                       # LinkageMap to avoid checking for this edge case here.
+                                                       writemsg_level(
+                                                               _("Found provider outside of findProviders:") + \
+                                                               (" %s -> %s %s\n" % (os.path.join(directory, soname),
+                                                               self._obj_properties[cachedKey][4], libraries)),
+                                                               level=logging.DEBUG,
+                                                               noiselevel=-1)
+                                               # A valid library has been found, so there is no need to
+                                               # continue.
+                                               break
+                                       if debug and cachedArch == arch and \
+                                                       cachedKey in self._obj_properties:
+                                               writemsg_level((_("Broken symlink or missing/bad soname: " + \
+                                                       "%(dir_soname)s -> %(cachedKey)s " + \
+                                                       "with soname %(cachedSoname)s but expecting %(soname)s") % \
+                                                       {"dir_soname":os.path.join(directory, soname),
+                                                       "cachedKey": self._obj_properties[cachedKey],
+                                                       "cachedSoname": cachedSoname, "soname":soname}) + "\n",
+                                                       level=logging.DEBUG,
+                                                       noiselevel=-1)
+                               # This conditional checks if there are no libraries to satisfy the
+                               # soname (empty set).
+                               if not validLibraries:
+                                       for obj in objs:
+                                               rValue.setdefault(obj, set()).add(soname)
+                                       # If no valid libraries have been found by this point, then
+                                       # there are no files named with the soname within obj's runpath,
+                                       # but if there are libraries (from the providers mapping), it is
+                                       # likely that soname symlinks or the actual libraries are
+                                       # missing or broken.  Thus those libraries are added to rValue
+                                       # in order to emerge corrupt library packages.
+                                       for lib in libraries:
+                                               rValue.setdefault(lib, set()).add(soname)
+                                               if debug:
+                                                       if not os.path.isfile(lib):
+                                                               writemsg_level(_("Missing library:") + " %s\n" % (lib,),
+                                                                       level=logging.DEBUG,
+                                                                       noiselevel=-1)
+                                                       else:
+                                                               writemsg_level(_("Possibly missing symlink:") + \
+                                                                       "%s\n" % (os.path.join(os.path.dirname(lib), soname)),
+                                                                       level=logging.DEBUG,
+                                                                       noiselevel=-1)
+               return rValue
+
+       def listProviders(self):
+               """
+               Find the providers for all object keys in LinkageMap.
+
+               @rtype: dict (example:
+                       {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}})
+               @return: The return value is an object key -> providers mapping, where
+                       providers is a mapping of soname -> set-of-library-paths returned
+                       from the findProviders method.
+
+               """
+               rValue = {}
+               if not self._libs:
+                       self.rebuild()
+               # Iterate over all object keys within LinkageMap.
+               for obj_key in self._obj_properties:
+                       rValue.setdefault(obj_key, self.findProviders(obj_key))
+               return rValue
+
+       def isMasterLink(self, obj):
+               """
+               Determine whether an object is a master link.
+
+               @param obj: absolute path to an object
+               @type obj: string (example: '/usr/bin/foo')
+               @rtype: Boolean
+               @return:
+                       1. True if obj is a master link
+                       2. False if obj is not a master link
+
+               """
+               os = _os_merge
+               basename = os.path.basename(obj)
+               obj_key = self._obj_key(obj)
+               if obj_key not in self._obj_properties:
+                       raise KeyError("%s (%s) not in object list" % (obj_key, obj))
+               soname = self._obj_properties[obj_key][3]
+               return (len(basename) < len(soname))
+
+       def listLibraryObjects(self):
+               """
+               Return a list of library objects.
+
+               Known limitation: library objects lacking an soname are not included.
+
+               @rtype: list of strings
+               @return: list of paths to all providers
+
+               """
+               rValue = []
+               if not self._libs:
+                       self.rebuild()
+               for arch_map in self._libs.values():
+                       for soname_map in arch_map.values():
+                               for obj_key in soname_map.providers:
+                                       rValue.extend(self._obj_properties[obj_key][4])
+               return rValue
+
+       def getSoname(self, obj):
+               """
+               Return the soname associated with an object.
+
+               @param obj: absolute path to an object
+               @type obj: string (example: '/usr/bin/bar')
+               @rtype: string
+               @return: soname as a string
+
+               """
+               if not self._libs:
+                       self.rebuild()
+               if isinstance(obj, self._ObjectKey):
+                       obj_key = obj
+                       if obj_key not in self._obj_properties:
+                               raise KeyError("%s not in object list" % obj_key)
+                       return self._obj_properties[obj_key][3]
+               if obj not in self._obj_key_cache:
+                       raise KeyError("%s not in object list" % obj)
+               return self._obj_properties[self._obj_key_cache[obj]][3]
+
+       def findProviders(self, obj):
+               """
+               Find providers for an object or object key.
+
+               This method may be called with a key from _obj_properties.
+
+               In some cases, not all valid libraries are returned.  This may occur when
+               an soname symlink referencing a library is in an object's runpath while
+               the actual library is not.  We should consider cataloging symlinks within
+               LinkageMap as this would avoid those cases and would be a better model of
+               library dependencies (since the dynamic linker actually searches for
+               files named with the soname in the runpaths).
+
+               @param obj: absolute path to an object or a key from _obj_properties
+               @type obj: string (example: '/usr/bin/bar') or _ObjectKey
+               @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])})
+               @return: The return value is a soname -> set-of-library-paths, where
+               set-of-library-paths satisfy soname.
+
+               """
+
+               os = _os_merge
+
+               rValue = {}
+
+               if not self._libs:
+                       self.rebuild()
+
+               # Determine the obj_key from the arguments.
+               if isinstance(obj, self._ObjectKey):
+                       obj_key = obj
+                       if obj_key not in self._obj_properties:
+                               raise KeyError("%s not in object list" % obj_key)
+               else:
+                       obj_key = self._obj_key(obj)
+                       if obj_key not in self._obj_properties:
+                               raise KeyError("%s (%s) not in object list" % (obj_key, obj))
+
+               arch, needed, path, _soname, _objs = self._obj_properties[obj_key]
+               path_keys = set(self._path_key(x) for x in path.union(self._defpath))
+               for soname in needed:
+                       rValue[soname] = set()
+                       if arch not in self._libs or soname not in self._libs[arch]:
+                               continue
+                       # For each potential provider of the soname, add it to rValue if it
+                       # resides in the obj's runpath.
+                       for provider_key in self._libs[arch][soname].providers:
+                               providers = self._obj_properties[provider_key][4]
+                               for provider in providers:
+                                       if self._path_key(os.path.dirname(provider)) in path_keys:
+                                               rValue[soname].add(provider)
+               return rValue
+
+       def findConsumers(self, obj):
+               """
+               Find consumers of an object or object key.
+
+               This method may be called with a key from _obj_properties.  If this
+               method is going to be called with an object key, to avoid not catching
+               shadowed libraries, do not pass new _ObjectKey instances to this method.
+               Instead pass the obj as a string.
+
+               In some cases, not all consumers are returned.  This may occur when
+               an soname symlink referencing a library is in an object's runpath while
+               the actual library is not. For example, this problem is noticeable for
+               binutils since it's libraries are added to the path via symlinks that
+               are gemerated in the /usr/$CHOST/lib/ directory by binutils-config.
+               Failure to recognize consumers of these symlinks makes preserve-libs
+               fail to preserve binutils libs that are needed by these unrecognized
+               consumers.
+
+               Note that library consumption via dlopen (common for kde plugins) is
+               currently undetected. However, it is possible to use the
+               corresponding libtool archive (*.la) files to detect such consumers
+               (revdep-rebuild is able to detect them).
+
+               @param obj: absolute path to an object or a key from _obj_properties
+               @type obj: string (example: '/usr/bin/bar') or _ObjectKey
+               @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar']))
+               @return: The return value is a soname -> set-of-library-paths, where
+               set-of-library-paths satisfy soname.
+
+               """
+
+               os = _os_merge
+
+               rValue = set()
+
+               if not self._libs:
+                       self.rebuild()
+
+               # Determine the obj_key and the set of objects matching the arguments.
+               if isinstance(obj, self._ObjectKey):
+                       obj_key = obj
+                       if obj_key not in self._obj_properties:
+                               raise KeyError("%s not in object list" % obj_key)
+                       objs = self._obj_properties[obj_key][4]
+               else:
+                       objs = set([obj])
+                       obj_key = self._obj_key(obj)
+                       if obj_key not in self._obj_properties:
+                               raise KeyError("%s (%s) not in object list" % (obj_key, obj))
+
+               # If there is another version of this lib with the
+               # same soname and the master link points to that
+               # other version, this lib will be shadowed and won't
+               # have any consumers.
+               if not isinstance(obj, self._ObjectKey):
+                       soname = self._obj_properties[obj_key][3]
+                       master_link = os.path.join(self._eroot,
+                               os.path.dirname(obj).lstrip(os.path.sep), soname)
+                       try:
+                               master_st = os.stat(master_link)
+                               obj_st = os.stat(obj)
+                       except OSError:
+                               pass
+                       else:
+                               if (obj_st.st_dev, obj_st.st_ino) != \
+                                       (master_st.st_dev, master_st.st_ino):
+                                       return set()
+
+               # Determine the directory(ies) from the set of objects.
+               objs_dir_keys = set(self._path_key(os.path.dirname(x)) for x in objs)
+               defpath_keys = set(self._path_key(x) for x in self._defpath)
+
+               arch, _needed, _path, soname, _objs = self._obj_properties[obj_key]
+               if arch in self._libs and soname in self._libs[arch]:
+                       # For each potential consumer, add it to rValue if an object from the
+                       # arguments resides in the consumer's runpath.
+                       for consumer_key in self._libs[arch][soname].consumers:
+                               _arch, _needed, path, _soname, consumer_objs = \
+                                               self._obj_properties[consumer_key]
+                               path_keys = defpath_keys.union(self._path_key(x) for x in path)
+                               if objs_dir_keys.intersection(path_keys):
+                                       rValue.update(consumer_objs)
+               return rValue