From 35c837dac4af646ca36c2f08a9586a07605a8e46 Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Fri, 22 Aug 2008 06:21:26 +0000 Subject: [PATCH] Optimize LinkageMap to use tuples of device and inode numbers from stat calls, instead of paths from realpath, as unique keys for identification of files. This is the same approach used by dblink.isowner() for cases in which path comparison fails due to symlinks. Thanks to Lucian Poston for submitting this patch (along with the missing-rebuild package set which I haven't merged yet). These patches are hosted in the following location: http://repo.or.cz/w/revdep-rebuild-reimplementation.git?a=tree;h=refs/heads/rc3;hb=refs/heads/rc3 svn path=/main/trunk/; revision=11447 --- pym/portage/dbapi/vartree.py | 430 +++++++++++++++++++++-------------- 1 file changed, 264 insertions(+), 166 deletions(-) diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py index 0abe96846..847349818 100644 --- a/pym/portage/dbapi/vartree.py +++ b/pym/portage/dbapi/vartree.py @@ -143,10 +143,12 @@ class LinkageMap(object): self._dbapi = vardbapi self._libs = {} self._obj_properties = {} - self._defpath = getlibpaths() - + self._defpath = set(getlibpaths()) + self._obj_key_cache = {} + def rebuild(self, include_file=None): libs = {} + obj_key_cache = {} obj_properties = {} lines = [] for cpv in self._dbapi.cpv_all(): @@ -176,29 +178,61 @@ class LinkageMap(object): # insufficient field length continue arch = fields[0] - obj = os.path.realpath(fields[1]) + obj = fields[1] + obj_key = self._generateObjKey(obj) soname = fields[2] - path = filter(None, fields[3].replace( + path = set([normalize_path(x) + for x in filter(None, fields[3].replace( "${ORIGIN}", os.path.dirname(obj)).replace( - "$ORIGIN", os.path.dirname(obj)).split(":")) + "$ORIGIN", os.path.dirname(obj)).split(":"))]) needed = filter(None, fields[4].split(",")) if soname: - libs.setdefault(soname, {arch: {"providers": [], "consumers": []}}) - libs[soname].setdefault(arch, {"providers": [], "consumers": []}) - libs[soname][arch]["providers"].append(obj) + libs.setdefault(soname, \ + {arch: {"providers": set(), "consumers": set()}}) + libs[soname].setdefault(arch, \ + {"providers": set(), "consumers": set()}) + libs[soname][arch]["providers"].add(obj_key) for x in needed: - libs.setdefault(x, {arch: {"providers": [], "consumers": []}}) - libs[x].setdefault(arch, {"providers": [], "consumers": []}) - libs[x][arch]["consumers"].append(obj) - obj_properties[obj] = (arch, needed, path, soname) - + libs.setdefault(x, \ + {arch: {"providers": set(), "consumers": set()}}) + libs[x].setdefault(arch, {"providers": set(), "consumers": set()}) + libs[x][arch]["consumers"].add(obj_key) + obj_key_cache.setdefault(obj, obj_key) + # All object paths are added into the obj_properties tuple + obj_properties.setdefault(obj_key, \ + (arch, needed, path, soname, set()))[4].add(obj) + self._libs = libs self._obj_properties = obj_properties + self._obj_key_cache = obj_key_cache - def listBrokenBinaries(self): + def _generateObjKey(self, obj): + """ + Generate obj key for a given object. + + @param obj: path to an existing file + @type obj: string (example: '/usr/bin/bar') + @rtype: 2-tuple of longs if obj exists. string if obj does not exist. + @return: + 1. 2-tuple of obj's inode and device from a stat call, if obj exists. + 2. realpath of object if obj does not exist. + + """ + try: + obj_st = os.stat(obj) + except OSError: + # Use the realpath as the key if the file does not exists on the + # filesystem. + return os.path.realpath(obj) + # Return a tuple of the device and inode. + return (obj_st.st_dev, obj_st.st_ino) + + def listBrokenBinaries(self, debug=False): """ Find binaries and their needed sonames, which have no providers. + @param debug: Boolean to enable debug output + @type debug: Boolean @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])}) @return: The return value is an object -> set-of-sonames mapping, where object is a broken binary and the set consists of sonames needed by @@ -208,65 +242,66 @@ class LinkageMap(object): class LibraryCache(object): """ - Caches sonames and realpaths associated with paths. + Caches properties associated with paths. The purpose of this class is to prevent multiple calls of - os.path.realpath and os.path.isfile on the same paths. + _generateObjKey on the same paths. """ def __init__(cache_self): cache_self.cache = {} - def get(cache_self, path): + def get(cache_self, obj): """ - Caches and returns the soname and realpath for a path. - - @param path: absolute path (can be symlink) - @type path: string (example: '/usr/lib/libfoo.so') - @rtype: 3-tuple with types (string or None, string, boolean) - @return: 3-tuple with the following components: - 1. soname as a string or None if it does not exist, - 2. realpath as a string, - 3. the result of os.path.isfile(realpath) - (example: ('libfoo.so.1', '/usr/lib/libfoo.so.1.5.1', True)) + Caches and returns properties associated with an object. + + @param obj: absolute path (can be symlink) + @type obj: string (example: '/usr/lib/libfoo.so') + @rtype: 4-tuple with types + (string or None, string or None, 2-tuple, Boolean) + @return: 4-tuple with the following components: + 1. arch as a string or None if it does not exist, + 2. soname as a string or None if it does not exist, + 3. obj_key as 2-tuple, + 4. Boolean representing whether the object exists. + (example: ('libfoo.so.1', (123L, 456L), True)) """ - if path in cache_self.cache: - return cache_self.cache[path] + if obj in cache_self.cache: + return cache_self.cache[obj] else: - realpath = os.path.realpath(path) + if obj in self._obj_key_cache: + obj_key = self._obj_key_cache.get(obj) + else: + obj_key = self._generateObjKey(obj) # Check that the library exists on the filesystem. - if os.path.isfile(realpath): - # Get the soname from LinkageMap._obj_properties if it - # exists. Otherwise, None. - soname = self._obj_properties.get(realpath, (None,)*3)[3] - # Both path and realpath are cached and the result is - # returned. - cache_self.cache.setdefault(realpath, \ - (soname, realpath, True)) - return cache_self.cache.setdefault(path, \ - (soname, realpath, True)) + if isinstance(obj_key, tuple): + # Get the arch and soname from LinkageMap._obj_properties if + # it exists. Otherwise, None. + arch, _, _, soname, _ = \ + self._obj_properties.get(obj_key, (None,)*5) + return cache_self.cache.setdefault(obj, \ + (arch, soname, obj_key, True)) else: - # realpath is not cached here, because the majority of cases - # where realpath is not a file, path is the same as realpath. - # Thus storing twice slows down the cache performance. - return cache_self.cache.setdefault(path, \ - (None, realpath, False)) + return cache_self.cache.setdefault(obj, \ + (None, None, obj_key, False)) - debug = False rValue = {} cache = LibraryCache() providers = self.listProviders() - # Iterate over all binaries and their providers. - for obj, sonames in providers.items(): + # Iterate over all obj_keys and their providers. + for obj_key, sonames in providers.items(): + arch, _, path, _, objs = self._obj_properties[obj_key] + path = path.union(self._defpath) # Iterate over each needed soname and the set of library paths that # fulfill the soname to determine if the dependency is broken. for soname, libraries in sonames.items(): # validLibraries is used to store libraries, which satisfy soname, # so if no valid libraries are found, the soname is not satisfied - # for obj. Thus obj must be emerged. + # for obj_key. If unsatisfied, objects associated with obj_key + # must be emerged. validLibraries = set() # It could be the case that the library to satisfy the soname is # not in the obj's runpath, but a symlink to the library is (eg @@ -274,67 +309,60 @@ class LinkageMap(object): # does not catalog symlinks, broken or missing symlinks may go # unnoticed. As a result of these cases, check that a file with # the same name as the soname exists in obj's runpath. - path = self._obj_properties[obj][2] + self._defpath - for d in path: - cachedSoname, cachedRealpath, cachedExists = \ - cache.get(os.path.join(d, soname)) - # Check that the this library provides the needed soname. Doing + # XXX If we catalog symlinks in LinkageMap, this could be improved. + for directory in path: + cachedArch, cachedSoname, cachedKey, cachedExists = \ + cache.get(os.path.join(directory, soname)) + # Check that this library provides the needed soname. Doing # this, however, will cause consumers of libraries missing # sonames to be unnecessarily emerged. (eg libmix.so) - if cachedSoname == soname: - validLibraries.add(cachedRealpath) - if debug and cachedRealpath not in libraries: + if cachedSoname == soname and cachedArch == arch: + validLibraries.add(cachedKey) + if debug and cachedKey not in \ + set(map(self._obj_key_cache.get, libraries)): + # XXX This is most often due to soname symlinks not in + # a library's directory. We could catalog symlinks in + # LinkageMap to avoid checking for this edge case here. print "Found provider outside of findProviders:", \ - os.path.join(d, soname), "->", cachedRealpath + os.path.join(directory, soname), "->", \ + self._obj_properties[cachedKey][4], libraries # A valid library has been found, so there is no need to # continue. break - if debug and cachedRealpath in self._obj_properties: + if debug and cachedArch == arch and \ + cachedKey in self._obj_properties: print "Broken symlink or missing/bad soname:", \ - os.path.join(d, soname), '->', cachedRealpath, \ - "with soname", cachedSoname, "but expecting", soname + os.path.join(directory, soname), '->', \ + self._obj_properties[cachedKey], "with soname", \ + cachedSoname, "but expecting", soname # This conditional checks if there are no libraries to satisfy the # soname (empty set). if not validLibraries: - rValue.setdefault(obj, set()).add(soname) + for obj in objs: + rValue.setdefault(obj, set()).add(soname) # If no valid libraries have been found by this point, then # there are no files named with the soname within obj's runpath, # but if there are libraries (from the providers mapping), it is - # likely that symlinks or the actual libraries are missing. - # Thus possible symlinks and missing libraries are added to - # rValue in order to emerge corrupt library packages. + # likely that soname symlinks or the actual libraries are + # missing or broken. Thus those libraries are added to rValue + # in order to emerge corrupt library packages. for lib in libraries: - cachedSoname, cachedRealpath, cachedExists = cache.get(lib) - if not cachedExists: - # The library's package needs to be emerged to repair the - # missing library. - rValue.setdefault(lib, set()).add(soname) - else: - # A library providing the soname exists in the obj's - # runpath, but no file named as the soname exists, so add - # the path constructed from the lib's directory and the - # soname to rValue to fix cases of vanishing (or modified) - # symlinks. This path is not guaranteed to exist, but it - # follows the symlink convention found in the majority of - # packages. - rValue.setdefault(os.path.join(os.path.dirname(lib), \ - soname), set()).add(soname) + rValue.setdefault(lib, set()).add(soname) if debug: - if not cachedExists: + if not os.path.isfile(lib): print "Missing library:", lib else: print "Possibly missing symlink:", \ os.path.join(os.path.dirname(lib), soname) - return rValue def listProviders(self): """ - Find the providers for all binaries. + Find the providers for all object keys in LinkageMap. @rtype: dict (example: - {'/usr/bin/foo': {'libbar.so': set(['/lib/libbar.so.1.5'])}}) - @return: The return value is an object -> providers mapping, where + {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}}) + @return: The return value is an object key -> providers mapping, where providers is a mapping of soname -> set-of-library-paths returned from the findProviders method. @@ -342,118 +370,188 @@ class LinkageMap(object): rValue = {} if not self._libs: self.rebuild() - # Iterate over all binaries within LinkageMap. - for obj in self._obj_properties: - rValue.setdefault(obj, self.findProviders(obj)) + # Iterate over all object keys within LinkageMap. + for obj_key in self._obj_properties: + rValue.setdefault(obj_key, self.findProviders(obj_key=obj_key)) return rValue def isMasterLink(self, obj): + """ + Determine whether an object is a master link. + + @param obj: absolute path to an object + @type obj: string (example: '/usr/bin/foo') + @rtype: Boolean + @return: + 1. True if obj is a master link + 2. False if obj is not a master link + + """ basename = os.path.basename(obj) - if obj not in self._obj_properties: - obj = os.path.realpath(obj) - if obj not in self._obj_properties: - raise KeyError("%s not in object list" % obj) - soname = self._obj_properties[obj][3] + obj_key = self._generateObjKey(obj) + if obj_key not in self._obj_properties: + raise KeyError("%s (%s) not in object list" % (obj_key, obj)) + soname = self._obj_properties[obj_key][3] return (len(basename) < len(soname)) - + def listLibraryObjects(self): + """ + Return a list of library objects. + + Known limitation: library objects lacking an soname are not included. + + @rtype: list of strings + @return: list of paths to all providers + + """ rValue = [] if not self._libs: self.rebuild() for soname in self._libs: for arch in self._libs[soname]: - rValue.extend(self._libs[soname][arch]["providers"]) + for obj_key in self._libs[soname][arch]["providers"]: + rValue.extend(self._obj_properties[obj_key][4]) return rValue def getSoname(self, obj): + """ + Return the soname associated with an object. + + @param obj: absolute path to an object + @type obj: string (example: '/usr/bin/bar') + @rtype: string + @return: soname as a string + + """ if not self._libs: self.rebuild() - if obj not in self._obj_properties: - obj = os.path.realpath(obj) - if obj not in self._obj_properties: - raise KeyError("%s not in object list" % obj) - arch, needed, path, soname = self._obj_properties[obj] - return soname - - def findProviders(self, obj): - if not self._libs: - self.rebuild() + if obj not in self._obj_key_cache: + raise KeyError("%s not in object list" % obj) + return self._obj_properties[self._obj_key_cache[obj]][3] - realpath_cache = {} - def realpath(p): - real_path = realpath_cache.get(p) - if real_path is None: - real_path = os.path.realpath(p) - realpath_cache[p] = real_path - return real_path + def findProviders(self, obj=None, obj_key=None): + """ + Find providers for an object or object key. + + This method should be called with either an obj or obj_key. If called + with both, the obj_key is ignored. If called with neither, KeyError is + raised as if an invalid obj was passed. + + In some cases, not all valid libraries are returned. This may occur when + an soname symlink referencing a library is in an object's runpath while + the actual library is not. + @param obj: absolute path to an object + @type obj: string (example: '/usr/bin/bar') + @param obj_key: key from LinkageMap._generateObjKey + @type obj_key: 2-tuple of longs or string + @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])}) + @return: The return value is a soname -> set-of-library-paths, where + set-of-library-paths satisfy soname. + + """ rValue = {} - if obj not in self._obj_properties: - obj = realpath(obj) - if obj not in self._obj_properties: - raise KeyError("%s not in object list" % obj) - arch, needed, path, soname = self._obj_properties[obj] - path = path[:] - path.extend(self._defpath) - path = set(realpath(x) for x in path) - for x in needed: - rValue[x] = set() - if x not in self._libs or arch not in self._libs[x]: + + if not self._libs: + self.rebuild() + + # Determine the obj_key from the arguments. + if obj is not None: + obj_key = self._obj_key_cache.get(obj) + if obj_key not in self._obj_properties: + obj_key = self._generateObjKey(obj) + if obj_key not in self._obj_properties: + raise KeyError("%s (%s) not in object list" % (obj_key, obj)) + elif obj_key not in self._obj_properties: + raise KeyError("%s not in object list" % obj_key) + + arch, needed, path, _, _ = self._obj_properties[obj_key] + path = path.union(self._defpath) + for soname in needed: + rValue[soname] = set() + if soname not in self._libs or arch not in self._libs[soname]: continue - for y in self._libs[x][arch]["providers"]: - if x[0] == os.sep and realpath(x) == realpath(y): - rValue[x].add(y) - elif realpath(os.path.dirname(y)) in path: - rValue[x].add(y) + # For each potential provider of the soname, add it to rValue if it + # resides in the obj's runpath. + for provider_key in self._libs[soname][arch]["providers"]: + providers = self._obj_properties[provider_key][4] + for provider in providers: + if os.path.dirname(provider) in path: + rValue[soname].add(provider) return rValue - - def findConsumers(self, obj): + + def findConsumers(self, obj=None, obj_key=None): + """ + Find consumers of an object or object key. + + This method should be called with either an obj or obj_key. If called + with both, the obj_key is ignored. If called with neither, KeyError is + raised as if an invalid obj was passed. + + In some cases, not all consumers are returned. This may occur when + an soname symlink referencing a library is in an object's runpath while + the actual library is not. + + @param obj: absolute path to an object + @type obj: string (example: '/usr/bin/bar') + @param obj_key: key from LinkageMap._generateObjKey + @type obj_key: 2-tuple of longs or string + @rtype: set of strings (example: ) + @return: The return value is a soname -> set-of-library-paths, where + set-of-library-paths satisfy soname. + + """ + rValue = set() + if not self._libs: self.rebuild() - realpath_cache = {} - def realpath(p): - real_path = realpath_cache.get(p) - if real_path is None: - real_path = os.path.realpath(p) - realpath_cache[p] = real_path - return real_path + # Determine the obj_key and the set of objects matching the arguments. + if obj is not None: + objs = set([obj]) + obj_key = self._obj_key_cache.get(obj) + if obj_key not in self._obj_properties: + obj_key = self._generateObjKey(obj) + if obj_key not in self._obj_properties: + raise KeyError("%s (%s) not in object list" % (obj_key, obj)) + else: + if obj_key not in self._obj_properties: + raise KeyError("%s not in object list" % obj_key) + objs = self._obj_properties[obj_key][4] - if obj not in self._obj_properties: - obj = realpath(obj) - if obj not in self._obj_properties: - raise KeyError("%s not in object list" % obj) + # Determine the directory(ies) from the set of objects. + objs_dirs = set([os.path.dirname(x) for x in objs]) # If there is another version of this lib with the # same soname and the master link points to that # other version, this lib will be shadowed and won't # have any consumers. - arch, needed, path, soname = self._obj_properties[obj] - obj_dir = os.path.dirname(obj) - master_link = os.path.join(obj_dir, soname) - try: - master_st = os.stat(master_link) - obj_st = os.stat(obj) - except OSError: - pass - else: - if (obj_st.st_dev, obj_st.st_ino) != \ - (master_st.st_dev, master_st.st_ino): - return set() - - rValue = set() - for soname in self._libs: - for arch in self._libs[soname]: - if obj in self._libs[soname][arch]["providers"]: - for x in self._libs[soname][arch]["consumers"]: - path = self._obj_properties[x][2] - path = [realpath(y) for y in path+self._defpath] - if soname[0] == os.sep and realpath(soname) == realpath(obj): - rValue.add(x) - elif realpath(obj_dir) in path: - rValue.add(x) + if obj is not None: + soname = self._obj_properties[obj_key][3] + obj_dir = os.path.dirname(obj) + master_link = os.path.join(obj_dir, soname) + try: + master_st = os.stat(master_link) + obj_st = os.stat(obj) + except OSError: + pass + else: + if (obj_st.st_dev, obj_st.st_ino) != \ + (master_st.st_dev, master_st.st_ino): + return set() + + arch, _, _, soname, _ = self._obj_properties[obj_key] + if soname in self._libs and arch in self._libs[soname]: + # For each potential consumer, add it to rValue if an object from the + # arguments resides in the consumer's runpath. + for consumer_key in self._libs[soname][arch]["consumers"]: + _, _, path, _, consumer_objs = \ + self._obj_properties[consumer_key] + path = path.union(self._defpath) + if objs_dirs.intersection(path): + rValue.update(consumer_objs) return rValue - + class vardbapi(dbapi): _excluded_dirs = ["CVS", "lost+found"] -- 2.26.2