From 0cf2ba868fbd6a1da4b6f5b3ae286845d538a066 Mon Sep 17 00:00:00 2001 From: Slawomir Lis Date: Mon, 17 Jan 2011 13:31:33 +0100 Subject: [PATCH] Split to several files. Introduced logging (instead of print_v) --- pym/gentoolkit/revdep_rebuild/analyse.py | 194 ++++++ pym/gentoolkit/revdep_rebuild/assign.py | 94 +++ pym/gentoolkit/revdep_rebuild/cache.py | 103 +++ pym/gentoolkit/revdep_rebuild/collect.py | 248 +++++++ .../revdep_rebuild/revdep-rebuild.py | 630 +----------------- .../revdep_rebuild/revdep_rebuild.kdev4 | 3 + pym/gentoolkit/revdep_rebuild/stuff.py | 39 ++ pym/gentoolkit/revdep_rebuild/test.py | 7 + 8 files changed, 717 insertions(+), 601 deletions(-) create mode 100644 pym/gentoolkit/revdep_rebuild/analyse.py create mode 100644 pym/gentoolkit/revdep_rebuild/assign.py create mode 100644 pym/gentoolkit/revdep_rebuild/cache.py create mode 100644 pym/gentoolkit/revdep_rebuild/collect.py create mode 100644 pym/gentoolkit/revdep_rebuild/revdep_rebuild.kdev4 create mode 100644 pym/gentoolkit/revdep_rebuild/stuff.py create mode 100644 pym/gentoolkit/revdep_rebuild/test.py diff --git a/pym/gentoolkit/revdep_rebuild/analyse.py b/pym/gentoolkit/revdep_rebuild/analyse.py new file mode 100644 index 0000000..5141f6f --- /dev/null +++ b/pym/gentoolkit/revdep_rebuild/analyse.py @@ -0,0 +1,194 @@ +#!/usr/bin/python + +import platform +import logging +import glob +from portage.output import bold, red, blue, yellow, green, nocolor + +from stuff import * +from collect import * +from assign import assign_packages + + +USE_TMP_FILES = True #if program should use temporary files from previous run +CMD_MAX_ARGS = 1000 # number of maximum allowed files to be parsed at once + + + +def prepare_checks(files_to_check, libraries, bits): + ''' Calls scanelf for all files_to_check, then returns found libraries and dependencies + ''' + + libs = [] # libs found by scanelf + dependencies = [] # list of lists of files (from file_to_check) that uses + # library (for dependencies[id] and libs[id] => id==id) + + for line in scan(['-M', str(bits), '-nBF', '%F %n'], files_to_check, CMD_MAX_ARGS): + #call_program(['scanelf', '-M', str(bits), '-nBF', '%F %n',]+files_to_check).strip().split('\n'): + r = line.strip().split(' ') + if len(r) < 2: # no dependencies? + continue + + deps = r[1].split(',') + for d in deps: + if d in libs: + i = libs.index(d) + dependencies[i].append(r[0]) + else: + libs.append(d) + dependencies.append([r[0],]) + return (libs, dependencies) + + +def extract_dependencies_from_la(la, libraries, to_check, logger): + broken = [] + for f in la: + if not os.path.exists(f): + continue + + for line in open(f, 'r').readlines(): + line = line.strip() + if line.startswith('dependency_libs='): + m = re.match("dependency_libs='([^']+)'", line) + if m is not None: + for el in m.group(1).split(' '): + el = el.strip() + if len(el) < 1 or el.startswith('-'): + continue + + if el in la or el in libraries: + pass + else: + if to_check: + _break = False + for tc in to_check: + if tc in el: + _break = True + break + if not _break: + continue + + logger.info(yellow(' * ') + f + ' is broken (requires: ' + bold(el)) + broken.append(f) + return broken + + +def find_broken(found_libs, system_libraries, to_check): + ''' Search for broken libraries. + Check if system_libraries contains found_libs, where + system_libraries is list of obsolute pathes and found_libs + is list of library names. + ''' + + # join libraries and looking at it as string is way too faster than for-jumping + + broken = [] + sl = '|'.join(system_libraries) + + if not to_check: + for f in found_libs: + if f+'|' not in sl: + broken.append(found_libs.index(f)) + else: + for tc in to_check: + for f in found_libs: + if tc in f:# and f+'|' not in sl: + broken.append(found_libs.index(f)) + + return broken + + +def main_checks(found_libs, broken, dependencies, logger): + ''' Checks for broken dependencies. + found_libs have to be the same as returned by prepare_checks + broken is list of libraries found by scanelf + dependencies is the value returned by prepare_checks + ''' + + broken_pathes = [] + + for b in broken: + f = found_libs[b] + logger.info('Broken files that requires: ' + bold(f)) + for d in dependencies[b]: + logger.info(yellow(' * ') + d) + broken_pathes.append(d) + return broken_pathes + + +def analyse(logger=logging, libraries=None, la_libraries=None, libraries_links=None, binaries=None, _libs_to_check=set()): + """Main program body. It will collect all info and determine the + pkgs needing rebuilding. + + @param logger: logger used for logging messages, instance of logging.Logger + class. Can be logging (RootLogger). + @param _libs_to_check Libraries that need to be checked only + @rtype list: list of pkgs that need rebuilding + """ + + if libraries and la_libraries and libraries_links and binaries: + logger.info(blue(' * ') + bold('Found a valid cache, skipping collecting phase')) + else: + #TODO: add partial cache (for ex. only libraries) when found for some reason + + logger.info(green(' * ') + bold('Collecting system binaries and libraries')) + bin_dirs, lib_dirs = prepare_search_dirs(logger) + + masked_dirs, masked_files, ld = parse_revdep_config() + lib_dirs = lib_dirs.union(ld) + bin_dirs = bin_dirs.union(ld) + masked_dirs = masked_dirs.union(set(['/lib/modules', '/lib32/modules', '/lib64/modules',])) + + logger.info(green(' * ') + bold('Collecting dynamic linking informations')) + libraries, la_libraries, libraries_links, symlink_pairs = collect_libraries_from_dir(lib_dirs, masked_dirs, logger) + binaries = collect_binaries_from_dir(bin_dirs, masked_dirs, logger) + + if USE_TMP_FILES: + save_cache(to_save={'libraries':libraries, 'la_libraries':la_libraries, 'libraries_links':libraries_links, 'binaries':binaries}) + + + logger.debug('Found '+ str(len(libraries)) + ' libraries (+' + str(len(libraries_links)) + ' symlinks) and ' + str(len(binaries)) + ' binaries') + + logger.warn(green(' * ') + bold('Checking dynamic linking consistency')) + logger.debug('Search for ' + str(len(binaries)+len(libraries)) + ' within ' + str(len(libraries)+len(libraries_links))) + libs_and_bins = libraries+binaries + + #l = [] + #for line in call_program(['scanelf', '-M', '64', '-BF', '%F',] + libraries).strip().split('\n'): + #l.append(line) + #libraries = l + + found_libs = [] + dependencies = [] + + + _bits, linkg = platform.architecture() + if _bits.startswith('32'): + bits = 32 + elif _bits.startswith('64'): + bits = 64 + + for av_bits in glob.glob('/lib[0-9]*') or ('/lib32',): + bits = int(av_bits[4:]) + _libraries = scan(['-M', str(bits), '-BF', '%F'], libraries+libraries_links, CMD_MAX_ARGS) + #call_program(['scanelf', '-M', str(bits), '-BF', '%F',] + libraries+libraries_links).strip().split('\n') + + found_libs, dependencies = prepare_checks(libs_and_bins, _libraries, bits) + + broken = find_broken(found_libs, _libraries, _libs_to_check) + broken_la = extract_dependencies_from_la(la_libraries, _libraries, _libs_to_check, logger) + + bits /= 2 + bits = int(bits) + + broken_pathes = main_checks(found_libs, broken, dependencies, logger) + broken_pathes += broken_la + + logger.warn(green(' * ') + bold('Assign files to packages')) + + return assign_packages(broken_pathes, logger) + + + +if __name__ == '__main__': + print "This script shouldn't be called directly" diff --git a/pym/gentoolkit/revdep_rebuild/assign.py b/pym/gentoolkit/revdep_rebuild/assign.py new file mode 100644 index 0000000..836e792 --- /dev/null +++ b/pym/gentoolkit/revdep_rebuild/assign.py @@ -0,0 +1,94 @@ +#!/usr/bin/python + +import os +import re +import logging +import portage +from portage import portdb +from portage.output import bold, red, blue, yellow, green, nocolor + + + +def assign_packages(broken, logger=logging): + ''' Finds and returns packages that owns files placed in broken. + Broken is list of files + ''' + assigned = set() + for group in os.listdir('/var/db/pkg'): + for pkg in os.listdir('/var/db/pkg/' + group): + f = '/var/db/pkg/' + group + '/' + pkg + '/CONTENTS' + if os.path.exists(f): + try: + with open(f, 'r') as cnt: + for line in cnt.readlines(): + m = re.match('^obj (/[^ ]+)', line) + if m is not None: + m = m.group(1) + if m in broken: + found = group+'/'+pkg + if found not in assigned: + assigned.add(found) + logger.info('\t' + m + ' -> ' + bold(found)) + except Exception as e: + logger.warn(red(' !! Failed to read ' + f)) + + return assigned + + +def get_best_match(cpv, cp, logger): + """Tries to find another version of the pkg with the same slot + as the deprecated installed version. Failing that attempt to get any version + of the same app + + @param cpv: string + @param cp: string + @rtype tuple: ([cpv,...], SLOT) + """ + + slot = portage.db[portage.root]["vartree"].dbapi.aux_get(cpv, ["SLOT"]) + logger.warn(yellow('Warning: ebuild "' + cpv + '" not found.')) + logger.info('Looking for %s:%s' %(cp, slot)) + try: + m = portdb.match('%s:%s' %(cp, slot)) + except portage.exception.InvalidAtom: + m = None + + if not m: + logger.warn(red('!!') + ' ' + yellow('Could not find ebuild for %s:%s' %(cp, slot))) + slot = [''] + m = portdb.match(cp) + if not m: + logger.warn(red('!!') + ' ' + yellow('Could not find ebuild for ' + cp)) + return m, slot + + +def get_slotted_cps(cpvs, logger): + """Uses portage to reduce the cpv list into a cp:slot list and returns it + """ + from portage.versions import catpkgsplit + from portage import portdb + + cps = [] + for cpv in cpvs: + parts = catpkgsplit(cpv) + print cpv + cp = parts[0] + '/' + parts[1] + try: + slot = portdb.aux_get(cpv, ["SLOT"]) + except KeyError: + m, slot = get_best_match(cpv, cp, logger) + if not m: + logger.warn(red("Installed package: %s is no longer available" %cp)) + continue + + if slot[0]: + cps.append(cp + ":" + slot[0]) + else: + cps.append(cp) + + return cps + + + +if __name__ == '__main__': + print 'Nothing to call here' diff --git a/pym/gentoolkit/revdep_rebuild/cache.py b/pym/gentoolkit/revdep_rebuild/cache.py new file mode 100644 index 0000000..5f7b932 --- /dev/null +++ b/pym/gentoolkit/revdep_rebuild/cache.py @@ -0,0 +1,103 @@ +#!/bin/bash + +import os +import time +import logging + + +DEFAULT_TMP_DIR = '/tmp/revdep-rebuild' #cache default location + +def read_cache(temp_path=DEFAULT_TMP_DIR): + ''' Reads cache information needed by analyse function. + This function does not checks if files exists nor timestamps, + check_temp_files should be called first + @param temp_path: directory where all temp files should reside + @return tuple with values of: libraries, la_libraries, libraries_links, symlink_pairs, binaries + ''' + + ret = {'libraries':[], 'la_libraries':[], 'libraries_links':[], 'binaries':[]} + try: + for key,val in ret.iteritems(): + f = open(os.path.join(temp_path, key)) + for line in f.readlines(): + val.append(line.strip()) + #libraries.remove('\n') + f.close() + except EnvironmentError: + pass + + return (ret['libraries'], ret['la_libraries'], ret['libraries_links'], ret['binaries']) + + +def save_cache(logger=logging, to_save={}, temp_path=DEFAULT_TMP_DIR): + ''' Tries to store caching information. + @param logger + @param to_save have to be dict with keys: libraries, la_libraries, libraries_links and binaries + ''' + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + try: + f = open(os.path.join(temp_path, 'timestamp'), 'w') + f.write(str(int(time.time()))) + f.close() + + for key,val in to_save.iteritems(): + f = open(os.path.join(temp_path, key), 'w') + for line in val: + f.write(line + '\n') + f.close() + except Exception as ex: + logger.warn(red('Could not save cache: %s' %str(ex))) + + + +def check_temp_files(temp_path=DEFAULT_TMP_DIR, max_delay=3600): + ''' Checks if temporary files from previous run are still available + and if they aren't too old + @param temp_path is directory, where temporary files should be found + @param max_delay is maximum time difference (in seconds) when those files + are still considered fresh and useful + returns True, when files can be used, or False, when they don't + exists or they are too old + ''' + + if not os.path.exists(temp_path) or not os.path.isdir(temp_path): + return False + + timestamp_path = os.path.join(temp_path, 'timestamp') + if not os.path.exists(timestamp_path) or not os.path.isfile(timestamp_path): + return False + + try: + f = open(timestamp_path) + timestamp = int(f.readline()) + f.close() + except: + timestamp = 0 + return False + + diff = int(time.time()) - timestamp + return max_delay > diff + + + +if __name__ == '__main__': + print 'Preparing cache ... ' + + from collect import * + + bin_dirs, lib_dirs = prepare_search_dirs() + + masked_dirs, masked_files, ld = parse_revdep_config() + lib_dirs = lib_dirs.union(ld) + bin_dirs = bin_dirs.union(ld) + masked_dirs = masked_dirs.union(set(['/lib/modules', '/lib32/modules', '/lib64/modules',])) + + libraries, la_libraries, libraries_links, symlink_pairs = collect_libraries_from_dir(lib_dirs, masked_dirs) + binaries = collect_binaries_from_dir(bin_dirs, masked_dirs) + + save_cache(to_save={'libraries':libraries, 'la_libraries':la_libraries, 'libraries_links':libraries_links, 'binaries':binaries}) + + print 'Done.' diff --git a/pym/gentoolkit/revdep_rebuild/collect.py b/pym/gentoolkit/revdep_rebuild/collect.py new file mode 100644 index 0000000..ca71d3d --- /dev/null +++ b/pym/gentoolkit/revdep_rebuild/collect.py @@ -0,0 +1,248 @@ +#!/usr/bin/python + +import re +import os +import glob +import stat +import logging +import portage +from portage.output import bold, red, blue, yellow, green, nocolor + +DEFAULT_LD_FILE = 'etc/ld.so.conf' +DEFAULT_ENV_FILE = 'etc/profile.env' + + +def parse_conf(conf_file=os.path.join(portage.root, DEFAULT_LD_FILE), visited=None, logger=logging): + ''' Parses supplied conf_file for libraries pathes. + conf_file is file or files to parse + visited is set of files already parsed + ''' + lib_dirs = set() + to_parse = set() + + if isinstance(conf_file, basestring): + conf_file = [conf_file] + + for conf in conf_file: + try: + with open(conf) as f: + for line in f.readlines(): + line = line.strip() + if line.startswith('#'): + continue + elif line.startswith('include'): + include_line = line.split()[1:] + for included in include_line: + if not included.startswith('/'): + path = os.path.join(os.path.dirname(conf), \ + included) + else: + path = included + + to_parse = to_parse.union(glob.glob(path)) + else: + lib_dirs.add(line) + except EnvironmentError: + logger.warn(yellow('Error when parsing file %s' %conf)) + + if visited is None: + visited = set() + + visited = visited.union(conf_file) + to_parse = to_parse.difference(visited) + if to_parse: + lib_dirs = lib_dirs.union(parse_conf(to_parse, visited, logger=logger)) + + return lib_dirs + + +def prepare_search_dirs(logger=logging): + ''' Lookup for search dirs. Returns tuple with two lists, + (list_of_bin_dirs, list_of_lib_dirs) + ''' + + bin_dirs = set(['/bin', '/usr/bin', ]) + lib_dirs = set(['/lib', '/usr/lib', ]) + + try: + with open(os.path.join(portage.root, DEFAULT_ENV_FILE), 'r') as f: + for line in f.readlines(): + line = line.strip() + m = re.match("^export (ROOT)?PATH='([^']+)'", line) + if m is not None: + bin_dirs = bin_dirs.union(set(m.group(2).split(':'))) + except EnvironmentError: + logger.debug(yellow('Could not open file %s' % f)) + + lib_dirs = parse_conf(logger=logger) + return (bin_dirs, lib_dirs) + + +def parse_revdep_config(): + ''' Parses all files under /etc/revdep-rebuild/ and returns + tuple of: (masked_dirs, masked_files, search_dirs)''' + + search_dirs = set() + masked_dirs = set() + masked_files = set() + + #TODO: remove hard-coded path + for f in os.listdir('/etc/revdep-rebuild/'): + for line in open(os.path.join('/etc/revdep-rebuild', f)): + line = line.strip() + if not line.startswith('#'): #first check for comment, we do not want to regex all lines + m = re.match('LD_LIBRARY_MASK=\\"([^"]+)\\"', line) + if m is not None: + s = m.group(1).split(' ') + masked_files = masked_files.union(s) + continue + m = re.match('SEARCH_DIRS_MASK=\\"([^"]+)\\"', line) + if m is not None: + s = m.group(1).split(' ') + for ss in s: + masked_dirs = masked_dirs.union(glob.glob(ss)) + continue + m = re.match('SEARCH_DIRS=\\"([^"]+)\\"', line) + if m is not None: + s = m.group(1).split() + for ss in s: + search_dirs = search_dirs.union(glob.glob(ss)) + continue + + return (masked_dirs, masked_files, search_dirs) + + +def collect_libraries_from_dir(dirs, mask, logger=logging): + ''' Collects all libraries from specified list of directories. + mask is list of pathes, that are ommited in scanning, can be eighter single file or entire directory + Returns tuple composed of: list of libraries, list of symlinks, and toupe with pair + (symlink_id, library_id) for resolving dependencies + ''' + + + found_directories = [] # contains list of directories found; allow us to reduce number of fnc calls + found_files = [] + found_symlinks = [] + found_la_files = [] # la libraries + symlink_pairs = [] # list of pairs symlink_id->library_id + + for d in dirs: + if d in mask: + continue + + try: + for l in os.listdir(d): + l = os.path.join(d, l) + if l in mask: + continue + + if os.path.isdir(l): + if os.path.islink(l): + #we do not want scan symlink-directories + pass + else: + found_directories.append(l) + elif os.path.isfile(l): + if l.endswith('.so') or '.so.' in l: + if l in found_files or l in found_symlinks: + continue + + if os.path.islink(l): + found_symlinks.append(l) + abs_path = os.path.realpath(l) + if abs_path in found_files: + i = found_files.index(abs_path) + else: + found_files.append(abs_path) + i = len(found_files)-1 + symlink_pairs.append((len(found_symlinks)-1, i,)) + else: + found_files.append(l) + continue + elif l.endswith('.la'): + if l in found_la_files: + continue + + found_la_files.append(l) + else: + # sometimes there are binaries in libs' subdir, for example in nagios + if not os.path.islink(l): + if l in found_files or l in found_symlinks: + continue + prv = os.stat(l)[stat.ST_MODE] + if prv & stat.S_IXUSR == stat.S_IXUSR or \ + prv & stat.S_IXGRP == stat.S_IXGRP or \ + prv & stat.S_IXOTH == stat.S_IXOTH: + found_files.append(l) + except Exception as ex: + logger.debug(yellow('Exception during collecting libraries: %s' %str(ex))) + + + if found_directories: + f,a,l,p = collect_libraries_from_dir(found_directories, mask, logger) + found_files+=f + found_la_files+=a + found_symlinks+=l + symlink_pairs+=p + + return (found_files, found_la_files, found_symlinks, symlink_pairs) + + +def collect_binaries_from_dir(dirs, mask, logger=logging): + ''' Collects all binaries from specified list of directories. + mask is list of pathes, that are ommited in scanning, can be eighter single file or entire directory + Returns list of binaries + ''' + + found_directories = [] # contains list of directories found; allow us to reduce number of fnc calls + found_files = [] + + for d in dirs: + if d in mask: + continue + + try: + for l in os.listdir(d): + l = os.path.join(d, l) + if d in mask: + continue + + if os.path.isdir(l): + if os.path.islink(l): + #we do not want scan symlink-directories + pass + else: + found_directories.append(l) + elif os.path.isfile(l): + #we're looking for binaries, and with binaries we do not need links, thus we can optimize a bit + if not os.path.islink(l): + prv = os.stat(l)[stat.ST_MODE] + if prv & stat.S_IXUSR == stat.S_IXUSR or \ + prv & stat.S_IXGRP == stat.S_IXGRP or \ + prv & stat.S_IXOTH == stat.S_IXOTH: + found_files.append(l) + except Exception as e: + logger.debug(yellow('Exception during binaries collecting: %s' %str(e))) + + if found_directories: + found_files += collect_binaries_from_dir(found_directories, mask, logger) + + return found_files + + + +if __name__ == '__main__': + bin_dirs, lib_dirs = prepare_search_dirs(logging) + + masked_dirs, masked_files, ld = parse_revdep_config() + lib_dirs = lib_dirs.union(ld) + bin_dirs = bin_dirs.union(ld) + masked_dirs = masked_dirs.union(set(['/lib/modules', '/lib32/modules', '/lib64/modules',])) + + libraries, la_libraries, libraries_links, symlink_pairs = collect_libraries_from_dir(lib_dirs, masked_dirs, logging) + binaries = collect_binaries_from_dir(bin_dirs, masked_dirs, logging) + + print 'Found: %i binaries and %i libraries.' %(len(binaries), len(libraries)) + + + diff --git a/pym/gentoolkit/revdep_rebuild/revdep-rebuild.py b/pym/gentoolkit/revdep_rebuild/revdep-rebuild.py index ac2a6c8..c28b1b0 100755 --- a/pym/gentoolkit/revdep_rebuild/revdep-rebuild.py +++ b/pym/gentoolkit/revdep_rebuild/revdep-rebuild.py @@ -20,12 +20,18 @@ import stat import time import glob import portage -import platform +import logging from portage import portdb from portage.output import bold, red, blue, yellow, green, nocolor +from analyse import analyse +from stuff import * +from cache import * +from assign import get_slotted_cps + + APP_NAME = sys.argv[0] -VERSION = '0.1-r4' +VERSION = '0.1-r5' __productname__ = "revdep-ng" @@ -49,51 +55,11 @@ VERBOSITY = 1 #verbosity level; 0-quiet, 1-norm., 2-verbose IS_DEV = True #True for dev. version, False for stable #used when IS_DEV is True, False forces to call emerge with --pretend # can be set True from the cli with the --no-pretend option -NO_PRETEND = False +NO_PRETEND = False CMD_MAX_ARGS = 1000 # number of maximum allowed files to be parsed at once -# util. functions -def call_program(args): - ''' Calls program with specified parameters and returns stdout ''' - subp = subprocess.Popen(args, stdout=subprocess.PIPE, \ - stderr=subprocess.PIPE) - stdout, stderr = subp.communicate() - return stdout - - -def scan(params, files): - ''' Calls scanelf with given params and files to scan. - @param params is list of parameters that should be passed into scanelf app. - @param files list of files to scan. - - When files count is greater CMD_MAX_ARGS, it'll be divided - into several parts - - @return scanelf output (joined if was called several times) - ''' - out = [] - for i in range(0, len(files), CMD_MAX_ARGS): - out += call_program(['scanelf'] + params + files[i:i+CMD_MAX_ARGS]).strip().split('\n') - return out - - -def print_v(verbosity, args): - """Prints to stdout according to the verbosity level - and the global VERBOSITY level - - @param verbosity: integer - @param args: string to print - """ - if verbosity > VERBOSITY: - return - print args - - -def exithandler(signum, frame): - sys.exit(1) - def print_usage(): print APP_NAME + ': (' + VERSION +')' @@ -127,225 +93,6 @@ def print_usage(): # functions -def parse_conf(conf_file=None, visited=None): - ''' Parses supplied conf_file for libraries pathes. - conf_file is file or files to parse - visited is set of files already parsed - ''' - - if conf_file is None: - conf_file = os.path.join(portage.root, DEFAULT_LD_FILE) - - lib_dirs = set() - to_parse = set() - - if isinstance(conf_file, basestring): - conf_file = [conf_file] - - for conf in conf_file: - try: - with open(conf) as f: - for line in f.readlines(): - line = line.strip() - if line.startswith('#'): - continue - elif line.startswith('include'): - include_line = line.split()[1:] - for included in include_line: - if not included.startswith('/'): - path = os.path.join(os.path.dirname(conf), \ - included) - else: - path = included - - to_parse = to_parse.union(glob.glob(path)) - else: - lib_dirs.add(line) - except EnvironmentError: - print_v(2, 'Error when parsing file %s' %conf) - - if visited is None: - visited = set() - - visited = visited.union(conf_file) - to_parse = to_parse.difference(visited) - if to_parse: - lib_dirs = lib_dirs.union(parse_conf(to_parse, visited)) - - return lib_dirs - - -def prepare_search_dirs(): - ''' Lookup for search dirs. Returns tuple with two lists, - (list_of_bin_dirs, list_of_lib_dirs) - ''' - - bin_dirs = set(['/bin', '/usr/bin', ]) - lib_dirs = set(['/lib', '/usr/lib', ]) - - try: - with open(os.path.join(portage.root, DEFAULT_ENV_FILE), 'r') as f: - for line in f.readlines(): - line = line.strip() - m = re.match("^export (ROOT)?PATH='([^']+)'", line) - if m is not None: - bin_dirs = bin_dirs.union(set(m.group(2).split(':'))) - except EnvironmentError: - print_v(2, 'Could not open file %s' % f) - - lib_dirs = parse_conf() - return (bin_dirs, lib_dirs) - - -def parse_revdep_config(): - ''' Parses all files under /etc/revdep-rebuild/ and returns - tuple of: (masked_dirs, masked_files, search_dirs)''' - - search_dirs = set() - masked_dirs = set() - masked_files = set() - - for f in os.listdir('/etc/revdep-rebuild/'): - for line in open(os.path.join('/etc/revdep-rebuild', f)): - line = line.strip() - if not line.startswith('#'): #first check for comment, we do not want to regex all lines - m = re.match('LD_LIBRARY_MASK=\\"([^"]+)\\"', line) - if m is not None: - s = m.group(1).split(' ') - masked_files = masked_files.union(s) - continue - m = re.match('SEARCH_DIRS_MASK=\\"([^"]+)\\"', line) - if m is not None: - s = m.group(1).split(' ') - for ss in s: - masked_dirs = masked_dirs.union(glob.glob(ss)) - continue - m = re.match('SEARCH_DIRS=\\"([^"]+)\\"', line) - if m is not None: - s = m.group(1).split() - for ss in s: - search_dirs = search_dirs.union(glob.glob(ss)) - continue - - return (masked_dirs, masked_files, search_dirs) - - -def collect_libraries_from_dir(dirs, mask): - ''' Collects all libraries from specified list of directories. - mask is list of pathes, that are ommited in scanning, can be eighter single file or entire directory - Returns tuple composed of: list of libraries, list of symlinks, and toupe with pair - (symlink_id, library_id) for resolving dependencies - ''' - - - found_directories = [] # contains list of directories found; allow us to reduce number of fnc calls - found_files = [] - found_symlinks = [] - found_la_files = [] # la libraries - symlink_pairs = [] # list of pairs symlink_id->library_id - - for d in dirs: - if d in mask: - continue - - try: - for l in os.listdir(d): - l = os.path.join(d, l) - if l in mask: - continue - - if os.path.isdir(l): - if os.path.islink(l): - #we do not want scan symlink-directories - pass - else: - found_directories.append(l) - elif os.path.isfile(l): - if l.endswith('.so') or '.so.' in l: - if l in found_files or l in found_symlinks: - continue - - if os.path.islink(l): - found_symlinks.append(l) - abs_path = os.path.realpath(l) - if abs_path in found_files: - i = found_files.index(abs_path) - else: - found_files.append(abs_path) - i = len(found_files)-1 - symlink_pairs.append((len(found_symlinks)-1, i,)) - else: - found_files.append(l) - continue - elif l.endswith('.la'): - if l in found_la_files: - continue - - found_la_files.append(l) - else: - # sometimes there are binaries in libs' subdir, for example in nagios - if not os.path.islink(l): - if l in found_files or l in found_symlinks: - continue - prv = os.stat(l)[stat.ST_MODE] - if prv & stat.S_IXUSR == stat.S_IXUSR or \ - prv & stat.S_IXGRP == stat.S_IXGRP or \ - prv & stat.S_IXOTH == stat.S_IXOTH: - found_files.append(l) - except: - pass - - - if found_directories: - f,a,l,p = collect_libraries_from_dir(found_directories, mask) - found_files+=f - found_la_files+=a - found_symlinks+=l - symlink_pairs+=p - - return (found_files, found_la_files, found_symlinks, symlink_pairs) - - -def collect_binaries_from_dir(dirs, mask): - ''' Collects all binaries from specified list of directories. - mask is list of pathes, that are ommited in scanning, can be eighter single file or entire directory - Returns list of binaries - ''' - - found_directories = [] # contains list of directories found; allow us to reduce number of fnc calls - found_files = [] - - for d in dirs: - if d in mask: - continue - - try: - for l in os.listdir(d): - l = os.path.join(d, l) - if d in mask: - continue - - if os.path.isdir(l): - if os.path.islink(l): - #we do not want scan symlink-directories - pass - else: - found_directories.append(l) - elif os.path.isfile(l): - #we're looking for binaries, and with binaries we do not need links, thus we can optimize a bit - if not os.path.islink(l): - prv = os.stat(l)[stat.ST_MODE] - if prv & stat.S_IXUSR == stat.S_IXUSR or \ - prv & stat.S_IXGRP == stat.S_IXGRP or \ - prv & stat.S_IXOTH == stat.S_IXOTH: - found_files.append(l) - except: - pass - - if found_directories: - found_files += collect_binaries_from_dir(found_directories, mask) - - return found_files def _match_str_in_list(lst, stri): @@ -355,333 +102,15 @@ def _match_str_in_list(lst, stri): return False -def prepare_checks(files_to_check, libraries, bits): - ''' Calls scanelf for all files_to_check, then returns found libraries and dependencies - ''' - - libs = [] # libs found by scanelf - dependencies = [] # list of lists of files (from file_to_check) that uses - # library (for dependencies[id] and libs[id] => id==id) - - for line in scan(['-M', str(bits), '-nBF', '%F %n'], files_to_check): - #call_program(['scanelf', '-M', str(bits), '-nBF', '%F %n',]+files_to_check).strip().split('\n'): - r = line.strip().split(' ') - if len(r) < 2: # no dependencies? - continue - - deps = r[1].split(',') - for d in deps: - if d in libs: - i = libs.index(d) - dependencies[i].append(r[0]) - else: - libs.append(d) - dependencies.append([r[0],]) - return (libs, dependencies) - - -def extract_dependencies_from_la(la, libraries, to_check): - broken = [] - for f in la: - if not os.path.exists(f): - continue - - for line in open(f, 'r').readlines(): - line = line.strip() - if line.startswith('dependency_libs='): - m = re.match("dependency_libs='([^']+)'", line) - if m is not None: - for el in m.group(1).split(' '): - el = el.strip() - if len(el) < 1 or el.startswith('-'): - continue - - if el in la or el in libraries: - pass - else: - if to_check: - _break = False - for tc in to_check: - if tc in el: - _break = True - break - if not _break: - continue - - print_v(1, yellow(' * ') + f + ' is broken (requires: ' + bold(el)) - broken.append(f) - return broken - - - -def find_broken(found_libs, system_libraries, to_check): - ''' Search for broken libraries. - Check if system_libraries contains found_libs, where - system_libraries is list of obsolute pathes and found_libs - is list of library names. - ''' - - # join libraries and looking at it as string is way too faster than for-jumping - - broken = [] - sl = '|'.join(system_libraries) - - if not to_check: - for f in found_libs: - if f+'|' not in sl: - broken.append(found_libs.index(f)) - else: - for tc in to_check: - for f in found_libs: - if tc in f:# and f+'|' not in sl: - broken.append(found_libs.index(f)) - - return broken - - -def main_checks(found_libs, broken, dependencies): - ''' Checks for broken dependencies. - found_libs have to be the same as returned by prepare_checks - broken is list of libraries found by scanelf - dependencies is the value returned by prepare_checks - ''' - - broken_pathes = [] - - for b in broken: - f = found_libs[b] - print_v(1, 'Broken files that requires: ' + bold(f)) - for d in dependencies[b]: - print_v(1, yellow(' * ') + d) - broken_pathes.append(d) - return broken_pathes - - -def assign_packages(broken, output): - ''' Finds and returns packages that owns files placed in broken. - Broken is list of files - ''' - assigned = set() - for group in os.listdir('/var/db/pkg'): - for pkg in os.listdir('/var/db/pkg/' + group): - f = '/var/db/pkg/' + group + '/' + pkg + '/CONTENTS' - if os.path.exists(f): - try: - with open(f, 'r') as cnt: - for line in cnt.readlines(): - m = re.match('^obj (/[^ ]+)', line) - if m is not None: - m = m.group(1) - if m in broken: - found = group+'/'+pkg - if found not in assigned: - assigned.add(found) - print_v(1, '\t' + m + ' -> ' + bold(found)) - except Exception as e: - output(1, red(' !! Failed to read ' + f)) - - return assigned - - -def get_best_match(cpv, cp): - """Tries to find another version of the pkg with the same slot - as the deprecated installed version. Failing that attempt to get any version - of the same app - - @param cpv: string - @param cp: string - @rtype tuple: ([cpv,...], SLOT) - """ - - slot = portage.db[portage.root]["vartree"].dbapi.aux_get(cpv, ["SLOT"]) - print_v(1, yellow('Warning: ebuild "' + cpv + '" not found.')) - print_v(1, 'Looking for %s:%s' %(cp, slot)) - try: - m = portdb.match('%s:%s' %(cp, slot)) - except portage.exception.InvalidAtom: - m = None - - if not m: - print_v(1, red('Could not find ebuild for %s:%s' %(cp, slot))) - slot = [''] - m = portdb.match(cp) - if not m: - print_v(1, red('Could not find ebuild for ' + cp)) - return m, slot - - -def get_slotted_cps(cpvs): - """Uses portage to reduce the cpv list into a cp:slot list and returns it - """ - from portage.versions import catpkgsplit - from portage import portdb - - cps = [] - for cpv in cpvs: - parts = catpkgsplit(cpv) - cp = parts[0] + '/' + parts[1] - try: - slot = portdb.aux_get(cpv, ["SLOT"]) - except KeyError: - m, slot = get_best_match(cpv, cp) - if not m: - print_v(1, red("Installed package: %s is no longer available" %cp)) - continue - - if slot[0]: - cps.append(cp + ":" + slot[0]) - else: - cps.append(cp) - - return cps - - -def read_cache(temp_path=DEFAULT_TMP_DIR): - ''' Reads cache information needed by analyse function. - This function does not checks if files exists nor timestamps, - check_temp_files should be called first - @param temp_path: directory where all temp files should reside - @return tuple with values of: libraries, la_libraries, libraries_links, symlink_pairs, binaries - ''' - - ret = {'libraries':[], 'la_libraries':[], 'libraries_links':[], 'binaries':[]} - try: - for key,val in ret.iteritems(): - f = open(os.path.join(temp_path, key)) - for line in f.readlines(): - val.append(line.strip()) - #libraries.remove('\n') - f.close() - except EnvironmentError: - pass - - return (ret['libraries'], ret['la_libraries'], ret['libraries_links'], ret['binaries']) - - -def save_cache(output=print_v, to_save={}, temp_path=DEFAULT_TMP_DIR): - ''' Tries to store caching information. - @param to_save have to be dict with keys: libraries, la_libraries, libraries_links and binaries - ''' - - if not os.path.exists(temp_path): - os.makedirs(temp_path) - - try: - f = open(os.path.join(temp_path, 'timestamp'), 'w') - f.write(str(int(time.time()))) - f.close() - - for key,val in to_save.iteritems(): - f = open(os.path.join(temp_path, key), 'w') - for line in val: - f.write(line + '\n') - f.close() - except Exception as ex: - output(1, red('Could not save cache %s' %str(ex))) - -def analyse(output=print_v, libraries=None, la_libraries=None, libraries_links=None, binaries=None): - """Main program body. It will collect all info and determine the - pkgs needing rebuilding. - - @param output: optional print/data gathering routine. Defaults to print_v - which prints to sys.stdout. Refer to print_v parameters for more detail. - @rtype list: list of pkgs that need rebuilding - """ - - if libraries and la_libraries and libraries_links and binaries: - output(1, blue(' * ') + bold('Found a valid cache, skipping collecting phase')) - else: - #TODO: add partial cache (for ex. only libraries) when found for some reason - - output(1, green(' * ') + bold('Collecting system binaries and libraries')) - bin_dirs, lib_dirs = prepare_search_dirs() - - masked_dirs, masked_files, ld = parse_revdep_config() - lib_dirs = lib_dirs.union(ld) - bin_dirs = bin_dirs.union(ld) - masked_dirs = masked_dirs.union(set(['/lib/modules', '/lib32/modules', '/lib64/modules',])) - - output(1, green(' * ') + bold('Collecting dynamic linking informations')) - libraries, la_libraries, libraries_links, symlink_pairs = collect_libraries_from_dir(lib_dirs, masked_dirs) - binaries = collect_binaries_from_dir(bin_dirs, masked_dirs) - - if USE_TMP_FILES: - save_cache(to_save={'libraries':libraries, 'la_libraries':la_libraries, 'libraries_links':libraries_links, 'binaries':binaries}) - - - output(2, 'Found '+ str(len(libraries)) + ' libraries (+' + str(len(libraries_links)) + ' symlinks) and ' + str(len(binaries)) + ' binaries') - - output(1, green(' * ') + bold('Checking dynamic linking consistency')) - output(2,'Search for ' + str(len(binaries)+len(libraries)) + ' within ' + str(len(libraries)+len(libraries_links))) - libs_and_bins = libraries+binaries - - #l = [] - #for line in call_program(['scanelf', '-M', '64', '-BF', '%F',] + libraries).strip().split('\n'): - #l.append(line) - #libraries = l - - found_libs = [] - dependencies = [] - - - _bits, linkg = platform.architecture() - if _bits.startswith('32'): - bits = 32 - elif _bits.startswith('64'): - bits = 64 - - for av_bits in glob.glob('/lib[0-9]*') or ('/lib32',): - bits = int(av_bits[4:]) - _libraries = scan(['-M', str(bits), '-BF', '%F'], libraries+libraries_links) - #call_program(['scanelf', '-M', str(bits), '-BF', '%F',] + libraries+libraries_links).strip().split('\n') - - found_libs, dependencies = prepare_checks(libs_and_bins, _libraries, bits) - - broken = find_broken(found_libs, _libraries, _libs_to_check) - broken_la = extract_dependencies_from_la(la_libraries, _libraries, _libs_to_check) - - bits /= 2 - bits = int(bits) - - broken_pathes = main_checks(found_libs, broken, dependencies) - broken_pathes += broken_la - - output(1, green(' * ') + bold('Assign files to packages')) - - return assign_packages(broken_pathes, output) - - -def check_temp_files(temp_path=DEFAULT_TMP_DIR, max_delay=3600): - ''' Checks if temporary files from previous run are still available - and if they aren't too old - @param temp_path is directory, where temporary files should be found - @param max_delay is maximum time difference (in seconds) when those files - are still considered fresh and useful - returns True, when files can be used, or False, when they don't - exists or they are too old - ''' - - if not os.path.exists(temp_path) or not os.path.isdir(temp_path): - return False - - timestamp_path = os.path.join(temp_path, 'timestamp') - if not os.path.exists(timestamp_path) or not os.path.isfile(timestamp_path): - return False - - try: - f = open(timestamp_path) - timestamp = int(f.readline()) - f.close() - except: - timestamp = 0 - return False - - diff = int(time.time()) - timestamp - return max_delay > diff - - # Runs from here if __name__ == "__main__": + logger = logging.getLogger() + log_handler = logging.StreamHandler() + log_fmt = logging.Formatter('%(msg)s') + log_handler.setFormatter(log_fmt) + logger.addHandler(log_handler) + logger.setLevel(logging.WARNING) + _libs_to_check = set() try: @@ -694,10 +123,13 @@ if __name__ == "__main__": sys.exit(0) elif key in ('-q', '--quiet'): VERBOSITY = 0 + logger.setLevel(logging.ERROR) elif key in ('-v', '--verbose'): VERBOSITY = 2 + logger.setLevel(logging.INFO) elif key in ('-d', '--debug'): PRINT_DEBUG = True + logger.setLevel(logging.DEBUG) elif key in ('-p', '--pretend'): PRETEND = True elif key == '--no-pretend': @@ -713,7 +145,7 @@ if __name__ == "__main__": args = " " + " ".join(args) except getopt.GetoptError: - print_v(1, red('Unrecognized option\n')) + logging.info(red('Unrecognized option\n')) print_usage() sys.exit(2) @@ -721,12 +153,12 @@ if __name__ == "__main__": nocolor() if os.getuid() != 0 and not PRETEND: - print_v(1, blue(' * ') + yellow('You are not root, adding --pretend to portage options')) + logger.warn(blue(' * ') + yellow('You are not root, adding --pretend to portage options')) PRETEND = True elif not PRETEND and IS_DEV and not NO_PRETEND: - print_v(1, blue(' * ') + yellow('This is a development version, so it may not work correctly')) - print_v(1, blue(' * ') + yellow('Adding --pretend to portage options anyway')) - print_v(1, blue(' * ') + 'If you\'re sure, you can add --no-pretend to revdep options') + logger.warn(blue(' * ') + yellow('This is a development version, so it may not work correctly')) + logger.warn(blue(' * ') + yellow('Adding --pretend to portage options anyway')) + logger.info(blue(' * ') + 'If you\'re sure, you can add --no-pretend to revdep options') PRETEND = True @@ -738,18 +170,18 @@ if __name__ == "__main__": if USE_TMP_FILES and check_temp_files(): libraries, la_libraries, libraries_links, binaries = read_cache() assigned = analyse(libraries=libraries, la_libraries=la_libraries, \ - libraries_links=libraries_links, binaries=binaries) + libraries_links=libraries_links, binaries=binaries, _libs_to_check=_libs_to_check) else: assigned = analyse() if not assigned: - print_v(1, '\n' + bold('Your system is consistent')) + logger.warn('\n' + bold('Your system is consistent')) sys.exit(0) if EXACT: emerge_command = '=' + ' ='.join(assigned) else: - emerge_command = ' '.join(get_slotted_cps(assigned)) + emerge_command = ' '.join(get_slotted_cps(assigned, logger)) if PRETEND: args += ' --pretend' if VERBOSITY >= 2: @@ -758,15 +190,11 @@ if __name__ == "__main__": args += ' --quiet' if len(emerge_command) == 0: - print_v(1, bold('\nThere is nothing to emerge. Exiting.')) + logger.warn(bold('\nThere is nothing to emerge. Exiting.')) sys.exit(0) emerge_command = args + ' --oneshot ' + emerge_command - - #if PRETEND: - print_v(1, yellow('\nemerge') + bold(emerge_command)) - #else: - #call_program(emerge_command.split()) + logger.warn(yellow('\nemerge') + bold(emerge_command)) os.system('emerge ' + emerge_command) diff --git a/pym/gentoolkit/revdep_rebuild/revdep_rebuild.kdev4 b/pym/gentoolkit/revdep_rebuild/revdep_rebuild.kdev4 new file mode 100644 index 0000000..7c395ce --- /dev/null +++ b/pym/gentoolkit/revdep_rebuild/revdep_rebuild.kdev4 @@ -0,0 +1,3 @@ +[Project] +Manager=KDevGenericManager +Name=revdep_rebuild diff --git a/pym/gentoolkit/revdep_rebuild/stuff.py b/pym/gentoolkit/revdep_rebuild/stuff.py new file mode 100644 index 0000000..dd8e572 --- /dev/null +++ b/pym/gentoolkit/revdep_rebuild/stuff.py @@ -0,0 +1,39 @@ +#!/usr/bin/python + +import subprocess + + +# util. functions +def call_program(args): + ''' Calls program with specified parameters and returns stdout ''' + subp = subprocess.Popen(args, stdout=subprocess.PIPE, \ + stderr=subprocess.PIPE) + stdout, stderr = subp.communicate() + return stdout + + +def scan(params, files, max_args): + ''' Calls scanelf with given params and files to scan. + @param params is list of parameters that should be passed into scanelf app. + @param files list of files to scan. + @param max_args number of files to process at once + + When files count is greater CMD_MAX_ARGS, it'll be divided + into several parts + + @return scanelf output (joined if was called several times) + ''' + out = [] + for i in range(0, len(files), max_args): + out += call_program(['scanelf'] + params + files[i:i+max_args]).strip().split('\n') + return out + + + +def exithandler(signum, frame): + sys.exit(1) + + + +if __name__ == '__main__': + print "There is nothing to run here." diff --git a/pym/gentoolkit/revdep_rebuild/test.py b/pym/gentoolkit/revdep_rebuild/test.py new file mode 100644 index 0000000..9f87f32 --- /dev/null +++ b/pym/gentoolkit/revdep_rebuild/test.py @@ -0,0 +1,7 @@ +#!/usr/bin/python + +import logging + +logging.basicConfig(format='%(msg)s', level=logging.DEBUG) +#logging.basicConfig() +logging.info('test') \ No newline at end of file -- 2.26.2