From 98961815eda7c7ff44cfca3643d8b280f3873d47 Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Tue, 31 Mar 2009 20:14:03 +0000 Subject: [PATCH] Add a new egencache --rsync option which enables a stat collision workaround for cases in which the content of a cache entry changes and neither the file mtime nor size changes (preventing rsync from detecting changes). See bug #139134. This option should only be needed for distribution via something like rsync, which relies on timestamps and file sizes to detect changes. It's not needed with git since that uses a more thorough mechanism which allows it to detect changed inode numbers (described in racy-git.txt in the git technical docs). svn path=/main/trunk/; revision=13262 --- bin/egencache | 51 +++++++++++++++++++++++++------ pym/portage/cache/cache_errors.py | 22 +++++++++++++ pym/portage/cache/metadata.py | 49 ++++++++++++++++++++++++----- 3 files changed, 104 insertions(+), 18 deletions(-) diff --git a/bin/egencache b/bin/egencache index de6db88dc..e8981deba 100755 --- a/bin/egencache +++ b/bin/egencache @@ -24,7 +24,7 @@ import optparse import os import portage import _emerge -from portage.cache.cache_errors import CacheError +from portage.cache.cache_errors import CacheError, StatCollision from portage.util import writemsg_level def parse_args(args): @@ -46,6 +46,10 @@ def parse_args(args): action="store", help="max load allowed when spawning multiple jobs", dest="load_average") + parser.add_option("--rsync", + action="store_true", + help="enable rsync stat collision workaround " + \ + "for bug 139134 (use with --update)") options, args = parser.parse_args(args) if not options.update: @@ -73,7 +77,8 @@ def parse_args(args): return options, args class GenCache(object): - def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None): + def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None, + rsync=False): self._portdb = portdb # We can globally cleanse stale cache only if we # iterate over every single cp. @@ -90,22 +95,47 @@ class GenCache(object): metadbmodule = portdb.mysettings.load_best_module("portdbapi.metadbmodule") self._trg_cache = metadbmodule(portdb.porttree_root, "metadata/cache", portage.auxdbkeys[:]) + if rsync: + self._trg_cache.raise_stat_collision = True self._existing_nodes = set() def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata): self._existing_nodes.add(cpv) if metadata is not None: - # TODO: Implement a workaround for bug 139134 here. The cache - # should be able to optionally raise an exception in order to - # indicate any mtime + size collisions that will prevent rsync - # from detecting changes. These exceptions will be handled by - # bumping the mtime on the ebuild (and the corresponding cache - # entry). if metadata.get('EAPI') == '0': del metadata['EAPI'] try: - self._trg_cache[cpv] = metadata + try: + self._trg_cache[cpv] = metadata + except StatCollision, sc: + # If the content of a cache entry changes and neither the + # file mtime nor size changes, it will prevent rsync from + # detecting changes. Cache backends may raise this + # exception from _setitem() if they detect this type of stat + # collision. These exceptions are be handled by bumping the + # mtime on the ebuild (and the corresponding cache entry). + # See bug #139134. + max_mtime = sc.mtime + for ec, (loc, ec_mtime) in metadata['_eclasses_'].iteritems(): + if max_mtime < ec_mtime: + max_mtime = ec_mtime + if max_mtime == sc.mtime: + max_mtime += 1 + max_mtime = long(max_mtime) + try: + os.utime(ebuild_path, (max_mtime, max_mtime)) + except OSError, e: + self.returncode |= 1 + writemsg_level( + "%s writing target: %s\n" % (cpv, e), + level=logging.ERROR, noiselevel=-1) + else: + metadata['_mtime_'] = max_mtime + self._trg_cache[cpv] = metadata + self._portdb.auxdb[repo_path][cpv] = metadata + except CacheError, ce: + self.returncode |= 1 writemsg_level( "%s writing target: %s\n" % (cpv, ce), level=logging.ERROR, noiselevel=-1) @@ -195,7 +225,8 @@ def egencache_main(args): gen_cache = GenCache(portdb, cp_iter=cp_iter, max_jobs=options.jobs, - max_load=options.load_average) + max_load=options.load_average, + rsync=options.rsync) gen_cache.run() return gen_cache.returncode diff --git a/pym/portage/cache/cache_errors.py b/pym/portage/cache/cache_errors.py index f63e5994b..e1e8eead0 100644 --- a/pym/portage/cache/cache_errors.py +++ b/pym/portage/cache/cache_errors.py @@ -39,3 +39,25 @@ class ReadOnlyRestriction(CacheError): self.info = info def __str__(self): return "cache is non-modifiable"+str(self.info) + +class StatCollision(CacheError): + """ + If the content of a cache entry changes and neither the file mtime nor + size changes, it will prevent rsync from detecting changes. Cache backends + may raise this exception from _setitem() if they detect this type of stat + collision. See bug #139134. + """ + def __init__(self, key, filename, mtime, size): + self.key = key + self.filename = filename + self.mtime = mtime + self.size = size + + def __str__(self): + return "%s has stat collision with size %s and mtime %s" % \ + (self.key, self.size, self.mtime) + + def __repr__(self): + return "portage.cache.cache_errors.StatCollision(%s)" % \ + (', '.join((repr(self.key), repr(self.filename), + repr(self.mtime), repr(self.size))),) diff --git a/pym/portage/cache/metadata.py b/pym/portage/cache/metadata.py index 5222223c6..a8be01095 100644 --- a/pym/portage/cache/metadata.py +++ b/pym/portage/cache/metadata.py @@ -3,7 +3,7 @@ # License: GPL2 # $Id$ -import errno, os, re +import errno, os, re, sys from portage.cache import cache_errors, flat_hash import portage.eclass_cache from portage.cache.template import reconstruct_eclasses @@ -30,6 +30,7 @@ class database(flat_hash.database): super(database, self).__init__(location, *args, **config) self.location = os.path.join(loc, "metadata","cache") self.ec = portage.eclass_cache.cache(loc) + self.raise_stat_collision = False def _parse_data(self, data, cpv): _hashed_re_match = self._hashed_re.match @@ -73,31 +74,63 @@ class database(flat_hash.database): values = ProtectedDict(values) values["INHERITED"] = ' '.join(sorted(values["_eclasses_"])) + new_content = [] + for k in self.auxdbkey_order: + new_content.append(unicode(values.get(k, ''), errors='replace')) + new_content.append(u'\n') + for i in xrange(magic_line_count - len(self.auxdbkey_order)): + new_content.append(u'\n') + new_content = u''.join(new_content) + new_content = new_content.encode( + sys.getdefaultencoding(), 'backslashreplace') + + new_fp = os.path.join(self.location, cpv) + try: + f = open(new_fp, 'rb') + except EnvironmentError: + pass + else: + try: + try: + existing_st = os.fstat(f.fileno()) + existing_content = f.read() + finally: + f.close() + except EnvironmentError: + pass + else: + existing_mtime = long(existing_st.st_mtime) + if values['_mtime_'] == existing_mtime and \ + existing_content == new_content: + return + + if self.raise_stat_collision and \ + values['_mtime_'] == existing_mtime and \ + len(new_content) == existing_st.st_size: + raise cache_errors.StatCollision(cpv, new_fp, + existing_mtime, existing_st.st_size) + s = cpv.rfind("/") fp = os.path.join(self.location,cpv[:s], ".update.%i.%s" % (os.getpid(), cpv[s+1:])) try: - myf = open(fp, "w") + myf = open(fp, 'wb') except EnvironmentError, e: if errno.ENOENT == e.errno: try: self._ensure_dirs(cpv) - myf = open(fp, "w") + myf = open(fp, 'wb') except EnvironmentError, e: raise cache_errors.CacheCorruption(cpv, e) else: raise cache_errors.CacheCorruption(cpv, e) try: - for k in self.auxdbkey_order: - myf.write(values.get(k, "") + "\n") - for i in xrange(magic_line_count - len(self.auxdbkey_order)): - myf.write("\n") + myf.write(new_content) finally: myf.close() self._ensure_access(fp, mtime=values["_mtime_"]) - new_fp = os.path.join(self.location, cpv) try: os.rename(fp, new_fp) except EnvironmentError, e: -- 2.26.2