From: Zac Medico Date: Wed, 26 Nov 2008 22:31:23 +0000 (-0000) Subject: Bug #235642 - Create hardlinks when merging identical files. This works by using a X-Git-Tag: v2.2_rc17~24 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=2c72bb29c7ba405e2deef81248a9f14ea049344d;p=portage.git Bug #235642 - Create hardlinks when merging identical files. This works by using a tuple of (md5, st_size) as a key to a list of hardlink candidates. Multiple candidates are used in case some happen to be merged to separate devices. svn path=/main/trunk/; revision=12109 --- diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py index db5f0a061..bedbbf3f9 100644 --- a/pym/portage/__init__.py +++ b/pym/portage/__init__.py @@ -5968,7 +5968,8 @@ def _movefile(src, dest, **kwargs): raise portage.exception.PortageException( "mv '%s' '%s'" % (src, dest)) -def movefile(src,dest,newmtime=None,sstat=None,mysettings=None): +def movefile(src, dest, newmtime=None, sstat=None, mysettings=None, + hardlink_candidates=None): """moves a file from src to dest, preserving all permissions and attributes; mtime will be preserved even when moving across filesystems. Returns true on success and false on failure. Move is atomic.""" @@ -6040,8 +6041,45 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None): print "!!!",e return None + hardlinked = False + # Since identical files might be merged to multiple filesystems, + # so os.link() calls might fail for some paths, so try them all. + # For atomic replacement, first create the link as a temp file + # and them use os.rename() to replace the destination. + if hardlink_candidates is not None: + head, tail = os.path.split(dest) + hardlink_tmp = os.path.join(head, ".%s._portage_merge_.%s" % \ + (tail, os.getpid())) + try: + os.unlink(hardlink_tmp) + except OSError, e: + if e.errno != errno.ENOENT: + writemsg("!!! Failed to remove hardlink temp file: %s\n" % \ + (hardlink_tmp,), noiselevel=-1) + writemsg("!!! %s\n" % (e,), noiselevel=-1) + return None + del e + for hardlink_src in hardlink_candidates: + try: + os.link(hardlink_src, hardlink_tmp) + except OSError: + continue + else: + try: + os.rename(hardlink_tmp, dest) + except OSError, e: + writemsg("!!! Failed to rename %s to %s\n" % \ + (hardlink_tmp, dest), noiselevel=-1) + writemsg("!!! %s\n" % (e,), noiselevel=-1) + return None + hardlinked = True + break + renamefailed=1 - if sstat[stat.ST_DEV]==dstat[stat.ST_DEV] or selinux_enabled: + if hardlinked: + renamefailed = False + if not hardlinked and \ + (selinux_enabled or sstat[stat.ST_DEV] == dstat[stat.ST_DEV]): try: if selinux_enabled: ret=selinux.secure_rename(src,dest) @@ -6102,11 +6140,14 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None): return None try: - if newmtime is not None: - os.utime(dest, (newmtime, newmtime)) + if hardlinked: + newmtime = long(os.stat(dest).st_mtime) else: - os.utime(dest, (sstat.st_atime, sstat.st_mtime)) - newmtime = long(sstat.st_mtime) + if newmtime is not None: + os.utime(dest, (newmtime, newmtime)) + else: + os.utime(dest, (sstat.st_atime, sstat.st_mtime)) + newmtime = long(sstat.st_mtime) except OSError: # The utime can fail here with EPERM even though the move succeeded. # Instead of failing, use stat to return the mtime if possible. diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py index e78d32a6f..90e7a8693 100644 --- a/pym/portage/dbapi/vartree.py +++ b/pym/portage/dbapi/vartree.py @@ -1756,6 +1756,7 @@ class dblink(object): self._contents_inodes = None self._contents_basenames = None self._linkmap_broken = False + self._md5_merge_map = {} def lockdb(self): if self._lock_vdb: @@ -3366,6 +3367,7 @@ class dblink(object): if self.mergeme(srcroot, destroot, outfile, None, secondhand, cfgfiledict, mymtime): return 1 + self._md5_merge_map.clear() #restore umask os.umask(prevmask) @@ -3767,9 +3769,17 @@ class dblink(object): # whether config protection or not, we merge the new file the # same way. Unless moveme=0 (blocking directory) if moveme: - mymtime = movefile(mysrc, mydest, newmtime=thismtime, sstat=mystat, mysettings=self.settings) + hardlink_key = (mymd5, mystat.st_size) + hardlink_candidates = self._md5_merge_map.get(hardlink_key) + if hardlink_candidates is None: + hardlink_candidates = [] + self._md5_merge_map[hardlink_key] = hardlink_candidates + mymtime = movefile(mysrc, mydest, newmtime=thismtime, + sstat=mystat, mysettings=self.settings, + hardlink_candidates=hardlink_candidates) if mymtime is None: return 1 + hardlink_candidates.append(mydest) zing = ">>>" if mymtime != None: