Bug #235642 - Create hardlinks when merging identical files. This works by using a
authorZac Medico <zmedico@gentoo.org>
Wed, 26 Nov 2008 22:31:23 +0000 (22:31 -0000)
committerZac Medico <zmedico@gentoo.org>
Wed, 26 Nov 2008 22:31:23 +0000 (22:31 -0000)
tuple of (md5, st_size) as a key to a list of hardlink candidates. Multiple candidates
are used in case some happen to be merged to separate devices.

svn path=/main/trunk/; revision=12109

pym/portage/__init__.py
pym/portage/dbapi/vartree.py

index db5f0a0611122aea0b804a0131b9c0ff4f35bdd9..bedbbf3f9cdbcb892c36fc1d55d045ebf3724c2a 100644 (file)
@@ -5968,7 +5968,8 @@ def _movefile(src, dest, **kwargs):
                raise portage.exception.PortageException(
                        "mv '%s' '%s'" % (src, dest))
 
-def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
+def movefile(src, dest, newmtime=None, sstat=None, mysettings=None,
+               hardlink_candidates=None):
        """moves a file from src to dest, preserving all permissions and attributes; mtime will
        be preserved even when moving across filesystems.  Returns true on success and false on
        failure.  Move is atomic."""
@@ -6040,8 +6041,45 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
                        print "!!!",e
                        return None
 
+       hardlinked = False
+       # Since identical files might be merged to multiple filesystems,
+       # so os.link() calls might fail for some paths, so try them all.
+       # For atomic replacement, first create the link as a temp file
+       # and them use os.rename() to replace the destination.
+       if hardlink_candidates is not None:
+               head, tail = os.path.split(dest)
+               hardlink_tmp = os.path.join(head, ".%s._portage_merge_.%s" % \
+                       (tail, os.getpid()))
+               try:
+                       os.unlink(hardlink_tmp)
+               except OSError, e:
+                       if e.errno != errno.ENOENT:
+                               writemsg("!!! Failed to remove hardlink temp file: %s\n" % \
+                                       (hardlink_tmp,), noiselevel=-1)
+                               writemsg("!!! %s\n" % (e,), noiselevel=-1)
+                               return None
+                       del e
+               for hardlink_src in hardlink_candidates:
+                       try:
+                               os.link(hardlink_src, hardlink_tmp)
+                       except OSError:
+                               continue
+                       else:
+                               try:
+                                       os.rename(hardlink_tmp, dest)
+                               except OSError, e:
+                                       writemsg("!!! Failed to rename %s to %s\n" % \
+                                               (hardlink_tmp, dest), noiselevel=-1)
+                                       writemsg("!!! %s\n" % (e,), noiselevel=-1)
+                                       return None
+                               hardlinked = True
+                               break
+
        renamefailed=1
-       if sstat[stat.ST_DEV]==dstat[stat.ST_DEV] or selinux_enabled:
+       if hardlinked:
+               renamefailed = False
+       if not hardlinked and \
+               (selinux_enabled or sstat[stat.ST_DEV] == dstat[stat.ST_DEV]):
                try:
                        if selinux_enabled:
                                ret=selinux.secure_rename(src,dest)
@@ -6102,11 +6140,14 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None):
                        return None
 
        try:
-               if newmtime is not None:
-                       os.utime(dest, (newmtime, newmtime))
+               if hardlinked:
+                       newmtime = long(os.stat(dest).st_mtime)
                else:
-                       os.utime(dest, (sstat.st_atime, sstat.st_mtime))
-                       newmtime = long(sstat.st_mtime)
+                       if newmtime is not None:
+                               os.utime(dest, (newmtime, newmtime))
+                       else:
+                               os.utime(dest, (sstat.st_atime, sstat.st_mtime))
+                               newmtime = long(sstat.st_mtime)
        except OSError:
                # The utime can fail here with EPERM even though the move succeeded.
                # Instead of failing, use stat to return the mtime if possible.
index e78d32a6f8d32fef69b51ca81399b443a5194db3..90e7a8693db7908ed5703165d5f553109c72cd2a 100644 (file)
@@ -1756,6 +1756,7 @@ class dblink(object):
                self._contents_inodes = None
                self._contents_basenames = None
                self._linkmap_broken = False
+               self._md5_merge_map = {}
 
        def lockdb(self):
                if self._lock_vdb:
@@ -3366,6 +3367,7 @@ class dblink(object):
                        if self.mergeme(srcroot, destroot, outfile, None,
                                secondhand, cfgfiledict, mymtime):
                                return 1
+               self._md5_merge_map.clear()
 
                #restore umask
                os.umask(prevmask)
@@ -3767,9 +3769,17 @@ class dblink(object):
                                # whether config protection or not, we merge the new file the
                                # same way.  Unless moveme=0 (blocking directory)
                                if moveme:
-                                       mymtime = movefile(mysrc, mydest, newmtime=thismtime, sstat=mystat, mysettings=self.settings)
+                                       hardlink_key = (mymd5, mystat.st_size)
+                                       hardlink_candidates = self._md5_merge_map.get(hardlink_key)
+                                       if hardlink_candidates is None:
+                                               hardlink_candidates = []
+                                               self._md5_merge_map[hardlink_key] = hardlink_candidates
+                                       mymtime = movefile(mysrc, mydest, newmtime=thismtime,
+                                               sstat=mystat, mysettings=self.settings,
+                                               hardlink_candidates=hardlink_candidates)
                                        if mymtime is None:
                                                return 1
+                                       hardlink_candidates.append(mydest)
                                        zing = ">>>"
 
                                if mymtime != None: