Handle symlink content with bad encoding.
authorZac Medico <zmedico@gentoo.org>
Sat, 3 Sep 2011 23:46:09 +0000 (16:46 -0700)
committerZac Medico <zmedico@gentoo.org>
Sat, 3 Sep 2011 23:46:09 +0000 (16:46 -0700)
This will fix bug #381629. The case is tested in tests/emerge. During
forced charset conversion, 'ascii' codec is forced, since otherwise we
somehow end up with the wrongly encoded file names when running the
test with Python 3.2 (which causes encoding problems for quickpkg when
it calls tarfile.gettarinfo).

pym/portage/dbapi/vartree.py
pym/portage/package/ebuild/doebuild.py
pym/portage/tests/emerge/test_simple.py

index 3a6d99f026193923eb54caa041570c9a7f12b24f..bafe13885d30e7c14b09801dd1dd442133f12638 100644 (file)
@@ -3304,7 +3304,7 @@ class dblink(object):
                                        new_parent = _unicode_decode(parent,
                                                encoding=_encodings['merge'], errors='replace')
                                        new_parent = _unicode_encode(new_parent,
-                                               encoding=_encodings['merge'], errors='backslashreplace')
+                                               encoding='ascii', errors='backslashreplace')
                                        new_parent = _unicode_decode(new_parent,
                                                encoding=_encodings['merge'], errors='replace')
                                        os.rename(parent, new_parent)
@@ -3322,7 +3322,7 @@ class dblink(object):
                                                new_fname = _unicode_decode(fname,
                                                        encoding=_encodings['merge'], errors='replace')
                                                new_fname = _unicode_encode(new_fname,
-                                                       encoding=_encodings['merge'], errors='backslashreplace')
+                                                       encoding='ascii', errors='backslashreplace')
                                                new_fname = _unicode_decode(new_fname,
                                                        encoding=_encodings['merge'], errors='replace')
                                                new_fpath = os.path.join(parent, new_fname)
@@ -3994,11 +3994,29 @@ class dblink(object):
 
                        if stat.S_ISLNK(mymode):
                                # we are merging a symbolic link
+                               # The file name of mysrc and the actual file that it points to
+                               # will have earlier been forcefully converted to the 'merge'
+                               # encoding if necessary, but the content of the symbolic link
+                               # may need to be forcefully converted here.
+                               myto = _os.readlink(_unicode_encode(mysrc,
+                                       encoding=_encodings['merge'], errors='strict'))
+                               try:
+                                       myto = _unicode_decode(myto,
+                                               encoding=_encodings['merge'], errors='strict')
+                               except UnicodeDecodeError:
+                                       myto = _unicode_decode(myto, encoding=_encodings['merge'],
+                                               errors='replace')
+                                       myto = _unicode_encode(myto, encoding='ascii',
+                                               errors='backslashreplace')
+                                       myto = _unicode_decode(myto, encoding=_encodings['merge'],
+                                               errors='replace')
+                                       os.unlink(mysrc)
+                                       os.symlink(myto, mysrc)
+
                                myabsto = abssymlink(mysrc)
                                if myabsto.startswith(srcroot):
                                        myabsto = myabsto[len(srcroot):]
                                myabsto = myabsto.lstrip(sep)
-                               myto = os.readlink(mysrc)
                                if self.settings and self.settings["D"]:
                                        if myto.startswith(self.settings["D"]):
                                                myto = myto[len(self.settings["D"]):]
@@ -4454,6 +4472,7 @@ def write_contents(contents, root, f):
 
 def tar_contents(contents, root, tar, protect=None, onProgress=None):
        os = _os_merge
+       encoding = _encodings['merge']
 
        try:
                for x in contents:
@@ -4473,6 +4492,7 @@ def tar_contents(contents, root, tar, protect=None, onProgress=None):
                        pass
                else:
                        os = portage.os
+                       encoding = _encodings['fs']
 
        root = normalize_path(root).rstrip(os.path.sep) + os.path.sep
        id_strings = {}
@@ -4524,7 +4544,7 @@ def tar_contents(contents, root, tar, protect=None, onProgress=None):
                                f.close()
                        else:
                                f = open(_unicode_encode(path,
-                                       encoding=object.__getattribute__(os, '_encoding'),
+                                       encoding=encoding,
                                        errors='strict'), 'rb')
                                try:
                                        tar.addfile(tarinfo, f)
index 7b3561e778fcf99f61631ea6e2b391086e0c2c88..eef1d329fd1bd340762c14e58a90cf41d2fd6517 100644 (file)
@@ -1604,7 +1604,7 @@ def _post_src_install_uid_fix(mysettings, out):
                                new_parent = _unicode_decode(parent,
                                        encoding=_encodings['merge'], errors='replace')
                                new_parent = _unicode_encode(new_parent,
-                                       encoding=_encodings['merge'], errors='backslashreplace')
+                                       encoding='ascii', errors='backslashreplace')
                                new_parent = _unicode_decode(new_parent,
                                        encoding=_encodings['merge'], errors='replace')
                                os.rename(parent, new_parent)
@@ -1622,7 +1622,7 @@ def _post_src_install_uid_fix(mysettings, out):
                                        new_fname = _unicode_decode(fname,
                                                encoding=_encodings['merge'], errors='replace')
                                        new_fname = _unicode_encode(new_fname,
-                                               encoding=_encodings['merge'], errors='backslashreplace')
+                                               encoding='ascii', errors='backslashreplace')
                                        new_fname = _unicode_decode(new_fname,
                                                encoding=_encodings['merge'], errors='replace')
                                        new_fpath = os.path.join(parent, new_fname)
index 038192e0170dc593a08660eab8d8ac86721cbfb0..cfb6cae552843d823dde72280686ddb6e5e4b3fb 100644 (file)
@@ -23,8 +23,18 @@ src_install() {
        einfo "installing something..."
        # TODO: Add prefix support to shell code/helpers, so we
        #       can use things like dodir and doins here.
-       mkdir -p "${ED}"/usr/lib/${P}
-       echo "blah blah blah" > "${ED}"/usr/lib/${P}/regular-file
+       mkdir -p "${ED}"/usr/lib/${P} || die
+       echo "blah blah blah" > "${ED}"/usr/lib/${P}/regular-file || die
+       ln -s regular-file "${ED}"/usr/lib/${P}/symlink || die
+
+       # Test code for bug #381629, using a copyright symbol encoded with latin-1.
+       # We use $(printf "\\xa9") rather than $'\\xa9', since printf apparently
+       # works in any case, while $'\\xa9' transforms to \\xef\\xbf\\xbd under
+       # some conditions. TODO: Find out why it transforms to \\xef\\xbf\\xbd when
+       # running tests for Python 3.2 (even though it's bash that is ultimately
+       # responsible for performing the transformation).
+       echo "blah blah blah" > "${ED}"/usr/lib/${P}/latin-1-$(printf "\\xa9")-regular-file || die
+       ln -s latin-1-$(printf "\\xa9")-regular-file "${ED}"/usr/lib/${P}/latin-1-$(printf "\\xa9")-symlink || die
 }
 """