From: Zac Medico Date: Sat, 3 Sep 2011 23:46:09 +0000 (-0700) Subject: Handle symlink content with bad encoding. X-Git-Tag: v2.2.0_alpha53~1 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=a62537685afbe492d4686cb6a636cbe556fe74d9;p=portage.git Handle symlink content with bad encoding. This will fix bug #381629. The case is tested in tests/emerge. During forced charset conversion, 'ascii' codec is forced, since otherwise we somehow end up with the wrongly encoded file names when running the test with Python 3.2 (which causes encoding problems for quickpkg when it calls tarfile.gettarinfo). --- diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py index 3a6d99f02..bafe13885 100644 --- a/pym/portage/dbapi/vartree.py +++ b/pym/portage/dbapi/vartree.py @@ -3304,7 +3304,7 @@ class dblink(object): new_parent = _unicode_decode(parent, encoding=_encodings['merge'], errors='replace') new_parent = _unicode_encode(new_parent, - encoding=_encodings['merge'], errors='backslashreplace') + encoding='ascii', errors='backslashreplace') new_parent = _unicode_decode(new_parent, encoding=_encodings['merge'], errors='replace') os.rename(parent, new_parent) @@ -3322,7 +3322,7 @@ class dblink(object): new_fname = _unicode_decode(fname, encoding=_encodings['merge'], errors='replace') new_fname = _unicode_encode(new_fname, - encoding=_encodings['merge'], errors='backslashreplace') + encoding='ascii', errors='backslashreplace') new_fname = _unicode_decode(new_fname, encoding=_encodings['merge'], errors='replace') new_fpath = os.path.join(parent, new_fname) @@ -3994,11 +3994,29 @@ class dblink(object): if stat.S_ISLNK(mymode): # we are merging a symbolic link + # The file name of mysrc and the actual file that it points to + # will have earlier been forcefully converted to the 'merge' + # encoding if necessary, but the content of the symbolic link + # may need to be forcefully converted here. + myto = _os.readlink(_unicode_encode(mysrc, + encoding=_encodings['merge'], errors='strict')) + try: + myto = _unicode_decode(myto, + encoding=_encodings['merge'], errors='strict') + except UnicodeDecodeError: + myto = _unicode_decode(myto, encoding=_encodings['merge'], + errors='replace') + myto = _unicode_encode(myto, encoding='ascii', + errors='backslashreplace') + myto = _unicode_decode(myto, encoding=_encodings['merge'], + errors='replace') + os.unlink(mysrc) + os.symlink(myto, mysrc) + myabsto = abssymlink(mysrc) if myabsto.startswith(srcroot): myabsto = myabsto[len(srcroot):] myabsto = myabsto.lstrip(sep) - myto = os.readlink(mysrc) if self.settings and self.settings["D"]: if myto.startswith(self.settings["D"]): myto = myto[len(self.settings["D"]):] @@ -4454,6 +4472,7 @@ def write_contents(contents, root, f): def tar_contents(contents, root, tar, protect=None, onProgress=None): os = _os_merge + encoding = _encodings['merge'] try: for x in contents: @@ -4473,6 +4492,7 @@ def tar_contents(contents, root, tar, protect=None, onProgress=None): pass else: os = portage.os + encoding = _encodings['fs'] root = normalize_path(root).rstrip(os.path.sep) + os.path.sep id_strings = {} @@ -4524,7 +4544,7 @@ def tar_contents(contents, root, tar, protect=None, onProgress=None): f.close() else: f = open(_unicode_encode(path, - encoding=object.__getattribute__(os, '_encoding'), + encoding=encoding, errors='strict'), 'rb') try: tar.addfile(tarinfo, f) diff --git a/pym/portage/package/ebuild/doebuild.py b/pym/portage/package/ebuild/doebuild.py index 7b3561e77..eef1d329f 100644 --- a/pym/portage/package/ebuild/doebuild.py +++ b/pym/portage/package/ebuild/doebuild.py @@ -1604,7 +1604,7 @@ def _post_src_install_uid_fix(mysettings, out): new_parent = _unicode_decode(parent, encoding=_encodings['merge'], errors='replace') new_parent = _unicode_encode(new_parent, - encoding=_encodings['merge'], errors='backslashreplace') + encoding='ascii', errors='backslashreplace') new_parent = _unicode_decode(new_parent, encoding=_encodings['merge'], errors='replace') os.rename(parent, new_parent) @@ -1622,7 +1622,7 @@ def _post_src_install_uid_fix(mysettings, out): new_fname = _unicode_decode(fname, encoding=_encodings['merge'], errors='replace') new_fname = _unicode_encode(new_fname, - encoding=_encodings['merge'], errors='backslashreplace') + encoding='ascii', errors='backslashreplace') new_fname = _unicode_decode(new_fname, encoding=_encodings['merge'], errors='replace') new_fpath = os.path.join(parent, new_fname) diff --git a/pym/portage/tests/emerge/test_simple.py b/pym/portage/tests/emerge/test_simple.py index 038192e01..cfb6cae55 100644 --- a/pym/portage/tests/emerge/test_simple.py +++ b/pym/portage/tests/emerge/test_simple.py @@ -23,8 +23,18 @@ src_install() { einfo "installing something..." # TODO: Add prefix support to shell code/helpers, so we # can use things like dodir and doins here. - mkdir -p "${ED}"/usr/lib/${P} - echo "blah blah blah" > "${ED}"/usr/lib/${P}/regular-file + mkdir -p "${ED}"/usr/lib/${P} || die + echo "blah blah blah" > "${ED}"/usr/lib/${P}/regular-file || die + ln -s regular-file "${ED}"/usr/lib/${P}/symlink || die + + # Test code for bug #381629, using a copyright symbol encoded with latin-1. + # We use $(printf "\\xa9") rather than $'\\xa9', since printf apparently + # works in any case, while $'\\xa9' transforms to \\xef\\xbf\\xbd under + # some conditions. TODO: Find out why it transforms to \\xef\\xbf\\xbd when + # running tests for Python 3.2 (even though it's bash that is ultimately + # responsible for performing the transformation). + echo "blah blah blah" > "${ED}"/usr/lib/${P}/latin-1-$(printf "\\xa9")-regular-file || die + ln -s latin-1-$(printf "\\xa9")-regular-file "${ED}"/usr/lib/${P}/latin-1-$(printf "\\xa9")-symlink || die } """