Inside dblink.treewalk(), handle filenames with incorrect encoding like we
authorZac Medico <zmedico@gentoo.org>
Fri, 14 Aug 2009 22:11:34 +0000 (22:11 -0000)
committerZac Medico <zmedico@gentoo.org>
Fri, 14 Aug 2009 22:11:34 +0000 (22:11 -0000)
do after src_install. The check needs to be repeated here for binary
packages (it's inexpensive since we call os.walk() here anyway).

svn path=/main/trunk/; revision=14050

pym/portage/__init__.py
pym/portage/dbapi/vartree.py

index f5cc1f41a6753673ecbe1dd55cad4c6fad592d9d..35addedf6887ba0952343b0c6ea14ee57048f21f 100644 (file)
@@ -5623,34 +5623,10 @@ def _post_src_install_uid_fix(mysettings, out=None):
                        break
 
        if unicode_errors:
-               from textwrap import wrap
                from portage.elog.messages import eerror
-               def _eerror(l):
+               for l in _merge_unicode_error(unicode_errors):
                        eerror(l, phase='install', key=mysettings.mycpv, out=out)
 
-               msg = "This package installs one or more file names containing " + \
-                       "characters that do not match your current locale " + \
-                       "settings. The current setting for filesystem encoding is '%s'." \
-                       % _merge_encoding
-               for l in wrap(msg, 72):
-                       _eerror(l)
-
-               _eerror("")
-               for x in unicode_errors:
-                       _eerror("\t" + x)
-               _eerror("")
-
-               if _merge_encoding.lower().replace('_', '').replace('-', '') != 'utf8':
-                       msg = "For best results, UTF-8 encoding is recommended. See " + \
-                               "the Gentoo Linux Localization Guide for instructions " + \
-                               "about how to configure your locale for UTF-8 encoding:"
-                       for l in wrap(msg, 72):
-                               _eerror(l)
-                       _eerror("")
-                       _eerror("\t" + \
-                               "http://www.gentoo.org/doc/en/guide-localization.xml")
-                       _eerror("")
-
        open(_unicode_encode(os.path.join(mysettings['PORTAGE_BUILDDIR'],
                'build-info', 'SIZE')), 'w').write(str(size) + '\n')
 
@@ -5660,6 +5636,33 @@ def _post_src_install_uid_fix(mysettings, out=None):
                        (_shell_quote(mysettings["D"]),
                        _shell_quote(os.path.join(mysettings["T"], "bsdflags.mtree"))))
 
+def _merge_unicode_error(errors):
+       from textwrap import wrap
+       lines = []
+
+       msg = "This package installs one or more file names containing " + \
+               "characters that do not match your current locale " + \
+               "settings. The current setting for filesystem encoding is '%s'." \
+               % _merge_encoding
+       lines.extend(wrap(msg, 72))
+
+       lines.append("")
+       errors.sort()
+       lines.extend("\t" + x for x in errors)
+       lines.append("")
+
+       if _merge_encoding.lower().replace('_', '').replace('-', '') != 'utf8':
+               msg = "For best results, UTF-8 encoding is recommended. See " + \
+                       "the Gentoo Linux Localization Guide for instructions " + \
+                       "about how to configure your locale for UTF-8 encoding:"
+               lines.extend(wrap(msg, 72))
+               lines.append("")
+               lines.append("\t" + \
+                       "http://www.gentoo.org/doc/en/guide-localization.xml")
+               lines.append("")
+
+       return lines
+
 def _post_pkg_preinst_cmd(mysettings):
        """
        Post phase logic and tasks that have been factored out of
index 10e48fc19e49b87a3506c5fef0d70171cbb7930f..b42e1c6eabf157805b39af000ffc36438a9df473 100644 (file)
@@ -40,6 +40,7 @@ from portage import os
 from portage import _merge_encoding
 from portage import _os_merge
 from portage import _selinux_merge
+from portage import _unicode_decode
 from portage import _unicode_encode
 
 from portage.cache.mappings import slot_dict_class
@@ -3292,30 +3293,81 @@ class dblink(object):
                                        max_dblnk = dblnk
                        self._installed_instance = max_dblnk
 
-               myfilelist = []
-               mylinklist = []
-               paths_with_newlines = []
-               srcroot_len = len(srcroot)
-               def onerror(e):
-                       raise
-               for parent, dirs, files in os.walk(srcroot, onerror=onerror):
-                       parent = portage._unicode_decode(parent, encoding=_merge_encoding)
-                       for f in files:
-                               f = portage._unicode_decode(f, encoding=_merge_encoding)
-                               file_path = os.path.join(parent, f)
-                               relative_path = file_path[srcroot_len:]
-
-                               if "\n" in relative_path:
-                                       paths_with_newlines.append(relative_path)
-
-                               file_mode = os.lstat(file_path).st_mode
-                               if stat.S_ISREG(file_mode):
-                                       myfilelist.append(relative_path)
-                               elif stat.S_ISLNK(file_mode):
-                                       # Note: os.walk puts symlinks to directories in the "dirs"
-                                       # list and it does not traverse them since that could lead
-                                       # to an infinite recursion loop.
-                                       mylinklist.append(relative_path)
+               # We check for unicode encoding issues after src_install. However,
+               # the check must be repeated here for binary packages (it's
+               # inexpensive since we call os.walk() here anyway).
+               unicode_errors = []
+
+               while True:
+
+                       unicode_error = False
+
+                       myfilelist = []
+                       mylinklist = []
+                       paths_with_newlines = []
+                       srcroot_len = len(srcroot)
+                       def onerror(e):
+                               raise
+                       for parent, dirs, files in os.walk(srcroot, onerror=onerror):
+                               try:
+                                       parent = _unicode_decode(parent,
+                                               encoding=_merge_encoding, errors='strict')
+                               except UnicodeDecodeError:
+                                       new_parent = _unicode_decode(parent,
+                                               encoding=_merge_encoding, errors='replace')
+                                       new_parent = _unicode_encode(new_parent,
+                                               encoding=_merge_encoding, errors='backslashreplace')
+                                       new_parent = _unicode_decode(new_parent,
+                                               encoding=_merge_encoding, errors='replace')
+                                       os.rename(parent, new_parent)
+                                       unicode_error = True
+                                       unicode_errors.append(new_parent[srcroot_len:])
+                                       break
+
+                               for fname in files:
+                                       try:
+                                               fname = _unicode_decode(fname,
+                                                       encoding=_merge_encoding, errors='strict')
+                                       except UnicodeDecodeError:
+                                               fpath = portage._os.path.join(
+                                                       parent.encode(_merge_encoding), fname)
+                                               new_fname = _unicode_decode(fname,
+                                                       encoding=_merge_encoding, errors='replace')
+                                               new_fname = _unicode_encode(new_fname,
+                                                       encoding=_merge_encoding, errors='backslashreplace')
+                                               new_fname = _unicode_decode(new_fname,
+                                                       encoding=_merge_encoding, errors='replace')
+                                               new_fpath = os.path.join(parent, new_fname)
+                                               os.rename(fpath, new_fpath)
+                                               unicode_error = True
+                                               unicode_errors.append(new_fpath[srcroot_len:])
+                                               fname = new_fname
+                                               fpath = new_fpath
+                                       else:
+                                               fpath = os.path.join(parent, fname)
+
+                                       relative_path = fpath[srcroot_len:]
+
+                                       if "\n" in relative_path:
+                                               paths_with_newlines.append(relative_path)
+
+                                       file_mode = os.lstat(fpath).st_mode
+                                       if stat.S_ISREG(file_mode):
+                                               myfilelist.append(relative_path)
+                                       elif stat.S_ISLNK(file_mode):
+                                               # Note: os.walk puts symlinks to directories in the "dirs"
+                                               # list and it does not traverse them since that could lead
+                                               # to an infinite recursion loop.
+                                               mylinklist.append(relative_path)
+
+                               if unicode_error:
+                                       break
+
+                       if not unicode_error:
+                               break
+
+               if unicode_errors:
+                       eerror(portage._merge_unicode_error(unicode_errors))
 
                if paths_with_newlines:
                        msg = []