From: Zac Medico Date: Fri, 14 Aug 2009 22:11:34 +0000 (-0000) Subject: Inside dblink.treewalk(), handle filenames with incorrect encoding like we X-Git-Tag: v2.2_rc39~71 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=226a52844f6967720a1e4509aceaeb76557593ad;p=portage.git Inside dblink.treewalk(), handle filenames with incorrect encoding like we do after src_install. The check needs to be repeated here for binary packages (it's inexpensive since we call os.walk() here anyway). svn path=/main/trunk/; revision=14050 --- diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py index f5cc1f41a..35addedf6 100644 --- a/pym/portage/__init__.py +++ b/pym/portage/__init__.py @@ -5623,34 +5623,10 @@ def _post_src_install_uid_fix(mysettings, out=None): break if unicode_errors: - from textwrap import wrap from portage.elog.messages import eerror - def _eerror(l): + for l in _merge_unicode_error(unicode_errors): eerror(l, phase='install', key=mysettings.mycpv, out=out) - msg = "This package installs one or more file names containing " + \ - "characters that do not match your current locale " + \ - "settings. The current setting for filesystem encoding is '%s'." \ - % _merge_encoding - for l in wrap(msg, 72): - _eerror(l) - - _eerror("") - for x in unicode_errors: - _eerror("\t" + x) - _eerror("") - - if _merge_encoding.lower().replace('_', '').replace('-', '') != 'utf8': - msg = "For best results, UTF-8 encoding is recommended. See " + \ - "the Gentoo Linux Localization Guide for instructions " + \ - "about how to configure your locale for UTF-8 encoding:" - for l in wrap(msg, 72): - _eerror(l) - _eerror("") - _eerror("\t" + \ - "http://www.gentoo.org/doc/en/guide-localization.xml") - _eerror("") - open(_unicode_encode(os.path.join(mysettings['PORTAGE_BUILDDIR'], 'build-info', 'SIZE')), 'w').write(str(size) + '\n') @@ -5660,6 +5636,33 @@ def _post_src_install_uid_fix(mysettings, out=None): (_shell_quote(mysettings["D"]), _shell_quote(os.path.join(mysettings["T"], "bsdflags.mtree")))) +def _merge_unicode_error(errors): + from textwrap import wrap + lines = [] + + msg = "This package installs one or more file names containing " + \ + "characters that do not match your current locale " + \ + "settings. The current setting for filesystem encoding is '%s'." \ + % _merge_encoding + lines.extend(wrap(msg, 72)) + + lines.append("") + errors.sort() + lines.extend("\t" + x for x in errors) + lines.append("") + + if _merge_encoding.lower().replace('_', '').replace('-', '') != 'utf8': + msg = "For best results, UTF-8 encoding is recommended. See " + \ + "the Gentoo Linux Localization Guide for instructions " + \ + "about how to configure your locale for UTF-8 encoding:" + lines.extend(wrap(msg, 72)) + lines.append("") + lines.append("\t" + \ + "http://www.gentoo.org/doc/en/guide-localization.xml") + lines.append("") + + return lines + def _post_pkg_preinst_cmd(mysettings): """ Post phase logic and tasks that have been factored out of diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py index 10e48fc19..b42e1c6ea 100644 --- a/pym/portage/dbapi/vartree.py +++ b/pym/portage/dbapi/vartree.py @@ -40,6 +40,7 @@ from portage import os from portage import _merge_encoding from portage import _os_merge from portage import _selinux_merge +from portage import _unicode_decode from portage import _unicode_encode from portage.cache.mappings import slot_dict_class @@ -3292,30 +3293,81 @@ class dblink(object): max_dblnk = dblnk self._installed_instance = max_dblnk - myfilelist = [] - mylinklist = [] - paths_with_newlines = [] - srcroot_len = len(srcroot) - def onerror(e): - raise - for parent, dirs, files in os.walk(srcroot, onerror=onerror): - parent = portage._unicode_decode(parent, encoding=_merge_encoding) - for f in files: - f = portage._unicode_decode(f, encoding=_merge_encoding) - file_path = os.path.join(parent, f) - relative_path = file_path[srcroot_len:] - - if "\n" in relative_path: - paths_with_newlines.append(relative_path) - - file_mode = os.lstat(file_path).st_mode - if stat.S_ISREG(file_mode): - myfilelist.append(relative_path) - elif stat.S_ISLNK(file_mode): - # Note: os.walk puts symlinks to directories in the "dirs" - # list and it does not traverse them since that could lead - # to an infinite recursion loop. - mylinklist.append(relative_path) + # We check for unicode encoding issues after src_install. However, + # the check must be repeated here for binary packages (it's + # inexpensive since we call os.walk() here anyway). + unicode_errors = [] + + while True: + + unicode_error = False + + myfilelist = [] + mylinklist = [] + paths_with_newlines = [] + srcroot_len = len(srcroot) + def onerror(e): + raise + for parent, dirs, files in os.walk(srcroot, onerror=onerror): + try: + parent = _unicode_decode(parent, + encoding=_merge_encoding, errors='strict') + except UnicodeDecodeError: + new_parent = _unicode_decode(parent, + encoding=_merge_encoding, errors='replace') + new_parent = _unicode_encode(new_parent, + encoding=_merge_encoding, errors='backslashreplace') + new_parent = _unicode_decode(new_parent, + encoding=_merge_encoding, errors='replace') + os.rename(parent, new_parent) + unicode_error = True + unicode_errors.append(new_parent[srcroot_len:]) + break + + for fname in files: + try: + fname = _unicode_decode(fname, + encoding=_merge_encoding, errors='strict') + except UnicodeDecodeError: + fpath = portage._os.path.join( + parent.encode(_merge_encoding), fname) + new_fname = _unicode_decode(fname, + encoding=_merge_encoding, errors='replace') + new_fname = _unicode_encode(new_fname, + encoding=_merge_encoding, errors='backslashreplace') + new_fname = _unicode_decode(new_fname, + encoding=_merge_encoding, errors='replace') + new_fpath = os.path.join(parent, new_fname) + os.rename(fpath, new_fpath) + unicode_error = True + unicode_errors.append(new_fpath[srcroot_len:]) + fname = new_fname + fpath = new_fpath + else: + fpath = os.path.join(parent, fname) + + relative_path = fpath[srcroot_len:] + + if "\n" in relative_path: + paths_with_newlines.append(relative_path) + + file_mode = os.lstat(fpath).st_mode + if stat.S_ISREG(file_mode): + myfilelist.append(relative_path) + elif stat.S_ISLNK(file_mode): + # Note: os.walk puts symlinks to directories in the "dirs" + # list and it does not traverse them since that could lead + # to an infinite recursion loop. + mylinklist.append(relative_path) + + if unicode_error: + break + + if not unicode_error: + break + + if unicode_errors: + eerror(portage._merge_unicode_error(unicode_errors)) if paths_with_newlines: msg = []