Fix unicode support for compatibility with the new wrapped os module, and
authorZac Medico <zmedico@gentoo.org>
Tue, 11 Aug 2009 01:38:07 +0000 (01:38 -0000)
committerZac Medico <zmedico@gentoo.org>
Tue, 11 Aug 2009 01:38:07 +0000 (01:38 -0000)
use _unicode_decode() and _unicode_encode() where appropriate.

svn path=/main/trunk/; revision=13990

pym/portage/__init__.py
pym/portage/dbapi/vartree.py

index 8d09333d696fde821387e5ca77ea65816361cf32..2d24e0508210382edfa0c917aa8eb97db8e537ee 100644 (file)
@@ -313,8 +313,6 @@ def cacheddir(my_original_path, ignorecvs, ignorelist, EmptyOnError, followSymli
                        raise portage.exception.PermissionDenied(mypath)
                ftype = []
                for x in list:
-                       if not isinstance(x, unicode):
-                               x = unicode(x, encoding='utf_8', errors='replace')
                        try:
                                if followSymlinks:
                                        pathstat = os.stat(mypath+"/"+x)
@@ -1588,13 +1586,8 @@ class config(object):
                                env = os.environ
 
                        # Avoid potential UnicodeDecodeError exceptions later.
-                       env_unicode = {}
-                       for k, v in env.iteritems():
-                               if not isinstance(k, unicode):
-                                       k = unicode(k, encoding='utf_8', errors='replace')
-                               if not isinstance(v, unicode):
-                                       v = unicode(v, encoding='utf_8', errors='replace')
-                               env_unicode[k] = v
+                       env_unicode = dict((_unicode_decode(k), _unicode_decode(v))
+                               for k, v in env.iteritems())
 
                        self.backupenv = env_unicode
 
@@ -3266,10 +3259,8 @@ class config(object):
                        raise ValueError("Invalid type being used as a value: '%s': '%s'" % (str(mykey),str(myvalue)))
 
                # Avoid potential UnicodeDecodeError exceptions later.
-               if not isinstance(mykey, unicode):
-                       mykey = unicode(mykey, encoding='utf_8', errors='replace')
-               if not isinstance(myvalue, unicode):
-                       myvalue = unicode(myvalue, encoding='utf_8', errors='replace')
+               mykey = _unicode_decode(mykey)
+               myvalue = _unicode_decode(myvalue)
 
                self.modifying()
                self.modifiedkeys.append(mykey)
@@ -4971,16 +4962,14 @@ def digestcheck(myfiles, mysettings, strict=0, justmanifest=0):
        """ epatch will just grab all the patches out of a directory, so we have to
        make sure there aren't any foreign files that it might grab."""
        filesdir = os.path.join(pkgdir, "files")
-       if isinstance(filesdir, unicode):
-               # Avoid UnicodeDecodeError raised from
-               # os.path.join when called by os.walk.
-               filesdir = filesdir.encode('utf_8', 'replace')
 
        for parent, dirs, files in os.walk(filesdir):
+               parent = _unicode_decode(parent)
                for d in dirs:
                        if d.startswith(".") or d == "CVS":
                                dirs.remove(d)
                for f in files:
+                       f = _unicode_decode(f)
                        if f.startswith("."):
                                continue
                        f = os.path.join(parent, f)[len(filesdir) + 1:]
@@ -5234,16 +5223,14 @@ def _post_src_install_uid_fix(mysettings):
                        (_shell_quote(mysettings["D"]),))
 
        destdir = mysettings["D"]
-       if isinstance(destdir, unicode):
-               # Avoid UnicodeDecodeError raised from
-               # os.path.join when called by os.walk.
-               destdir = destdir.encode('utf_8', 'replace')
 
        size = 0
        counted_inodes = set()
 
        for parent, dirs, files in os.walk(destdir):
+               parent = _unicode_decode(parent)
                for fname in chain(dirs, files):
+                       fname = _unicode_decode(fname)
                        fpath = os.path.join(parent, fname)
                        mystat = os.lstat(fpath)
                        if stat.S_ISREG(mystat.st_mode) and \
@@ -6188,8 +6175,6 @@ def doebuild(myebuild, mydo, myroot, mysettings, debug=0, listonly=0,
                                        if not mybytes[-1]:
                                                break
                                os.close(pr)
-                               mybytes = u''.join(unicode(chunk,
-                                       encoding='utf_8', errors='replace') for chunk in mybytes)
                                global auxdbkeys
                                for k, v in izip(auxdbkeys, mybytes.splitlines()):
                                        dbkey[k] = v
index f89d6793eb327765d79834e9b83d6d4fb2768c2c..fd838393822fd9b57f5fd14836f7499a2a3f02ce 100644 (file)
@@ -33,8 +33,10 @@ from portage.localization import _
 
 from portage import listdir, dep_expand, digraph, flatten, key_expand, \
        doebuild_environment, doebuild, env_update, prepare_build_dirs, \
-       abssymlink, movefile, _movefile, bsd_chflags, cpv_getkey, \
-       _unicode_module_wrapper
+       abssymlink, movefile, _movefile, bsd_chflags, cpv_getkey
+
+# This is a special version of the os module, wrapped for unicode support.
+from portage import os
 
 from portage.cache.mappings import slot_dict_class
 
@@ -44,8 +46,6 @@ import logging
 import sys
 from itertools import izip
 
-os = portage.os
-
 try:
        import cPickle as pickle
 except ImportError:
@@ -311,8 +311,7 @@ class LinkageMap(object):
                                raise CommandNotFound(args[0])
                        else:
                                for l in proc.stdout:
-                                       if not isinstance(l, unicode):
-                                               l = unicode(l, encoding='utf_8', errors='replace')
+                                       l = portage._unicode_decode(l)
                                        l = l[3:].rstrip("\n")
                                        if not l:
                                                continue
@@ -740,7 +739,7 @@ class vardbapi(dbapi):
                now has a categories property that is generated from the
                available packages.
                """
-               self.root = root[:]
+               self.root = portage._unicode_decode(root)
 
                #cache for category directory mtimes
                self.mtdircache = {}
@@ -904,8 +903,6 @@ class vardbapi(dbapi):
 
                returnme = []
                for x in dir_list:
-                       if not isinstance(x, unicode):
-                               x = unicode(x, encoding='utf_8', errors='replace')
                        if self._excluded_dirs.match(x) is not None:
                                continue
                        ps = pkgsplit(x)
@@ -937,9 +934,7 @@ class vardbapi(dbapi):
                else:
                        def listdir(p, **kwargs):
                                try:
-                                       return [isinstance(x, unicode) and x or \
-                                               unicode(x, encoding='utf_8', errors='replace') \
-                                               for x in os.listdir(p) \
+                                       return [x for x in os.listdir(p) \
                                                if os.path.isdir(os.path.join(p, x))]
                                except EnvironmentError, e:
                                        if e.errno == PermissionDenied.errno:
@@ -1183,11 +1178,9 @@ class vardbapi(dbapi):
                        cache_mtime, metadata = pkg_data
                        cache_valid = cache_mtime == mydir_mtime
                if cache_valid:
+                       # Migrate old metadata to unicode.
                        for k, v in metadata.iteritems():
-                               if not isinstance(v, unicode):
-                                       # Migrate old metadata to unicode.
-                                       metadata[k] = unicode(v,
-                                               encoding='utf_8', errors='replace')
+                               metadata[k] = portage._unicode_decode(v)
 
                        mydata.update(metadata)
                        pull_me.difference_update(mydata)
@@ -1363,16 +1356,12 @@ class vardbapi(dbapi):
                if not hasattr(pkg, "getcontents"):
                        pkg = self._dblink(pkg)
                root = self.root
-               if not isinstance(root, unicode):
-                       root = unicode(root, encoding='utf_8', errors='replace')
                root_len = len(root) - 1
                new_contents = pkg.getcontents().copy()
                removed = 0
 
                for filename in paths:
-                       if not isinstance(filename, unicode):
-                               filename = unicode(filename, 
-                                       encoding='utf_8', errors='replace')
+                       filename = portage._unicode_decode(filename)
                        filename = normalize_path(filename)
                        if relative_paths:
                                relative_filename = filename
@@ -1440,7 +1429,7 @@ class vardbapi(dbapi):
                        h = self._new_hash()
                        # Always use a constant utf_8 encoding here, since
                        # the "default" encoding can change.
-                       h.update(s.encode('utf_8', 'replace'))
+                       h.update(portage._unicode_encode(s))
                        h = h.hexdigest()
                        h = h[-self._hex_chars:]
                        h = int(h, 16)
@@ -2546,13 +2535,9 @@ class dblink(object):
                        if the file is not owned by this package.
                """
 
-               if not isinstance(filename, unicode):
-                       filename = unicode(filename,
-                               encoding='utf_8', errors='replace')
+               filename = portage._unicode_decode(filename)
 
-               if not isinstance(destroot, unicode):
-                       destroot = unicode(destroot,
-                               encoding='utf_8', errors='replace')
+               destroot = portage._unicode_decode(destroot)
 
                destfile = normalize_path(
                        os.path.join(destroot, filename.lstrip(os.path.sep)))
@@ -2736,8 +2721,7 @@ class dblink(object):
                new_contents = self.getcontents().copy()
                old_contents = self._installed_instance.getcontents()
                for f in sorted(preserve_paths):
-                       if not isinstance(f, unicode):
-                               f = unicode(f, encoding='utf_8', errors='replace')
+                       f = portage._unicode_decode(f)
                        f_abs = os.path.join(root, f.lstrip(os.sep))
                        contents_entry = old_contents.get(f_abs)
                        if contents_entry is None:
@@ -3161,6 +3145,11 @@ class dblink(object):
                not existing; we will merge these symlinks at a later time.
                """
 
+               srcroot = portage._unicode_decode(srcroot)
+               destroot = portage._unicode_decode(destroot)
+               inforoot = portage._unicode_decode(inforoot)
+               myebuild = portage._unicode_decode(myebuild)
+
                showMessage = self._display_merge
                scheduler = self._scheduler
 
@@ -3248,17 +3237,13 @@ class dblink(object):
                myfilelist = []
                mylinklist = []
                paths_with_newlines = []
-
-               if isinstance(srcroot, unicode):
-                       # Avoid UnicodeDecodeError raised from
-                       # os.path.join when called by os.walk.
-                       srcroot = srcroot.encode('utf_8', 'replace')
-
                srcroot_len = len(srcroot)
                def onerror(e):
                        raise
                for parent, dirs, files in os.walk(srcroot, onerror=onerror):
+                       parent = portage._unicode_decode(parent)
                        for f in files:
+                               f = portage._unicode_decode(f)
                                file_path = os.path.join(parent, f)
                                relative_path = file_path[srcroot_len:]
 
@@ -3738,7 +3723,8 @@ class dblink(object):
                writemsg = self._display_merge
                scheduler = self._scheduler
 
-               from os.path import sep, join
+               sep = portage.os.sep
+               join = portage.os.path.join
                srcroot = normalize_path(srcroot).rstrip(sep) + sep
                destroot = normalize_path(destroot).rstrip(sep) + sep
                
@@ -3761,9 +3747,6 @@ class dblink(object):
                        mydest = join(destroot, offset, x)
                        # myrealdest is mydest without the $ROOT prefix (makes a difference if ROOT!="/")
                        myrealdest = join(sep, offset, x)
-                       if not isinstance(myrealdest, unicode):
-                               myrealdest = unicode(myrealdest,
-                                       encoding='utf_8', errors='replace')
                        # stat file once, test using S_* macros many times (faster that way)
                        mystat = os.lstat(mysrc)
                        mymode = mystat[stat.ST_MODE]