Always pass encodings='utf_8' to codecs.open(), since otherwise it can
authorZac Medico <zmedico@gentoo.org>
Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)
committerZac Medico <zmedico@gentoo.org>
Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)
return non-unicode strings (at least in some cases, observed with
python-2.6.2). Don't use unicode in portage.util.getconfig() for now,
since shlex doesn't seem to support it (spurious \0 characters). If we
use unicode for config variables, it breaks shlex.split() calls on those
variables due to the same issue (spurious \0 characters).

svn path=/main/trunk/; revision=13845

bin/repoman
pym/portage/__init__.py
pym/portage/env/loaders.py
pym/portage/output.py
pym/portage/util.py

index 3ea879c35835fc067036cbf09872126ff2ba51fa..c8f32f4a4b56360f694fb350a2c722dcf79df5d8 100755 (executable)
@@ -590,7 +590,8 @@ for path in portdb.porttrees:
 
        desc_path = os.path.join(path, 'profiles', 'profiles.desc')
        try:
-               desc_file = codecs.open(desc_path, mode='r', errors='replace')
+               desc_file = codecs.open(desc_path, mode='r',
+                       encoding='utf_8', errors='replace')
        except EnvironmentError:
                pass
        else:
@@ -960,7 +961,8 @@ for x in scanlist:
                        continue
                try:
                        line = 1
-                       for l in codecs.open(checkdir+"/"+y, "r", "utf8"):
+                       for l in codecs.open(os.path.join(checkdir, y), mode='r',
+                               encoding='utf_8'):
                                line +=1
                except UnicodeDecodeError, ue:
                        stats["file.UTF8"] += 1
index 3a969bbce81e892da146952aec92d0c01d2c136a..d897af0e8b5f2ece05a50524dc44e8b79c5aa31c 100644 (file)
@@ -1682,8 +1682,8 @@ class config(object):
                                repo_conf_parser = SafeConfigParser()
                                try:
                                        repo_conf_parser.readfp(
-                                               codecs.open(self._local_repo_conf_path,
-                                               mode='r', errors='replace'))
+                                               codecs.open(self._local_repo_conf_path, mode='r',
+                                               encoding='utf_8', errors='replace'))
                                except EnvironmentError, e:
                                        if e.errno != errno.ENOENT:
                                                raise
index 7cd2600aefd237224ab9ce98cdb5bad6d22ade16..7b4d727213bb8face02bc92b62af196d4de5614e 100644 (file)
@@ -139,7 +139,7 @@ class FileLoader(DataLoader):
                # once, which may be expensive due to digging in child classes.
                func = self.lineParser
                for fn in RecursiveFileLoader(self.fname):
-                       f = codecs.open(fn, mode='r', errors='replace')
+                       f = codecs.open(fn, mode='r', encoding='utf_8', errors='replace')
                        for line_num, line in enumerate(f):
                                func(line, line_num, data, errors)
                return (data, errors)
index 4c533c427a462178d163f306fa08d4b57342ec80..eb0d247b7bb61c5e111b1c1b20671ac84e4fd8ac 100644 (file)
@@ -165,7 +165,8 @@ def _parse_color_map(onerror=None):
                return token
        try:
                lineno=0
-               for line in codecs.open( myfile, mode = 'r', errors = 'replace' ):
+               for line in codecs.open( myfile, mode='r',
+                       encoding='utf_8', errors='replace' ):
                        lineno += 1
 
                        commenter_pos = line.find("#")
index 6d9a23cd1a15940d2a4ccca6befaba1ffb1ee1e9..63b504cf91c4aa146865a9a385f8a606f551dc13 100644 (file)
@@ -318,7 +318,7 @@ def grablines(myfilename,recursive=0):
        else:
                try:
                        myfile = codecs.open(myfilename, mode='r',
-                               encoding=sys.getdefaultencoding(), errors='replace')
+                               encoding='utf_8', errors='replace')
                        mylines = myfile.readlines()
                        myfile.close()
                except IOError, e:
@@ -368,10 +368,11 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True):
                # Workaround for avoiding a silent error in shlex that
                # is triggered by a source statement at the end of the file without a
                # trailing newline after the source statement
-               content = codecs.open(mycfg, mode='r', errors='replace').read()
-               if content and content[-1] != u'\n':
-                       content += u'\n'
-               f = StringIO(content)
+               # NOTE: shex doesn't seem to supported unicode objects
+               # (produces spurious \0 characters with python-2.6.2)
+               content = open(mycfg).read()
+               if content and content[-1] != '\n':
+                       content += '\n'
        except IOError, e:
                if e.errno == PermissionDenied.errno:
                        raise PermissionDenied(mycfg)
@@ -387,7 +388,7 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True):
                # The default shlex.sourcehook() implementation
                # only joins relative paths when the infile
                # attribute is properly set.
-               lex = shlex_class(f, infile=mycfg, posix=True)
+               lex = shlex_class(content, infile=mycfg, posix=True)
                lex.wordchars = string.digits + string.ascii_letters + \
                        "~!@#$%*_\:;?,./-+{}"
                lex.quotes="\"'"
@@ -874,6 +875,7 @@ class atomic_ofstream(ObjectProxy):
                        open_func = open
                else:
                        open_func = codecs.open
+                       kargs.setdefault('encoding', 'utf_8')
                        kargs.setdefault('errors', 'replace')
 
                if follow_links: