Use _content_encoding and _fs_encoding for unicode encoding/decoding.
authorZac Medico <zmedico@gentoo.org>
Mon, 17 Aug 2009 00:15:11 +0000 (00:15 -0000)
committerZac Medico <zmedico@gentoo.org>
Mon, 17 Aug 2009 00:15:11 +0000 (00:15 -0000)
svn path=/main/trunk/; revision=14072

pym/portage/cache/ebuild_xattr.py
pym/portage/env/loaders.py
pym/portage/tests/__init__.py
pym/portage/update.py
pym/portage/util.py

index baba94321b801b1cff78c7ac1f46743761e34741..bcaf30640e86d7d681651d89ecfe489853ba1dfa 100644 (file)
@@ -10,6 +10,7 @@ from portage.cache import fs_template
 from portage.versions import catsplit
 from portage import cpv_getkey
 from portage import os
+from portage import _fs_encoding
 from portage import _unicode_decode
 import xattr
 from errno import ENODATA,ENOSPC,E2BIG
@@ -156,7 +157,11 @@ class database(fs_template.FsBased):
 
                for root, dirs, files in os.walk(self.portdir):
                        for file in files:
-                               file = _unicode_decode(file)
+                               try:
+                                       file = _unicode_decode(file,
+                                               encoding=_fs_encoding, errors='strict')
+                               except UnicodeDecodeError:
+                                       continue
                                if file[-7:] == '.ebuild':
                                        cat = os.path.basename(os.path.dirname(root))
                                        pn_pv = file[:-7]
index 854304125f331d8794d8483fd72d90287a95be75..e878ba449c64ce6a3645afb7a2d4415abf1b1570 100644 (file)
@@ -4,8 +4,13 @@
 # $Id$
 
 import codecs
-import os
+import errno
 import stat
+from portage import os
+from portage import _content_encoding
+from portage import _fs_encoding
+from portage import _unicode_decode
+from portage import _unicode_encode
 from portage.localization import _
 
 class LoaderError(Exception):
@@ -40,11 +45,6 @@ def RecursiveFileLoader(filename):
        @returns: List of files to process
        """
 
-       if isinstance(filename, unicode):
-               # Avoid UnicodeDecodeError raised from
-               # os.path.join when called by os.walk.
-               filename = filename.encode('utf_8', 'replace')
-
        try:
                st = os.stat(filename)
        except OSError:
@@ -55,6 +55,11 @@ def RecursiveFileLoader(filename):
                                if d[:1] == '.' or d == 'CVS':
                                        dirs.remove(d)
                        for f in files:
+                               try:
+                                       f = _unicode_decode(f,
+                                               encoding=_fs_encoding, errors='strict')
+                               except UnicodeDecodeError:
+                                       continue
                                if f[:1] == '.' or f[-1:] == '~':
                                        continue
                                yield os.path.join(root, f)
@@ -145,9 +150,18 @@ class FileLoader(DataLoader):
                # once, which may be expensive due to digging in child classes.
                func = self.lineParser
                for fn in RecursiveFileLoader(self.fname):
-                       f = codecs.open(fn, mode='r', encoding='utf_8', errors='replace')
+                       try:
+                               f = codecs.open(_unicode_encode(fn,
+                                       encoding=_fs_encoding, errors='strict'), mode='r',
+                                       encoding=_content_encoding, errors='replace')
+                       except EnvironmentError, e:
+                               if e.errno not in (errno.ENOENT, errno.ESTALE):
+                                       raise
+                               del e
+                               continue
                        for line_num, line in enumerate(f):
                                func(line, line_num, data, errors)
+                       f.close()
                return (data, errors)
 
        def lineParser(self, line, line_num, data, errors):
index 8676c6ae25253b2cadeeb1602fc578e615b40c93..4a5ced8a35b8dbbd60f1ed31f03907f0185f9787 100644 (file)
@@ -3,14 +3,21 @@
 # Distributed under the terms of the GNU General Public License v2
 # $Id$
 
-import os
 import sys
 import time
 import unittest
 
+from portage import os
+from portage import _fs_encoding
+from portage import _unicode_encode
+from portage import _unicode_decode
+
 def main():
 
-       TEST_FILE = '__test__'
+       TEST_FILE = _unicode_encode('__test__',
+               encoding=_fs_encoding, errors='strict')
+       svn_dirname = _unicode_encode('.svn',
+               encoding=_fs_encoding, errors='strict')
        suite = unittest.TestSuite()
        basedir = os.path.dirname(os.path.realpath(__file__))
        testDirs = []
@@ -19,8 +26,14 @@ def main():
        # I was tired of adding dirs to the list, so now we add __test__
        # to each dir we want tested.
        for root, dirs, files in os.walk(basedir):
-               if ".svn" in dirs:
-                       dirs.remove('.svn')
+               if svn_dirname in dirs:
+                       dirs.remove(svn_dirname)
+               try:
+                       root = _unicode_decode(root,
+                               encoding=_fs_encoding, errors='strict')
+               except UnicodeDecodeError:
+                       continue
+
                if TEST_FILE in files:
                        testDirs.append(root)
 
index 4129565915ba924fcc4698b3ac3f067ecef40692..d38ddf942ce60211538f320f604869b7d24505bf 100644 (file)
@@ -2,8 +2,16 @@
 # Distributed under the terms of the GNU General Public License v2
 # $Id$
 
-import errno, os, re, sys
-
+import codecs
+import errno
+import re
+import sys
+
+from portage import os
+from portage import _content_encoding
+from portage import _fs_encoding
+from portage import _unicode_decode
+from portage import _unicode_encode
 import portage
 portage.proxy.lazyimport.lazyimport(globals(),
        'portage.dep:dep_getkey,get_operator,isvalidatom,isjustname,remove_slot',
@@ -12,7 +20,7 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.versions:ververify'
 )
 
-from portage.const import USER_CONFIG_PATH, WORLD_FILE
+from portage.const import USER_CONFIG_PATH
 from portage.exception import DirectoryNotFound, PortageException
 from portage.localization import _
 
@@ -63,9 +71,10 @@ def fixdbentries(update_iter, dbdir):
        mydata = {}
        for myfile in [f for f in os.listdir(dbdir) if f not in ignored_dbentries]:
                file_path = os.path.join(dbdir, myfile)
-               f = open(file_path, "r")
-               mydata[myfile] = f.read()
-               f.close()
+               mydata[myfile] = codecs.open(_unicode_encode(file_path,
+                       encoding=_fs_encoding, errors='strict'),
+                       mode='r', encoding=_content_encoding,
+                       errors='replace').read()
        updated_items = update_dbentries(update_iter, mydata)
        for myfile, mycontent in updated_items.iteritems():
                file_path = os.path.join(dbdir, myfile)
@@ -100,9 +109,9 @@ def grab_updates(updpath, prev_mtimes=None):
                mystat = os.stat(file_path)
                if file_path not in prev_mtimes or \
                long(prev_mtimes[file_path]) != long(mystat.st_mtime):
-                       f = open(file_path)
-                       content = f.read()
-                       f.close()
+                       content = codecs.open(_unicode_encode(file_path,
+                               encoding=_fs_encoding, errors='strict'),
+                               mode='r', encoding=_content_encoding, errors='replace').read()
                        update_data.append((file_path, mystat, content))
        return update_data
 
@@ -142,17 +151,12 @@ def parse_updates(mycontent):
        return myupd, errors
 
 def update_config_files(config_root, protect, protect_mask, update_iter):
-       """Perform global updates on /etc/portage/package.* and the world file.
+       """Perform global updates on /etc/portage/package.*.
        config_root - location of files to update
        protect - list of paths from CONFIG_PROTECT
        protect_mask - list of paths from CONFIG_PROTECT_MASK
        update_iter - list of update commands as returned from parse_updates()"""
 
-       if isinstance(config_root, unicode):
-               # Avoid UnicodeDecodeError raised from
-               # os.path.join when called by os.walk.
-               config_root = config_root.encode('utf_8', 'replace')
-
        config_root = normalize_path(config_root)
        update_files = {}
        file_contents = {}
@@ -166,9 +170,20 @@ def update_config_files(config_root, protect, protect_mask, update_iter):
                if os.path.isdir(config_file):
                        for parent, dirs, files in os.walk(config_file):
                                for y in dirs:
+                                       try:
+                                               y = _unicode_decode(y,
+                                                       encoding=_fs_encoding, errors='strict')
+                                       except UnicodeDecodeError:
+                                               dirs.remove(y)
+                                               continue
                                        if y.startswith("."):
                                                dirs.remove(y)
                                for y in files:
+                                       try:
+                                               y = _unicode_decode(y,
+                                                       encoding=_fs_encoding, errors='strict')
+                                       except UnicodeDecodeError:
+                                               continue
                                        if y.startswith("."):
                                                continue
                                        recursivefiles.append(
@@ -178,9 +193,11 @@ def update_config_files(config_root, protect, protect_mask, update_iter):
        myxfiles = recursivefiles
        for x in myxfiles:
                try:
-                       myfile = open(os.path.join(abs_user_config, x),"r")
-                       file_contents[x] = myfile.readlines()
-                       myfile.close()
+                       file_contents[x] = codecs.open(
+                               _unicode_encode(os.path.join(abs_user_config, x),
+                               encoding=_fs_encoding, errors='strict'),
+                               mode='r', encoding=_content_encoding,
+                               errors='replace').readlines()
                except IOError:
                        if file_contents.has_key(x):
                                del file_contents[x]
index 67e502df8010206876f2a30d2edb37e6a20cdd3c..521429aadb46a61293003d8bcd851541798ff9b7 100644 (file)
@@ -14,7 +14,6 @@ __all__ = ['apply_permissions', 'apply_recursive_permissions',
 
 import commands
 import codecs
-import os
 import errno
 import logging
 import shlex
@@ -24,7 +23,8 @@ import sys
 
 import portage
 from portage import os
-from portage import _merge_encoding
+from portage import _content_encoding
+from portage import _fs_encoding
 from portage import _os_merge
 from portage import _unicode_encode
 from portage import _unicode_decode
@@ -327,8 +327,9 @@ def grablines(myfilename,recursive=0):
                                        os.path.join(myfilename, f), recursive))
        else:
                try:
-                       myfile = codecs.open(_unicode_encode(myfilename),
-                               mode='r', encoding='utf_8', errors='replace')
+                       myfile = codecs.open(_unicode_encode(myfilename,
+                               encoding=_fs_encoding, errors='strict'),
+                               mode='r', encoding=_content_encoding, errors='replace')
                        mylines = myfile.readlines()
                        myfile.close()
                except IOError, e:
@@ -394,10 +395,12 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True):
                # NOTE: shex doesn't seem to support unicode objects
                # (produces spurious \0 characters with python-2.6.2)
                if sys.hexversion < 0x3000000:
-                       content = open(_unicode_encode(mycfg), 'rb').read()
+                       content = open(_unicode_encode(mycfg,
+                               encoding=_fs_encoding, errors='strict'), 'rb').read()
                else:
-                       content = open(_unicode_encode(mycfg), mode='r',
-                               encoding='utf_8', errors='replace').read()
+                       content = open(_unicode_encode(mycfg,
+                               encoding=_fs_encoding, errors='strict'), mode='r',
+                               encoding=_content_encoding, errors='replace').read()
                if content and content[-1] != '\n':
                        content += '\n'
        except IOError, e:
@@ -589,7 +592,8 @@ def pickle_read(filename,default=None,debug=0):
                return default
        data = None
        try:
-               myf = open(_unicode_encode(filename), 'rb')
+               myf = open(_unicode_encode(filename,
+                       encoding=_fs_encoding, errors='strict'), 'rb')
                mypickle = pickle.Unpickler(myf)
                data = mypickle.load()
                myf.close()
@@ -904,7 +908,7 @@ class atomic_ofstream(ObjectProxy):
                        open_func = open
                else:
                        open_func = codecs.open
-                       kargs.setdefault('encoding', 'utf_8')
+                       kargs.setdefault('encoding', _content_encoding)
                        kargs.setdefault('errors', 'replace')
 
                if follow_links:
@@ -913,7 +917,9 @@ class atomic_ofstream(ObjectProxy):
                        tmp_name = "%s.%i" % (canonical_path, os.getpid())
                        try:
                                object.__setattr__(self, '_file',
-                                       open_func(_unicode_encode(tmp_name), mode=mode, **kargs))
+                                       open_func(_unicode_encode(tmp_name,
+                                               encoding=_fs_encoding, errors='strict'),
+                                               mode=mode, **kargs))
                                return
                        except IOError, e:
                                if canonical_path == filename:
@@ -925,7 +931,9 @@ class atomic_ofstream(ObjectProxy):
                object.__setattr__(self, '_real_name', filename)
                tmp_name = "%s.%i" % (filename, os.getpid())
                object.__setattr__(self, '_file',
-                       open_func(_unicode_encode(tmp_name), mode=mode, **kargs))
+                       open_func(_unicode_encode(tmp_name,
+                               encoding=_fs_encoding, errors='strict'),
+                               mode=mode, **kargs))
 
        def _get_target(self):
                return object.__getattribute__(self, '_file')