From cb0f273297db79c47d38df235bcf18378126e9eb Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Mon, 17 Aug 2009 00:15:11 +0000 Subject: [PATCH] Use _content_encoding and _fs_encoding for unicode encoding/decoding. svn path=/main/trunk/; revision=14072 --- pym/portage/cache/ebuild_xattr.py | 7 +++- pym/portage/env/loaders.py | 28 ++++++++++++---- pym/portage/tests/__init__.py | 21 +++++++++--- pym/portage/update.py | 53 ++++++++++++++++++++----------- pym/portage/util.py | 30 ++++++++++------- 5 files changed, 98 insertions(+), 41 deletions(-) diff --git a/pym/portage/cache/ebuild_xattr.py b/pym/portage/cache/ebuild_xattr.py index baba94321..bcaf30640 100644 --- a/pym/portage/cache/ebuild_xattr.py +++ b/pym/portage/cache/ebuild_xattr.py @@ -10,6 +10,7 @@ from portage.cache import fs_template from portage.versions import catsplit from portage import cpv_getkey from portage import os +from portage import _fs_encoding from portage import _unicode_decode import xattr from errno import ENODATA,ENOSPC,E2BIG @@ -156,7 +157,11 @@ class database(fs_template.FsBased): for root, dirs, files in os.walk(self.portdir): for file in files: - file = _unicode_decode(file) + try: + file = _unicode_decode(file, + encoding=_fs_encoding, errors='strict') + except UnicodeDecodeError: + continue if file[-7:] == '.ebuild': cat = os.path.basename(os.path.dirname(root)) pn_pv = file[:-7] diff --git a/pym/portage/env/loaders.py b/pym/portage/env/loaders.py index 854304125..e878ba449 100644 --- a/pym/portage/env/loaders.py +++ b/pym/portage/env/loaders.py @@ -4,8 +4,13 @@ # $Id$ import codecs -import os +import errno import stat +from portage import os +from portage import _content_encoding +from portage import _fs_encoding +from portage import _unicode_decode +from portage import _unicode_encode from portage.localization import _ class LoaderError(Exception): @@ -40,11 +45,6 @@ def RecursiveFileLoader(filename): @returns: List of files to process """ - if isinstance(filename, unicode): - # Avoid UnicodeDecodeError raised from - # os.path.join when called by os.walk. - filename = filename.encode('utf_8', 'replace') - try: st = os.stat(filename) except OSError: @@ -55,6 +55,11 @@ def RecursiveFileLoader(filename): if d[:1] == '.' or d == 'CVS': dirs.remove(d) for f in files: + try: + f = _unicode_decode(f, + encoding=_fs_encoding, errors='strict') + except UnicodeDecodeError: + continue if f[:1] == '.' or f[-1:] == '~': continue yield os.path.join(root, f) @@ -145,9 +150,18 @@ class FileLoader(DataLoader): # once, which may be expensive due to digging in child classes. func = self.lineParser for fn in RecursiveFileLoader(self.fname): - f = codecs.open(fn, mode='r', encoding='utf_8', errors='replace') + try: + f = codecs.open(_unicode_encode(fn, + encoding=_fs_encoding, errors='strict'), mode='r', + encoding=_content_encoding, errors='replace') + except EnvironmentError, e: + if e.errno not in (errno.ENOENT, errno.ESTALE): + raise + del e + continue for line_num, line in enumerate(f): func(line, line_num, data, errors) + f.close() return (data, errors) def lineParser(self, line, line_num, data, errors): diff --git a/pym/portage/tests/__init__.py b/pym/portage/tests/__init__.py index 8676c6ae2..4a5ced8a3 100644 --- a/pym/portage/tests/__init__.py +++ b/pym/portage/tests/__init__.py @@ -3,14 +3,21 @@ # Distributed under the terms of the GNU General Public License v2 # $Id$ -import os import sys import time import unittest +from portage import os +from portage import _fs_encoding +from portage import _unicode_encode +from portage import _unicode_decode + def main(): - TEST_FILE = '__test__' + TEST_FILE = _unicode_encode('__test__', + encoding=_fs_encoding, errors='strict') + svn_dirname = _unicode_encode('.svn', + encoding=_fs_encoding, errors='strict') suite = unittest.TestSuite() basedir = os.path.dirname(os.path.realpath(__file__)) testDirs = [] @@ -19,8 +26,14 @@ def main(): # I was tired of adding dirs to the list, so now we add __test__ # to each dir we want tested. for root, dirs, files in os.walk(basedir): - if ".svn" in dirs: - dirs.remove('.svn') + if svn_dirname in dirs: + dirs.remove(svn_dirname) + try: + root = _unicode_decode(root, + encoding=_fs_encoding, errors='strict') + except UnicodeDecodeError: + continue + if TEST_FILE in files: testDirs.append(root) diff --git a/pym/portage/update.py b/pym/portage/update.py index 412956591..d38ddf942 100644 --- a/pym/portage/update.py +++ b/pym/portage/update.py @@ -2,8 +2,16 @@ # Distributed under the terms of the GNU General Public License v2 # $Id$ -import errno, os, re, sys - +import codecs +import errno +import re +import sys + +from portage import os +from portage import _content_encoding +from portage import _fs_encoding +from portage import _unicode_decode +from portage import _unicode_encode import portage portage.proxy.lazyimport.lazyimport(globals(), 'portage.dep:dep_getkey,get_operator,isvalidatom,isjustname,remove_slot', @@ -12,7 +20,7 @@ portage.proxy.lazyimport.lazyimport(globals(), 'portage.versions:ververify' ) -from portage.const import USER_CONFIG_PATH, WORLD_FILE +from portage.const import USER_CONFIG_PATH from portage.exception import DirectoryNotFound, PortageException from portage.localization import _ @@ -63,9 +71,10 @@ def fixdbentries(update_iter, dbdir): mydata = {} for myfile in [f for f in os.listdir(dbdir) if f not in ignored_dbentries]: file_path = os.path.join(dbdir, myfile) - f = open(file_path, "r") - mydata[myfile] = f.read() - f.close() + mydata[myfile] = codecs.open(_unicode_encode(file_path, + encoding=_fs_encoding, errors='strict'), + mode='r', encoding=_content_encoding, + errors='replace').read() updated_items = update_dbentries(update_iter, mydata) for myfile, mycontent in updated_items.iteritems(): file_path = os.path.join(dbdir, myfile) @@ -100,9 +109,9 @@ def grab_updates(updpath, prev_mtimes=None): mystat = os.stat(file_path) if file_path not in prev_mtimes or \ long(prev_mtimes[file_path]) != long(mystat.st_mtime): - f = open(file_path) - content = f.read() - f.close() + content = codecs.open(_unicode_encode(file_path, + encoding=_fs_encoding, errors='strict'), + mode='r', encoding=_content_encoding, errors='replace').read() update_data.append((file_path, mystat, content)) return update_data @@ -142,17 +151,12 @@ def parse_updates(mycontent): return myupd, errors def update_config_files(config_root, protect, protect_mask, update_iter): - """Perform global updates on /etc/portage/package.* and the world file. + """Perform global updates on /etc/portage/package.*. config_root - location of files to update protect - list of paths from CONFIG_PROTECT protect_mask - list of paths from CONFIG_PROTECT_MASK update_iter - list of update commands as returned from parse_updates()""" - if isinstance(config_root, unicode): - # Avoid UnicodeDecodeError raised from - # os.path.join when called by os.walk. - config_root = config_root.encode('utf_8', 'replace') - config_root = normalize_path(config_root) update_files = {} file_contents = {} @@ -166,9 +170,20 @@ def update_config_files(config_root, protect, protect_mask, update_iter): if os.path.isdir(config_file): for parent, dirs, files in os.walk(config_file): for y in dirs: + try: + y = _unicode_decode(y, + encoding=_fs_encoding, errors='strict') + except UnicodeDecodeError: + dirs.remove(y) + continue if y.startswith("."): dirs.remove(y) for y in files: + try: + y = _unicode_decode(y, + encoding=_fs_encoding, errors='strict') + except UnicodeDecodeError: + continue if y.startswith("."): continue recursivefiles.append( @@ -178,9 +193,11 @@ def update_config_files(config_root, protect, protect_mask, update_iter): myxfiles = recursivefiles for x in myxfiles: try: - myfile = open(os.path.join(abs_user_config, x),"r") - file_contents[x] = myfile.readlines() - myfile.close() + file_contents[x] = codecs.open( + _unicode_encode(os.path.join(abs_user_config, x), + encoding=_fs_encoding, errors='strict'), + mode='r', encoding=_content_encoding, + errors='replace').readlines() except IOError: if file_contents.has_key(x): del file_contents[x] diff --git a/pym/portage/util.py b/pym/portage/util.py index 67e502df8..521429aad 100644 --- a/pym/portage/util.py +++ b/pym/portage/util.py @@ -14,7 +14,6 @@ __all__ = ['apply_permissions', 'apply_recursive_permissions', import commands import codecs -import os import errno import logging import shlex @@ -24,7 +23,8 @@ import sys import portage from portage import os -from portage import _merge_encoding +from portage import _content_encoding +from portage import _fs_encoding from portage import _os_merge from portage import _unicode_encode from portage import _unicode_decode @@ -327,8 +327,9 @@ def grablines(myfilename,recursive=0): os.path.join(myfilename, f), recursive)) else: try: - myfile = codecs.open(_unicode_encode(myfilename), - mode='r', encoding='utf_8', errors='replace') + myfile = codecs.open(_unicode_encode(myfilename, + encoding=_fs_encoding, errors='strict'), + mode='r', encoding=_content_encoding, errors='replace') mylines = myfile.readlines() myfile.close() except IOError, e: @@ -394,10 +395,12 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True): # NOTE: shex doesn't seem to support unicode objects # (produces spurious \0 characters with python-2.6.2) if sys.hexversion < 0x3000000: - content = open(_unicode_encode(mycfg), 'rb').read() + content = open(_unicode_encode(mycfg, + encoding=_fs_encoding, errors='strict'), 'rb').read() else: - content = open(_unicode_encode(mycfg), mode='r', - encoding='utf_8', errors='replace').read() + content = open(_unicode_encode(mycfg, + encoding=_fs_encoding, errors='strict'), mode='r', + encoding=_content_encoding, errors='replace').read() if content and content[-1] != '\n': content += '\n' except IOError, e: @@ -589,7 +592,8 @@ def pickle_read(filename,default=None,debug=0): return default data = None try: - myf = open(_unicode_encode(filename), 'rb') + myf = open(_unicode_encode(filename, + encoding=_fs_encoding, errors='strict'), 'rb') mypickle = pickle.Unpickler(myf) data = mypickle.load() myf.close() @@ -904,7 +908,7 @@ class atomic_ofstream(ObjectProxy): open_func = open else: open_func = codecs.open - kargs.setdefault('encoding', 'utf_8') + kargs.setdefault('encoding', _content_encoding) kargs.setdefault('errors', 'replace') if follow_links: @@ -913,7 +917,9 @@ class atomic_ofstream(ObjectProxy): tmp_name = "%s.%i" % (canonical_path, os.getpid()) try: object.__setattr__(self, '_file', - open_func(_unicode_encode(tmp_name), mode=mode, **kargs)) + open_func(_unicode_encode(tmp_name, + encoding=_fs_encoding, errors='strict'), + mode=mode, **kargs)) return except IOError, e: if canonical_path == filename: @@ -925,7 +931,9 @@ class atomic_ofstream(ObjectProxy): object.__setattr__(self, '_real_name', filename) tmp_name = "%s.%i" % (filename, os.getpid()) object.__setattr__(self, '_file', - open_func(_unicode_encode(tmp_name), mode=mode, **kargs)) + open_func(_unicode_encode(tmp_name, + encoding=_fs_encoding, errors='strict'), + mode=mode, **kargs)) def _get_target(self): return object.__getattribute__(self, '_file') -- 2.26.2