From: Zac Medico Date: Mon, 20 Jul 2009 23:50:20 +0000 (-0000) Subject: Always pass encodings='utf_8' to codecs.open(), since otherwise it can X-Git-Tag: v2.2_rc34~27 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=581381acffbedfdc553b9643d47924bf44e32238;p=portage.git Always pass encodings='utf_8' to codecs.open(), since otherwise it can return non-unicode strings (at least in some cases, observed with python-2.6.2). Don't use unicode in portage.util.getconfig() for now, since shlex doesn't seem to support it (spurious \0 characters). If we use unicode for config variables, it breaks shlex.split() calls on those variables due to the same issue (spurious \0 characters). svn path=/main/trunk/; revision=13845 --- diff --git a/bin/repoman b/bin/repoman index 3ea879c35..c8f32f4a4 100755 --- a/bin/repoman +++ b/bin/repoman @@ -590,7 +590,8 @@ for path in portdb.porttrees: desc_path = os.path.join(path, 'profiles', 'profiles.desc') try: - desc_file = codecs.open(desc_path, mode='r', errors='replace') + desc_file = codecs.open(desc_path, mode='r', + encoding='utf_8', errors='replace') except EnvironmentError: pass else: @@ -960,7 +961,8 @@ for x in scanlist: continue try: line = 1 - for l in codecs.open(checkdir+"/"+y, "r", "utf8"): + for l in codecs.open(os.path.join(checkdir, y), mode='r', + encoding='utf_8'): line +=1 except UnicodeDecodeError, ue: stats["file.UTF8"] += 1 diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py index 3a969bbce..d897af0e8 100644 --- a/pym/portage/__init__.py +++ b/pym/portage/__init__.py @@ -1682,8 +1682,8 @@ class config(object): repo_conf_parser = SafeConfigParser() try: repo_conf_parser.readfp( - codecs.open(self._local_repo_conf_path, - mode='r', errors='replace')) + codecs.open(self._local_repo_conf_path, mode='r', + encoding='utf_8', errors='replace')) except EnvironmentError, e: if e.errno != errno.ENOENT: raise diff --git a/pym/portage/env/loaders.py b/pym/portage/env/loaders.py index 7cd2600ae..7b4d72721 100644 --- a/pym/portage/env/loaders.py +++ b/pym/portage/env/loaders.py @@ -139,7 +139,7 @@ class FileLoader(DataLoader): # once, which may be expensive due to digging in child classes. func = self.lineParser for fn in RecursiveFileLoader(self.fname): - f = codecs.open(fn, mode='r', errors='replace') + f = codecs.open(fn, mode='r', encoding='utf_8', errors='replace') for line_num, line in enumerate(f): func(line, line_num, data, errors) return (data, errors) diff --git a/pym/portage/output.py b/pym/portage/output.py index 4c533c427..eb0d247b7 100644 --- a/pym/portage/output.py +++ b/pym/portage/output.py @@ -165,7 +165,8 @@ def _parse_color_map(onerror=None): return token try: lineno=0 - for line in codecs.open( myfile, mode = 'r', errors = 'replace' ): + for line in codecs.open( myfile, mode='r', + encoding='utf_8', errors='replace' ): lineno += 1 commenter_pos = line.find("#") diff --git a/pym/portage/util.py b/pym/portage/util.py index 6d9a23cd1..63b504cf9 100644 --- a/pym/portage/util.py +++ b/pym/portage/util.py @@ -318,7 +318,7 @@ def grablines(myfilename,recursive=0): else: try: myfile = codecs.open(myfilename, mode='r', - encoding=sys.getdefaultencoding(), errors='replace') + encoding='utf_8', errors='replace') mylines = myfile.readlines() myfile.close() except IOError, e: @@ -368,10 +368,11 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True): # Workaround for avoiding a silent error in shlex that # is triggered by a source statement at the end of the file without a # trailing newline after the source statement - content = codecs.open(mycfg, mode='r', errors='replace').read() - if content and content[-1] != u'\n': - content += u'\n' - f = StringIO(content) + # NOTE: shex doesn't seem to supported unicode objects + # (produces spurious \0 characters with python-2.6.2) + content = open(mycfg).read() + if content and content[-1] != '\n': + content += '\n' except IOError, e: if e.errno == PermissionDenied.errno: raise PermissionDenied(mycfg) @@ -387,7 +388,7 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True): # The default shlex.sourcehook() implementation # only joins relative paths when the infile # attribute is properly set. - lex = shlex_class(f, infile=mycfg, posix=True) + lex = shlex_class(content, infile=mycfg, posix=True) lex.wordchars = string.digits + string.ascii_letters + \ "~!@#$%*_\:;?,./-+{}" lex.quotes="\"'" @@ -874,6 +875,7 @@ class atomic_ofstream(ObjectProxy): open_func = open else: open_func = codecs.open + kargs.setdefault('encoding', 'utf_8') kargs.setdefault('errors', 'replace') if follow_links: