Always pass encodings='utf_8' to codecs.open(), since otherwise it can

author Zac Medico <zmedico@gentoo.org>

Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)

committer Zac Medico <zmedico@gentoo.org>

Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)
author Zac Medico <zmedico@gentoo.org>
Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)
committer Zac Medico <zmedico@gentoo.org>
Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)
diff --git a/bin/repoman b/bin/repoman

index 3ea879c35835fc067036cbf09872126ff2ba51fa..c8f32f4a4b56360f694fb350a2c722dcf79df5d8 100755 (executable)
--- a/bin/repoman
+++ b/bin/repoman
@@ -590,7 +590,8 @@ for path in portdb.porttrees:
  
         desc_path = os.path.join(path, 'profiles', 'profiles.desc')
         try:
-               desc_file = codecs.open(desc_path, mode='r', errors='replace')
+               desc_file = codecs.open(desc_path, mode='r',
+                       encoding='utf_8', errors='replace')
         except EnvironmentError:
                 pass
         else:
@@ -960,7 +961,8 @@ for x in scanlist:
                         continue
                 try:
                         line = 1
-                       for l in codecs.open(checkdir+"/"+y, "r", "utf8"):
+                       for l in codecs.open(os.path.join(checkdir, y), mode='r',
+                               encoding='utf_8'):
                                 line +=1
                 except UnicodeDecodeError, ue:
                         stats["file.UTF8"] += 1
diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py

index 3a969bbce81e892da146952aec92d0c01d2c136a..d897af0e8b5f2ece05a50524dc44e8b79c5aa31c 100644 (file)
--- a/pym/portage/__init__.py
+++ b/pym/portage/__init__.py
@@ -1682,8 +1682,8 @@ class config(object):
                                 repo_conf_parser = SafeConfigParser()
                                 try:
                                         repo_conf_parser.readfp(
-                                               codecs.open(self._local_repo_conf_path,
-                                               mode='r', errors='replace'))
+                                               codecs.open(self._local_repo_conf_path, mode='r',
+                                               encoding='utf_8', errors='replace'))
                                 except EnvironmentError, e:
                                         if e.errno != errno.ENOENT:
                                                 raise
diff --git a/pym/portage/env/loaders.py b/pym/portage/env/loaders.py

index 7cd2600aefd237224ab9ce98cdb5bad6d22ade16..7b4d727213bb8face02bc92b62af196d4de5614e 100644 (file)
--- a/pym/portage/env/loaders.py
+++ b/pym/portage/env/loaders.py
@@ -139,7 +139,7 @@ class FileLoader(DataLoader):
                 # once, which may be expensive due to digging in child classes.
                 func = self.lineParser
                 for fn in RecursiveFileLoader(self.fname):
-                       f = codecs.open(fn, mode='r', errors='replace')
+                       f = codecs.open(fn, mode='r', encoding='utf_8', errors='replace')
                         for line_num, line in enumerate(f):
                                 func(line, line_num, data, errors)
                 return (data, errors)
diff --git a/pym/portage/output.py b/pym/portage/output.py

index 4c533c427a462178d163f306fa08d4b57342ec80..eb0d247b7bb61c5e111b1c1b20671ac84e4fd8ac 100644 (file)
--- a/pym/portage/output.py
+++ b/pym/portage/output.py
@@ -165,7 +165,8 @@ def _parse_color_map(onerror=None):
                 return token
         try:
                 lineno=0
-               for line in codecs.open( myfile, mode = 'r', errors = 'replace' ):
+               for line in codecs.open( myfile, mode='r',
+                       encoding='utf_8', errors='replace' ):
                         lineno += 1
  
                         commenter_pos = line.find("#")
diff --git a/pym/portage/util.py b/pym/portage/util.py

index 6d9a23cd1a15940d2a4ccca6befaba1ffb1ee1e9..63b504cf91c4aa146865a9a385f8a606f551dc13 100644 (file)
--- a/pym/portage/util.py
+++ b/pym/portage/util.py
@@ -318,7 +318,7 @@ def grablines(myfilename,recursive=0):
         else:
                 try:
                         myfile = codecs.open(myfilename, mode='r',
-                               encoding=sys.getdefaultencoding(), errors='replace')
+                               encoding='utf_8', errors='replace')
                         mylines = myfile.readlines()
                         myfile.close()
                 except IOError, e:
@@ -368,10 +368,11 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True):
                 # Workaround for avoiding a silent error in shlex that
                 # is triggered by a source statement at the end of the file without a
                 # trailing newline after the source statement
-               content = codecs.open(mycfg, mode='r', errors='replace').read()
-               if content and content[-1] != u'\n':
-                       content += u'\n'
-               f = StringIO(content)
+               # NOTE: shex doesn't seem to supported unicode objects
+               # (produces spurious \0 characters with python-2.6.2)
+               content = open(mycfg).read()
+               if content and content[-1] != '\n':
+                       content += '\n'
         except IOError, e:
                 if e.errno == PermissionDenied.errno:
                         raise PermissionDenied(mycfg)
@@ -387,7 +388,7 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True):
                 # The default shlex.sourcehook() implementation
                 # only joins relative paths when the infile
                 # attribute is properly set.
-               lex = shlex_class(f, infile=mycfg, posix=True)
+               lex = shlex_class(content, infile=mycfg, posix=True)
                 lex.wordchars = string.digits + string.ascii_letters + \
                         "~!@#$%*_\:;?,./-+{}"
                 lex.quotes="\"'"
@@ -874,6 +875,7 @@ class atomic_ofstream(ObjectProxy):
                         open_func = open
                 else:
                         open_func = codecs.open
+                       kargs.setdefault('encoding', 'utf_8')
                         kargs.setdefault('errors', 'replace')
  
                 if follow_links:
author	Zac Medico <zmedico@gentoo.org>
	Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)
committer	Zac Medico <zmedico@gentoo.org>
	Mon, 20 Jul 2009 23:50:20 +0000 (23:50 -0000)
bin/repoman		patch \| blob \| history
pym/portage/__init__.py		patch \| blob \| history
pym/portage/env/loaders.py		patch \| blob \| history
pym/portage/output.py		patch \| blob \| history
pym/portage/util.py		patch \| blob \| history