From a636c88eb998c562bfa8310862caa36315335aae Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Thu, 20 Jun 2013 03:11:37 -0700 Subject: [PATCH] Decode sys.argv with surrogateescape for Python 3 With Python 3, the surrogateescape encoding error handler makes it possible to access the original argv bytes, which can be useful if their actual encoding does no match the filesystem encoding. --- bin/portageq | 3 +-- bin/repoman | 3 +-- pym/_emerge/main.py | 6 ++---- pym/portage/__init__.py | 13 +++++++++++++ 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/bin/portageq b/bin/portageq index 1ae1fe16e..4be9f8890 100755 --- a/bin/portageq +++ b/bin/portageq @@ -1148,8 +1148,7 @@ else: def main(argv): - if argv and isinstance(argv[0], bytes): - argv = [portage._unicode_decode(x) for x in argv] + argv = portage._decode_argv(argv) nocolor = os.environ.get('NOCOLOR') if nocolor in ('yes', 'true'): diff --git a/bin/repoman b/bin/repoman index c4a5a220d..ff481d776 100755 --- a/bin/repoman +++ b/bin/repoman @@ -156,8 +156,7 @@ def ParseArgs(argv, qahelp): (opts, args), just like a call to parser.parse_args() """ - if argv and isinstance(argv[0], bytes): - argv = [portage._unicode_decode(x) for x in argv] + argv = portage._decode_argv(argv) modes = { 'commit' : 'Run a scan then commit changes', diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py index b26ce306b..fe9fb2924 100644 --- a/pym/_emerge/main.py +++ b/pym/_emerge/main.py @@ -981,10 +981,6 @@ def parse_opts(tmpcmdline, silent=False): if myaction is None and myoptions.deselect is True: myaction = 'deselect' - if myargs and isinstance(myargs[0], bytes): - for i in range(len(myargs)): - myargs[i] = portage._unicode_decode(myargs[i]) - myfiles += myargs return myaction, myopts, myfiles @@ -1014,6 +1010,8 @@ def emerge_main(args=None): if args is None: args = sys.argv[1:] + args = portage._decode_argv(args) + # Disable color until we're sure that it should be enabled (after # EMERGE_DEFAULT_OPTS has been parsed). portage.output.havecolor = 0 diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py index 7656c6ebe..d6da9f744 100644 --- a/pym/portage/__init__.py +++ b/pym/portage/__init__.py @@ -174,6 +174,15 @@ _encodings = { } if sys.hexversion >= 0x3000000: + + def _decode_argv(argv): + # With Python 3, the surrogateescape encoding error handler makes it + # possible to access the original argv bytes, which can be useful + # if their actual encoding does no match the filesystem encoding. + fs_encoding = sys.getfilesystemencoding() + return [_unicode_decode(x.encode(fs_encoding, 'surrogateescape')) + for x in argv] + def _unicode_encode(s, encoding=_encodings['content'], errors='backslashreplace'): if isinstance(s, str): s = s.encode(encoding, errors) @@ -186,6 +195,10 @@ if sys.hexversion >= 0x3000000: _native_string = _unicode_decode else: + + def _decode_argv(argv): + return [_unicode_decode(x) for x in argv] + def _unicode_encode(s, encoding=_encodings['content'], errors='backslashreplace'): if isinstance(s, unicode): s = s.encode(encoding, errors) -- 2.26.2