From 2a9613be614edb48edc25258332aed5774ff4dc7 Mon Sep 17 00:00:00 2001
From: "W. Trevor King" <wking@drexel.edu>
Date: Thu, 16 Feb 2012 10:42:43 -0500
Subject: [PATCH] Improve unicode handling and increase the default textwrap
 width.

---
 update_copyright/project.py   | 55 ++++++++++++++++++++---------------
 update_copyright/utils.py     | 30 +++++++++++++++----
 update_copyright/vcs/git.py   |  3 +-
 update_copyright/vcs/utils.py | 19 ++++++++++--
 4 files changed, 73 insertions(+), 34 deletions(-)

diff --git a/update_copyright/project.py b/update_copyright/project.py
index 18280b1..4684d38 100644
--- a/update_copyright/project.py
+++ b/update_copyright/project.py
@@ -84,9 +84,11 @@ class Project (object):
         self.with_files = False
         self._ignored_paths = None
         self._pyfile = None
+        self._encoding = None
+        self._width = 79
 
         # unlikely to occur in the wild :p
-        self._copyright_tag = '-xyz-COPY' + '-RIGHT-zyx-'
+        self._copyright_tag = u'-xyz-COPY' + u'-RIGHT-zyx-'
 
     def load_config(self, stream):
         p = _configparser.RawConfigParser()
@@ -150,20 +152,24 @@ class Project (object):
         authors = self._vcs.authors()
         new_contents = u'{} was written by:\n{}\n'.format(
             self._name, u'\n'.join(authors))
-        _utils.set_contents('AUTHORS', new_contents, dry_run=dry_run)
+        _utils.set_contents(
+            'AUTHORS', new_contents, unicode=True, encoding=self._encoding,
+            dry_run=dry_run)
 
     def update_file(self, filename, dry_run=False):
         _LOG.info('update {}'.format(filename))
-        contents = _utils.get_contents(filename=filename)
+        contents = _utils.get_contents(
+            filename=filename, unicode=True, encoding=self._encoding)
         original_year = self._vcs.original_year(filename=filename)
         authors = self._vcs.authors(filename=filename)
         new_contents = _utils.update_copyright(
             contents=contents, original_year=original_year, authors=authors,
             text=self._copyright, info=self._info(), prefix='# ',
-            tag=self._copyright_tag)
+            width=self._width, tag=self._copyright_tag)
         _utils.set_contents(
             filename=filename, contents=new_contents,
-            original_contents=contents, dry_run=dry_run)
+            original_contents=contents, unicode=True, encoding=self._encoding,
+            dry_run=dry_run)
 
     def update_files(self, files=None, dry_run=False):
         if files is None or len(files) == 0:
@@ -185,37 +191,38 @@ class Project (object):
             _utils.copyright_string(
                 original_year=original_year, final_year=current_year,
                 authors=authors, text=self._copyright, info=self._info(),
-                prefix='# '),
-            '', 'import textwrap as _textwrap', '', '',
-            'LICENSE = """',
+                prefix=u'# ', width=self._width),
+            u'', u'import textwrap as _textwrap', u'', u'',
+            u'LICENSE = """',
             _utils.copyright_string(
                 original_year=original_year, final_year=current_year,
                 authors=authors, text=self._copyright, info=self._info(),
-                prefix=''),
-            '""".strip()',
-            '',
-            'def short_license(info, wrap=True, **kwargs):',
-            '    paragraphs = [',
+                prefix=u'', width=self._width),
+            u'""".strip()',
+            u'',
+            u'def short_license(info, wrap=True, **kwargs):',
+            u'    paragraphs = [',
             ]
         paragraphs = _utils.copyright_string(
             original_year=original_year, final_year=current_year,
             authors=authors, text=self._short_copyright, info=self._info(),
             author_format_fn=_utils.short_author_formatter, wrap=False,
-            ).split('\n\n')
+            ).split(u'\n\n')
         for p in paragraphs:
-            lines.append("        '{}' % info,".format(
-                    p.replace("'", r"\'")))
+            lines.append(u"        '{}' % info,".format(
+                    p.replace(u"'", ur"\'")))
         lines.extend([
-                '        ]',
-                '    if wrap:',
-                '        for i,p in enumerate(paragraphs):',
-                '            paragraphs[i] = _textwrap.fill(p, **kwargs)',
-                r"    return '\n\n'.join(paragraphs)",
-                '',  # for terminal endline
+                u'        ]',
+                u'    if wrap:',
+                u'        for i,p in enumerate(paragraphs):',
+                u'            paragraphs[i] = _textwrap.fill(p, **kwargs)',
+                ur"    return '\n\n'.join(paragraphs)",
+                u'',  # for terminal endline
                 ])
-        new_contents = '\n'.join(lines)
+        new_contents = u'\n'.join(lines)
         _utils.set_contents(
-            filename=self._pyfile, contents=new_contents, dry_run=dry_run)
+            filename=self._pyfile, contents=new_contents, unicode=True,
+            encoding=self._encoding, dry_run=dry_run)
 
     def _ignored_file(self, filename):
         """
diff --git a/update_copyright/utils.py b/update_copyright/utils.py
index 41da98f..37af5b8 100644
--- a/update_copyright/utils.py
+++ b/update_copyright/utils.py
@@ -16,15 +16,21 @@
 # along with update-copyright.  If not, see
 # <http://www.gnu.org/licenses/>.
 
+import codecs as _codecs
 import difflib as _difflib
+import locale as _locale
 import os as _os
 import os.path as _os_path
+import sys as _sys
 import textwrap as _textwrap
 import time as _time
 
 from . import LOG as _LOG
 
 
+ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding()
+
+
 def long_author_formatter(copyright_year_string, authors):
     """
     >>> print '\\n'.join(long_author_formatter(
@@ -188,24 +194,31 @@ def update_copyright(contents, tag=None, **kwargs):
     contents = tag_copyright(contents=contents, tag=tag)
     return contents.replace(tag, string)
 
-def get_contents(filename):
+def get_contents(filename, unicode=False, encoding=None):
     if _os_path.isfile(filename):
-        f = open(filename, 'r')
+        if unicode:
+            if encoding is None:
+                encoding = ENCODING
+            f = _codecs.open(filename, 'r', encoding=encoding)
+        else:
+            f = open(filename, 'r')
         contents = f.read()
         f.close()
         return contents
     return None
 
-def set_contents(filename, contents, original_contents=None, dry_run=False):
+def set_contents(filename, contents, original_contents=None, unicode=False,
+                 encoding=None, dry_run=False):
     if original_contents is None:
-        original_contents = get_contents(filename=filename)
+        original_contents = get_contents(
+            filename=filename, unicode=unicode, encoding=encoding)
     _LOG.debug('check contents of {}'.format(filename))
     if contents != original_contents:
         if original_contents is None:
             _LOG.info('creating {}'.format(filename))
         else:
             _LOG.info('updating {}'.format(filename))
-            _LOG.debug('\n'.join(
+            _LOG.debug(u'\n'.join(
                     _difflib.unified_diff(
                         original_contents.splitlines(), contents.splitlines(),
                         fromfile=_os_path.normpath(
@@ -213,7 +226,12 @@ def set_contents(filename, contents, original_contents=None, dry_run=False):
                         tofile=_os_path.normpath(_os_path.join('b', filename)),
                         n=3, lineterm='')))
         if dry_run == False:
-            f = file(filename, 'w')
+            if unicode:
+                if encoding is None:
+                    encoding = ENCODING
+                f = _codecs.open(filename, 'w', encoding=encoding)
+            else:
+                f = file(filename, 'w')
             f.write(contents)
             f.close()
     _LOG.debug('no change in {}'.format(filename))
diff --git a/update_copyright/vcs/git.py b/update_copyright/vcs/git.py
index e65acb2..46f15f1 100644
--- a/update_copyright/vcs/git.py
+++ b/update_copyright/vcs/git.py
@@ -25,7 +25,8 @@ class GitBackend (_VCSBackend):
 
     @staticmethod
     def _git_cmd(*args):
-        status,stdout,stderr = _utils.invoke(['git'] + list(args))
+        status,stdout,stderr = _utils.invoke(
+            ['git'] + list(args), unicode_output=True)
         return stdout.rstrip('\n')
 
     def __init__(self, **kwargs):
diff --git a/update_copyright/vcs/utils.py b/update_copyright/vcs/utils.py
index 3190f57..f7f0b40 100644
--- a/update_copyright/vcs/utils.py
+++ b/update_copyright/vcs/utils.py
@@ -23,15 +23,21 @@ import os.path as _os_path
 import subprocess as _subprocess
 import sys as _sys
 
+from ..utils import ENCODING as _ENCODING
+
 
 _MSWINDOWS = _sys.platform == 'win32'
 _POSIX = not _MSWINDOWS
 
 
 def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE,
-           expect=(0,)):
-    """
-    expect should be a tuple of allowed exit codes.
+           expect=(0,), unicode_output=False, encoding=None):
+    """Invoke an external program and return the results
+
+    ``expect`` should be a tuple of allowed exit codes.
+
+    When ``unicode_output`` is ``True``, convert stdout and stdin
+    strings to unicode before returing them.
     """
     try :
         if _POSIX:
@@ -46,6 +52,13 @@ def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE,
         raise ValueError([args, e])
     stdout,stderr = q.communicate(input=stdin)
     status = q.wait()
+    if unicode_output == True:
+        if encoding is None:
+            encoding = _ENCODING
+        if stdout is not None:
+            stdout = unicode(stdout, encoding)
+        if stderr is not None:
+            stderr = unicode(stderr, encoding)
     if status not in expect:
         raise ValueError([args, status, stdout, stderr])
     return status, stdout, stderr
-- 
2.26.2