Improve unicode handling and increase the default textwrap width.

author W. Trevor King <wking@drexel.edu>

Thu, 16 Feb 2012 15:42:43 +0000 (10:42 -0500)

committer W. Trevor King <wking@drexel.edu>

Thu, 16 Feb 2012 15:42:43 +0000 (10:42 -0500)
author W. Trevor King <wking@drexel.edu>
Thu, 16 Feb 2012 15:42:43 +0000 (10:42 -0500)
committer W. Trevor King <wking@drexel.edu>
Thu, 16 Feb 2012 15:42:43 +0000 (10:42 -0500)
diff --git a/update_copyright/project.py b/update_copyright/project.py

index 18280b14b3508f251d8ebba48cb7861fa704ccf5..4684d380dd1fed225a89a707da2a584577d3fd8d 100644 (file)
--- a/update_copyright/project.py
+++ b/update_copyright/project.py
@@ -84,9 +84,11 @@ class Project (object):
          self.with_files = False
          self._ignored_paths = None
          self._pyfile = None
+        self._encoding = None
+        self._width = 79
  
          # unlikely to occur in the wild :p
-        self._copyright_tag = '-xyz-COPY' + '-RIGHT-zyx-'
+        self._copyright_tag = u'-xyz-COPY' + u'-RIGHT-zyx-'
  
      def load_config(self, stream):
          p = _configparser.RawConfigParser()
@@ -150,20 +152,24 @@ class Project (object):
          authors = self._vcs.authors()
          new_contents = u'{} was written by:\n{}\n'.format(
              self._name, u'\n'.join(authors))
-        _utils.set_contents('AUTHORS', new_contents, dry_run=dry_run)
+        _utils.set_contents(
+            'AUTHORS', new_contents, unicode=True, encoding=self._encoding,
+            dry_run=dry_run)
  
      def update_file(self, filename, dry_run=False):
          _LOG.info('update {}'.format(filename))
-        contents = _utils.get_contents(filename=filename)
+        contents = _utils.get_contents(
+            filename=filename, unicode=True, encoding=self._encoding)
          original_year = self._vcs.original_year(filename=filename)
          authors = self._vcs.authors(filename=filename)
          new_contents = _utils.update_copyright(
              contents=contents, original_year=original_year, authors=authors,
              text=self._copyright, info=self._info(), prefix='# ',
-            tag=self._copyright_tag)
+            width=self._width, tag=self._copyright_tag)
          _utils.set_contents(
              filename=filename, contents=new_contents,
-            original_contents=contents, dry_run=dry_run)
+            original_contents=contents, unicode=True, encoding=self._encoding,
+            dry_run=dry_run)
  
      def update_files(self, files=None, dry_run=False):
          if files is None or len(files) == 0:
@@ -185,37 +191,38 @@ class Project (object):
              _utils.copyright_string(
                  original_year=original_year, final_year=current_year,
                  authors=authors, text=self._copyright, info=self._info(),
-                prefix='# '),
-            '', 'import textwrap as _textwrap', '', '',
-            'LICENSE = """',
+                prefix=u'# ', width=self._width),
+            u'', u'import textwrap as _textwrap', u'', u'',
+            u'LICENSE = """',
              _utils.copyright_string(
                  original_year=original_year, final_year=current_year,
                  authors=authors, text=self._copyright, info=self._info(),
-                prefix=''),
-            '""".strip()',
-            '',
-            'def short_license(info, wrap=True, **kwargs):',
-            '    paragraphs = [',
+                prefix=u'', width=self._width),
+            u'""".strip()',
+            u'',
+            u'def short_license(info, wrap=True, **kwargs):',
+            u'    paragraphs = [',
              ]
          paragraphs = _utils.copyright_string(
              original_year=original_year, final_year=current_year,
              authors=authors, text=self._short_copyright, info=self._info(),
              author_format_fn=_utils.short_author_formatter, wrap=False,
-            ).split('\n\n')
+            ).split(u'\n\n')
          for p in paragraphs:
-            lines.append("        '{}' % info,".format(
-                    p.replace("'", r"\'")))
+            lines.append(u"        '{}' % info,".format(
+                    p.replace(u"'", ur"\'")))
          lines.extend([
-                '        ]',
-                '    if wrap:',
-                '        for i,p in enumerate(paragraphs):',
-                '            paragraphs[i] = _textwrap.fill(p, **kwargs)',
-                r"    return '\n\n'.join(paragraphs)",
-                '',  # for terminal endline
+                u'        ]',
+                u'    if wrap:',
+                u'        for i,p in enumerate(paragraphs):',
+                u'            paragraphs[i] = _textwrap.fill(p, **kwargs)',
+                ur"    return '\n\n'.join(paragraphs)",
+                u'',  # for terminal endline
                  ])
-        new_contents = '\n'.join(lines)
+        new_contents = u'\n'.join(lines)
          _utils.set_contents(
-            filename=self._pyfile, contents=new_contents, dry_run=dry_run)
+            filename=self._pyfile, contents=new_contents, unicode=True,
+            encoding=self._encoding, dry_run=dry_run)
  
      def _ignored_file(self, filename):
          """
diff --git a/update_copyright/utils.py b/update_copyright/utils.py

index 41da98f3409809814b131d6f07072dfe68219345..37af5b8b7dc57254f80c29f95ce41c853b55daf4 100644 (file)
--- a/update_copyright/utils.py
+++ b/update_copyright/utils.py
@@ -16,15 +16,21 @@
  # along with update-copyright.  If not, see
  # <http://www.gnu.org/licenses/>.
  
+import codecs as _codecs
  import difflib as _difflib
+import locale as _locale
  import os as _os
  import os.path as _os_path
+import sys as _sys
  import textwrap as _textwrap
  import time as _time
  
  from . import LOG as _LOG
  
  
+ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding()
+
+
  def long_author_formatter(copyright_year_string, authors):
      """
      >>> print '\\n'.join(long_author_formatter(
@@ -188,24 +194,31 @@ def update_copyright(contents, tag=None, **kwargs):
      contents = tag_copyright(contents=contents, tag=tag)
      return contents.replace(tag, string)
  
-def get_contents(filename):
+def get_contents(filename, unicode=False, encoding=None):
      if _os_path.isfile(filename):
-        f = open(filename, 'r')
+        if unicode:
+            if encoding is None:
+                encoding = ENCODING
+            f = _codecs.open(filename, 'r', encoding=encoding)
+        else:
+            f = open(filename, 'r')
          contents = f.read()
          f.close()
          return contents
      return None
  
-def set_contents(filename, contents, original_contents=None, dry_run=False):
+def set_contents(filename, contents, original_contents=None, unicode=False,
+                 encoding=None, dry_run=False):
      if original_contents is None:
-        original_contents = get_contents(filename=filename)
+        original_contents = get_contents(
+            filename=filename, unicode=unicode, encoding=encoding)
      _LOG.debug('check contents of {}'.format(filename))
      if contents != original_contents:
          if original_contents is None:
              _LOG.info('creating {}'.format(filename))
          else:
              _LOG.info('updating {}'.format(filename))
-            _LOG.debug('\n'.join(
+            _LOG.debug(u'\n'.join(
                      _difflib.unified_diff(
                          original_contents.splitlines(), contents.splitlines(),
                          fromfile=_os_path.normpath(
@@ -213,7 +226,12 @@ def set_contents(filename, contents, original_contents=None, dry_run=False):
                          tofile=_os_path.normpath(_os_path.join('b', filename)),
                          n=3, lineterm='')))
          if dry_run == False:
-            f = file(filename, 'w')
+            if unicode:
+                if encoding is None:
+                    encoding = ENCODING
+                f = _codecs.open(filename, 'w', encoding=encoding)
+            else:
+                f = file(filename, 'w')
              f.write(contents)
              f.close()
      _LOG.debug('no change in {}'.format(filename))
diff --git a/update_copyright/vcs/git.py b/update_copyright/vcs/git.py

index e65acb2fd0687b0c48629159e94088734a6878c2..46f15f13a323d68bc46b71158dab40725f16f918 100644 (file)
--- a/update_copyright/vcs/git.py
+++ b/update_copyright/vcs/git.py
@@ -25,7 +25,8 @@ class GitBackend (_VCSBackend):
  
      @staticmethod
      def _git_cmd(*args):
-        status,stdout,stderr = _utils.invoke(['git'] + list(args))
+        status,stdout,stderr = _utils.invoke(
+            ['git'] + list(args), unicode_output=True)
          return stdout.rstrip('\n')
  
      def __init__(self, **kwargs):
diff --git a/update_copyright/vcs/utils.py b/update_copyright/vcs/utils.py

index 3190f571b250b62b465f91aac62efad7328fc41a..f7f0b407d9130d125bf69373e274f7fee7b2e28c 100644 (file)
--- a/update_copyright/vcs/utils.py
+++ b/update_copyright/vcs/utils.py
@@ -23,15 +23,21 @@ import os.path as _os_path
  import subprocess as _subprocess
  import sys as _sys
  
+from ..utils import ENCODING as _ENCODING
+
  
  _MSWINDOWS = _sys.platform == 'win32'
  _POSIX = not _MSWINDOWS
  
  
  def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE,
-           expect=(0,)):
-    """
-    expect should be a tuple of allowed exit codes.
+           expect=(0,), unicode_output=False, encoding=None):
+    """Invoke an external program and return the results
+
+    ``expect`` should be a tuple of allowed exit codes.
+
+    When ``unicode_output`` is ``True``, convert stdout and stdin
+    strings to unicode before returing them.
      """
      try :
          if _POSIX:
@@ -46,6 +52,13 @@ def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE,
          raise ValueError([args, e])
      stdout,stderr = q.communicate(input=stdin)
      status = q.wait()
+    if unicode_output == True:
+        if encoding is None:
+            encoding = _ENCODING
+        if stdout is not None:
+            stdout = unicode(stdout, encoding)
+        if stderr is not None:
+            stderr = unicode(stderr, encoding)
      if status not in expect:
          raise ValueError([args, status, stdout, stderr])
      return status, stdout, stderr
author	W. Trevor King <wking@drexel.edu>
	Thu, 16 Feb 2012 15:42:43 +0000 (10:42 -0500)
committer	W. Trevor King <wking@drexel.edu>
	Thu, 16 Feb 2012 15:42:43 +0000 (10:42 -0500)
update_copyright/project.py		patch \| blob \| history
update_copyright/utils.py		patch \| blob \| history
update_copyright/vcs/git.py		patch \| blob \| history
update_copyright/vcs/utils.py		patch \| blob \| history