From: W. Trevor King Date: Thu, 16 Feb 2012 15:42:43 +0000 (-0500) Subject: Improve unicode handling and increase the default textwrap width. X-Git-Tag: v0.3~8 X-Git-Url: http://git.tremily.us/?p=update-copyright.git;a=commitdiff_plain;h=2a9613be614edb48edc25258332aed5774ff4dc7 Improve unicode handling and increase the default textwrap width. --- diff --git a/update_copyright/project.py b/update_copyright/project.py index 18280b1..4684d38 100644 --- a/update_copyright/project.py +++ b/update_copyright/project.py @@ -84,9 +84,11 @@ class Project (object): self.with_files = False self._ignored_paths = None self._pyfile = None + self._encoding = None + self._width = 79 # unlikely to occur in the wild :p - self._copyright_tag = '-xyz-COPY' + '-RIGHT-zyx-' + self._copyright_tag = u'-xyz-COPY' + u'-RIGHT-zyx-' def load_config(self, stream): p = _configparser.RawConfigParser() @@ -150,20 +152,24 @@ class Project (object): authors = self._vcs.authors() new_contents = u'{} was written by:\n{}\n'.format( self._name, u'\n'.join(authors)) - _utils.set_contents('AUTHORS', new_contents, dry_run=dry_run) + _utils.set_contents( + 'AUTHORS', new_contents, unicode=True, encoding=self._encoding, + dry_run=dry_run) def update_file(self, filename, dry_run=False): _LOG.info('update {}'.format(filename)) - contents = _utils.get_contents(filename=filename) + contents = _utils.get_contents( + filename=filename, unicode=True, encoding=self._encoding) original_year = self._vcs.original_year(filename=filename) authors = self._vcs.authors(filename=filename) new_contents = _utils.update_copyright( contents=contents, original_year=original_year, authors=authors, text=self._copyright, info=self._info(), prefix='# ', - tag=self._copyright_tag) + width=self._width, tag=self._copyright_tag) _utils.set_contents( filename=filename, contents=new_contents, - original_contents=contents, dry_run=dry_run) + original_contents=contents, unicode=True, encoding=self._encoding, + dry_run=dry_run) def update_files(self, files=None, dry_run=False): if files is None or len(files) == 0: @@ -185,37 +191,38 @@ class Project (object): _utils.copyright_string( original_year=original_year, final_year=current_year, authors=authors, text=self._copyright, info=self._info(), - prefix='# '), - '', 'import textwrap as _textwrap', '', '', - 'LICENSE = """', + prefix=u'# ', width=self._width), + u'', u'import textwrap as _textwrap', u'', u'', + u'LICENSE = """', _utils.copyright_string( original_year=original_year, final_year=current_year, authors=authors, text=self._copyright, info=self._info(), - prefix=''), - '""".strip()', - '', - 'def short_license(info, wrap=True, **kwargs):', - ' paragraphs = [', + prefix=u'', width=self._width), + u'""".strip()', + u'', + u'def short_license(info, wrap=True, **kwargs):', + u' paragraphs = [', ] paragraphs = _utils.copyright_string( original_year=original_year, final_year=current_year, authors=authors, text=self._short_copyright, info=self._info(), author_format_fn=_utils.short_author_formatter, wrap=False, - ).split('\n\n') + ).split(u'\n\n') for p in paragraphs: - lines.append(" '{}' % info,".format( - p.replace("'", r"\'"))) + lines.append(u" '{}' % info,".format( + p.replace(u"'", ur"\'"))) lines.extend([ - ' ]', - ' if wrap:', - ' for i,p in enumerate(paragraphs):', - ' paragraphs[i] = _textwrap.fill(p, **kwargs)', - r" return '\n\n'.join(paragraphs)", - '', # for terminal endline + u' ]', + u' if wrap:', + u' for i,p in enumerate(paragraphs):', + u' paragraphs[i] = _textwrap.fill(p, **kwargs)', + ur" return '\n\n'.join(paragraphs)", + u'', # for terminal endline ]) - new_contents = '\n'.join(lines) + new_contents = u'\n'.join(lines) _utils.set_contents( - filename=self._pyfile, contents=new_contents, dry_run=dry_run) + filename=self._pyfile, contents=new_contents, unicode=True, + encoding=self._encoding, dry_run=dry_run) def _ignored_file(self, filename): """ diff --git a/update_copyright/utils.py b/update_copyright/utils.py index 41da98f..37af5b8 100644 --- a/update_copyright/utils.py +++ b/update_copyright/utils.py @@ -16,15 +16,21 @@ # along with update-copyright. If not, see # . +import codecs as _codecs import difflib as _difflib +import locale as _locale import os as _os import os.path as _os_path +import sys as _sys import textwrap as _textwrap import time as _time from . import LOG as _LOG +ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding() + + def long_author_formatter(copyright_year_string, authors): """ >>> print '\\n'.join(long_author_formatter( @@ -188,24 +194,31 @@ def update_copyright(contents, tag=None, **kwargs): contents = tag_copyright(contents=contents, tag=tag) return contents.replace(tag, string) -def get_contents(filename): +def get_contents(filename, unicode=False, encoding=None): if _os_path.isfile(filename): - f = open(filename, 'r') + if unicode: + if encoding is None: + encoding = ENCODING + f = _codecs.open(filename, 'r', encoding=encoding) + else: + f = open(filename, 'r') contents = f.read() f.close() return contents return None -def set_contents(filename, contents, original_contents=None, dry_run=False): +def set_contents(filename, contents, original_contents=None, unicode=False, + encoding=None, dry_run=False): if original_contents is None: - original_contents = get_contents(filename=filename) + original_contents = get_contents( + filename=filename, unicode=unicode, encoding=encoding) _LOG.debug('check contents of {}'.format(filename)) if contents != original_contents: if original_contents is None: _LOG.info('creating {}'.format(filename)) else: _LOG.info('updating {}'.format(filename)) - _LOG.debug('\n'.join( + _LOG.debug(u'\n'.join( _difflib.unified_diff( original_contents.splitlines(), contents.splitlines(), fromfile=_os_path.normpath( @@ -213,7 +226,12 @@ def set_contents(filename, contents, original_contents=None, dry_run=False): tofile=_os_path.normpath(_os_path.join('b', filename)), n=3, lineterm=''))) if dry_run == False: - f = file(filename, 'w') + if unicode: + if encoding is None: + encoding = ENCODING + f = _codecs.open(filename, 'w', encoding=encoding) + else: + f = file(filename, 'w') f.write(contents) f.close() _LOG.debug('no change in {}'.format(filename)) diff --git a/update_copyright/vcs/git.py b/update_copyright/vcs/git.py index e65acb2..46f15f1 100644 --- a/update_copyright/vcs/git.py +++ b/update_copyright/vcs/git.py @@ -25,7 +25,8 @@ class GitBackend (_VCSBackend): @staticmethod def _git_cmd(*args): - status,stdout,stderr = _utils.invoke(['git'] + list(args)) + status,stdout,stderr = _utils.invoke( + ['git'] + list(args), unicode_output=True) return stdout.rstrip('\n') def __init__(self, **kwargs): diff --git a/update_copyright/vcs/utils.py b/update_copyright/vcs/utils.py index 3190f57..f7f0b40 100644 --- a/update_copyright/vcs/utils.py +++ b/update_copyright/vcs/utils.py @@ -23,15 +23,21 @@ import os.path as _os_path import subprocess as _subprocess import sys as _sys +from ..utils import ENCODING as _ENCODING + _MSWINDOWS = _sys.platform == 'win32' _POSIX = not _MSWINDOWS def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, - expect=(0,)): - """ - expect should be a tuple of allowed exit codes. + expect=(0,), unicode_output=False, encoding=None): + """Invoke an external program and return the results + + ``expect`` should be a tuple of allowed exit codes. + + When ``unicode_output`` is ``True``, convert stdout and stdin + strings to unicode before returing them. """ try : if _POSIX: @@ -46,6 +52,13 @@ def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, raise ValueError([args, e]) stdout,stderr = q.communicate(input=stdin) status = q.wait() + if unicode_output == True: + if encoding is None: + encoding = _ENCODING + if stdout is not None: + stdout = unicode(stdout, encoding) + if stderr is not None: + stderr = unicode(stderr, encoding) if status not in expect: raise ValueError([args, status, stdout, stderr]) return status, stdout, stderr