Convert update-copyright.py to a more modular framework.
authorW. Trevor King <wking@drexel.edu>
Thu, 16 Feb 2012 01:43:11 +0000 (20:43 -0500)
committerW. Trevor King <wking@drexel.edu>
Thu, 16 Feb 2012 02:16:01 +0000 (21:16 -0500)
bin/update-copyright.py [new file with mode: 0755]
update_copyright.py [deleted file]
update_copyright/__init__.py [new file with mode: 0644]
update_copyright/log.py [new file with mode: 0644]
update_copyright/project.py [new file with mode: 0644]
update_copyright/utils.py [new file with mode: 0644]
update_copyright/vcs/__init__.py [new file with mode: 0644]
update_copyright/vcs/bazaar.py [new file with mode: 0644]
update_copyright/vcs/git.py [new file with mode: 0644]
update_copyright/vcs/mercurial.py [new file with mode: 0644]
update_copyright/vcs/utils.py [new file with mode: 0644]

diff --git a/bin/update-copyright.py b/bin/update-copyright.py
new file mode 100755 (executable)
index 0000000..590f27b
--- /dev/null
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+#
+# Copyright
+
+"""Update copyright information with information from the VCS repository.
+
+Run from the project's repository root.
+
+Replaces every line starting with ``^# Copyright`` and continuing with
+``^#`` with an auto-generated copyright blurb.  If you want to add
+``#``-commented material after a copyright blurb, please insert a blank
+line between the blurb and your comment, so the next run of
+``update_copyright.py`` doesn't clobber your comment.
+
+If no files are given, a list of files to update is generated
+automatically.
+"""
+
+import logging as _logging
+
+from update_copyright import LOG as _LOG
+from update_copyright.project import Project
+
+
+if __name__ == '__main__':
+    import optparse
+    import sys
+
+    usage = "%%prog [options] [file ...]"
+
+    p = optparse.OptionParser(usage=usage, description=__doc__)
+    p.add_option('--config', dest='config', default='.update-copyright.conf',
+                 metavar='PATH', help='path to project config file (%default)')
+    p.add_option('--no-authors', dest='authors', default=True,
+                 action='store_false', help="Don't generate AUTHORS")
+    p.add_option('--no-files', dest='files', default=True,
+                 action='store_false', help="Don't update file copyrights")
+    p.add_option('--no-pyfile', dest='pyfile', default=True,
+                 action='store_false', help="Don't update the pyfile")
+    p.add_option('--dry-run', dest='dry_run', default=False,
+                 action='store_true', help="Don't make any changes")
+    p.add_option('-v', '--verbose', dest='verbose', default=0,
+                 action='count', help='Increment verbosity')
+    options,args = p.parse_args()
+
+    _LOG.setLevel(max(0, _logging.ERROR - 10*options.verbose))
+
+    project = Project()
+    project.load_config(open(options.config, 'r'))
+    if options.authors:
+        project.update_authors(dry_run=options.dry_run)
+    if options.files:
+        project.update_files(files=args, dry_run=options.dry_run)
+    if options.pyfile:
+        project.update_pyfile(dry_run=options.dry_run)
diff --git a/update_copyright.py b/update_copyright.py
deleted file mode 100755 (executable)
index 647e09d..0000000
+++ /dev/null
@@ -1,754 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (C) 2009-2012 W. Trevor King <wking@drexel.edu>
-#
-# This file is part of update-copyright.
-#
-# update-copyright is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# update-copyright is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with update-copyright.  If not, see
-# <http://www.gnu.org/licenses/>.
-
-"""Automatically update copyright boilerplate.
-
-This script is adapted from one written for `Bugs Everywhere`_. and
-later modified for `Hooke`_ before returning to `Bugs Everywhere`_.  I
-finally gave up on maintaining separate versions, so here it is as a
-stand-alone module.
-
-.. _Bugs Everywhere: http://bugseverywhere.org/
-.. _Hooke: http://code.google.com/p/hooke/
-"""
-
-import difflib
-import email.utils
-import os
-import os.path
-import sys
-import textwrap
-import time
-
-
-PROJECT_INFO = {
-    'project': 'update-copyright',
-    'vcs': 'Git',
-    }
-
-# Break "copyright" into "copy" and "right" to avoid matching the
-# REGEXP if we decide to go back to regexps.
-COPY_RIGHT_TEXT = [
-    'This file is part of %(project)s.',
-    '%(project)s is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.',
-    '%(project)s is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.',
-    'You should have received a copy of the GNU General Public License along with %(project)s.  If not, see <http://www.gnu.org/licenses/>.'
-    ]
-
-SHORT_COPY_RIGHT_TEXT = [
-    '%(project)s comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License.  For details, %(get-details)s.'
-    ]
-
-COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
-
-# Convert author names to canonical forms.
-# ALIASES[<canonical name>] = <list of aliases>
-# for example,
-# ALIASES = {
-#     'John Doe <jdoe@a.com>':
-#         ['John Doe', 'jdoe', 'J. Doe <j@doe.net>'],
-#     }
-# Git-based projects are encouraged to use .mailmap instead of
-# ALIASES.  See git-shortlog(1) for details.
-ALIASES = {}
-
-# List of paths that should not be scanned for copyright updates.
-# IGNORED_PATHS = ['./.git/']
-IGNORED_PATHS = ['./.git']
-# List of files that should not be scanned for copyright updates.
-# IGNORED_FILES = ['COPYING']
-IGNORED_FILES = ['COPYING']
-
-# Work around missing author holes in the VCS history.
-# AUTHOR_HACKS[<path tuple>] = [<missing authors]
-# for example, if John Doe contributed to module.py but wasn't listed
-# in the VCS history of that file:
-# AUTHOR_HACKS = {
-#     ('path', 'to', 'module.py'):['John Doe'],
-#     }
-AUTHOR_HACKS = {}
-
-# Work around missing year holes in the VCS history.
-# YEAR_HACKS[<path tuple>] = <original year>
-# for example, if module.py was published in 2008 but the VCS history
-# only goes back to 2010:
-# YEAR_HACKS = {
-#     ('path', 'to', 'module.py'):2008,
-#     }
-YEAR_HACKS = {}
-
-# Helpers for VCS-specific commands
-
-def splitpath(path):
-    """Recursively split a path into elements.
-
-    Examples
-    --------
-
-    >>> splitpath(os.path.join('a', 'b', 'c'))
-    ('a', 'b', 'c')
-    >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
-    ('a', 'b', 'c')
-    """
-    path = os.path.normpath(path)
-    elements = []
-    while True:
-        dirname,basename = os.path.split(path)
-        elements.insert(0,basename)
-        if dirname in ['', '.']:
-            break
-        path = dirname
-    return tuple(elements)
-
-# VCS-specific commands
-
-if PROJECT_INFO['vcs'] == 'Git':
-
-    import subprocess
-
-    _MSWINDOWS = sys.platform == 'win32'
-    _POSIX = not _MSWINDOWS
-
-    def invoke(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, expect=(0,)):
-        """
-        expect should be a tuple of allowed exit codes.
-        """
-        try :
-            if _POSIX:
-                q = subprocess.Popen(args, stdin=subprocess.PIPE,
-                                     stdout=stdout, stderr=stderr)
-            else:
-                assert _MSWINDOWS == True, 'invalid platform'
-                # win32 don't have os.execvp() so run the command in a shell
-                q = subprocess.Popen(args, stdin=subprocess.PIPE,
-                                     stdout=stdout, stderr=stderr, shell=True)
-        except OSError, e:
-            raise ValueError([args, e])
-        stdout,stderr = q.communicate(input=stdin)
-        status = q.wait()
-        if status not in expect:
-            raise ValueError([args, status, stdout, stderr])
-        return status, stdout, stderr
-
-    def git_cmd(*args):
-        status,stdout,stderr = invoke(['git'] + list(args))
-        return stdout.rstrip('\n')
-
-    version = git_cmd('--version').split(' ')[-1]
-    if version.startswith('1.5.'):
-        # Author name <author email>
-        author_format = '--pretty=format:%an <%ae>'
-        year_format = ['--pretty=format:%ai']  # Author date
-        # YYYY-MM-DD HH:MM:SS Z
-        # Earlier versions of Git don't seem to recognize --date=short
-    else:
-        author_format = '--pretty=format:%aN <%aE>'
-        year_format = ['--pretty=format:%ad',  # Author date
-                       '--date=short']         # YYYY-MM-DD
-
-    def original_year(filename=None, year_hacks=YEAR_HACKS):
-        args = ['log'] + year_format
-        if filename is not None:
-            args.extend(['--follow'] + [filename])
-        output = git_cmd(*args)
-        if version.startswith('1.5.'):
-            output = '\n'.join([x.split()[0] for x in output.splitlines()])
-        years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
-        if filename is None:
-            years.extend(year_hacks.values())
-        elif splitpath(filename) in year_hacks:
-            years.append(year_hacks[splitpath(filename)])
-        years.sort()
-        return years[0]
-
-    def authors(filename, author_hacks=AUTHOR_HACKS):
-        output = git_cmd('log', '--follow', author_format,
-                         filename)
-        ret = list(set(output.splitlines()))
-        if splitpath(filename) in author_hacks:
-            ret.extend(author_hacks[splitpath(filename)])
-        return ret
-
-    def authors_list(author_hacks=AUTHOR_HACKS):
-        output = git_cmd('log', author_format)
-        ret = list(set(output.splitlines()))
-        for path,authors in author_hacks.items():
-            ret.extend(authors)
-        return ret
-
-    def is_versioned(filename):
-        output = git_cmd('log', '--follow', filename)
-        if len(output) == 0:
-            return False
-        return True
-
-elif PROJECT_INFO['vcs'] == 'Mercurial':
-
-    import StringIO
-    import mercurial
-    import mercurial.dispatch
-
-    def mercurial_cmd(*args):
-        cwd = os.getcwd()
-        stdout = sys.stdout
-        stderr = sys.stderr
-        tmp_stdout = StringIO.StringIO()
-        tmp_stderr = StringIO.StringIO()
-        sys.stdout = tmp_stdout
-        sys.stderr = tmp_stderr
-        try:
-            mercurial.dispatch.dispatch(list(args))
-        finally:
-            os.chdir(cwd)
-            sys.stdout = stdout
-            sys.stderr = stderr
-        return (tmp_stdout.getvalue().rstrip('\n'),
-                tmp_stderr.getvalue().rstrip('\n'))
-
-    def original_year(filename=None, year_hacks=YEAR_HACKS):
-        args = [
-            '--template', '{date|shortdate}\n',
-            # shortdate filter: YEAR-MONTH-DAY
-            ]
-        if filename != None:
-            args.extend(['--follow', filename])
-        output,error = mercurial_cmd('log', *args)
-        years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
-        if filename == None:
-            years.extend(year_hacks.values())
-        elif splitpath(filename) in year_hacks:
-            years.append(year_hacks[splitpath(filename)])
-        years.sort()
-        return years[0]
-
-    def authors(filename, author_hacks=AUTHOR_HACKS):
-        output,error = mercurial_cmd('log', '--follow',
-                                     '--template', '{author}\n',
-                                     filename)
-        ret = list(set(output.splitlines()))
-        if splitpath(filename) in author_hacks:
-            ret.extend(author_hacks[splitpath(filename)])
-        return ret
-
-    def authors_list(author_hacks=AUTHOR_HACKS):
-        output,error = mercurial_cmd('log', '--template', '{author}\n')
-        ret = list(set(output.splitlines()))
-        for path,authors in author_hacks.items():
-            ret.extend(authors)
-        return ret
-
-    def is_versioned(filename):
-        output,error = mercurial_cmd('log', '--follow', filename)
-        if len(error) > 0:
-            return False
-        return True
-
-elif PROJECT_INFO['vcs'] == 'Bazaar':
-
-    import StringIO
-    import bzrlib
-    import bzrlib.builtins
-    import bzrlib.log
-
-    class LogFormatter (bzrlib.log.LogFormatter):
-        supports_merge_revisions = True
-        preferred_levels = 0
-        supports_deta = False
-        supports_tags = False
-        supports_diff = False
-
-        def log_revision(self, revision):
-            raise NotImplementedError
-
-    class YearLogFormatter (LogFormatter):
-        def log_revision(self, revision):
-            self.to_file.write(
-                time.strftime('%Y', time.gmtime(revision.rev.timestamp))
-                +'\n')
-
-    class AuthorLogFormatter (LogFormatter):
-        def log_revision(self, revision):
-            authors = revision.rev.get_apparent_authors()
-            self.to_file.write('\n'.join(authors)+'\n')
-
-    def original_year(filename=None, year_hacks=YEAR_HACKS):
-        cmd = bzrlib.builtins.cmd_log()
-        cmd.outf = StringIO.StringIO()
-        kwargs = {'log_format':YearLogFormatter, 'levels':0}
-        if filename != None:
-            kwargs['file_list'] = [filename]
-        cmd.run(**kwargs)
-        years = [int(year) for year in set(cmd.outf.getvalue().splitlines())]
-        if filename == None:
-            years.append(year_hacks.values())
-        elif splitpath(filename) in year_hacks:
-            years.append(year_hacks[splitpath(filename)])
-        years.sort()
-        return years[0]
-
-    def authors(filename, author_hacks=AUTHOR_HACKS):
-        cmd = bzrlib.builtins.cmd_log()
-        cmd.outf = StringIO.StringIO()
-        cmd.run(file_list=[filename], log_format=AuthorLogFormatter, levels=0)
-        ret = list(set(cmd.outf.getvalue().splitlines()))
-        if splitpath(filename) in author_hacks:
-            ret.extend(author_hacks[splitpath(filename)])
-        return ret
-
-    def authors_list(author_hacks=AUTHOR_HACKS):
-        cmd = bzrlib.builtins.cmd_log()
-        cmd.outf = StringIO.StringIO()
-        cmd.run(log_format=AuthorLogFormatter, levels=0)
-        output = cmd.outf.getvalue()
-        ret = list(set(cmd.outf.getvalue().splitlines()))
-        for path,authors in author_hacks.items():
-            ret.extend(authors)
-        return ret
-
-    def is_versioned(filename):
-        cmd = bzrlib.builtins.cmd_log()
-        cmd.outf = StringIO.StringIO()
-        cmd.run(file_list=[filename])
-        return True
-
-else:
-    raise NotImplementedError('Unrecognized VCS: %(vcs)s' % PROJECT_INFO)
-
-# General utility commands
-
-def _strip_email(*args):
-    """Remove email addresses from a series of names.
-
-    Examples
-    --------
-
-    >>> _strip_email('J Doe')
-    ['J Doe']
-    >>> _strip_email('J Doe <jdoe@a.com>')
-    ['J Doe']
-    >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
-    ['J Doe', 'JJJ Smith']
-    """
-    args = list(args)
-    for i,arg in enumerate(args):
-        if arg == None:
-            continue
-        author,addr = email.utils.parseaddr(arg)
-        if author == '':
-            author = arg
-        args[i] = author
-    return args
-
-def _reverse_aliases(aliases):
-    """Reverse an `aliases` dict.
-
-    Input:   key: canonical name,  value: list of aliases
-    Output:  key: alias,           value: canonical name
-
-    Examples
-    --------
-
-    >>> aliases = {
-    ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
-    ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
-    ...     None:['Anonymous <a@a.com>'],
-    ...     }
-    >>> r = _reverse_aliases(aliases)
-    >>> for item in sorted(r.items()):
-    ...     print item
-    ('Anonymous <a@a.com>', None)
-    ('J', 'J Doe <jdoe@a.com>')
-    ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
-    ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
-    """
-    output = {}
-    for canonical_name,_aliases in aliases.items():
-        for alias in _aliases:
-            output[alias] = canonical_name
-    return output
-
-def _replace_aliases(authors, with_email=True, aliases=None):
-    """Consolidate and sort `authors`.
-
-    Make the replacements listed in the `aliases` dict (key: canonical
-    name, value: list of aliases).  If `aliases` is ``None``, default
-    to ``ALIASES``.
-
-    >>> aliases = {
-    ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
-    ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
-    ...     None:['Anonymous <a@a.com>'],
-    ...     }
-    >>> authors = [
-    ...     'JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
-    ...     'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>', 'Anonymous <a@a.com>']
-    >>> _replace_aliases(authors, with_email=True, aliases=aliases)
-    ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
-    >>> _replace_aliases(authors, with_email=False, aliases=aliases)
-    ['J Doe', 'JJJ Smith']
-    """
-    if aliases == None:
-        aliases = ALIASES
-    rev_aliases = _reverse_aliases(aliases)
-    for i,author in enumerate(authors):
-        if author in rev_aliases:
-            authors[i] = rev_aliases[author]
-    authors = sorted(list(set(authors)))
-    if None in authors:
-        authors.remove(None)
-    if with_email == False:
-        authors = _strip_email(*authors)
-    return authors
-
-def _long_author_formatter(copyright_year_string, authors):
-    """
-    >>> print '\\n'.join(_long_author_formatter(
-    ...     copyright_year_string='Copyright (C) 1990-2010',
-    ...     authors=['Jack', 'Jill', 'John']))
-    Copyright (C) 1990-2010 Jack
-                            Jill
-                            John
-    """
-    lines = ['%s %s' % (copyright_year_string, authors[0])]
-    for author in authors[1:]:
-        lines.append(' '*(len(copyright_year_string)+1) + author)
-    return lines
-
-def _short_author_formatter(copyright_year_string, authors):
-    """
-    >>> print '\\n'.join(_short_author_formatter(
-    ...     copyright_year_string='Copyright (C) 1990-2010',
-    ...     authors=['Jack', 'Jill', 'John']*5))
-    Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John
-    """
-    blurb = '%s %s' % (copyright_year_string, ', '.join(authors))
-    return [blurb]
-
-def _copyright_string(original_year, final_year, authors,
-                      text=COPY_RIGHT_TEXT, extra_info={},
-                      author_format_fn=_long_author_formatter,
-                      formatter_kwargs={}, prefix='', wrap=True,
-                      **wrap_kwargs):
-    """
-    >>> print _copyright_string(original_year=2005,
-    ...                         final_year=2005,
-    ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
-    ...                         prefix='# '
-    ...                        ) # doctest: +ELLIPSIS
-    # Copyright (C) 2005 A <a@a.com>
-    #                    B <b@b.edu>
-    #
-    # This file...
-    >>> print _copyright_string(original_year=2005,
-    ...                         final_year=2009,
-    ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
-    ...                        ) # doctest: +ELLIPSIS
-    Copyright (C) 2005-2009 A <a@a.com>
-                            B <b@b.edu>
-    <BLANKLINE>
-    This file...
-    >>> print _copyright_string(original_year=2005,
-    ...                         final_year=2005,
-    ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
-    ...                         text=SHORT_COPY_RIGHT_TEXT,
-    ...                         author_format_fn=_short_author_formatter,
-    ...                         extra_info={'get-details':'%(get-details)s'},
-    ...                         prefix='',
-    ...                         width=50,
-    ...                        )
-    Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
-    <BLANKLINE>
-    update-copyright comes with ABSOLUTELY NO WARRANTY
-    and is licensed under the GNU General Public
-    License.  For details, %(get-details)s.
-
-    >>> print _copyright_string(original_year=2005,
-    ...                         final_year=2005,
-    ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
-    ...                         text=SHORT_COPY_RIGHT_TEXT,
-    ...                         extra_info={'get-details':'%(get-details)s'},
-    ...                         author_format_fn=_short_author_formatter,
-    ...                         wrap=False,
-    ...                         prefix='',
-    ...                        )
-    Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
-    <BLANKLINE>
-    update-copyright comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License.  For details, %(get-details)s.
-    """
-    for key in ['initial_indent', 'subsequent_indent']:
-        if key not in wrap_kwargs:
-            wrap_kwargs[key] = prefix
-
-    if original_year == final_year:
-        date_range = '%s' % original_year
-    else:
-        date_range = '%s-%s' % (original_year, final_year)
-    copyright_year_string = 'Copyright (C) %s' % date_range
-
-    lines = author_format_fn(copyright_year_string, authors,
-                             **formatter_kwargs)
-    for i,line in enumerate(lines):
-        lines[i] = prefix + line
-
-    info = dict(PROJECT_INFO)
-    for key,value in extra_info.items():
-        info[key] = value
-    text = [paragraph % info for paragraph in text]
-
-    if wrap == True:
-        text = [textwrap.fill(p, **wrap_kwargs) for p in text]
-    else:
-        assert wrap_kwargs['subsequent_indent'] == '', \
-            wrap_kwargs['subsequent_indent']
-    sep = '\n%s\n' % prefix.rstrip()
-    return sep.join(['\n'.join(lines)] + text)
-
-def _tag_copyright(contents):
-    """
-    >>> contents = '''Some file
-    ... bla bla
-    ... # Copyright (copyright begins)
-    ... # (copyright continues)
-    ... # bla bla bla
-    ... (copyright ends)
-    ... bla bla bla
-    ... '''
-    >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
-    Some file
-    bla bla
-    -xyz-CR-zyx-
-    (copyright ends)
-    bla bla bla
-    <BLANKLINE>
-    """
-    lines = []
-    incopy = False
-    for line in contents.splitlines():
-        if incopy == False and line.startswith('# Copyright'):
-            incopy = True
-            lines.append(COPY_RIGHT_TAG)
-        elif incopy == True and not line.startswith('#'):
-            incopy = False
-        if incopy == False:
-            lines.append(line.rstrip('\n'))
-    return '\n'.join(lines)+'\n'
-
-def _update_copyright(contents, original_year, authors):
-    """
-    >>> contents = '''Some file
-    ... bla bla
-    ... # Copyright (copyright begins)
-    ... # (copyright continues)
-    ... # bla bla bla
-    ... (copyright ends)
-    ... bla bla bla
-    ... '''
-    >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
-    ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
-    Some file
-    bla bla
-    # Copyright (C) 2008-... Jack
-    #                         Jill
-    #
-    # This file...
-    (copyright ends)
-    bla bla bla
-    <BLANKLINE>
-    """
-    current_year = time.gmtime()[0]
-    copyright_string = _copyright_string(
-        original_year, current_year, authors, prefix='# ')
-    contents = _tag_copyright(contents)
-    return contents.replace(COPY_RIGHT_TAG, copyright_string)
-
-def ignored_file(filename, ignored_paths=None, ignored_files=None,
-                 check_disk=True, check_vcs=True):
-    """
-    >>> ignored_paths = ['./a/', './b/']
-    >>> ignored_files = ['x', 'y']
-    >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
-    True
-    >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
-    False
-    >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
-    True
-    >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
-    False
-    >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
-    False
-    """
-    if ignored_paths == None:
-        ignored_paths = IGNORED_PATHS
-    if ignored_files == None:
-        ignored_files = IGNORED_FILES
-    if check_disk == True and os.path.isfile(filename) == False:
-        return True
-    for path in ignored_paths:
-        if filename.startswith(path):
-            return True
-    if os.path.basename(filename) in ignored_files:
-        return True
-    if check_vcs == True and is_versioned(filename) == False:
-        return True
-    return False
-
-def _set_contents(filename, contents, original_contents=None, dry_run=False,
-                  verbose=0):
-    if original_contents == None and os.path.isfile(filename):
-        f = open(filename, 'r')
-        original_contents = f.read()
-        f.close()
-    if verbose > 0:
-        print "checking %s ... " % filename,
-    if contents != original_contents:
-        if verbose > 0:
-            if original_contents == None:
-                print "[creating]"
-            else:
-                print "[updating]"
-        if verbose > 1 and original_contents != None:
-            print '\n'.join(
-                difflib.unified_diff(
-                    original_contents.splitlines(), contents.splitlines(),
-                    fromfile=os.path.normpath(os.path.join('a', filename)),
-                    tofile=os.path.normpath(os.path.join('b', filename)),
-                    n=3, lineterm=''))
-        if dry_run == False:
-            f = file(filename, 'w')
-            f.write(contents)
-            f.close()
-    elif verbose > 0:
-        print "[no change]"
-
-# Update commands
-
-def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
-    authors = authors_fn()
-    authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
-    new_contents = '%s was written by:\n%s\n' % (
-        PROJECT_INFO['project'],
-        '\n'.join(authors)
-        )
-    _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
-
-def update_file(filename, original_year_fn=original_year, authors_fn=authors,
-                dry_run=False, verbose=0):
-    f = file(filename, 'r')
-    contents = f.read()
-    f.close()
-
-    original_year = original_year_fn(filename)
-    authors = authors_fn(filename)
-    authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
-
-    new_contents = _update_copyright(contents, original_year, authors)
-    _set_contents(filename, contents=new_contents, original_contents=contents,
-                  dry_run=dry_run, verbose=verbose)
-
-def update_files(files=None, dry_run=False, verbose=0):
-    if files == None or len(files) == 0:
-        files = []
-        for dirpath,dirnames,filenames in os.walk('.'):
-            for filename in filenames:
-                files.append(os.path.join(dirpath, filename))
-
-    for filename in files:
-        if ignored_file(filename) == True:
-            continue
-        update_file(filename, dry_run=dry_run, verbose=verbose)
-
-def update_pyfile(path, original_year_fn=original_year,
-                  authors_fn=authors_list, dry_run=False, verbose=0):
-    original_year = original_year_fn()
-    current_year = time.gmtime()[0]
-    authors = authors_fn()
-    authors = _replace_aliases(authors, with_email=False, aliases=ALIASES)
-    paragraphs = _copyright_string(
-        original_year, current_year, authors,
-        text=SHORT_COPY_RIGHT_TEXT,
-        extra_info={'get-details':'%(get-details)s'},
-        author_format_fn=_short_author_formatter, wrap=False,
-        ).split('\n\n')
-    lines = [
-        _copyright_string(original_year, current_year, authors, prefix='# '),
-        '', 'import textwrap', '', '',
-        'LICENSE = """',
-        _copyright_string(original_year, current_year, authors, prefix=''),
-        '""".strip()',
-        '',
-        'def short_license(extra_info, wrap=True, **kwargs):',
-        '    paragraphs = [',
-        ]
-    for p in paragraphs:
-        lines.append("        '%s' %% extra_info," % p.replace("'", r"\'"))
-    lines.extend([
-            '        ]',
-            '    if wrap == True:',
-            '        for i,p in enumerate(paragraphs):',
-            '            paragraphs[i] = textwrap.fill(p, **kwargs)',
-            r"    return '\n\n'.join(paragraphs)",
-            ])
-    new_contents = '\n'.join(lines)+'\n'
-    _set_contents(path, new_contents, dry_run=dry_run, verbose=verbose)
-
-
-def test():
-    import doctest
-    doctest.testmod()
-
-if __name__ == '__main__':
-    import optparse
-    import sys
-
-    usage = """%%prog [options] [file ...]
-
-Update copyright information in source code with information from
-the %(vcs)s repository.  Run from the %(project)s repository root.
-
-Replaces every line starting with '^# Copyright' and continuing with
-'^#' with an auto-generated copyright blurb.  If you want to add
-#-commented material after a copyright blurb, please insert a blank
-line between the blurb and your comment, so the next run of
-``update_copyright.py`` doesn't clobber your comment.
-
-If no files are given, a list of files to update is generated
-automatically.
-""" % PROJECT_INFO
-    p = optparse.OptionParser(usage)
-    p.add_option('--pyfile', dest='pyfile', default='update_copyright/license.py',
-                 metavar='PATH',
-                 help='Write project license info to a Python module at PATH')
-    p.add_option('--test', dest='test', default=False,
-                 action='store_true', help='Run internal tests and exit')
-    p.add_option('--dry-run', dest='dry_run', default=False,
-                 action='store_true', help="Don't make any changes")
-    p.add_option('-v', '--verbose', dest='verbose', default=0,
-                 action='count', help='Increment verbosity')
-    options,args = p.parse_args()
-
-    if options.test == True:
-        test()
-        sys.exit(0)
-
-    update_authors(dry_run=options.dry_run, verbose=options.verbose)
-    update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)
-    if options.pyfile != None:
-        update_pyfile(path=options.pyfile,
-                      dry_run=options.dry_run, verbose=options.verbose)
diff --git a/update_copyright/__init__.py b/update_copyright/__init__.py
new file mode 100644 (file)
index 0000000..e523c34
--- /dev/null
@@ -0,0 +1,20 @@
+# Copyright
+
+"""Automatically update copyright boilerplate.
+
+This package is adapted from a script written for `Bugs
+Everywhere`_. and later modified for `Hooke`_ before returning to
+`Bugs Everywhere`_.  I finally gave up on maintaining separate
+versions, so here it is as a stand-alone package.
+
+.. _Bugs Everywhere: http://bugseverywhere.org/
+.. _Hooke: http://code.google.com/p/hooke/
+"""
+
+from .log import get_basic_logger as _get_basic_logger
+
+
+__version__ = '0.2'
+
+
+LOG = _get_basic_logger(name='update-copyright')
diff --git a/update_copyright/log.py b/update_copyright/log.py
new file mode 100644 (file)
index 0000000..e679a82
--- /dev/null
@@ -0,0 +1,32 @@
+# Copyright
+
+"""Tools for setting up a package logging.
+
+This module is separate from `tools` to avoid module dependency
+cycles.  This module has no internal dependencies, while `tools`
+depends on many of the other modules.  With this module separate, the
+other internal modules have access to the default logger before the
+package configuration is built up enough to configure it according to
+your external specifications.
+"""
+
+import logging as _logging
+
+
+def get_basic_logger(name, level=_logging.WARN):
+    """Create and return a basic logger
+
+    This utility function encapsulates a bunch of `logging`
+    boilerplate that I use in several packages.
+    """
+    log = _logging.getLogger(name)
+    log.setLevel(level)
+    formatter = _logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    stream_handler = _logging.StreamHandler()
+    stream_handler.setLevel(_logging.DEBUG)
+    stream_handler.setFormatter(formatter)
+    log.addHandler(stream_handler)
+    # Cache handlers for easy swapping depending on config settings
+    log._handler_cache = {'stream': stream_handler}
+    return log
diff --git a/update_copyright/project.py b/update_copyright/project.py
new file mode 100644 (file)
index 0000000..7680174
--- /dev/null
@@ -0,0 +1,235 @@
+# Copyright (C) 2009-2012 W. Trevor King <wking@drexel.edu>
+#
+# This file is part of update-copyright.
+#
+# update-copyright is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# update-copyright is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with update-copyright.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Project-specific configuration.
+
+# Convert author names to canonical forms.
+# ALIASES[<canonical name>] = <list of aliases>
+# for example,
+# ALIASES = {
+#     'John Doe <jdoe@a.com>':
+#         ['John Doe', 'jdoe', 'J. Doe <j@doe.net>'],
+#     }
+# Git-based projects are encouraged to use .mailmap instead of
+# ALIASES.  See git-shortlog(1) for details.
+
+# List of paths that should not be scanned for copyright updates.
+# IGNORED_PATHS = ['./.git/']
+IGNORED_PATHS = ['./.git']
+# List of files that should not be scanned for copyright updates.
+# IGNORED_FILES = ['COPYING']
+IGNORED_FILES = ['COPYING']
+
+# Work around missing author holes in the VCS history.
+# AUTHOR_HACKS[<path tuple>] = [<missing authors]
+# for example, if John Doe contributed to module.py but wasn't listed
+# in the VCS history of that file:
+# AUTHOR_HACKS = {
+#     ('path', 'to', 'module.py'):['John Doe'],
+#     }
+AUTHOR_HACKS = {}
+
+# Work around missing year holes in the VCS history.
+# YEAR_HACKS[<path tuple>] = <original year>
+# for example, if module.py was published in 2008 but the VCS history
+# only goes back to 2010:
+# YEAR_HACKS = {
+#     ('path', 'to', 'module.py'):2008,
+#     }
+YEAR_HACKS = {}
+"""
+
+import ConfigParser as _configparser
+import fnmatch as _fnmatch
+import os.path as _os_path
+import sys
+import time as _time
+
+from . import LOG as _LOG
+from . import utils as _utils
+from .vcs.git import GitBackend as _GitBackend
+try:
+    from .vcs.bazaar import BazaarBackend as _BazaarBackend
+except ImportError, _bazaar_import_error:
+    _BazaarBackend = None
+try:
+    from .vcs.mercurial import MercurialBackend as _MercurialBackend
+except ImportError, _mercurial_import_error:
+    _MercurialBackend = None
+
+
+class Project (object):
+    def __init__(self, name=None, vcs=None, copyright=None,
+                 short_copyright=None):
+        self._name = name
+        self._vcs = vcs
+        self._copyright = None
+        self._short_copyright = None
+        self.with_authors = False
+        self.with_files = False
+        self._ignored_paths = None
+        self._pyfile = None
+
+        # unlikely to occur in the wild :p
+        self._copyright_tag = '-xyz-COPY' + '-RIGHT-zyx-'
+
+    def load_config(self, stream):
+        p = _configparser.RawConfigParser()
+        p.readfp(stream)
+        try:
+            self._name = p.get('project', 'name')
+        except _configparser.NoOptionError:
+            pass
+        try:
+            vcs = p.get('project', 'vcs')
+        except _configparser.NoOptionError:
+            pass
+        else:
+            if vcs == 'Git':
+                self._vcs = _GitBackend()
+            elif vcs == 'Bazaar':
+                self._vcs = _BazaarBackend()
+            elif vcs == 'Mercurial':
+                self._vcs = _MercurialBackend()
+            else:
+                raise NotImplementedError('vcs: {}'.format(vcs))
+        try:
+            self._copyright = p.get('copyright', 'long').splitlines()
+        except _configparser.NoOptionError:
+            pass
+        try:
+            self._short_copyright = p.get('copyright', 'short').splitlines()
+        except _configparser.NoOptionError:
+            pass
+        try:
+            self.with_authors = p.get('files', 'authors')
+        except _configparser.NoOptionError:
+            pass
+        try:
+            self.with_files = p.get('files', 'files')
+        except _configparser.NoOptionError:
+            pass
+        try:
+            self._ignored_paths = p.get('files', 'ignored')
+        except _configparser.NoOptionError:
+            pass
+        try:
+            self._pyfile = p.get('files', 'pyfile')
+        except _configparser.NoOptionError:
+            pass
+
+    def _info(self):
+        return {
+            'project': self._name,
+            'vcs': self._vcs.name,
+            }
+
+    def update_authors(self, dry_run=False):
+        _LOG.info('update AUTHORS')
+        authors = self._vcs.authors()
+        new_contents = u'{} was written by:\n{}\n'.format(
+            self._name, u'\n'.join(authors))
+        _utils.set_contents('AUTHORS', new_contents, dry_run=dry_run)
+
+    def update_file(self, filename, dry_run=False):
+        _LOG.info('update {}'.format(filename))
+        contents = _utils.get_contents(filename=filename)
+        original_year = self._vcs.original_year(filename=filename)
+        authors = self._vcs.authors(filename=filename)
+        new_contents = _utils.update_copyright(
+            contents=contents, original_year=original_year, authors=authors,
+            text=self._copyright, info=self._info(), prefix='# ',
+            tag=self._copyright_tag)
+        _utils.set_contents(
+            filename=filename, contents=new_contents,
+            original_contents=contents, dry_run=dry_run)
+
+    def update_files(self, files=None, dry_run=False):
+        if files is None or len(files) == 0:
+            files = _utils.list_files(root='.')
+        for filename in files:
+            if self._ignored_file(filename=filename):
+                continue
+            self.update_file(filename=filename, dry_run=dry_run)
+
+    def update_pyfile(self, dry_run=False):
+        if self._pyfile is None:
+            _LOG.info('no pyfile location configured, skip `update_pyfile`')
+            return
+        _LOG.info('update pyfile at {}'.format(self._pyfile))
+        current_year = _time.gmtime()[0]
+        original_year = self._vcs.original_year()
+        authors = self._vcs.authors()
+        lines = [
+            _utils.copyright_string(
+                original_year=original_year, final_year=current_year,
+                authors=authors, text=self._copyright, info=self._info(),
+                prefix='# '),
+            '', 'import textwrap as _textwrap', '', '',
+            'LICENSE = """',
+            _utils.copyright_string(
+                original_year=original_year, final_year=current_year,
+                authors=authors, text=self._copyright, info=self._info(),
+                prefix=''),
+            '""".strip()',
+            '',
+            'def short_license(info, wrap=True, **kwargs):',
+            '    paragraphs = [',
+            ]
+        paragraphs = _utils.copyright_string(
+            original_year=original_year, final_year=current_year,
+            authors=authors, text=self._short_copyright, info=self._info(),
+            author_format_fn=_utils.short_author_formatter, wrap=False,
+            ).split('\n\n')
+        for p in paragraphs:
+            lines.append("        '{}' % info,".format(
+                    p.replace("'", r"\'")))
+        lines.extend([
+                '        ]',
+                '    if wrap:',
+                '        for i,p in enumerate(paragraphs):',
+                '            paragraphs[i] = _textwrap.fill(p, **kwargs)',
+                r"    return '\n\n'.join(paragraphs)",
+                '',  # for terminal endline
+                ])
+        new_contents = '\n'.join(lines)
+        _utils.set_contents(
+            filename=self._pyfile, contents=new_contents, dry_run=dry_run)
+
+    def _ignored_file(self, filename):
+        """
+        >>> ignored_paths = ['./a/', './b/']
+        >>> ignored_files = ['x', 'y']
+        >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
+        True
+        >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
+        False
+        >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
+        True
+        >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
+        False
+        >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
+        False
+        """
+        if self._ignored_paths is not None:
+            for path in self._ignored_paths:
+                if _fnmatch.fnmatch(filename, path):
+                    return True
+        if self._vcs and not self._vcs.is_versioned(filename):
+            return True
+        return False
diff --git a/update_copyright/utils.py b/update_copyright/utils.py
new file mode 100644 (file)
index 0000000..9aa9305
--- /dev/null
@@ -0,0 +1,208 @@
+# Copyright
+
+import difflib as _difflib
+import os as _os
+import os.path as _os_path
+import textwrap as _textwrap
+import time as _time
+
+from . import LOG as _LOG
+
+
+def long_author_formatter(copyright_year_string, authors):
+    """
+    >>> print '\\n'.join(long_author_formatter(
+    ...     copyright_year_string='Copyright (C) 1990-2010',
+    ...     authors=['Jack', 'Jill', 'John']))
+    Copyright (C) 1990-2010 Jack
+                            Jill
+                            John
+    """
+    lines = ['%s %s' % (copyright_year_string, authors[0])]
+    for author in authors[1:]:
+        lines.append(' '*(len(copyright_year_string)+1) + author)
+    return lines
+
+def short_author_formatter(copyright_year_string, authors):
+    """
+    >>> print '\\n'.join(short_author_formatter(
+    ...     copyright_year_string='Copyright (C) 1990-2010',
+    ...     authors=['Jack', 'Jill', 'John']*5))
+    Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John
+    """
+    blurb = '%s %s' % (copyright_year_string, ', '.join(authors))
+    return [blurb]
+
+def copyright_string(original_year, final_year, authors, text, info={},
+                     author_format_fn=long_author_formatter,
+                     formatter_kwargs={}, prefix='', wrap=True,
+                     **wrap_kwargs):
+    """
+    >>> print(copyright_string(original_year=2005, final_year=2005,
+    ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
+    ...                        text=['BLURB',], prefix='# '
+    ...                        )) # doctest: +REPORT_UDIFF
+    # Copyright (C) 2005 A <a@a.com>
+    #                    B <b@b.edu>
+    #
+    # BLURB
+    >>> print(copyright_string(original_year=2005, final_year=2009,
+    ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
+    ...                        text=['BLURB',]
+    ...                        )) # doctest: +REPORT_UDIFF
+    Copyright (C) 2005-2009 A <a@a.com>
+                            B <b@b.edu>
+    <BLANKLINE>
+    BLURB
+    >>> print(copyright_string(original_year=2005, final_year=2005,
+    ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
+    ...                        text=['This file is part of %(program)s.',],
+    ...                        author_format_fn=short_author_formatter,
+    ...                        info={'program':'update-copyright'},
+    ...                        width=25,
+    ...                        )) # doctest: +REPORT_UDIFF
+    Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
+    <BLANKLINE>
+    This file is part of
+    update-copyright.
+    >>> print(copyright_string(original_year=2005, final_year=2005,
+    ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
+    ...                        text=[('This file is part of %(program)s.  '*3
+    ...                               ).strip(),],
+    ...                        info={'program':'update-copyright'},
+    ...                        author_format_fn=short_author_formatter,
+    ...                        wrap=False,
+    ...                        )) # doctest: +REPORT_UDIFF
+    Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
+    <BLANKLINE>
+    This file is part of update-copyright.  This file is part of update-copyright.  This file is part of update-copyright.
+    """
+    for key in ['initial_indent', 'subsequent_indent']:
+        if key not in wrap_kwargs:
+            wrap_kwargs[key] = prefix
+
+    if original_year == final_year:
+        date_range = '%s' % original_year
+    else:
+        date_range = '%s-%s' % (original_year, final_year)
+    copyright_year_string = 'Copyright (C) %s' % date_range
+
+    lines = author_format_fn(copyright_year_string, authors,
+                             **formatter_kwargs)
+    for i,line in enumerate(lines):
+        lines[i] = prefix + line
+
+    for i,paragraph in enumerate(text):
+        try:
+            text[i] = paragraph % info
+        except ValueError, e:
+            _LOG.error(
+                "{}: can't format {} with {}".format(e, paragraph, info))
+            raise
+        except TypeError, e:
+            _LOG.error(
+                ('{}: copright text must be a list of paragraph strings, '
+                 'not {}').format(e, repr(text)))
+            raise
+
+    if wrap == True:
+        text = [_textwrap.fill(p, **wrap_kwargs) for p in text]
+    else:
+        assert wrap_kwargs['subsequent_indent'] == '', \
+            wrap_kwargs['subsequent_indent']
+    sep = '\n%s\n' % prefix.rstrip()
+    return sep.join(['\n'.join(lines)] + text)
+
+def tag_copyright(contents, tag=None):
+    """
+    >>> contents = '''Some file
+    ... bla bla
+    ... # Copyright (copyright begins)
+    ... # (copyright continues)
+    ... # bla bla bla
+    ... (copyright ends)
+    ... bla bla bla
+    ... '''
+    >>> print tag_copyright(contents, tag='-xyz-CR-zyx-')
+    Some file
+    bla bla
+    -xyz-CR-zyx-
+    (copyright ends)
+    bla bla bla
+    <BLANKLINE>
+    """
+    lines = []
+    incopy = False
+    for line in contents.splitlines():
+        if incopy == False and line.startswith('# Copyright'):
+            incopy = True
+            lines.append(tag)
+        elif incopy == True and not line.startswith('#'):
+            incopy = False
+        if incopy == False:
+            lines.append(line.rstrip('\n'))
+    return '\n'.join(lines)+'\n'
+
+def update_copyright(contents, tag=None, **kwargs):
+    """
+    >>> contents = '''Some file
+    ... bla bla
+    ... # Copyright (copyright begins)
+    ... # (copyright continues)
+    ... # bla bla bla
+    ... (copyright ends)
+    ... bla bla bla
+    ... '''
+    >>> print update_copyright(contents, original_year=2008,
+    ...                        authors=['Jack', 'Jill'],
+    ...                        text=['BLURB',], prefix='# ', tag='--tag--'
+    ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
+    Some file
+    bla bla
+    # Copyright (C) 2008-... Jack
+    #                         Jill
+    #
+    # BLURB
+    (copyright ends)
+    bla bla bla
+    <BLANKLINE>
+    """
+    current_year = _time.gmtime()[0]
+    string = copyright_string(final_year=current_year, **kwargs)
+    contents = tag_copyright(contents=contents, tag=tag)
+    return contents.replace(tag, string)
+
+def get_contents(filename):
+    if _os_path.isfile(filename):
+        f = open(filename, 'r')
+        contents = f.read()
+        f.close()
+        return contents
+    return None
+
+def set_contents(filename, contents, original_contents=None, dry_run=False):
+    if original_contents is None:
+        original_contents = get_contents(filename=filename)
+    _LOG.debug('check contents of {}'.format(filename))
+    if contents != original_contents:
+        if original_contents is None:
+            _LOG.info('creating {}'.format(filename))
+        else:
+            _LOG.info('updating {}'.format(filename))
+            _LOG.debug('\n'.join(
+                    _difflib.unified_diff(
+                        original_contents.splitlines(), contents.splitlines(),
+                        fromfile=_os_path.normpath(
+                            _os_path.join('a', filename)),
+                        tofile=_os_path.normpath(_os_path.join('b', filename)),
+                        n=3, lineterm='')))
+        if dry_run == False:
+            f = file(filename, 'w')
+            f.write(contents)
+            f.close()
+    _LOG.debug('no change in {}'.format(filename))
+
+def list_files(root='.'):
+    for dirpath,dirnames,filenames in _os.walk(root):
+        for filename in filenames:
+            yield _os_path.join(root, dirpath, filename)
diff --git a/update_copyright/vcs/__init__.py b/update_copyright/vcs/__init__.py
new file mode 100644 (file)
index 0000000..bd9248b
--- /dev/null
@@ -0,0 +1,48 @@
+# Copyright
+
+"""Backends for version control systems."""
+
+from . import utils as _utils
+
+
+class VCSBackend (object):
+    name = None
+
+    def __init__(self, author_hacks=None, year_hacks=None, aliases=None):
+        if author_hacks is None:
+            author_hacks = {}
+        self._author_hacks = author_hacks
+        if year_hacks is None:
+            year_hacks = {}
+        self._year_hacks = year_hacks
+        if aliases is None:
+            aliases = {}
+        self._aliases = aliases
+
+    def _years(self, filename=None):
+        raise NotImplementedError()
+
+    def original_year(self, filename=None):
+        years = self._years(filename=filename)
+        if filename is None:
+            years.update(self._year_hacks.values())
+        elif _utils.splitpath(filename) in self._year_hacks:
+            years.update(year_hacks[_utils.splitpath(filename)])
+        years = sorted(years)
+        return years[0]
+
+    def _authors(self, filename=None):
+        raise NotImplementedError()
+
+    def authors(self, filename=None, with_emails=True):
+        authors = self._authors(filename=filename)
+        if filename is None:
+            for path,authors in self._author_hacks.items():
+                authors.update(authors)
+        elif _utils.splitpath(filename) in self._author_hacks:
+            authors.update(self._author_hacks[_utils.splitpath(filename)])
+        return _utils.replace_aliases(
+            authors, with_email=False, aliases=self._aliases)
+
+    def is_versioned(self, filename=None):
+        raise NotImplementedError()
diff --git a/update_copyright/vcs/bazaar.py b/update_copyright/vcs/bazaar.py
new file mode 100644 (file)
index 0000000..e54eed5
--- /dev/null
@@ -0,0 +1,67 @@
+# Copyright
+
+import StringIO as _StringIO
+
+import bzrlib as _bzrlib
+import bzrlib.builtins as _bzrlib_builtins
+import bzrlib.log as _bzrlib_log
+
+from . import VCSBackend as _VCSBackend
+
+
+class _LogFormatter (_bzrlib_log.LogFormatter):
+    supports_merge_revisions = True
+    preferred_levels = 0
+    supports_deta = False
+    supports_tags = False
+    supports_diff = False
+
+    def log_revision(self, revision):
+        raise NotImplementedError
+
+
+class _YearLogFormatter (_LogFormatter):
+    def log_revision(self, revision):
+        self.to_file.write(
+            time.strftime('%Y', time.gmtime(revision.rev.timestamp))
+            +'\n')
+
+
+class _AuthorLogFormatter (_LogFormatter):
+    def log_revision(self, revision):
+        authors = revision.rev.get_apparent_authors()
+        self.to_file.write('\n'.join(authors)+'\n')
+
+
+class BazaarBackend (_VCSBackend):
+    name = 'Bazaar'
+
+    def __init__(self, **kwargs):
+        super(BazaarBackend, self).__init__(**kwargs)
+        self._version = _bzrlib.__version__
+
+    def _years(self, filename=None):
+        cmd = _bzrlib_builtins.cmd_log()
+        cmd.outf = _StringIO.StringIO()
+        kwargs = {'log_format':_YearLogFormatter, 'levels':0}
+        if filename is not None:
+            kwargs['file_list'] = [filename]
+        cmd.run(**kwargs)
+        years = set(int(year) for year in cmd.outf.getvalue().splitlines())
+        return years
+
+    def _authors(self, filename=None):
+        cmd = _bzrlib_builtins.cmd_log()
+        cmd.outf = _StringIO.StringIO()
+        kwargs = {'log_format':_AuthorLogFormatter, 'levels':0}
+        if filename is not None:
+            kwargs['file_list'] = [filename]
+        cmd.run(**kwargs)
+        authors = set(cmd.outf.getvalue().splitlines())
+        return authors
+
+    def is_versioned(self, filename):
+        cmd = _bzrlib_builtins.cmd_log()
+        cmd.outf = StringIO.StringIO()
+        cmd.run(file_list=[filename])
+        return True
diff --git a/update_copyright/vcs/git.py b/update_copyright/vcs/git.py
new file mode 100644 (file)
index 0000000..3cbbd5e
--- /dev/null
@@ -0,0 +1,51 @@
+# Copyright
+
+from . import VCSBackend as _VCSBackend
+from . import utils as _utils
+
+
+class GitBackend (_VCSBackend):
+    name = 'Git'
+
+    @staticmethod
+    def _git_cmd(*args):
+        status,stdout,stderr = _utils.invoke(['git'] + list(args))
+        return stdout.rstrip('\n')
+
+    def __init__(self, **kwargs):
+        super(GitBackend, self).__init__(**kwargs)
+        self._version = self._git_cmd('--version').split(' ')[-1]
+        if self._version.startswith('1.5.'):
+            # Author name <author email>
+            self._author_format = '--pretty=format:%an <%ae>'
+            self._year_format = ['--pretty=format:%ai']  # Author date
+            # YYYY-MM-DD HH:MM:SS Z
+            # Earlier versions of Git don't seem to recognize --date=short
+        else:
+            self._author_format = '--pretty=format:%aN <%aE>'
+            self._year_format = ['--pretty=format:%ad',  # Author date
+                                 '--date=short']         # YYYY-MM-DD
+
+    def _years(self, filename=None):
+        args = ['log'] + self._year_format
+        if filename is not None:
+            args.extend(['--follow'] + [filename])
+        output = self._git_cmd(*args)
+        if self._version.startswith('1.5.'):
+            output = '\n'.join([x.split()[0] for x in output.splitlines()])
+        years = set(int(line.split('-', 1)[0]) for line in output.splitlines())
+        return years
+
+    def _authors(self, filename=None):
+        args = ['log', self._author_format]
+        if filename is not None:
+            args.extend(['--follow', filename])
+        output = self._git_cmd(*args)
+        authors = set(output.splitlines())
+        return authors
+
+    def is_versioned(self, filename):
+        output = self._git_cmd('log', '--follow', filename)
+        if len(output) == 0:
+            return False
+        return True
diff --git a/update_copyright/vcs/mercurial.py b/update_copyright/vcs/mercurial.py
new file mode 100644 (file)
index 0000000..d71ce9c
--- /dev/null
@@ -0,0 +1,65 @@
+# Copyright
+
+from __future__ import absolute_import
+
+import StringIO as _StringIO
+import os as _os
+import sys as _sys
+
+import mercurial as _mercurial
+from mercurial.__version__ import version as _version
+import mercurial.dispatch as _mercurial_dispatch
+
+from . import VCSBackend as _VCSBackend
+from . import utils as _utils
+
+
+class MercurialBackend (_VCSBackend):
+    name = 'Mercurial'
+
+    @staticmethod
+    def _hg_cmd(*args):
+        cwd = _os.getcwd()
+        stdout = _sys.stdout
+        stderr = _sys.stderr
+        tmp_stdout = _StringIO.StringIO()
+        tmp_stderr = _StringIO.StringIO()
+        _sys.stdout = tmp_stdout
+        _sys.stderr = tmp_stderr
+        try:
+            _mercurial_dispatch.dispatch(list(args))
+        finally:
+            _os.chdir(cwd)
+            _sys.stdout = stdout
+            _sys.stderr = stderr
+        return (tmp_stdout.getvalue().rstrip('\n'),
+                tmp_stderr.getvalue().rstrip('\n'))
+
+    def __init__(self, **kwargs):
+        super(MercurialBackend, self).__init__(**kwargs)
+        self._version = _version
+
+    def _years(self, filename=None):
+        args = [
+            '--template', '{date|shortdate}\n',
+            # shortdate filter: YEAR-MONTH-DAY
+            ]
+        if filename is not None:
+            args.extend(['--follow', filename])
+        output,error = mercurial_cmd('log', *args)
+        years = set(int(line.split('-', 1)[0]) for line in output.splitlines())
+        return years
+
+    def _authors(self, filename=None):
+        args = ['--template', '{author}\n']
+        if filename is not None:
+            args.extend(['--follow', filename])
+        output,error = mercurial_cmd('log', *args)
+        authors = set(output.splitlines())
+        return authors
+
+    def is_versioned(self, filename):
+        output,error = mercurial_cmd('log', '--follow', filename)
+        if len(error) > 0:
+            return False
+        return True
diff --git a/update_copyright/vcs/utils.py b/update_copyright/vcs/utils.py
new file mode 100644 (file)
index 0000000..3c3c1ba
--- /dev/null
@@ -0,0 +1,141 @@
+# Copyright
+
+"""Useful utilities for backend classes."""
+
+import email.utils as _email_utils
+import os.path as _os_path
+import subprocess as _subprocess
+import sys as _sys
+
+
+_MSWINDOWS = _sys.platform == 'win32'
+_POSIX = not _MSWINDOWS
+
+
+def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE,
+           expect=(0,)):
+    """
+    expect should be a tuple of allowed exit codes.
+    """
+    try :
+        if _POSIX:
+            q = _subprocess.Popen(args, stdin=_subprocess.PIPE,
+                                  stdout=stdout, stderr=stderr)
+        else:
+            assert _MSWINDOWS == True, 'invalid platform'
+            # win32 don't have os.execvp() so run the command in a shell
+            q = _subprocess.Popen(args, stdin=_subprocess.PIPE,
+                                  stdout=stdout, stderr=stderr, shell=True)
+    except OSError, e:
+        raise ValueError([args, e])
+    stdout,stderr = q.communicate(input=stdin)
+    status = q.wait()
+    if status not in expect:
+        raise ValueError([args, status, stdout, stderr])
+    return status, stdout, stderr
+
+def splitpath(path):
+    """Recursively split a path into elements.
+
+    Examples
+    --------
+
+    >>> import os.path
+    >>> splitpath(os.path.join('a', 'b', 'c'))
+    ('a', 'b', 'c')
+    >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
+    ('a', 'b', 'c')
+    """
+    path = _os_path.normpath(path)
+    elements = []
+    while True:
+        dirname,basename = _os_path.split(path)
+        elements.insert(0,basename)
+        if dirname in ['', '.']:
+            break
+        path = dirname
+    return tuple(elements)
+
+def strip_email(*args):
+    """Remove email addresses from a series of names.
+
+    Examples
+    --------
+
+    >>> strip_email('J Doe')
+    ['J Doe']
+    >>> strip_email('J Doe <jdoe@a.com>')
+    ['J Doe']
+    >>> strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
+    ['J Doe', 'JJJ Smith']
+    """
+    args = list(args)
+    for i,arg in enumerate(args):
+        if arg == None:
+            continue
+        author,addr = _email_utils.parseaddr(arg)
+        if author == '':
+            author = arg
+        args[i] = author
+    return args
+
+def reverse_aliases(aliases):
+    """Reverse an `aliases` dict.
+
+    Input:   key: canonical name,  value: list of aliases
+    Output:  key: alias,           value: canonical name
+
+    Examples
+    --------
+
+    >>> aliases = {
+    ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
+    ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
+    ...     None:['Anonymous <a@a.com>'],
+    ...     }
+    >>> r = reverse_aliases(aliases)
+    >>> for item in sorted(r.items()):
+    ...     print item
+    ('Anonymous <a@a.com>', None)
+    ('J', 'J Doe <jdoe@a.com>')
+    ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
+    ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
+    """
+    output = {}
+    for canonical_name,_aliases in aliases.items():
+        for alias in _aliases:
+            output[alias] = canonical_name
+    return output
+
+def replace_aliases(authors, with_email=True, aliases=None):
+    """Consolidate and sort `authors`.
+
+    Make the replacements listed in the `aliases` dict (key: canonical
+    name, value: list of aliases).  If `aliases` is ``None``, default
+    to ``ALIASES``.
+
+    >>> aliases = {
+    ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
+    ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
+    ...     None:['Anonymous <a@a.com>'],
+    ...     }
+    >>> authors = [
+    ...     'JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
+    ...     'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>', 'Anonymous <a@a.com>']
+    >>> replace_aliases(authors, with_email=True, aliases=aliases)
+    ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
+    >>> replace_aliases(authors, with_email=False, aliases=aliases)
+    ['J Doe', 'JJJ Smith']
+    """
+    if aliases == None:
+        aliases = ALIASES
+    rev_aliases = reverse_aliases(aliases)
+    for i,author in enumerate(authors):
+        if author in rev_aliases:
+            authors[i] = rev_aliases[author]
+    authors = sorted(list(set(authors)))
+    if None in authors:
+        authors.remove(None)
+    if with_email == False:
+        authors = strip_email(*authors)
+    return authors