Added hooke.plugin.license and removed get-warrenty info from short_license.
[update-copyright.git] / update_copyright.py
index f4e39dfcbbfe226b43c97679bde290e0306af192..ba82f5a2d37483ef78f8c52bd125f7dfb38398ba 100755 (executable)
@@ -1,6 +1,22 @@
 #!/usr/bin/python
 #
-# Copyright
+# Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
+#
+# This file is part of Hooke.
+#
+# Hooke is free software: you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation, either
+# version 3 of the License, or (at your option) any later version.
+#
+# Hooke is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with Hooke.  If not, see
+# <http://www.gnu.org/licenses/>.
 
 """Automatically update copyright boilerplate.
 
@@ -13,14 +29,10 @@ import difflib
 import email.utils
 import os
 import os.path
-import re
-import StringIO
 import sys
+import textwrap
 import time
 
-import mercurial
-import mercurial.dispatch
-
 
 PROJECT_INFO = {
     'project': 'Hooke',
@@ -28,7 +40,7 @@ PROJECT_INFO = {
     }
 
 # Break "copyright" into "copy" and "right" to avoid matching the
-# REGEXP.
+# REGEXP if we decide to go back to regexps.
 COPY_RIGHT_TEXT="""
 This file is part of %(project)s.
 
@@ -47,71 +59,315 @@ License along with %(project)s.  If not, see
 <http://www.gnu.org/licenses/>.
 """.strip()
 
+SHORT_COPY_RIGHT_TEXT="""
+%(project)s comes with ABSOLUTELY NO WARRANTY and is licensed
+under the GNU Lesser General Public License.  For details,
+%(get-details)s
+""".strip()
+
 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
 
+# Convert author names to canonical forms.
+# ALIASES[<canonical name>] = <list of aliases>
+# for example,
+# ALIASES = {
+#     'John Doe <jdoe@a.com>':
+#         ['John Doe', 'jdoe', 'J. Doe <j@doe.net>'],
+#     }
+# Git-based projects are encouraged to use .mailmap instead of
+# ALIASES.  See git-shortlog(1) for details.
 ALIASES = {
+    'A. Seeholzer':
+        ['A. Seeholzer'],
     'Alberto Gomez-Casado':
         ['albertogomcas'],
     'Massimo Sandal <devicerandom@gmail.com>':
-        ['devicerandom',
+        ['Massimo Sandal',
+         'devicerandom',
          'unknown'],
-    'Fabrizio Benedetti':['fabrizio.benedetti'],
-    'il':['illysam'],
-    'Marco Brucale':['marcobrucale'],
-    'pp':['pancaldi.paolo'],
+    'Fabrizio Benedetti':
+        ['fabrizio.benedetti.82'],
+    'Richard Naud <richard.naud@epfl.ch>':
+        ['Richard Naud'],
+    'Rolf Schmidt <rschmidt@alcor.concordia.ca>':
+        ['Rolf Schmidt',
+         'illysam'],
+    'Marco Brucale':
+        ['marcobrucale'],
+    'Pancaldi Paolo':
+        ['pancaldi.paolo'],
     }
 
-IGNORED_PATHS = ['./.hg/', './doc/img', './test/data/',
-                 './build/', '/doc/build/']
+# List of paths that should not be scanned for copyright updates.
+# IGNORED_PATHS = ['./.git/']
+IGNORED_PATHS = ['./.hg/', './doc/img/', './test/data/',
+                 './build/', './doc/build/']
+# List of files that should not be scanned for copyright updates.
+# IGNORED_FILES = ['COPYING']
 IGNORED_FILES = ['COPYING', 'COPYING.LESSER']
 
+# Work around missing author holes in the VCS history.
+# AUTHOR_HACKS[<path tuple>] = [<missing authors]
+# for example, if John Doe contributed to module.py but wasn't listed
+# in the VCS history of that file:
+# AUTHOR_HACKS = {
+#     ('path', 'to', 'module.py'):['John Doe'],
+#     }
+AUTHOR_HACKS = {
+    ('hooke','driver','hdf5.py'):['Massimo Sandal'],
+    ('hooke','driver','mcs.py'):['Allen Chen'],
+    ('hooke','driver','mfp3d.py'):['A. Seeholzer','Richard Naud','Rolf Schmidt',
+                                   'Alberto Gomez-Casado'],
+    ('hooke','util','peak.py'):['Fabrizio Benedetti'],
+    ('hooke','plugin','showconvoluted.py'):['Rolf Schmidt'],
+    ('hooke','ui','gui','formatter.py'):['Francesco Musiani','Massimo Sandal'],
+    ('hooke','ui','gui','prettyformat.py'):['Rolf Schmidt'],
+    }
+
+# Work around missing year holes in the VCS history.
+# YEAR_HACKS[<path tuple>] = <original year>
+# for example, if module.py was published in 2008 but the VCS history
+# only goes back to 2010:
+# YEAR_HACKS = {
+#     ('path', 'to', 'module.py'):2008,
+#     }
+YEAR_HACKS = {
+    ('hooke','driver','hdf5.py'):2009,
+    ('hooke','driver','mfp3d.py'):2008,
+    ('hooke','driver','picoforce.py'):2006,
+    ('hooke','driver','picoforcealt.py'):2006,
+    ('hooke','util','peak.py'):2007,
+    ('hooke','plugin','showconvoluted.py'):2009,
+    ('hooke','plugin','tutorial.py'):2007,
+    ('hooke','ui','gui','formatter.py'):2006,
+    ('hooke','ui','gui','prettyformat.py'):2009,
+    }
+
+# Helpers for VCS-specific commands
+
+def splitpath(path):
+    """Recursively split a path into elements.
+
+    Examples
+    --------
+
+    >>> splitpath(os.path.join('a', 'b', 'c'))
+    ('a', 'b', 'c')
+    >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
+    ('a', 'b', 'c')
+    """
+    path = os.path.normpath(path)
+    elements = []
+    while True:
+        dirname,basename = os.path.split(path)
+        elements.insert(0,basename)
+        if dirname in ['', '.']:
+            break
+        path = dirname
+    return tuple(elements)
 
 # VCS-specific commands
 
-def mercurial_cmd(*args):
-    cwd = os.getcwd()
-    stdout = sys.stdout
-    stderr = sys.stderr
-    tmp_stdout = StringIO.StringIO()
-    tmp_stderr = StringIO.StringIO()
-    sys.stdout = tmp_stdout
-    sys.stderr = tmp_stderr
-    try:
-        mercurial.dispatch.dispatch(list(args))
-    finally:
-        os.chdir(cwd)
-        sys.stdout = stdout
-        sys.stderr = stderr
-    return (tmp_stdout.getvalue().rstrip('\n'),
-            tmp_stderr.getvalue().rstrip('\n'))
-
-def original_year(filename):
-    # shortdate filter: YEAR-MONTH-DAY
-    output,error = mercurial_cmd('log', '--follow',
-                                 '--template', '{date|shortdate}\n',
-                                 filename)
-    years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
-    years.sort()
-    return years[0]
-
-def authors(filename):
-    output,error = mercurial_cmd('log', '--follow',
-                                 '--template', '{author}\n',
-                                 filename)
-    return list(set(output.splitlines()))
-
-def authors_list():
-    output,error = mercurial_cmd('log', '--follow',
-                                 '--template', '{author}\n')
-    return list(set(output.splitlines()))
-
-def is_versioned(filename):
-    output,error = mercurial_cmd('log', '--follow',
-                                 '--template', '{date|shortdate}\n',
-                                 filename)
-    if len(error) > 0:
-        return False
-    return True
+if PROJECT_INFO['vcs'] == 'Git':
+
+    import subprocess
+
+    _MSWINDOWS = sys.platform == 'win32'
+    _POSIX = not _MSWINDOWS
+
+    def invoke(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, expect=(0,)):
+        """
+        expect should be a tuple of allowed exit codes.
+        """
+        try :
+            if _POSIX:
+                q = subprocess.Popen(args, stdin=subprocess.PIPE,
+                                     stdout=stdout, stderr=stderr)
+            else:
+                assert _MSWINDOWS == True, 'invalid platform'
+                # win32 don't have os.execvp() so run the command in a shell
+                q = subprocess.Popen(args, stdin=subprocess.PIPE,
+                                     stdout=stdout, stderr=stderr, shell=True)
+        except OSError, e:
+            raise ValueError([args, e])
+        stdout,stderr = q.communicate(input=stdin)
+        status = q.wait()
+        if status not in expect:
+            raise ValueError([args, status, stdout, stderr])
+        return status, stdout, stderr
+
+    def git_cmd(*args):
+        status,stdout,stderr = invoke(['git'] + list(args))
+        return stdout.rstrip('\n')
+
+    def original_year(filename=None, year_hacks=YEAR_HACKS):
+        args = [
+            '--format=format:%ad',  # Author date
+            '--date=short',         # YYYY-MM-DD
+            ]
+        if filename != None:
+            args.extend(['--follow', filename])
+        output = git_cmd('log', *args)
+        years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
+        if filename == None:
+            years.extend(year_hacks.values())
+        elif splitpath(filename) in year_hacks:
+            years.append(year_hacks[splitpath(filename)])
+        years.sort()
+        return years[0]
+
+    def authors(filename, author_hacks=AUTHOR_HACKS):
+        output = git_cmd('log', '--follow', '--format=format:%aN <%aE>',
+                         filename)   # Author name <author email>
+        ret = list(set(output.splitlines()))
+        if splitpath(filename) in author_hacks:
+            ret.extend(author_hacks[splitpath(filename)])
+        return ret
+
+    def authors_list(author_hacks=AUTHOR_HACKS):
+        output = git_cmd('log', '--format=format:%aN <%aE>')
+        ret = list(set(output.splitlines()))
+        for path,authors in author_hacks.items():
+            ret.extend(authors)
+        return ret
+
+    def is_versioned(filename):
+        output = git_cmd('log', '--follow', filename)
+        if len(output) == 0:
+            return False
+        return True
+
+elif PROJECT_INFO['vcs'] == 'Mercurial':
+
+    import StringIO
+    import mercurial
+    import mercurial.dispatch
+
+    def mercurial_cmd(*args):
+        cwd = os.getcwd()
+        stdout = sys.stdout
+        stderr = sys.stderr
+        tmp_stdout = StringIO.StringIO()
+        tmp_stderr = StringIO.StringIO()
+        sys.stdout = tmp_stdout
+        sys.stderr = tmp_stderr
+        try:
+            mercurial.dispatch.dispatch(list(args))
+        finally:
+            os.chdir(cwd)
+            sys.stdout = stdout
+            sys.stderr = stderr
+        return (tmp_stdout.getvalue().rstrip('\n'),
+                tmp_stderr.getvalue().rstrip('\n'))
+
+    def original_year(filename=None, year_hacks=YEAR_HACKS):
+        args = [
+            '--template', '{date|shortdate}\n',
+            # shortdate filter: YEAR-MONTH-DAY
+            ]
+        if filename != None:
+            args.extend(['--follow', filename])
+        output,error = mercurial_cmd('log', *args)
+        years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
+        if filename == None:
+            years.extend(year_hacks.values())
+        elif splitpath(filename) in year_hacks:
+            years.append(year_hacks[splitpath(filename)])
+        years.sort()
+        return years[0]
+
+    def authors(filename, author_hacks=AUTHOR_HACKS):
+        output,error = mercurial_cmd('log', '--follow',
+                                     '--template', '{author}\n',
+                                     filename)
+        ret = list(set(output.splitlines()))
+        if splitpath(filename) in author_hacks:
+            ret.extend(author_hacks[splitpath(filename)])
+        return ret
+
+    def authors_list(author_hacks=AUTHOR_HACKS):
+        output,error = mercurial_cmd('log', '--template', '{author}\n')
+        ret = list(set(output.splitlines()))
+        for path,authors in author_hacks.items():
+            ret.extend(authors)
+        return ret
+
+    def is_versioned(filename):
+        output,error = mercurial_cmd('log', '--follow', filename)
+        if len(error) > 0:
+            return False
+        return True
+
+elif PROJECT_INFO['vcs'] == 'Bazaar':
+
+    import StringIO
+    import bzrlib
+    import bzrlib.builtins
+    import bzrlib.log
+
+    class LogFormatter (bzrlib.log.LogFormatter):
+        supports_merge_revisions = True
+        preferred_levels = 0
+        supports_deta = False
+        supports_tags = False
+        supports_diff = False
+
+        def log_revision(self, revision):
+            raise NotImplementedError
+
+    class YearLogFormatter (LogFormatter):
+        def log_revision(self, revision):
+            self.to_file.write(
+                time.strftime('%Y', time.gmtime(revision.rev.timestamp))
+                +'\n')
+
+    class AuthorLogFormatter (LogFormatter):
+        def log_revision(self, revision):
+            authors = revision.rev.get_apparent_authors()
+            self.to_file.write('\n'.join(authors)+'\n')
+
+    def original_year(filename=None, year_hacks=YEAR_HACKS):
+        cmd = bzrlib.builtins.cmd_log()
+        cmd.outf = StringIO.StringIO()
+        kwargs = {'log_format':YearLogFormatter, 'levels':0}
+        if filename != None:
+            kwargs['file_list'] = [filenme]
+        cmd.run(**kwargs)
+        years = [int(year) for year in set(cmd.outf.getvalue().splitlines())]
+        if filename == None:
+            years.append(year_hacks.values())
+        elif splitpath(filename) in year_hacks:
+            years.append(year_hacks[splitpath(filename)])
+        years.sort()
+        return years[0]
+
+    def authors(filename, author_hacks=AUTHOR_HACKS):
+        cmd = bzrlib.builtins.cmd_log()
+        cmd.outf = StringIO.StringIO()
+        cmd.run(file_list=[filename], log_format=AuthorLogFormatter, levels=0)
+        ret = list(set(cmd.outf.getvalue().splitlines()))
+        if splitpath(filename) in author_hacks:
+            ret.extend(author_hacks[splitpath(filename)])
+        return ret
+
+    def authors_list(author_hacks=AUTHOR_HACKS):
+        cmd = bzrlib.builtins.cmd_log()
+        cmd.outf = StringIO.StringIO()
+        cmd.run(log_format=AuthorLogFormatter, levels=0)
+        output = cmd.outf.getvalue()
+        ret = list(set(cmd.outf.getvalue().splitlines()))
+        for path,authors in author_hacks.items():
+            ret.extend(authors)
+        return ret
+
+    def is_versioned(filename):
+        cmd = bzrlib.builtins.cmd_log()
+        cmd.outf = StringIO.StringIO()
+        cmd.run(file_list=[filename])
+        return True
+
+else:
+    raise NotImplementedError('Unrecognized VCS: %(vcs)s' % PROJECT_INFO)
 
 # General utility commands
 
@@ -121,6 +377,8 @@ def _strip_email(*args):
     Examples
     --------
 
+    >>> _strip_email('J Doe')
+    ['J Doe']
     >>> _strip_email('J Doe <jdoe@a.com>')
     ['J Doe']
     >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
@@ -131,6 +389,8 @@ def _strip_email(*args):
         if arg == None:
             continue
         author,addr = email.utils.parseaddr(arg)
+        if author == '':
+            author = arg
         args[i] = author
     return args
 
@@ -174,23 +434,16 @@ def _replace_aliases(authors, with_email=True, aliases=None):
     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
     ...     None:['Anonymous <a@a.com>'],
     ...     }
-    >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
-    ...                   'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
-    ...                  with_email=True, aliases=aliases)
+    >>> authors = [
+    ...     'JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
+    ...     'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>', 'Anonymous <a@a.com>']
+    >>> _replace_aliases(authors, with_email=True, aliases=aliases)
     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
-    >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
-    ...                  with_email=False, aliases=aliases)
+    >>> _replace_aliases(authors, with_email=False, aliases=aliases)
     ['J Doe', 'JJJ Smith']
-    >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
-    ...                   'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>'],
-    ...                  with_email=True, aliases=aliases)
-    ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
     """
     if aliases == None:
         aliases = ALIASES
-    if with_email == False:
-        aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
-                        for author,_aliases in aliases.items()])
     rev_aliases = _reverse_aliases(aliases)
     for i,author in enumerate(authors):
         if author in rev_aliases:
@@ -198,9 +451,41 @@ def _replace_aliases(authors, with_email=True, aliases=None):
     authors = sorted(list(set(authors)))
     if None in authors:
         authors.remove(None)
+    if with_email == False:
+        authors = _strip_email(*authors)
     return authors
 
-def _copyright_string(original_year, final_year, authors, prefix=''):
+def _long_author_formatter(copyright_year_string, authors):
+    """
+    >>> print '\\n'.join(_long_author_formatter(
+    ...     copyright_year_string='Copyright (C) 1990-2010',
+    ...     authors=['Jack', 'Jill', 'John']))
+    Copyright (C) 1990-2010 Jack
+                            Jill
+                            John
+    """
+    lines = ['%s %s' % (copyright_year_string, authors[0])]
+    for author in authors[1:]:
+        lines.append(' '*(len(copyright_year_string)+1) + author)
+    return lines
+
+def _short_author_formatter(copyright_year_string, authors, **kwargs):
+    """
+    >>> print '\\n'.join(_short_author_formatter(
+    ...     copyright_year_string='Copyright (C) 1990-2010',
+    ...     authors=['Jack', 'Jill', 'John']*5,
+    ...     width=50))
+    Copyright (C) 1990-2010 Jack, Jill, John, Jack,
+    Jill, John, Jack, Jill, John, Jack, Jill, John,
+    Jack, Jill, John
+    """
+    blurb = '%s %s' % (copyright_year_string, ', '.join(authors))
+    return textwrap.wrap(blurb, **kwargs)
+
+def _copyright_string(original_year, final_year, authors, prefix='',
+                      text=COPY_RIGHT_TEXT, extra_info={},
+                      author_format_fn=_long_author_formatter,
+                      formatter_kwargs={}):
     """
     >>> print _copyright_string(original_year=2005,
     ...                         final_year=2005,
@@ -219,17 +504,33 @@ def _copyright_string(original_year, final_year, authors, prefix=''):
                             B <b@b.edu>
     <BLANKLINE>
     This file...
+    >>> print _copyright_string(original_year=2005,
+    ...                         final_year=2005,
+    ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
+    ...                         prefix='',
+    ...                         text=SHORT_COPY_RIGHT_TEXT,
+    ...                         author_format_fn=_short_author_formatter,
+    ...                         extra_info={'get-details':'%(get-details)s'},
+    ...                         formatter_kwargs={'width': 50},
+    ...                        ) # doctest: +ELLIPSIS
+    Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
+    <BLANKLINE>
+    Hooke comes with ABSOLUTELY NO WARRANTY and is licensed
+    under the GNU Lesser General Public License.  For details,
+    %(get-details)s
     """
     if original_year == final_year:
         date_range = '%s' % original_year
     else:
         date_range = '%s-%s' % (original_year, final_year)
-    lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
-    for author in authors[1:]:
-        lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
-                     author)
+    copyright_year_string = 'Copyright (C) %s' % date_range
+    lines = author_format_fn(copyright_year_string, authors,
+                             **formatter_kwargs)
     lines.append('')
-    lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
+    info = dict(PROJECT_INFO)
+    for key,value in extra_info.items():
+        info[key] = value
+    lines.extend((text % info).splitlines())
     for i,line in enumerate(lines):
         lines[i] = (prefix + line).rstrip()
     return '\n'.join(lines)
@@ -354,9 +655,11 @@ def _set_contents(filename, contents, original_contents=None, dry_run=False,
 # Update commands
 
 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
+    authors = authors_fn()
+    authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
     new_contents = '%s was written by:\n%s\n' % (
         PROJECT_INFO['project'],
-        '\n'.join(authors_fn())
+        '\n'.join(authors)
         )
     _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
 
@@ -386,6 +689,31 @@ def update_files(files=None, dry_run=False, verbose=0):
             continue
         update_file(filename, dry_run=dry_run, verbose=verbose)
 
+def update_pyfile(path, original_year_fn=original_year,
+                  authors_fn=authors_list, dry_run=False, verbose=0):
+    original_year = original_year_fn()
+    current_year = time.gmtime()[0]
+    authors = authors_fn()
+    authors = _replace_aliases(authors, with_email=False, aliases=ALIASES)
+    lines = [
+        _copyright_string(original_year, current_year, authors, prefix='# '),
+        '',
+        'LICENSE = """',
+        _copyright_string(original_year, current_year, authors, prefix=''),
+        '""".strip()',
+        '',
+        'def short_license(extra_info):',
+        '    return """',
+        _copyright_string(original_year, current_year, authors, prefix='',
+                          text=SHORT_COPY_RIGHT_TEXT,
+                          author_format_fn=_short_author_formatter,
+                          extra_info={'get-details':'%(get-details)s'}),
+        '""".strip() % extra_info',
+        ]
+    new_contents = '\n'.join(lines)+'\n'
+    _set_contents(path, new_contents, dry_run=dry_run, verbose=verbose)
+
+
 def test():
     import doctest
     doctest.testmod()
@@ -409,6 +737,9 @@ If no files are given, a list of files to update is generated
 automatically.
 """ % PROJECT_INFO
     p = optparse.OptionParser(usage)
+    p.add_option('--pyfile', dest='pyfile', default='hooke/license.py',
+                 metavar='PATH',
+                 help='Write project license info to a Python module at PATH')
     p.add_option('--test', dest='test', default=False,
                  action='store_true', help='Run internal tests and exit')
     p.add_option('--dry-run', dest='dry_run', default=False,
@@ -423,3 +754,6 @@ automatically.
 
     update_authors(dry_run=options.dry_run, verbose=options.verbose)
     update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)
+    if options.pyfile != None:
+        update_pyfile(path=options.pyfile,
+                      dry_run=options.dry_run, verbose=options.verbose)