update_copyright.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright
   4
   5 """Automatically update copyright boilerplate.
   6
   7 This script is adapted from one written for `Bugs Everywhere`_.
   8
   9 .. _Bugs Everywhere: http://bugseverywhere.org/
  10 """
  11
  12 import difflib
  13 import email.utils
  14 import os
  15 import os.path
  16 import re
  17 import StringIO
  18 import sys
  19 import time
  20
  21 import mercurial
  22 import mercurial.dispatch
  23
  24
  25 PROJECT_INFO = {
  26     'project': 'Hooke',
  27     'vcs': 'Mercurial',
  28     }
  29
  30 # Break "copyright" into "copy" and "right" to avoid matching the
  31 # REGEXP.
  32 COPY_RIGHT_TEXT="""
  33 This file is part of %(project)s.
  34
  35 %(project)s is free software: you can redistribute it and/or
  36 modify it under the terms of the GNU Lesser General Public
  37 License as published by the Free Software Foundation, either
  38 version 3 of the License, or (at your option) any later version.
  39
  40 %(project)s is distributed in the hope that it will be useful,
  41 but WITHOUT ANY WARRANTY; without even the implied warranty of
  42 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  43 GNU Lesser General Public License for more details.
  44
  45 You should have received a copy of the GNU Lesser General Public
  46 License along with %(project)s.  If not, see
  47 <http://www.gnu.org/licenses/>.
  48 """.strip()
  49
  50 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
  51
  52 ALIASES = {
  53     'A. Seeholzer':
  54         ['A. Seeholzer'],
  55     'Alberto Gomez-Casado':
  56         ['albertogomcas'],
  57     'Massimo Sandal <devicerandom@gmail.com>':
  58         ['Massimo Sandal',
  59          'devicerandom',
  60          'unknown'],
  61     'Fabrizio Benedetti':
  62         ['fabrizio.benedetti.82'],
  63     'Richard Naud <richard.naud@epfl.ch>':
  64         ['Richard Naud'],
  65     'Rolf Schmidt <rschmidt@alcor.concordia.ca>':
  66         ['Rolf Schmidt',
  67          'illysam'],
  68     'Marco Brucale':
  69         ['marcobrucale'],
  70     'Pancaldi Paolo':
  71         ['pancaldi.paolo'],
  72     }
  73
  74 IGNORED_PATHS = ['./.hg/', './doc/img', './test/data/',
  75                  './build/', '/doc/build/']
  76 IGNORED_FILES = ['COPYING', 'COPYING.LESSER']
  77
  78 # Work around missing author holes in the VCS history
  79 AUTHOR_HACKS = {
  80     ('hooke','driver','hdf5.py'):['Massimo Sandal'],
  81     ('hooke','driver','mcs.py'):['Allen Chen'],
  82     ('hooke','driver','mfp3d.py'):['A. Seeholzer','Richard Naud','Rolf Schmidt',
  83                                    'Alberto Gomez-Casado'],
  84     ('hooke','plugin','peakspot.py'):['Fabrizio Benedetti'],
  85     ('hooke','plugin','showconvoluted.py'):['Rolf Schmidt'],
  86     ('hooke','ui','gui','formatter.py'):['Francesco Musiani','Massimo Sandal'],
  87     ('hooke','ui','gui','prettyformat.py'):['Rolf Schmidt'],
  88     }
  89
  90 # Work around missing year holes in the VCS history
  91 YEAR_HACKS = {
  92     ('hooke','driver','hdf5.py'):2009,
  93     ('hooke','driver','mfp3d.py'):2008,
  94     ('hooke','driver','picoforce.py'):2006,
  95     ('hooke','driver','picoforcealt.py'):2006,
  96     ('hooke','plugin','peakspot.py'):2007,
  97     ('hooke','plugin','showconvoluted.py'):2009,
  98     ('hooke','plugin','tutorial.py'):2007,
  99     ('hooke','ui','gui','formatter.py'):2006,
 100     ('hooke','ui','gui','prettyformat.py'):2009,
 101     }
 102
 103 # Helpers for VCS-specific commands
 104
 105 def splitpath(path):
 106     """Recursively split a path into elements.
 107
 108     Examples
 109     --------
 110
 111     >>> splitpath(os.path.join('a', 'b', 'c'))
 112     ('a', 'b', 'c')
 113     >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
 114     ('a', 'b', 'c')
 115     """
 116     path = os.path.normpath(path)
 117     elements = []
 118     while True:
 119         dirname,basename = os.path.split(path)
 120         elements.insert(0,basename)
 121         if dirname in ['', '.']:
 122             break
 123         path = dirname
 124     return tuple(elements)
 125
 126 # VCS-specific commands
 127
 128 def mercurial_cmd(*args):
 129     cwd = os.getcwd()
 130     stdout = sys.stdout
 131     stderr = sys.stderr
 132     tmp_stdout = StringIO.StringIO()
 133     tmp_stderr = StringIO.StringIO()
 134     sys.stdout = tmp_stdout
 135     sys.stderr = tmp_stderr
 136     try:
 137         mercurial.dispatch.dispatch(list(args))
 138     finally:
 139         os.chdir(cwd)
 140         sys.stdout = stdout
 141         sys.stderr = stderr
 142     return (tmp_stdout.getvalue().rstrip('\n'),
 143             tmp_stderr.getvalue().rstrip('\n'))
 144
 145 def original_year(filename, year_hacks=YEAR_HACKS):
 146     # shortdate filter: YEAR-MONTH-DAY
 147     output,error = mercurial_cmd('log', '--follow',
 148                                  '--template', '{date|shortdate}\n',
 149                                  filename)
 150     years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
 151     if splitpath(filename) in year_hacks:
 152         years.append(year_hacks[splitpath(filename)])
 153     years.sort()
 154     return years[0]
 155
 156 def authors(filename, author_hacks=AUTHOR_HACKS):
 157     output,error = mercurial_cmd('log', '--follow',
 158                                  '--template', '{author}\n',
 159                                  filename)
 160     ret = list(set(output.splitlines()))
 161     if splitpath(filename) in author_hacks:
 162         ret.extend(author_hacks[splitpath(filename)])
 163     return ret
 164
 165 def authors_list():
 166     output,error = mercurial_cmd('log', '--follow',
 167                                  '--template', '{author}\n')
 168     return list(set(output.splitlines()))
 169
 170 def is_versioned(filename):
 171     output,error = mercurial_cmd('log', '--follow',
 172                                  '--template', '{date|shortdate}\n',
 173                                  filename)
 174     if len(error) > 0:
 175         return False
 176     return True
 177
 178 # General utility commands
 179
 180 def _strip_email(*args):
 181     """Remove email addresses from a series of names.
 182
 183     Examples
 184     --------
 185
 186     >>> _strip_email('J Doe <jdoe@a.com>')
 187     ['J Doe']
 188     >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
 189     ['J Doe', 'JJJ Smith']
 190     """
 191     args = list(args)
 192     for i,arg in enumerate(args):
 193         if arg == None:
 194             continue
 195         author,addr = email.utils.parseaddr(arg)
 196         args[i] = author
 197     return args
 198
 199 def _reverse_aliases(aliases):
 200     """Reverse an `aliases` dict.
 201
 202     Input:   key: canonical name,  value: list of aliases
 203     Output:  key: alias,           value: canonical name
 204
 205     Examples
 206     --------
 207
 208     >>> aliases = {
 209     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
 210     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 211     ...     None:['Anonymous <a@a.com>'],
 212     ...     }
 213     >>> r = _reverse_aliases(aliases)
 214     >>> for item in sorted(r.items()):
 215     ...     print item
 216     ('Anonymous <a@a.com>', None)
 217     ('J', 'J Doe <jdoe@a.com>')
 218     ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
 219     ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
 220     """
 221     output = {}
 222     for canonical_name,_aliases in aliases.items():
 223         for alias in _aliases:
 224             output[alias] = canonical_name
 225     return output
 226
 227 def _replace_aliases(authors, with_email=True, aliases=None):
 228     """Consolidate and sort `authors`.
 229
 230     Make the replacements listed in the `aliases` dict (key: canonical
 231     name, value: list of aliases).  If `aliases` is ``None``, default
 232     to ``ALIASES``.
 233
 234     >>> aliases = {
 235     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
 236     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 237     ...     None:['Anonymous <a@a.com>'],
 238     ...     }
 239     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 240     ...                   'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
 241     ...                  with_email=True, aliases=aliases)
 242     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 243     >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
 244     ...                  with_email=False, aliases=aliases)
 245     ['J Doe', 'JJJ Smith']
 246     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 247     ...                   'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>'],
 248     ...                  with_email=True, aliases=aliases)
 249     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 250     """
 251     if aliases == None:
 252         aliases = ALIASES
 253     if with_email == False:
 254         aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
 255                         for author,_aliases in aliases.items()])
 256     rev_aliases = _reverse_aliases(aliases)
 257     for i,author in enumerate(authors):
 258         if author in rev_aliases:
 259             authors[i] = rev_aliases[author]
 260     authors = sorted(list(set(authors)))
 261     if None in authors:
 262         authors.remove(None)
 263     return authors
 264
 265 def _copyright_string(original_year, final_year, authors, prefix=''):
 266     """
 267     >>> print _copyright_string(original_year=2005,
 268     ...                         final_year=2005,
 269     ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
 270     ...                         prefix='# '
 271     ...                        ) # doctest: +ELLIPSIS
 272     # Copyright (C) 2005 A <a@a.com>
 273     #                    B <b@b.edu>
 274     #
 275     # This file...
 276     >>> print _copyright_string(original_year=2005,
 277     ...                         final_year=2009,
 278     ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
 279     ...                        ) # doctest: +ELLIPSIS
 280     Copyright (C) 2005-2009 A <a@a.com>
 281                             B <b@b.edu>
 282     <BLANKLINE>
 283     This file...
 284     """
 285     if original_year == final_year:
 286         date_range = '%s' % original_year
 287     else:
 288         date_range = '%s-%s' % (original_year, final_year)
 289     lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
 290     for author in authors[1:]:
 291         lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
 292                      author)
 293     lines.append('')
 294     lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
 295     for i,line in enumerate(lines):
 296         lines[i] = (prefix + line).rstrip()
 297     return '\n'.join(lines)
 298
 299 def _tag_copyright(contents):
 300     """
 301     >>> contents = '''Some file
 302     ... bla bla
 303     ... # Copyright (copyright begins)
 304     ... # (copyright continues)
 305     ... # bla bla bla
 306     ... (copyright ends)
 307     ... bla bla bla
 308     ... '''
 309     >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
 310     Some file
 311     bla bla
 312     -xyz-CR-zyx-
 313     (copyright ends)
 314     bla bla bla
 315     <BLANKLINE>
 316     """
 317     lines = []
 318     incopy = False
 319     for line in contents.splitlines():
 320         if incopy == False and line.startswith('# Copyright'):
 321             incopy = True
 322             lines.append(COPY_RIGHT_TAG)
 323         elif incopy == True and not line.startswith('#'):
 324             incopy = False
 325         if incopy == False:
 326             lines.append(line.rstrip('\n'))
 327     return '\n'.join(lines)+'\n'
 328
 329 def _update_copyright(contents, original_year, authors):
 330     """
 331     >>> contents = '''Some file
 332     ... bla bla
 333     ... # Copyright (copyright begins)
 334     ... # (copyright continues)
 335     ... # bla bla bla
 336     ... (copyright ends)
 337     ... bla bla bla
 338     ... '''
 339     >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
 340     ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
 341     Some file
 342     bla bla
 343     # Copyright (C) 2008-... Jack
 344     #                         Jill
 345     #
 346     # This file...
 347     (copyright ends)
 348     bla bla bla
 349     <BLANKLINE>
 350     """
 351     current_year = time.gmtime()[0]
 352     copyright_string = _copyright_string(
 353         original_year, current_year, authors, prefix='# ')
 354     contents = _tag_copyright(contents)
 355     return contents.replace(COPY_RIGHT_TAG, copyright_string)
 356
 357 def ignored_file(filename, ignored_paths=None, ignored_files=None,
 358                  check_disk=True, check_vcs=True):
 359     """
 360     >>> ignored_paths = ['./a/', './b/']
 361     >>> ignored_files = ['x', 'y']
 362     >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
 363     True
 364     >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
 365     False
 366     >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
 367     True
 368     >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
 369     False
 370     >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
 371     False
 372     """
 373     if ignored_paths == None:
 374         ignored_paths = IGNORED_PATHS
 375     if ignored_files == None:
 376         ignored_files = IGNORED_FILES
 377     if check_disk == True and os.path.isfile(filename) == False:
 378         return True
 379     for path in ignored_paths:
 380         if filename.startswith(path):
 381             return True
 382     if os.path.basename(filename) in ignored_files:
 383         return True
 384     if check_vcs == True and is_versioned(filename) == False:
 385         return True
 386     return False
 387
 388 def _set_contents(filename, contents, original_contents=None, dry_run=False,
 389                   verbose=0):
 390     if original_contents == None and os.path.isfile(filename):
 391         f = open(filename, 'r')
 392         original_contents = f.read()
 393         f.close()
 394     if verbose > 0:
 395         print "checking %s ... " % filename,
 396     if contents != original_contents:
 397         if verbose > 0:
 398             if original_contents == None:
 399                 print "[creating]"
 400             else:
 401                 print "[updating]"
 402         if verbose > 1 and original_contents != None:
 403             print '\n'.join(
 404                 difflib.unified_diff(
 405                     original_contents.splitlines(), contents.splitlines(),
 406                     fromfile=os.path.normpath(os.path.join('a', filename)),
 407                     tofile=os.path.normpath(os.path.join('b', filename)),
 408                     n=3, lineterm=''))
 409         if dry_run == False:
 410             f = file(filename, 'w')
 411             f.write(contents)
 412             f.close()
 413     elif verbose > 0:
 414         print "[no change]"
 415
 416 # Update commands
 417
 418 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
 419     new_contents = '%s was written by:\n%s\n' % (
 420         PROJECT_INFO['project'],
 421         '\n'.join(authors_fn())
 422         )
 423     _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
 424
 425 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
 426                 dry_run=False, verbose=0):
 427     f = file(filename, 'r')
 428     contents = f.read()
 429     f.close()
 430
 431     original_year = original_year_fn(filename)
 432     authors = authors_fn(filename)
 433     authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
 434
 435     new_contents = _update_copyright(contents, original_year, authors)
 436     _set_contents(filename, contents=new_contents, original_contents=contents,
 437                   dry_run=dry_run, verbose=verbose)
 438
 439 def update_files(files=None, dry_run=False, verbose=0):
 440     if files == None or len(files) == 0:
 441         files = []
 442         for dirpath,dirnames,filenames in os.walk('.'):
 443             for filename in filenames:
 444                 files.append(os.path.join(dirpath, filename))
 445
 446     for filename in files:
 447         if ignored_file(filename) == True:
 448             continue
 449         update_file(filename, dry_run=dry_run, verbose=verbose)
 450
 451 def test():
 452     import doctest
 453     doctest.testmod()
 454
 455 if __name__ == '__main__':
 456     import optparse
 457     import sys
 458
 459     usage = """%%prog [options] [file ...]
 460
 461 Update copyright information in source code with information from
 462 the %(vcs)s repository.  Run from the %(project)s repository root.
 463
 464 Replaces every line starting with '^# Copyright' and continuing with
 465 '^#' with an auto-generated copyright blurb.  If you want to add
 466 #-commented material after a copyright blurb, please insert a blank
 467 line between the blurb and your comment, so the next run of
 468 ``update_copyright.py`` doesn't clobber your comment.
 469
 470 If no files are given, a list of files to update is generated
 471 automatically.
 472 """ % PROJECT_INFO
 473     p = optparse.OptionParser(usage)
 474     p.add_option('--test', dest='test', default=False,
 475                  action='store_true', help='Run internal tests and exit')
 476     p.add_option('--dry-run', dest='dry_run', default=False,
 477                  action='store_true', help="Don't make any changes")
 478     p.add_option('-v', '--verbose', dest='verbose', default=0,
 479                  action='count', help='Increment verbosity')
 480     options,args = p.parse_args()
 481
 482     if options.test == True:
 483         test()
 484         sys.exit(0)
 485
 486     update_authors(dry_run=options.dry_run, verbose=options.verbose)
 487     update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)