update_copyright.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
   4 #
   5 # This file is part of Hooke.
   6 #
   7 # Hooke is free software: you can redistribute it and/or
   8 # modify it under the terms of the GNU Lesser General Public
   9 # License as published by the Free Software Foundation, either
  10 # version 3 of the License, or (at your option) any later version.
  11 #
  12 # Hooke is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU Lesser General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU Lesser General Public
  18 # License along with Hooke.  If not, see
  19 # <http://www.gnu.org/licenses/>.
  20
  21 """Automatically update copyright boilerplate.
  22
  23 This script is adapted from one written for `Bugs Everywhere`_.
  24
  25 .. _Bugs Everywhere: http://bugseverywhere.org/
  26 """
  27
  28 import difflib
  29 import email.utils
  30 import os
  31 import os.path
  32 import re
  33 import StringIO
  34 import sys
  35 import time
  36
  37 import mercurial
  38 import mercurial.dispatch
  39
  40
  41 PROJECT_INFO = {
  42     'project': 'Hooke',
  43     'vcs': 'Mercurial',
  44     }
  45
  46 # Break "copyright" into "copy" and "right" to avoid matching the
  47 # REGEXP.
  48 COPY_RIGHT_TEXT="""
  49 This file is part of %(project)s.
  50
  51 %(project)s is free software: you can redistribute it and/or
  52 modify it under the terms of the GNU Lesser General Public
  53 License as published by the Free Software Foundation, either
  54 version 3 of the License, or (at your option) any later version.
  55
  56 %(project)s is distributed in the hope that it will be useful,
  57 but WITHOUT ANY WARRANTY; without even the implied warranty of
  58 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  59 GNU Lesser General Public License for more details.
  60
  61 You should have received a copy of the GNU Lesser General Public
  62 License along with %(project)s.  If not, see
  63 <http://www.gnu.org/licenses/>.
  64 """.strip()
  65
  66 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
  67
  68 ALIASES = {
  69     'A. Seeholzer':
  70         ['A. Seeholzer'],
  71     'Alberto Gomez-Casado':
  72         ['albertogomcas'],
  73     'Massimo Sandal <devicerandom@gmail.com>':
  74         ['Massimo Sandal',
  75          'devicerandom',
  76          'unknown'],
  77     'Fabrizio Benedetti':
  78         ['fabrizio.benedetti.82'],
  79     'Richard Naud <richard.naud@epfl.ch>':
  80         ['Richard Naud'],
  81     'Rolf Schmidt <rschmidt@alcor.concordia.ca>':
  82         ['Rolf Schmidt',
  83          'illysam'],
  84     'Marco Brucale':
  85         ['marcobrucale'],
  86     'Pancaldi Paolo':
  87         ['pancaldi.paolo'],
  88     }
  89
  90 IGNORED_PATHS = ['./.hg/', './doc/img', './test/data/',
  91                  './build/', '/doc/build/']
  92 IGNORED_FILES = ['COPYING', 'COPYING.LESSER']
  93
  94 # Work around missing author holes in the VCS history
  95 AUTHOR_HACKS = {
  96     ('hooke','driver','hdf5.py'):['Massimo Sandal'],
  97     ('hooke','driver','mcs.py'):['Allen Chen'],
  98     ('hooke','driver','mfp3d.py'):['A. Seeholzer','Richard Naud','Rolf Schmidt',
  99                                    'Alberto Gomez-Casado'],
 100     ('hooke','plugin','peakspot.py'):['Fabrizio Benedetti'],
 101     ('hooke','plugin','showconvoluted.py'):['Rolf Schmidt'],
 102     ('hooke','ui','gui','formatter.py'):['Francesco Musiani','Massimo Sandal'],
 103     ('hooke','ui','gui','prettyformat.py'):['Rolf Schmidt'],
 104     }
 105
 106 # Work around missing year holes in the VCS history
 107 YEAR_HACKS = {
 108     ('hooke','driver','hdf5.py'):2009,
 109     ('hooke','driver','mfp3d.py'):2008,
 110     ('hooke','driver','picoforce.py'):2006,
 111     ('hooke','driver','picoforcealt.py'):2006,
 112     ('hooke','plugin','peakspot.py'):2007,
 113     ('hooke','plugin','showconvoluted.py'):2009,
 114     ('hooke','plugin','tutorial.py'):2007,
 115     ('hooke','ui','gui','formatter.py'):2006,
 116     ('hooke','ui','gui','prettyformat.py'):2009,
 117     }
 118
 119 # Helpers for VCS-specific commands
 120
 121 def splitpath(path):
 122     """Recursively split a path into elements.
 123
 124     Examples
 125     --------
 126
 127     >>> splitpath(os.path.join('a', 'b', 'c'))
 128     ('a', 'b', 'c')
 129     >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
 130     ('a', 'b', 'c')
 131     """
 132     path = os.path.normpath(path)
 133     elements = []
 134     while True:
 135         dirname,basename = os.path.split(path)
 136         elements.insert(0,basename)
 137         if dirname in ['', '.']:
 138             break
 139         path = dirname
 140     return tuple(elements)
 141
 142 # VCS-specific commands
 143
 144 def mercurial_cmd(*args):
 145     cwd = os.getcwd()
 146     stdout = sys.stdout
 147     stderr = sys.stderr
 148     tmp_stdout = StringIO.StringIO()
 149     tmp_stderr = StringIO.StringIO()
 150     sys.stdout = tmp_stdout
 151     sys.stderr = tmp_stderr
 152     try:
 153         mercurial.dispatch.dispatch(list(args))
 154     finally:
 155         os.chdir(cwd)
 156         sys.stdout = stdout
 157         sys.stderr = stderr
 158     return (tmp_stdout.getvalue().rstrip('\n'),
 159             tmp_stderr.getvalue().rstrip('\n'))
 160
 161 def original_year(filename, year_hacks=YEAR_HACKS):
 162     # shortdate filter: YEAR-MONTH-DAY
 163     output,error = mercurial_cmd('log', '--follow',
 164                                  '--template', '{date|shortdate}\n',
 165                                  filename)
 166     years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
 167     if splitpath(filename) in year_hacks:
 168         years.append(year_hacks[splitpath(filename)])
 169     years.sort()
 170     return years[0]
 171
 172 def authors(filename, author_hacks=AUTHOR_HACKS):
 173     output,error = mercurial_cmd('log', '--follow',
 174                                  '--template', '{author}\n',
 175                                  filename)
 176     ret = list(set(output.splitlines()))
 177     if splitpath(filename) in author_hacks:
 178         ret.extend(author_hacks[splitpath(filename)])
 179     return ret
 180
 181 def authors_list(author_hacks=AUTHOR_HACKS):
 182     output,error = mercurial_cmd('log', '--follow',
 183                                  '--template', '{author}\n')
 184     ret = list(set(output.splitlines()))
 185     for path,authors in author_hacks.items():
 186         ret.extend(authors)
 187     return ret
 188
 189 def is_versioned(filename):
 190     output,error = mercurial_cmd('log', '--follow',
 191                                  '--template', '{date|shortdate}\n',
 192                                  filename)
 193     if len(error) > 0:
 194         return False
 195     return True
 196
 197 # General utility commands
 198
 199 def _strip_email(*args):
 200     """Remove email addresses from a series of names.
 201
 202     Examples
 203     --------
 204
 205     >>> _strip_email('J Doe <jdoe@a.com>')
 206     ['J Doe']
 207     >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
 208     ['J Doe', 'JJJ Smith']
 209     """
 210     args = list(args)
 211     for i,arg in enumerate(args):
 212         if arg == None:
 213             continue
 214         author,addr = email.utils.parseaddr(arg)
 215         args[i] = author
 216     return args
 217
 218 def _reverse_aliases(aliases):
 219     """Reverse an `aliases` dict.
 220
 221     Input:   key: canonical name,  value: list of aliases
 222     Output:  key: alias,           value: canonical name
 223
 224     Examples
 225     --------
 226
 227     >>> aliases = {
 228     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
 229     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 230     ...     None:['Anonymous <a@a.com>'],
 231     ...     }
 232     >>> r = _reverse_aliases(aliases)
 233     >>> for item in sorted(r.items()):
 234     ...     print item
 235     ('Anonymous <a@a.com>', None)
 236     ('J', 'J Doe <jdoe@a.com>')
 237     ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
 238     ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
 239     """
 240     output = {}
 241     for canonical_name,_aliases in aliases.items():
 242         for alias in _aliases:
 243             output[alias] = canonical_name
 244     return output
 245
 246 def _replace_aliases(authors, with_email=True, aliases=None):
 247     """Consolidate and sort `authors`.
 248
 249     Make the replacements listed in the `aliases` dict (key: canonical
 250     name, value: list of aliases).  If `aliases` is ``None``, default
 251     to ``ALIASES``.
 252
 253     >>> aliases = {
 254     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
 255     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 256     ...     None:['Anonymous <a@a.com>'],
 257     ...     }
 258     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 259     ...                   'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
 260     ...                  with_email=True, aliases=aliases)
 261     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 262     >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
 263     ...                  with_email=False, aliases=aliases)
 264     ['J Doe', 'JJJ Smith']
 265     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 266     ...                   'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>'],
 267     ...                  with_email=True, aliases=aliases)
 268     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 269     """
 270     if aliases == None:
 271         aliases = ALIASES
 272     if with_email == False:
 273         aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
 274                         for author,_aliases in aliases.items()])
 275     rev_aliases = _reverse_aliases(aliases)
 276     for i,author in enumerate(authors):
 277         if author in rev_aliases:
 278             authors[i] = rev_aliases[author]
 279     authors = sorted(list(set(authors)))
 280     if None in authors:
 281         authors.remove(None)
 282     return authors
 283
 284 def _copyright_string(original_year, final_year, authors, prefix=''):
 285     """
 286     >>> print _copyright_string(original_year=2005,
 287     ...                         final_year=2005,
 288     ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
 289     ...                         prefix='# '
 290     ...                        ) # doctest: +ELLIPSIS
 291     # Copyright (C) 2005 A <a@a.com>
 292     #                    B <b@b.edu>
 293     #
 294     # This file...
 295     >>> print _copyright_string(original_year=2005,
 296     ...                         final_year=2009,
 297     ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
 298     ...                        ) # doctest: +ELLIPSIS
 299     Copyright (C) 2005-2009 A <a@a.com>
 300                             B <b@b.edu>
 301     <BLANKLINE>
 302     This file...
 303     """
 304     if original_year == final_year:
 305         date_range = '%s' % original_year
 306     else:
 307         date_range = '%s-%s' % (original_year, final_year)
 308     lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
 309     for author in authors[1:]:
 310         lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
 311                      author)
 312     lines.append('')
 313     lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
 314     for i,line in enumerate(lines):
 315         lines[i] = (prefix + line).rstrip()
 316     return '\n'.join(lines)
 317
 318 def _tag_copyright(contents):
 319     """
 320     >>> contents = '''Some file
 321     ... bla bla
 322     ... # Copyright (copyright begins)
 323     ... # (copyright continues)
 324     ... # bla bla bla
 325     ... (copyright ends)
 326     ... bla bla bla
 327     ... '''
 328     >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
 329     Some file
 330     bla bla
 331     -xyz-CR-zyx-
 332     (copyright ends)
 333     bla bla bla
 334     <BLANKLINE>
 335     """
 336     lines = []
 337     incopy = False
 338     for line in contents.splitlines():
 339         if incopy == False and line.startswith('# Copyright'):
 340             incopy = True
 341             lines.append(COPY_RIGHT_TAG)
 342         elif incopy == True and not line.startswith('#'):
 343             incopy = False
 344         if incopy == False:
 345             lines.append(line.rstrip('\n'))
 346     return '\n'.join(lines)+'\n'
 347
 348 def _update_copyright(contents, original_year, authors):
 349     """
 350     >>> contents = '''Some file
 351     ... bla bla
 352     ... # Copyright (copyright begins)
 353     ... # (copyright continues)
 354     ... # bla bla bla
 355     ... (copyright ends)
 356     ... bla bla bla
 357     ... '''
 358     >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
 359     ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
 360     Some file
 361     bla bla
 362     # Copyright (C) 2008-... Jack
 363     #                         Jill
 364     #
 365     # This file...
 366     (copyright ends)
 367     bla bla bla
 368     <BLANKLINE>
 369     """
 370     current_year = time.gmtime()[0]
 371     copyright_string = _copyright_string(
 372         original_year, current_year, authors, prefix='# ')
 373     contents = _tag_copyright(contents)
 374     return contents.replace(COPY_RIGHT_TAG, copyright_string)
 375
 376 def ignored_file(filename, ignored_paths=None, ignored_files=None,
 377                  check_disk=True, check_vcs=True):
 378     """
 379     >>> ignored_paths = ['./a/', './b/']
 380     >>> ignored_files = ['x', 'y']
 381     >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
 382     True
 383     >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
 384     False
 385     >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
 386     True
 387     >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
 388     False
 389     >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
 390     False
 391     """
 392     if ignored_paths == None:
 393         ignored_paths = IGNORED_PATHS
 394     if ignored_files == None:
 395         ignored_files = IGNORED_FILES
 396     if check_disk == True and os.path.isfile(filename) == False:
 397         return True
 398     for path in ignored_paths:
 399         if filename.startswith(path):
 400             return True
 401     if os.path.basename(filename) in ignored_files:
 402         return True
 403     if check_vcs == True and is_versioned(filename) == False:
 404         return True
 405     return False
 406
 407 def _set_contents(filename, contents, original_contents=None, dry_run=False,
 408                   verbose=0):
 409     if original_contents == None and os.path.isfile(filename):
 410         f = open(filename, 'r')
 411         original_contents = f.read()
 412         f.close()
 413     if verbose > 0:
 414         print "checking %s ... " % filename,
 415     if contents != original_contents:
 416         if verbose > 0:
 417             if original_contents == None:
 418                 print "[creating]"
 419             else:
 420                 print "[updating]"
 421         if verbose > 1 and original_contents != None:
 422             print '\n'.join(
 423                 difflib.unified_diff(
 424                     original_contents.splitlines(), contents.splitlines(),
 425                     fromfile=os.path.normpath(os.path.join('a', filename)),
 426                     tofile=os.path.normpath(os.path.join('b', filename)),
 427                     n=3, lineterm=''))
 428         if dry_run == False:
 429             f = file(filename, 'w')
 430             f.write(contents)
 431             f.close()
 432     elif verbose > 0:
 433         print "[no change]"
 434
 435 # Update commands
 436
 437 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
 438     authors = authors_fn()
 439     authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
 440     new_contents = '%s was written by:\n%s\n' % (
 441         PROJECT_INFO['project'],
 442         '\n'.join(authors)
 443         )
 444     _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
 445
 446 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
 447                 dry_run=False, verbose=0):
 448     f = file(filename, 'r')
 449     contents = f.read()
 450     f.close()
 451
 452     original_year = original_year_fn(filename)
 453     authors = authors_fn(filename)
 454     authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
 455
 456     new_contents = _update_copyright(contents, original_year, authors)
 457     _set_contents(filename, contents=new_contents, original_contents=contents,
 458                   dry_run=dry_run, verbose=verbose)
 459
 460 def update_files(files=None, dry_run=False, verbose=0):
 461     if files == None or len(files) == 0:
 462         files = []
 463         for dirpath,dirnames,filenames in os.walk('.'):
 464             for filename in filenames:
 465                 files.append(os.path.join(dirpath, filename))
 466
 467     for filename in files:
 468         if ignored_file(filename) == True:
 469             continue
 470         update_file(filename, dry_run=dry_run, verbose=verbose)
 471
 472 def test():
 473     import doctest
 474     doctest.testmod()
 475
 476 if __name__ == '__main__':
 477     import optparse
 478     import sys
 479
 480     usage = """%%prog [options] [file ...]
 481
 482 Update copyright information in source code with information from
 483 the %(vcs)s repository.  Run from the %(project)s repository root.
 484
 485 Replaces every line starting with '^# Copyright' and continuing with
 486 '^#' with an auto-generated copyright blurb.  If you want to add
 487 #-commented material after a copyright blurb, please insert a blank
 488 line between the blurb and your comment, so the next run of
 489 ``update_copyright.py`` doesn't clobber your comment.
 490
 491 If no files are given, a list of files to update is generated
 492 automatically.
 493 """ % PROJECT_INFO
 494     p = optparse.OptionParser(usage)
 495     p.add_option('--test', dest='test', default=False,
 496                  action='store_true', help='Run internal tests and exit')
 497     p.add_option('--dry-run', dest='dry_run', default=False,
 498                  action='store_true', help="Don't make any changes")
 499     p.add_option('-v', '--verbose', dest='verbose', default=0,
 500                  action='count', help='Increment verbosity')
 501     options,args = p.parse_args()
 502
 503     if options.test == True:
 504         test()
 505         sys.exit(0)
 506
 507     update_authors(dry_run=options.dry_run, verbose=options.verbose)
 508     update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)