update_copyright.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright
   4
   5 """Automatically update copyright boilerplate.
   6
   7 This script is adapted from one written for `Bugs Everywhere`_.
   8
   9 .. _Bugs Everywhere: http://bugseverywhere.org/
  10 """
  11
  12 import difflib
  13 import email.utils
  14 import os
  15 import os.path
  16 import re
  17 import StringIO
  18 import sys
  19 import time
  20
  21 import mercurial
  22 import mercurial.dispatch
  23
  24
  25 PROJECT_INFO = {
  26     'project': 'Hooke',
  27     'vcs': 'Mercurial',
  28     }
  29
  30 # Break "copyright" into "copy" and "right" to avoid matching the
  31 # REGEXP.
  32 COPY_RIGHT_TEXT="""
  33 This file is part of %(project)s.
  34
  35 %(project)s is free software: you can redistribute it and/or
  36 modify it under the terms of the GNU Lesser General Public
  37 License as published by the Free Software Foundation, either
  38 version 3 of the License, or (at your option) any later version.
  39
  40 %(project)s is distributed in the hope that it will be useful,
  41 but WITHOUT ANY WARRANTY; without even the implied warranty of
  42 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  43 GNU Lesser General Public License for more details.
  44
  45 You should have received a copy of the GNU Lesser General Public
  46 License along with %(project)s.  If not, see
  47 <http://www.gnu.org/licenses/>.
  48 """.strip()
  49
  50 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
  51
  52 ALIASES = {
  53     'Alberto Gomez-Casado':
  54         ['albertogomcas'],
  55     'Massimo Sandal <devicerandom@gmail.com>':
  56         ['Massimo Sandal',
  57          'devicerandom',
  58          'unknown'],
  59     'Fabrizio Benedetti':['fabrizio.benedetti.82'],
  60     'Rolf Schmidt <rschmidt@alcor.concordia.ca>':
  61         ['Rolf Schmidt',
  62          'illysam'],
  63     'Marco Brucale':['marcobrucale'],
  64     'Pancaldi Paolo':['pancaldi.paolo'],
  65     }
  66
  67 IGNORED_PATHS = ['./.hg/', './doc/img', './test/data/',
  68                  './build/', '/doc/build/']
  69 IGNORED_FILES = ['COPYING', 'COPYING.LESSER']
  70
  71 # Work around missing author holes in the VCS history
  72 AUTHOR_HACKS = {
  73     ('hooke','driver','hdf5.py'):['Massimo Sandal'],
  74     ('hooke','driver','mcs.py'):['Allen Chen'],
  75     ('hooke','plugin','peakspot.py'):['Fabrizio Benedetti'],
  76     }
  77
  78 # Work around missing year holes in the VCS history
  79 YEAR_HACKS = {
  80     ('hooke','driver','hdf5.py'):2009,
  81     ('hooke','driver','picoforce.py'):2006,
  82     ('hooke','driver','picoforcealt.py'):2006,
  83     ('hooke','plugin','peakspot.py'):2007,
  84     ('hooke','plugin','tutorial.py'):2007,
  85     }
  86
  87 # Helpers for VCS-specific commands
  88
  89 def splitpath(path):
  90     """Recursively split a path into elements.
  91
  92     Examples
  93     --------
  94
  95     >>> splitpath(os.path.join('a', 'b', 'c'))
  96     ('a', 'b', 'c')
  97     >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
  98     ('a', 'b', 'c')
  99     """
 100     path = os.path.normpath(path)
 101     elements = []
 102     while True:
 103         dirname,basename = os.path.split(path)
 104         elements.insert(0,basename)
 105         if dirname in ['', '.']:
 106             break
 107         path = dirname
 108     return tuple(elements)
 109
 110 # VCS-specific commands
 111
 112 def mercurial_cmd(*args):
 113     cwd = os.getcwd()
 114     stdout = sys.stdout
 115     stderr = sys.stderr
 116     tmp_stdout = StringIO.StringIO()
 117     tmp_stderr = StringIO.StringIO()
 118     sys.stdout = tmp_stdout
 119     sys.stderr = tmp_stderr
 120     try:
 121         mercurial.dispatch.dispatch(list(args))
 122     finally:
 123         os.chdir(cwd)
 124         sys.stdout = stdout
 125         sys.stderr = stderr
 126     return (tmp_stdout.getvalue().rstrip('\n'),
 127             tmp_stderr.getvalue().rstrip('\n'))
 128
 129 def original_year(filename, year_hacks=YEAR_HACKS):
 130     # shortdate filter: YEAR-MONTH-DAY
 131     output,error = mercurial_cmd('log', '--follow',
 132                                  '--template', '{date|shortdate}\n',
 133                                  filename)
 134     years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
 135     if splitpath(filename) in year_hacks:
 136         years.append(year_hacks[splitpath(filename)])
 137     years.sort()
 138     return years[0]
 139
 140 def authors(filename, author_hacks=AUTHOR_HACKS):
 141     output,error = mercurial_cmd('log', '--follow',
 142                                  '--template', '{author}\n',
 143                                  filename)
 144     ret = list(set(output.splitlines()))
 145     if splitpath(filename) in author_hacks:
 146         ret.extend(author_hacks[splitpath(filename)])
 147     return ret
 148
 149 def authors_list():
 150     output,error = mercurial_cmd('log', '--follow',
 151                                  '--template', '{author}\n')
 152     return list(set(output.splitlines()))
 153
 154 def is_versioned(filename):
 155     output,error = mercurial_cmd('log', '--follow',
 156                                  '--template', '{date|shortdate}\n',
 157                                  filename)
 158     if len(error) > 0:
 159         return False
 160     return True
 161
 162 # General utility commands
 163
 164 def _strip_email(*args):
 165     """Remove email addresses from a series of names.
 166
 167     Examples
 168     --------
 169
 170     >>> _strip_email('J Doe <jdoe@a.com>')
 171     ['J Doe']
 172     >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
 173     ['J Doe', 'JJJ Smith']
 174     """
 175     args = list(args)
 176     for i,arg in enumerate(args):
 177         if arg == None:
 178             continue
 179         author,addr = email.utils.parseaddr(arg)
 180         args[i] = author
 181     return args
 182
 183 def _reverse_aliases(aliases):
 184     """Reverse an `aliases` dict.
 185
 186     Input:   key: canonical name,  value: list of aliases
 187     Output:  key: alias,           value: canonical name
 188
 189     Examples
 190     --------
 191
 192     >>> aliases = {
 193     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
 194     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 195     ...     None:['Anonymous <a@a.com>'],
 196     ...     }
 197     >>> r = _reverse_aliases(aliases)
 198     >>> for item in sorted(r.items()):
 199     ...     print item
 200     ('Anonymous <a@a.com>', None)
 201     ('J', 'J Doe <jdoe@a.com>')
 202     ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
 203     ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
 204     """
 205     output = {}
 206     for canonical_name,_aliases in aliases.items():
 207         for alias in _aliases:
 208             output[alias] = canonical_name
 209     return output
 210
 211 def _replace_aliases(authors, with_email=True, aliases=None):
 212     """Consolidate and sort `authors`.
 213
 214     Make the replacements listed in the `aliases` dict (key: canonical
 215     name, value: list of aliases).  If `aliases` is ``None``, default
 216     to ``ALIASES``.
 217
 218     >>> aliases = {
 219     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
 220     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 221     ...     None:['Anonymous <a@a.com>'],
 222     ...     }
 223     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 224     ...                   'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
 225     ...                  with_email=True, aliases=aliases)
 226     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 227     >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
 228     ...                  with_email=False, aliases=aliases)
 229     ['J Doe', 'JJJ Smith']
 230     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 231     ...                   'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>'],
 232     ...                  with_email=True, aliases=aliases)
 233     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 234     """
 235     if aliases == None:
 236         aliases = ALIASES
 237     if with_email == False:
 238         aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
 239                         for author,_aliases in aliases.items()])
 240     rev_aliases = _reverse_aliases(aliases)
 241     for i,author in enumerate(authors):
 242         if author in rev_aliases:
 243             authors[i] = rev_aliases[author]
 244     authors = sorted(list(set(authors)))
 245     if None in authors:
 246         authors.remove(None)
 247     return authors
 248
 249 def _copyright_string(original_year, final_year, authors, prefix=''):
 250     """
 251     >>> print _copyright_string(original_year=2005,
 252     ...                         final_year=2005,
 253     ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
 254     ...                         prefix='# '
 255     ...                        ) # doctest: +ELLIPSIS
 256     # Copyright (C) 2005 A <a@a.com>
 257     #                    B <b@b.edu>
 258     #
 259     # This file...
 260     >>> print _copyright_string(original_year=2005,
 261     ...                         final_year=2009,
 262     ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
 263     ...                        ) # doctest: +ELLIPSIS
 264     Copyright (C) 2005-2009 A <a@a.com>
 265                             B <b@b.edu>
 266     <BLANKLINE>
 267     This file...
 268     """
 269     if original_year == final_year:
 270         date_range = '%s' % original_year
 271     else:
 272         date_range = '%s-%s' % (original_year, final_year)
 273     lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
 274     for author in authors[1:]:
 275         lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
 276                      author)
 277     lines.append('')
 278     lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
 279     for i,line in enumerate(lines):
 280         lines[i] = (prefix + line).rstrip()
 281     return '\n'.join(lines)
 282
 283 def _tag_copyright(contents):
 284     """
 285     >>> contents = '''Some file
 286     ... bla bla
 287     ... # Copyright (copyright begins)
 288     ... # (copyright continues)
 289     ... # bla bla bla
 290     ... (copyright ends)
 291     ... bla bla bla
 292     ... '''
 293     >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
 294     Some file
 295     bla bla
 296     -xyz-CR-zyx-
 297     (copyright ends)
 298     bla bla bla
 299     <BLANKLINE>
 300     """
 301     lines = []
 302     incopy = False
 303     for line in contents.splitlines():
 304         if incopy == False and line.startswith('# Copyright'):
 305             incopy = True
 306             lines.append(COPY_RIGHT_TAG)
 307         elif incopy == True and not line.startswith('#'):
 308             incopy = False
 309         if incopy == False:
 310             lines.append(line.rstrip('\n'))
 311     return '\n'.join(lines)+'\n'
 312
 313 def _update_copyright(contents, original_year, authors):
 314     """
 315     >>> contents = '''Some file
 316     ... bla bla
 317     ... # Copyright (copyright begins)
 318     ... # (copyright continues)
 319     ... # bla bla bla
 320     ... (copyright ends)
 321     ... bla bla bla
 322     ... '''
 323     >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
 324     ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
 325     Some file
 326     bla bla
 327     # Copyright (C) 2008-... Jack
 328     #                         Jill
 329     #
 330     # This file...
 331     (copyright ends)
 332     bla bla bla
 333     <BLANKLINE>
 334     """
 335     current_year = time.gmtime()[0]
 336     copyright_string = _copyright_string(
 337         original_year, current_year, authors, prefix='# ')
 338     contents = _tag_copyright(contents)
 339     return contents.replace(COPY_RIGHT_TAG, copyright_string)
 340
 341 def ignored_file(filename, ignored_paths=None, ignored_files=None,
 342                  check_disk=True, check_vcs=True):
 343     """
 344     >>> ignored_paths = ['./a/', './b/']
 345     >>> ignored_files = ['x', 'y']
 346     >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
 347     True
 348     >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
 349     False
 350     >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
 351     True
 352     >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
 353     False
 354     >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
 355     False
 356     """
 357     if ignored_paths == None:
 358         ignored_paths = IGNORED_PATHS
 359     if ignored_files == None:
 360         ignored_files = IGNORED_FILES
 361     if check_disk == True and os.path.isfile(filename) == False:
 362         return True
 363     for path in ignored_paths:
 364         if filename.startswith(path):
 365             return True
 366     if os.path.basename(filename) in ignored_files:
 367         return True
 368     if check_vcs == True and is_versioned(filename) == False:
 369         return True
 370     return False
 371
 372 def _set_contents(filename, contents, original_contents=None, dry_run=False,
 373                   verbose=0):
 374     if original_contents == None and os.path.isfile(filename):
 375         f = open(filename, 'r')
 376         original_contents = f.read()
 377         f.close()
 378     if verbose > 0:
 379         print "checking %s ... " % filename,
 380     if contents != original_contents:
 381         if verbose > 0:
 382             if original_contents == None:
 383                 print "[creating]"
 384             else:
 385                 print "[updating]"
 386         if verbose > 1 and original_contents != None:
 387             print '\n'.join(
 388                 difflib.unified_diff(
 389                     original_contents.splitlines(), contents.splitlines(),
 390                     fromfile=os.path.normpath(os.path.join('a', filename)),
 391                     tofile=os.path.normpath(os.path.join('b', filename)),
 392                     n=3, lineterm=''))
 393         if dry_run == False:
 394             f = file(filename, 'w')
 395             f.write(contents)
 396             f.close()
 397     elif verbose > 0:
 398         print "[no change]"
 399
 400 # Update commands
 401
 402 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
 403     new_contents = '%s was written by:\n%s\n' % (
 404         PROJECT_INFO['project'],
 405         '\n'.join(authors_fn())
 406         )
 407     _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
 408
 409 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
 410                 dry_run=False, verbose=0):
 411     f = file(filename, 'r')
 412     contents = f.read()
 413     f.close()
 414
 415     original_year = original_year_fn(filename)
 416     authors = authors_fn(filename)
 417     authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
 418
 419     new_contents = _update_copyright(contents, original_year, authors)
 420     _set_contents(filename, contents=new_contents, original_contents=contents,
 421                   dry_run=dry_run, verbose=verbose)
 422
 423 def update_files(files=None, dry_run=False, verbose=0):
 424     if files == None or len(files) == 0:
 425         files = []
 426         for dirpath,dirnames,filenames in os.walk('.'):
 427             for filename in filenames:
 428                 files.append(os.path.join(dirpath, filename))
 429
 430     for filename in files:
 431         if ignored_file(filename) == True:
 432             continue
 433         update_file(filename, dry_run=dry_run, verbose=verbose)
 434
 435 def test():
 436     import doctest
 437     doctest.testmod()
 438
 439 if __name__ == '__main__':
 440     import optparse
 441     import sys
 442
 443     usage = """%%prog [options] [file ...]
 444
 445 Update copyright information in source code with information from
 446 the %(vcs)s repository.  Run from the %(project)s repository root.
 447
 448 Replaces every line starting with '^# Copyright' and continuing with
 449 '^#' with an auto-generated copyright blurb.  If you want to add
 450 #-commented material after a copyright blurb, please insert a blank
 451 line between the blurb and your comment, so the next run of
 452 ``update_copyright.py`` doesn't clobber your comment.
 453
 454 If no files are given, a list of files to update is generated
 455 automatically.
 456 """ % PROJECT_INFO
 457     p = optparse.OptionParser(usage)
 458     p.add_option('--test', dest='test', default=False,
 459                  action='store_true', help='Run internal tests and exit')
 460     p.add_option('--dry-run', dest='dry_run', default=False,
 461                  action='store_true', help="Don't make any changes")
 462     p.add_option('-v', '--verbose', dest='verbose', default=0,
 463                  action='count', help='Increment verbosity')
 464     options,args = p.parse_args()
 465
 466     if options.test == True:
 467         test()
 468         sys.exit(0)
 469
 470     update_authors(dry_run=options.dry_run, verbose=options.verbose)
 471     update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)