update_copyright.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright
   4
   5 """Automatically update copyright boilerplate.
   6
   7 This script is adapted from one written for `Bugs Everywhere`_.
   8
   9 .. _Bugs Everywhere: http://bugseverywhere.org/
  10 """
  11
  12 import difflib
  13 import email.utils
  14 import os
  15 import os.path
  16 import re
  17 import StringIO
  18 import sys
  19 import time
  20
  21 import mercurial
  22 import mercurial.dispatch
  23
  24
  25 PROJECT_INFO = {
  26     'project': 'Hooke',
  27     'vcs': 'Mercurial',
  28     }
  29
  30 # Break "copyright" into "copy" and "right" to avoid matching the
  31 # REGEXP.
  32 COPY_RIGHT_TEXT="""
  33 This file is part of %(project)s.
  34
  35 %(project)s is free software: you can redistribute it and/or
  36 modify it under the terms of the GNU Lesser General Public
  37 License as published by the Free Software Foundation, either
  38 version 3 of the License, or (at your option) any later version.
  39
  40 %(project)s is distributed in the hope that it will be useful,
  41 but WITHOUT ANY WARRANTY; without even the implied warranty of
  42 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  43 GNU Lesser General Public License for more details.
  44
  45 You should have received a copy of the GNU Lesser General Public
  46 License along with %(project)s.  If not, see
  47 <http://www.gnu.org/licenses/>.
  48 """.strip()
  49
  50 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
  51
  52 ALIASES = {
  53     'Alberto Gomez-Casado':
  54         ['albertogomcas'],
  55     'Massimo Sandal <devicerandom@gmail.com>':
  56         ['devicerandom',
  57          'unknown'],
  58     'Fabrizio Benedetti':['fabrizio.benedetti'],
  59     'il':['illysam'],
  60     'Marco Brucale':['marcobrucale'],
  61     'pp':['pancaldi.paolo'],
  62     }
  63
  64 IGNORED_PATHS = ['./.hg/', './doc/img', './test/data/',
  65                  './build/', '/doc/build/']
  66 IGNORED_FILES = ['COPYING', 'COPYING.LESSER']
  67
  68
  69 # VCS-specific commands
  70
  71 def mercurial_cmd(*args):
  72     cwd = os.getcwd()
  73     stdout = sys.stdout
  74     stderr = sys.stderr
  75     tmp_stdout = StringIO.StringIO()
  76     tmp_stderr = StringIO.StringIO()
  77     sys.stdout = tmp_stdout
  78     sys.stderr = tmp_stderr
  79     try:
  80         mercurial.dispatch.dispatch(list(args))
  81     finally:
  82         os.chdir(cwd)
  83         sys.stdout = stdout
  84         sys.stderr = stderr
  85     return (tmp_stdout.getvalue().rstrip('\n'),
  86             tmp_stderr.getvalue().rstrip('\n'))
  87
  88 def original_year(filename):
  89     # shortdate filter: YEAR-MONTH-DAY
  90     output,error = mercurial_cmd('log', '--follow',
  91                                  '--template', '{date|shortdate}\n',
  92                                  filename)
  93     years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
  94     years.sort()
  95     return years[0]
  96
  97 def authors(filename):
  98     output,error = mercurial_cmd('log', '--follow',
  99                                  '--template', '{author}\n',
 100                                  filename)
 101     return list(set(output.splitlines()))
 102
 103 def authors_list():
 104     output,error = mercurial_cmd('log', '--follow',
 105                                  '--template', '{author}\n')
 106     return list(set(output.splitlines()))
 107
 108 def is_versioned(filename):
 109     output,error = mercurial_cmd('log', '--follow',
 110                                  '--template', '{date|shortdate}\n',
 111                                  filename)
 112     if len(error) > 0:
 113         return False
 114     return True
 115
 116 # General utility commands
 117
 118 def _strip_email(*args):
 119     """Remove email addresses from a series of names.
 120
 121     Examples
 122     --------
 123
 124     >>> _strip_email('J Doe <jdoe@a.com>')
 125     ['J Doe']
 126     >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
 127     ['J Doe', 'JJJ Smith']
 128     """
 129     args = list(args)
 130     for i,arg in enumerate(args):
 131         if arg == None:
 132             continue
 133         author,addr = email.utils.parseaddr(arg)
 134         args[i] = author
 135     return args
 136
 137 def _reverse_aliases(aliases):
 138     """Reverse an `aliases` dict.
 139
 140     Input:   key: canonical name,  value: list of aliases
 141     Output:  key: alias,           value: canonical name
 142
 143     Examples
 144     --------
 145
 146     >>> aliases = {
 147     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
 148     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 149     ...     None:['Anonymous <a@a.com>'],
 150     ...     }
 151     >>> r = _reverse_aliases(aliases)
 152     >>> for item in sorted(r.items()):
 153     ...     print item
 154     ('Anonymous <a@a.com>', None)
 155     ('J', 'J Doe <jdoe@a.com>')
 156     ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
 157     ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
 158     """
 159     output = {}
 160     for canonical_name,_aliases in aliases.items():
 161         for alias in _aliases:
 162             output[alias] = canonical_name
 163     return output
 164
 165 def _replace_aliases(authors, with_email=True, aliases=None):
 166     """Consolidate and sort `authors`.
 167
 168     Make the replacements listed in the `aliases` dict (key: canonical
 169     name, value: list of aliases).  If `aliases` is ``None``, default
 170     to ``ALIASES``.
 171
 172     >>> aliases = {
 173     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
 174     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 175     ...     None:['Anonymous <a@a.com>'],
 176     ...     }
 177     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 178     ...                   'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
 179     ...                  with_email=True, aliases=aliases)
 180     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 181     >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
 182     ...                  with_email=False, aliases=aliases)
 183     ['J Doe', 'JJJ Smith']
 184     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 185     ...                   'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>'],
 186     ...                  with_email=True, aliases=aliases)
 187     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 188     """
 189     if aliases == None:
 190         aliases = ALIASES
 191     if with_email == False:
 192         aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
 193                         for author,_aliases in aliases.items()])
 194     rev_aliases = _reverse_aliases(aliases)
 195     for i,author in enumerate(authors):
 196         if author in rev_aliases:
 197             authors[i] = rev_aliases[author]
 198     authors = sorted(list(set(authors)))
 199     if None in authors:
 200         authors.remove(None)
 201     return authors
 202
 203 def _copyright_string(original_year, final_year, authors, prefix=''):
 204     """
 205     >>> print _copyright_string(original_year=2005,
 206     ...                         final_year=2005,
 207     ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
 208     ...                         prefix='# '
 209     ...                        ) # doctest: +ELLIPSIS
 210     # Copyright (C) 2005 A <a@a.com>
 211     #                    B <b@b.edu>
 212     #
 213     # This file...
 214     >>> print _copyright_string(original_year=2005,
 215     ...                         final_year=2009,
 216     ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
 217     ...                        ) # doctest: +ELLIPSIS
 218     Copyright (C) 2005-2009 A <a@a.com>
 219                             B <b@b.edu>
 220     <BLANKLINE>
 221     This file...
 222     """
 223     if original_year == final_year:
 224         date_range = '%s' % original_year
 225     else:
 226         date_range = '%s-%s' % (original_year, final_year)
 227     lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
 228     for author in authors[1:]:
 229         lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
 230                      author)
 231     lines.append('')
 232     lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
 233     for i,line in enumerate(lines):
 234         lines[i] = (prefix + line).rstrip()
 235     return '\n'.join(lines)
 236
 237 def _tag_copyright(contents):
 238     """
 239     >>> contents = '''Some file
 240     ... bla bla
 241     ... # Copyright (copyright begins)
 242     ... # (copyright continues)
 243     ... # bla bla bla
 244     ... (copyright ends)
 245     ... bla bla bla
 246     ... '''
 247     >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
 248     Some file
 249     bla bla
 250     -xyz-CR-zyx-
 251     (copyright ends)
 252     bla bla bla
 253     <BLANKLINE>
 254     """
 255     lines = []
 256     incopy = False
 257     for line in contents.splitlines():
 258         if incopy == False and line.startswith('# Copyright'):
 259             incopy = True
 260             lines.append(COPY_RIGHT_TAG)
 261         elif incopy == True and not line.startswith('#'):
 262             incopy = False
 263         if incopy == False:
 264             lines.append(line.rstrip('\n'))
 265     return '\n'.join(lines)+'\n'
 266
 267 def _update_copyright(contents, original_year, authors):
 268     """
 269     >>> contents = '''Some file
 270     ... bla bla
 271     ... # Copyright (copyright begins)
 272     ... # (copyright continues)
 273     ... # bla bla bla
 274     ... (copyright ends)
 275     ... bla bla bla
 276     ... '''
 277     >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
 278     ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
 279     Some file
 280     bla bla
 281     # Copyright (C) 2008-... Jack
 282     #                         Jill
 283     #
 284     # This file...
 285     (copyright ends)
 286     bla bla bla
 287     <BLANKLINE>
 288     """
 289     current_year = time.gmtime()[0]
 290     copyright_string = _copyright_string(
 291         original_year, current_year, authors, prefix='# ')
 292     contents = _tag_copyright(contents)
 293     return contents.replace(COPY_RIGHT_TAG, copyright_string)
 294
 295 def ignored_file(filename, ignored_paths=None, ignored_files=None,
 296                  check_disk=True, check_vcs=True):
 297     """
 298     >>> ignored_paths = ['./a/', './b/']
 299     >>> ignored_files = ['x', 'y']
 300     >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
 301     True
 302     >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
 303     False
 304     >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
 305     True
 306     >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
 307     False
 308     >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
 309     False
 310     """
 311     if ignored_paths == None:
 312         ignored_paths = IGNORED_PATHS
 313     if ignored_files == None:
 314         ignored_files = IGNORED_FILES
 315     if check_disk == True and os.path.isfile(filename) == False:
 316         return True
 317     for path in ignored_paths:
 318         if filename.startswith(path):
 319             return True
 320     if os.path.basename(filename) in ignored_files:
 321         return True
 322     if check_vcs == True and is_versioned(filename) == False:
 323         return True
 324     return False
 325
 326 def _set_contents(filename, contents, original_contents=None, dry_run=False,
 327                   verbose=0):
 328     if original_contents == None and os.path.isfile(filename):
 329         f = open(filename, 'r')
 330         original_contents = f.read()
 331         f.close()
 332     if verbose > 0:
 333         print "checking %s ... " % filename,
 334     if contents != original_contents:
 335         if verbose > 0:
 336             if original_contents == None:
 337                 print "[creating]"
 338             else:
 339                 print "[updating]"
 340         if verbose > 1 and original_contents != None:
 341             print '\n'.join(
 342                 difflib.unified_diff(
 343                     original_contents.splitlines(), contents.splitlines(),
 344                     fromfile=os.path.normpath(os.path.join('a', filename)),
 345                     tofile=os.path.normpath(os.path.join('b', filename)),
 346                     n=3, lineterm=''))
 347         if dry_run == False:
 348             f = file(filename, 'w')
 349             f.write(contents)
 350             f.close()
 351     elif verbose > 0:
 352         print "[no change]"
 353
 354 # Update commands
 355
 356 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
 357     new_contents = '%s was written by:\n%s\n' % (
 358         PROJECT_INFO['project'],
 359         '\n'.join(authors_fn())
 360         )
 361     _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
 362
 363 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
 364                 dry_run=False, verbose=0):
 365     f = file(filename, 'r')
 366     contents = f.read()
 367     f.close()
 368
 369     original_year = original_year_fn(filename)
 370     authors = authors_fn(filename)
 371     authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
 372
 373     new_contents = _update_copyright(contents, original_year, authors)
 374     _set_contents(filename, contents=new_contents, original_contents=contents,
 375                   dry_run=dry_run, verbose=verbose)
 376
 377 def update_files(files=None, dry_run=False, verbose=0):
 378     if files == None or len(files) == 0:
 379         files = []
 380         for dirpath,dirnames,filenames in os.walk('.'):
 381             for filename in filenames:
 382                 files.append(os.path.join(dirpath, filename))
 383
 384     for filename in files:
 385         if ignored_file(filename) == True:
 386             continue
 387         update_file(filename, dry_run=dry_run, verbose=verbose)
 388
 389 def test():
 390     import doctest
 391     doctest.testmod()
 392
 393 if __name__ == '__main__':
 394     import optparse
 395     import sys
 396
 397     usage = """%%prog [options] [file ...]
 398
 399 Update copyright information in source code with information from
 400 the %(vcs)s repository.  Run from the %(project)s repository root.
 401
 402 Replaces every line starting with '^# Copyright' and continuing with
 403 '^#' with an auto-generated copyright blurb.  If you want to add
 404 #-commented material after a copyright blurb, please insert a blank
 405 line between the blurb and your comment, so the next run of
 406 ``update_copyright.py`` doesn't clobber your comment.
 407
 408 If no files are given, a list of files to update is generated
 409 automatically.
 410 """ % PROJECT_INFO
 411     p = optparse.OptionParser(usage)
 412     p.add_option('--test', dest='test', default=False,
 413                  action='store_true', help='Run internal tests and exit')
 414     p.add_option('--dry-run', dest='dry_run', default=False,
 415                  action='store_true', help="Don't make any changes")
 416     p.add_option('-v', '--verbose', dest='verbose', default=0,
 417                  action='count', help='Increment verbosity')
 418     options,args = p.parse_args()
 419
 420     if options.test == True:
 421         test()
 422         sys.exit(0)
 423
 424     update_authors(dry_run=options.dry_run, verbose=options.verbose)
 425     update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)