update_copyright.py

   1 #!/usr/bin/python
   2 #
   3 # COPYRIGHT
   4
   5 """Automatically update copyright boilerplate.
   6
   7 This script is adapted from one written for `Bugs Everywhere`_.
   8
   9 .. _Bugs Everywhere: http://bugseverywhere.org/
  10 """
  11
  12 import difflib
  13 import email.utils
  14 import os
  15 import os.path
  16 import re
  17 import StringIO
  18 import sys
  19 import time
  20
  21 import mercurial
  22 import mercurial.dispatch
  23
  24
  25 PROJECT_INFO = {
  26     'project': 'Hooke',
  27     'vcs': 'Mercurial',
  28     }
  29
  30 # Break "copyright" into "copy" and "right" to avoid matching the
  31 # REGEXP.
  32 COPY_RIGHT_TEXT="""
  33 This file is part of %(project)s.
  34
  35 %(project)s is free software: you can redistribute it and/or
  36 modify it under the terms of the GNU Lesser General Public
  37 License as published by the Free Software Foundation, either
  38 version 3 of the License, or (at your option) any later version.
  39
  40 %(project)s is distributed in the hope that it will be useful,
  41 but WITHOUT ANY WARRANTY; without even the implied warranty of
  42 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  43 GNU Lesser General Public License for more details.
  44
  45 You should have received a copy of the GNU Lesser General Public
  46 License along with %(project)s.  If not, see
  47 <http://www.gnu.org/licenses/>.
  48 """.strip()
  49
  50 COPY_RIGHT_TAG='-xyz-COPY-RIGHT-zyx-' # unlikely to occur in the wild :p
  51
  52 ALIASES = {
  53     'Alberto Gomez-Casado':
  54         ['albertogomcas'],
  55     'Massimo Sandal <devicerandom@gmail.com>':
  56         ['devicerandom',
  57          'unknown'],
  58     'Fabrizio Benedetti':['fabrizio.benedetti'],
  59     'il':['illysam'],
  60     'Marco Brucale':['marcobrucale'],
  61     'pp':['pancaldi.paolo'],
  62     }
  63
  64 IGNORED_PATHS = ['./.hg/', './doc/img', './test/data/',
  65                  './build/', '/doc/build/']
  66 IGNORED_FILES = ['COPYING', 'COPYING.LESSER']
  67
  68
  69 # VCS-specific commands
  70
  71 def mercurial_cmd(*args):
  72     cwd = os.getcwd()
  73     stdout = sys.stdout
  74     stderr = sys.stderr
  75     tmp_stdout = StringIO.StringIO()
  76     tmp_stderr = StringIO.StringIO()
  77     sys.stdout = tmp_stdout
  78     sys.stderr = tmp_stderr
  79     try:
  80         mercurial.dispatch.dispatch(list(args))
  81     finally:
  82         os.chdir(cwd)
  83         sys.stdout = stdout
  84         sys.stderr = stderr
  85     return (tmp_stdout.getvalue().rstrip('\n'),
  86             tmp_stderr.getvalue().rstrip('\n'))
  87
  88 def original_year(filename):
  89     # shortdate filter: YEAR-MONTH-DAY
  90     output,error = mercurial_cmd('log', '--follow',
  91                                  '--template', '{date|shortdate}\n',
  92                                  filename)
  93     years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
  94     years.sort()
  95     return years[0]
  96
  97 def authors(filename):
  98     output,error = mercurial_cmd('log', '--follow',
  99                                  '--template', '{author}\n',
 100                                  filename)
 101     return list(set(output.splitlines()))
 102
 103 def authors_list():
 104     output,error = mercurial_cmd('log', '--follow',
 105                                  '--template', '{author}\n')
 106     return list(set(output.splitlines()))
 107
 108 def is_versioned(filename):
 109     output,error = mercurial_cmd('log', '--follow',
 110                                  '--template', '{date|shortdate}\n',
 111                                  filename)
 112     if len(error) > 0:
 113         return False
 114     return True
 115
 116 # General utility commands
 117
 118 def _strip_email(*args):
 119     """Remove email addresses from a series of names.
 120
 121     Examples
 122     --------
 123
 124     >>> _strip_email('J Doe <jdoe@a.com>')
 125     ['J Doe']
 126     >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
 127     ['J Doe', 'JJJ Smith']
 128     """
 129     args = list(args)
 130     for i,arg in enumerate(args):
 131         if arg == None:
 132             continue
 133         author,addr = email.utils.parseaddr(arg)
 134         args[i] = author
 135     return args
 136
 137 def _reverse_aliases(aliases):
 138     """Reverse an `aliases` dict.
 139
 140     Input:   key: canonical name,  value: list of aliases
 141     Output:  key: alias,           value: canonical name
 142
 143     Examples
 144     --------
 145
 146     >>> aliases = {
 147     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
 148     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 149     ...     None:['Anonymous <a@a.com>'],
 150     ...     }
 151     >>> r = _reverse_aliases(aliases)
 152     >>> for item in sorted(r.items()):
 153     ...     print item
 154     ('Anonymous <a@a.com>', None)
 155     ('J', 'J Doe <jdoe@a.com>')
 156     ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
 157     ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
 158     """
 159     output = {}
 160     for canonical_name,_aliases in aliases.items():
 161         for alias in _aliases:
 162             output[alias] = canonical_name
 163     return output
 164
 165 def _replace_aliases(authors, with_email=True, aliases=None):
 166     """Consolidate and sort `authors`.
 167
 168     Make the replacements listed in the `aliases` dict (key: canonical
 169     name, value: list of aliases).  If `aliases` is ``None``, default
 170     to ``ALIASES``.
 171
 172     >>> aliases = {
 173     ...     'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
 174     ...     'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
 175     ...     None:['Anonymous <a@a.com>'],
 176     ...     }
 177     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 178     ...                   'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
 179     ...                  with_email=True, aliases=aliases)
 180     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 181     >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
 182     ...                  with_email=False, aliases=aliases)
 183     ['J Doe', 'JJJ Smith']
 184     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 185     ...                   'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>'],
 186     ...                  with_email=True, aliases=aliases)
 187     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 188     """
 189     if aliases == None:
 190         aliases = ALIASES
 191     if with_email == False:
 192         aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
 193                         for author,_aliases in aliases.items()])
 194     rev_aliases = _reverse_aliases(aliases)
 195     for i,author in enumerate(authors):
 196         if author in rev_aliases:
 197             authors[i] = rev_aliases[author]
 198     authors = sorted(list(set(authors)))
 199     if None in authors:
 200         authors.remove(None)
 201     return authors
 202
 203 def _copyright_string(original_year, final_year, authors, prefix=''):
 204     """
 205     >>> print _copyright_string(original_year=2005,
 206     ...                         final_year=2005,
 207     ...                         authors=['A <a@a.com>', 'B <b@b.edu>'],
 208     ...                         prefix='# '
 209     ...                        ) # doctest: +ELLIPSIS
 210     # Copyright (C) 2005 A <a@a.com>
 211     #                    B <b@b.edu>
 212     #
 213     # This file...
 214     >>> print _copyright_string(original_year=2005,
 215     ...                         final_year=2009,
 216     ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
 217     ...                        ) # doctest: +ELLIPSIS
 218     Copyright (C) 2005-2009 A <a@a.com>
 219                             B <b@b.edu>
 220     <BLANKLINE>
 221     This file...
 222     """
 223     if original_year == final_year:
 224         date_range = '%s' % original_year
 225     else:
 226         date_range = '%s-%s' % (original_year, final_year)
 227     lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
 228     for author in authors[1:]:
 229         lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
 230                      author)
 231     lines.append('')
 232     lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
 233     for i,line in enumerate(lines):
 234         lines[i] = (prefix + line).rstrip()
 235     return '\n'.join(lines)
 236
 237 def _tag_copyright(contents):
 238     """
 239     >>> contents = '''Some file
 240     ... bla bla
 241     ... # Copyright (copyright begins)
 242     ... # (copyright continues)
 243     ... # bla bla bla
 244     ... (copyright ends)
 245     ... bla bla bla
 246     ... '''
 247     >>> print _tag_copyright(contents),
 248     Some file
 249     bla bla
 250     -xyz-COPY-RIGHT-zyx-
 251     (copyright ends)
 252     bla bla bla
 253     """
 254     lines = []
 255     incopy = False
 256     for line in contents.splitlines():
 257         if incopy == False and line.startswith('# Copyright'):
 258             incopy = True
 259             lines.append(COPY_RIGHT_TAG)
 260         elif incopy == True and not line.startswith('#'):
 261             incopy = False
 262         if incopy == False:
 263             lines.append(line.rstrip('\n'))
 264     return '\n'.join(lines)+'\n'
 265
 266 def _update_copyright(contents, original_year, authors):
 267     """
 268     >>> contents = '''Some file
 269     ... bla bla
 270     ... # Copyright (copyright begins)
 271     ... # (copyright continues)
 272     ... # bla bla bla
 273     ... (copyright ends)
 274     ... bla bla bla
 275     ... '''
 276     >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
 277     ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
 278     Some file
 279     bla bla
 280     # Copyright (C) 2008-... Jack
 281     #                         Jill
 282     #
 283     # This file...
 284     (copyright ends)
 285     bla bla bla
 286     <BLANKLINE>
 287     """
 288     current_year = time.gmtime()[0]
 289     copyright_string = _copyright_string(
 290         original_year, current_year, authors, prefix='# ')
 291     contents = _tag_copyright(contents)
 292     return contents.replace(COPY_RIGHT_TAG, copyright_string)
 293
 294 def ignored_file(filename, ignored_paths=None, ignored_files=None):
 295     """
 296     >>> ignored_paths = ['./a/', './b/']
 297     >>> ignored_files = ['x', 'y']
 298     >>> ignored_file('./a/z', ignored_paths, ignored_files)
 299     True
 300     >>> ignored_file('./ab/z', ignored_paths, ignored_files)
 301     False
 302     >>> ignored_file('./ab/x', ignored_paths, ignored_files)
 303     True
 304     >>> ignored_file('./ab/xy', ignored_paths, ignored_files)
 305     False
 306     >>> ignored_file('./z', ignored_paths, ignored_files)
 307     False
 308     """
 309     if ignored_paths == None:
 310         ignored_paths = IGNORED_PATHS
 311     if ignored_files == None:
 312         ignored_files = IGNORED_FILES
 313     if os.path.isfile(filename) == False:
 314         return True
 315     for path in ignored_paths:
 316         if filename.startswith(path):
 317             return True
 318     if os.path.basename(filename) in ignored_files:
 319         return True
 320     if is_versioned(filename) == False:
 321         return True
 322     return False
 323
 324 def _set_contents(filename, contents, original_contents=None, dry_run=False,
 325                   verbose=0):
 326     if original_contents == None and os.path.isfile(filename):
 327         f = open(filename, 'r')
 328         original_contents = f.read()
 329         f.close()
 330     if verbose > 0:
 331         print "checking %s ... " % filename,
 332     if contents != original_contents:
 333         if verbose > 0:
 334             if original_contents == None:
 335                 print "[creating]"
 336             else:
 337                 print "[updating]"
 338         if verbose > 1 and original_contents != None:
 339             print '\n'.join(
 340                 difflib.unified_diff(
 341                     original_contents.splitlines(), contents.splitlines(),
 342                     fromfile=os.path.normpath(os.path.join('a', filename)),
 343                     tofile=os.path.normpath(os.path.join('b', filename)),
 344                     n=3, lineterm=''))
 345         if dry_run == False:
 346             f = file(filename, 'w')
 347             f.write(contents)
 348             f.close()
 349     elif verbose > 0:
 350         print "[no change]"
 351
 352 # Update commands
 353
 354 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
 355     new_contents = '%s was written by:\n%s\n' % (
 356         PROJECT_INFO['project'],
 357         '\n'.join(authors_fn())
 358         )
 359     _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
 360
 361 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
 362                 dry_run=False, verbose=0):
 363     f = file(filename, 'r')
 364     contents = f.read()
 365     f.close()
 366
 367     original_year = original_year_fn(filename)
 368     authors = authors_fn(filename)
 369     authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
 370
 371     new_contents = _update_copyright(contents, original_year, authors)
 372     _set_contents(filename, contents=new_contents, original_contents=contents,
 373                   dry_run=dry_run, verbose=verbose)
 374
 375 def update_files(files=None, dry_run=False, verbose=0):
 376     if files == None or len(files) == 0:
 377         files = []
 378         for dirpath,dirnames,filenames in os.walk('.'):
 379             for filename in filenames:
 380                 files.append(os.path.join(dirpath, filename))
 381
 382     for filename in files:
 383         if ignored_file(filename) == True:
 384             continue
 385         update_file(filename, dry_run=dry_run, verbose=verbose)
 386
 387 def test():
 388     import doctest
 389     doctest.testmod()
 390
 391 if __name__ == '__main__':
 392     import optparse
 393     import sys
 394
 395     usage = """%%prog [options] [file ...]
 396
 397 Update copyright information in source code with information from
 398 the %(vcs)s repository.  Run from the %(project)s repository root.
 399
 400 Replaces every line starting with '^# Copyright' and continuing with
 401 '^#' with an auto-generated copyright blurb.  If you want to add
 402 #-commented material after a copyright blurb, please insert a blank
 403 line between the blurb and your comment, so the next run of
 404 ``update_copyright.py`` doesn't clobber your comment.
 405
 406 If no files are given, a list of files to update is generated
 407 automatically.
 408 """ % PROJECT_INFO
 409     p = optparse.OptionParser(usage)
 410     p.add_option('--test', dest='test', default=False,
 411                  action='store_true', help='Run internal tests and exit')
 412     p.add_option('--dry-run', dest='dry_run', default=False,
 413                  action='store_true', help="Don't make any changes")
 414     p.add_option('-v', '--verbose', dest='verbose', default=0,
 415                  action='count', help='Increment verbosity')
 416     options,args = p.parse_args()
 417
 418     if options.test == True:
 419         test()
 420         sys.exit(0)
 421
 422     update_authors(dry_run=options.dry_run, verbose=options.verbose)
 423     update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)