#!/usr/bin/python
#
# Copyright
"""Automatically update copyright boilerplate.
This script is adapted from one written for `Bugs Everywhere`_.
.. _Bugs Everywhere: http://bugseverywhere.org/
"""
import difflib
import email.utils
import os
import os.path
import re
import StringIO
import sys
import time
import mercurial
import mercurial.dispatch
PROJECT_INFO = {
'project': 'Hooke',
'vcs': 'Mercurial',
}
# Break "copyright" into "copy" and "right" to avoid matching the
# REGEXP.
COPY_RIGHT_TEXT="""
This file is part of %(project)s.
%(project)s is free software: you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later version.
%(project)s is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with %(project)s. If not, see
.
""".strip()
COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
ALIASES = {
'Alberto Gomez-Casado':
['albertogomcas'],
'Massimo Sandal ':
['devicerandom',
'unknown'],
'Fabrizio Benedetti':['fabrizio.benedetti'],
'il':['illysam'],
'Marco Brucale':['marcobrucale'],
'pp':['pancaldi.paolo'],
}
IGNORED_PATHS = ['./.hg/', './doc/img', './test/data/',
'./build/', '/doc/build/']
IGNORED_FILES = ['COPYING', 'COPYING.LESSER']
# VCS-specific commands
def mercurial_cmd(*args):
cwd = os.getcwd()
stdout = sys.stdout
stderr = sys.stderr
tmp_stdout = StringIO.StringIO()
tmp_stderr = StringIO.StringIO()
sys.stdout = tmp_stdout
sys.stderr = tmp_stderr
try:
mercurial.dispatch.dispatch(list(args))
finally:
os.chdir(cwd)
sys.stdout = stdout
sys.stderr = stderr
return (tmp_stdout.getvalue().rstrip('\n'),
tmp_stderr.getvalue().rstrip('\n'))
def original_year(filename):
# shortdate filter: YEAR-MONTH-DAY
output,error = mercurial_cmd('log', '--follow',
'--template', '{date|shortdate}\n',
filename)
years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
years.sort()
return years[0]
def authors(filename):
output,error = mercurial_cmd('log', '--follow',
'--template', '{author}\n',
filename)
return list(set(output.splitlines()))
def authors_list():
output,error = mercurial_cmd('log', '--follow',
'--template', '{author}\n')
return list(set(output.splitlines()))
def is_versioned(filename):
output,error = mercurial_cmd('log', '--follow',
'--template', '{date|shortdate}\n',
filename)
if len(error) > 0:
return False
return True
# General utility commands
def _strip_email(*args):
"""Remove email addresses from a series of names.
Examples
--------
>>> _strip_email('J Doe ')
['J Doe']
>>> _strip_email('J Doe ', 'JJJ Smith ')
['J Doe', 'JJJ Smith']
"""
args = list(args)
for i,arg in enumerate(args):
if arg == None:
continue
author,addr = email.utils.parseaddr(arg)
args[i] = author
return args
def _reverse_aliases(aliases):
"""Reverse an `aliases` dict.
Input: key: canonical name, value: list of aliases
Output: key: alias, value: canonical name
Examples
--------
>>> aliases = {
... 'J Doe ':['Johnny ', 'J'],
... 'JJJ Smith ':['Jingly '],
... None:['Anonymous '],
... }
>>> r = _reverse_aliases(aliases)
>>> for item in sorted(r.items()):
... print item
('Anonymous ', None)
('J', 'J Doe ')
('Jingly ', 'JJJ Smith ')
('Johnny ', 'J Doe ')
"""
output = {}
for canonical_name,_aliases in aliases.items():
for alias in _aliases:
output[alias] = canonical_name
return output
def _replace_aliases(authors, with_email=True, aliases=None):
"""Consolidate and sort `authors`.
Make the replacements listed in the `aliases` dict (key: canonical
name, value: list of aliases). If `aliases` is ``None``, default
to ``ALIASES``.
>>> aliases = {
... 'J Doe ':['Johnny '],
... 'JJJ Smith ':['Jingly '],
... None:['Anonymous '],
... }
>>> _replace_aliases(['JJJ Smith ', 'Johnny ',
... 'Jingly ', 'Anonymous '],
... with_email=True, aliases=aliases)
['J Doe ', 'JJJ Smith ']
>>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
... with_email=False, aliases=aliases)
['J Doe', 'JJJ Smith']
>>> _replace_aliases(['JJJ Smith ', 'Johnny ',
... 'Jingly ', 'J Doe '],
... with_email=True, aliases=aliases)
['J Doe ', 'JJJ Smith ']
"""
if aliases == None:
aliases = ALIASES
if with_email == False:
aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
for author,_aliases in aliases.items()])
rev_aliases = _reverse_aliases(aliases)
for i,author in enumerate(authors):
if author in rev_aliases:
authors[i] = rev_aliases[author]
authors = sorted(list(set(authors)))
if None in authors:
authors.remove(None)
return authors
def _copyright_string(original_year, final_year, authors, prefix=''):
"""
>>> print _copyright_string(original_year=2005,
... final_year=2005,
... authors=['A ', 'B '],
... prefix='# '
... ) # doctest: +ELLIPSIS
# Copyright (C) 2005 A
# B
#
# This file...
>>> print _copyright_string(original_year=2005,
... final_year=2009,
... authors=['A ', 'B ']
... ) # doctest: +ELLIPSIS
Copyright (C) 2005-2009 A
B
This file...
"""
if original_year == final_year:
date_range = '%s' % original_year
else:
date_range = '%s-%s' % (original_year, final_year)
lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
for author in authors[1:]:
lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
author)
lines.append('')
lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
for i,line in enumerate(lines):
lines[i] = (prefix + line).rstrip()
return '\n'.join(lines)
def _tag_copyright(contents):
"""
>>> contents = '''Some file
... bla bla
... # Copyright (copyright begins)
... # (copyright continues)
... # bla bla bla
... (copyright ends)
... bla bla bla
... '''
>>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
Some file
bla bla
-xyz-CR-zyx-
(copyright ends)
bla bla bla
"""
lines = []
incopy = False
for line in contents.splitlines():
if incopy == False and line.startswith('# Copyright'):
incopy = True
lines.append(COPY_RIGHT_TAG)
elif incopy == True and not line.startswith('#'):
incopy = False
if incopy == False:
lines.append(line.rstrip('\n'))
return '\n'.join(lines)+'\n'
def _update_copyright(contents, original_year, authors):
"""
>>> contents = '''Some file
... bla bla
... # Copyright (copyright begins)
... # (copyright continues)
... # bla bla bla
... (copyright ends)
... bla bla bla
... '''
>>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
... ) # doctest: +ELLIPSIS, +REPORT_UDIFF
Some file
bla bla
# Copyright (C) 2008-... Jack
# Jill
#
# This file...
(copyright ends)
bla bla bla
"""
current_year = time.gmtime()[0]
copyright_string = _copyright_string(
original_year, current_year, authors, prefix='# ')
contents = _tag_copyright(contents)
return contents.replace(COPY_RIGHT_TAG, copyright_string)
def ignored_file(filename, ignored_paths=None, ignored_files=None,
check_disk=True, check_vcs=True):
"""
>>> ignored_paths = ['./a/', './b/']
>>> ignored_files = ['x', 'y']
>>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
True
>>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
False
>>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
True
>>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
False
>>> ignored_file('./z', ignored_paths, ignored_files, False, False)
False
"""
if ignored_paths == None:
ignored_paths = IGNORED_PATHS
if ignored_files == None:
ignored_files = IGNORED_FILES
if check_disk == True and os.path.isfile(filename) == False:
return True
for path in ignored_paths:
if filename.startswith(path):
return True
if os.path.basename(filename) in ignored_files:
return True
if check_vcs == True and is_versioned(filename) == False:
return True
return False
def _set_contents(filename, contents, original_contents=None, dry_run=False,
verbose=0):
if original_contents == None and os.path.isfile(filename):
f = open(filename, 'r')
original_contents = f.read()
f.close()
if verbose > 0:
print "checking %s ... " % filename,
if contents != original_contents:
if verbose > 0:
if original_contents == None:
print "[creating]"
else:
print "[updating]"
if verbose > 1 and original_contents != None:
print '\n'.join(
difflib.unified_diff(
original_contents.splitlines(), contents.splitlines(),
fromfile=os.path.normpath(os.path.join('a', filename)),
tofile=os.path.normpath(os.path.join('b', filename)),
n=3, lineterm=''))
if dry_run == False:
f = file(filename, 'w')
f.write(contents)
f.close()
elif verbose > 0:
print "[no change]"
# Update commands
def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
new_contents = '%s was written by:\n%s\n' % (
PROJECT_INFO['project'],
'\n'.join(authors_fn())
)
_set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
def update_file(filename, original_year_fn=original_year, authors_fn=authors,
dry_run=False, verbose=0):
f = file(filename, 'r')
contents = f.read()
f.close()
original_year = original_year_fn(filename)
authors = authors_fn(filename)
authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
new_contents = _update_copyright(contents, original_year, authors)
_set_contents(filename, contents=new_contents, original_contents=contents,
dry_run=dry_run, verbose=verbose)
def update_files(files=None, dry_run=False, verbose=0):
if files == None or len(files) == 0:
files = []
for dirpath,dirnames,filenames in os.walk('.'):
for filename in filenames:
files.append(os.path.join(dirpath, filename))
for filename in files:
if ignored_file(filename) == True:
continue
update_file(filename, dry_run=dry_run, verbose=verbose)
def test():
import doctest
doctest.testmod()
if __name__ == '__main__':
import optparse
import sys
usage = """%%prog [options] [file ...]
Update copyright information in source code with information from
the %(vcs)s repository. Run from the %(project)s repository root.
Replaces every line starting with '^# Copyright' and continuing with
'^#' with an auto-generated copyright blurb. If you want to add
#-commented material after a copyright blurb, please insert a blank
line between the blurb and your comment, so the next run of
``update_copyright.py`` doesn't clobber your comment.
If no files are given, a list of files to update is generated
automatically.
""" % PROJECT_INFO
p = optparse.OptionParser(usage)
p.add_option('--test', dest='test', default=False,
action='store_true', help='Run internal tests and exit')
p.add_option('--dry-run', dest='dry_run', default=False,
action='store_true', help="Don't make any changes")
p.add_option('-v', '--verbose', dest='verbose', default=0,
action='count', help='Increment verbosity')
options,args = p.parse_args()
if options.test == True:
test()
sys.exit(0)
update_authors(dry_run=options.dry_run, verbose=options.verbose)
update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)