3 # Copyright (C) 2009-2012 W. Trevor King <wking@drexel.edu>
5 # This file is part of update-copyright.
7 # update-copyright is free software: you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # update-copyright is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with update-copyright. If not, see
19 # <http://www.gnu.org/licenses/>.
21 """Automatically update copyright boilerplate.
23 This script is adapted from one written for `Bugs Everywhere`_. and
24 later modified for `Hooke`_ before returning to `Bugs Everywhere`_. I
25 finally gave up on maintaining separate versions, so here it is as a
28 .. _Bugs Everywhere: http://bugseverywhere.org/
29 .. _Hooke: http://code.google.com/p/hooke/
42 'project': 'update-copyright',
46 # Break "copyright" into "copy" and "right" to avoid matching the
47 # REGEXP if we decide to go back to regexps.
49 'This file is part of %(project)s.',
50 '%(project)s is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.',
51 '%(project)s is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.',
52 'You should have received a copy of the GNU General Public License along with %(project)s. If not, see <http://www.gnu.org/licenses/>.'
55 SHORT_COPY_RIGHT_TEXT = [
56 '%(project)s comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License. For details, %(get-details)s.'
59 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
61 # Convert author names to canonical forms.
62 # ALIASES[<canonical name>] = <list of aliases>
65 # 'John Doe <jdoe@a.com>':
66 # ['John Doe', 'jdoe', 'J. Doe <j@doe.net>'],
68 # Git-based projects are encouraged to use .mailmap instead of
69 # ALIASES. See git-shortlog(1) for details.
72 # List of paths that should not be scanned for copyright updates.
73 # IGNORED_PATHS = ['./.git/']
74 IGNORED_PATHS = ['./.git']
75 # List of files that should not be scanned for copyright updates.
76 # IGNORED_FILES = ['COPYING']
77 IGNORED_FILES = ['COPYING']
79 # Work around missing author holes in the VCS history.
80 # AUTHOR_HACKS[<path tuple>] = [<missing authors]
81 # for example, if John Doe contributed to module.py but wasn't listed
82 # in the VCS history of that file:
84 # ('path', 'to', 'module.py'):['John Doe'],
88 # Work around missing year holes in the VCS history.
89 # YEAR_HACKS[<path tuple>] = <original year>
90 # for example, if module.py was published in 2008 but the VCS history
91 # only goes back to 2010:
93 # ('path', 'to', 'module.py'):2008,
97 # Helpers for VCS-specific commands
100 """Recursively split a path into elements.
105 >>> splitpath(os.path.join('a', 'b', 'c'))
107 >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
110 path = os.path.normpath(path)
113 dirname,basename = os.path.split(path)
114 elements.insert(0,basename)
115 if dirname in ['', '.']:
118 return tuple(elements)
120 # VCS-specific commands
122 if PROJECT_INFO['vcs'] == 'Git':
126 _MSWINDOWS = sys.platform == 'win32'
127 _POSIX = not _MSWINDOWS
129 def invoke(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, expect=(0,)):
131 expect should be a tuple of allowed exit codes.
135 q = subprocess.Popen(args, stdin=subprocess.PIPE,
136 stdout=stdout, stderr=stderr)
138 assert _MSWINDOWS == True, 'invalid platform'
139 # win32 don't have os.execvp() so run the command in a shell
140 q = subprocess.Popen(args, stdin=subprocess.PIPE,
141 stdout=stdout, stderr=stderr, shell=True)
143 raise ValueError([args, e])
144 stdout,stderr = q.communicate(input=stdin)
146 if status not in expect:
147 raise ValueError([args, status, stdout, stderr])
148 return status, stdout, stderr
151 status,stdout,stderr = invoke(['git'] + list(args))
152 return stdout.rstrip('\n')
154 version = git_cmd('--version').split(' ')[-1]
155 if version.startswith('1.5.'):
156 # Author name <author email>
157 author_format = '--pretty=format:%an <%ae>'
158 year_format = ['--pretty=format:%ai'] # Author date
159 # YYYY-MM-DD HH:MM:SS Z
160 # Earlier versions of Git don't seem to recognize --date=short
162 author_format = '--pretty=format:%aN <%aE>'
163 year_format = ['--pretty=format:%ad', # Author date
164 '--date=short'] # YYYY-MM-DD
166 def original_year(filename, year_hacks=YEAR_HACKS):
167 output = git_cmd(*(['log', '--follow']
170 if version.startswith('1.5.'):
171 output = '\n'.join([x.split()[0] for x in output.splitlines()])
172 years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
173 if splitpath(filename) in year_hacks:
174 years.append(year_hacks[splitpath(filename)])
178 def authors(filename, author_hacks=AUTHOR_HACKS):
179 output = git_cmd('log', '--follow', author_format,
181 ret = list(set(output.splitlines()))
182 if splitpath(filename) in author_hacks:
183 ret.extend(author_hacks[splitpath(filename)])
186 def authors_list(author_hacks=AUTHOR_HACKS):
187 output = git_cmd('log', author_format)
188 ret = list(set(output.splitlines()))
189 for path,authors in author_hacks.items():
193 def is_versioned(filename):
194 output = git_cmd('log', '--follow', filename)
199 elif PROJECT_INFO['vcs'] == 'Mercurial':
203 import mercurial.dispatch
205 def mercurial_cmd(*args):
209 tmp_stdout = StringIO.StringIO()
210 tmp_stderr = StringIO.StringIO()
211 sys.stdout = tmp_stdout
212 sys.stderr = tmp_stderr
214 mercurial.dispatch.dispatch(list(args))
219 return (tmp_stdout.getvalue().rstrip('\n'),
220 tmp_stderr.getvalue().rstrip('\n'))
222 def original_year(filename=None, year_hacks=YEAR_HACKS):
224 '--template', '{date|shortdate}\n',
225 # shortdate filter: YEAR-MONTH-DAY
228 args.extend(['--follow', filename])
229 output,error = mercurial_cmd('log', *args)
230 years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
232 years.extend(year_hacks.values())
233 elif splitpath(filename) in year_hacks:
234 years.append(year_hacks[splitpath(filename)])
238 def authors(filename, author_hacks=AUTHOR_HACKS):
239 output,error = mercurial_cmd('log', '--follow',
240 '--template', '{author}\n',
242 ret = list(set(output.splitlines()))
243 if splitpath(filename) in author_hacks:
244 ret.extend(author_hacks[splitpath(filename)])
247 def authors_list(author_hacks=AUTHOR_HACKS):
248 output,error = mercurial_cmd('log', '--template', '{author}\n')
249 ret = list(set(output.splitlines()))
250 for path,authors in author_hacks.items():
254 def is_versioned(filename):
255 output,error = mercurial_cmd('log', '--follow', filename)
260 elif PROJECT_INFO['vcs'] == 'Bazaar':
264 import bzrlib.builtins
267 class LogFormatter (bzrlib.log.LogFormatter):
268 supports_merge_revisions = True
270 supports_deta = False
271 supports_tags = False
272 supports_diff = False
274 def log_revision(self, revision):
275 raise NotImplementedError
277 class YearLogFormatter (LogFormatter):
278 def log_revision(self, revision):
280 time.strftime('%Y', time.gmtime(revision.rev.timestamp))
283 class AuthorLogFormatter (LogFormatter):
284 def log_revision(self, revision):
285 authors = revision.rev.get_apparent_authors()
286 self.to_file.write('\n'.join(authors)+'\n')
288 def original_year(filename=None, year_hacks=YEAR_HACKS):
289 cmd = bzrlib.builtins.cmd_log()
290 cmd.outf = StringIO.StringIO()
291 kwargs = {'log_format':YearLogFormatter, 'levels':0}
293 kwargs['file_list'] = [filename]
295 years = [int(year) for year in set(cmd.outf.getvalue().splitlines())]
297 years.append(year_hacks.values())
298 elif splitpath(filename) in year_hacks:
299 years.append(year_hacks[splitpath(filename)])
303 def authors(filename, author_hacks=AUTHOR_HACKS):
304 cmd = bzrlib.builtins.cmd_log()
305 cmd.outf = StringIO.StringIO()
306 cmd.run(file_list=[filename], log_format=AuthorLogFormatter, levels=0)
307 ret = list(set(cmd.outf.getvalue().splitlines()))
308 if splitpath(filename) in author_hacks:
309 ret.extend(author_hacks[splitpath(filename)])
312 def authors_list(author_hacks=AUTHOR_HACKS):
313 cmd = bzrlib.builtins.cmd_log()
314 cmd.outf = StringIO.StringIO()
315 cmd.run(log_format=AuthorLogFormatter, levels=0)
316 output = cmd.outf.getvalue()
317 ret = list(set(cmd.outf.getvalue().splitlines()))
318 for path,authors in author_hacks.items():
322 def is_versioned(filename):
323 cmd = bzrlib.builtins.cmd_log()
324 cmd.outf = StringIO.StringIO()
325 cmd.run(file_list=[filename])
329 raise NotImplementedError('Unrecognized VCS: %(vcs)s' % PROJECT_INFO)
331 # General utility commands
333 def _strip_email(*args):
334 """Remove email addresses from a series of names.
339 >>> _strip_email('J Doe')
341 >>> _strip_email('J Doe <jdoe@a.com>')
343 >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
344 ['J Doe', 'JJJ Smith']
347 for i,arg in enumerate(args):
350 author,addr = email.utils.parseaddr(arg)
356 def _reverse_aliases(aliases):
357 """Reverse an `aliases` dict.
359 Input: key: canonical name, value: list of aliases
360 Output: key: alias, value: canonical name
366 ... 'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
367 ... 'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
368 ... None:['Anonymous <a@a.com>'],
370 >>> r = _reverse_aliases(aliases)
371 >>> for item in sorted(r.items()):
373 ('Anonymous <a@a.com>', None)
374 ('J', 'J Doe <jdoe@a.com>')
375 ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
376 ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
379 for canonical_name,_aliases in aliases.items():
380 for alias in _aliases:
381 output[alias] = canonical_name
384 def _replace_aliases(authors, with_email=True, aliases=None):
385 """Consolidate and sort `authors`.
387 Make the replacements listed in the `aliases` dict (key: canonical
388 name, value: list of aliases). If `aliases` is ``None``, default
392 ... 'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
393 ... 'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
394 ... None:['Anonymous <a@a.com>'],
397 ... 'JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
398 ... 'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>', 'Anonymous <a@a.com>']
399 >>> _replace_aliases(authors, with_email=True, aliases=aliases)
400 ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
401 >>> _replace_aliases(authors, with_email=False, aliases=aliases)
402 ['J Doe', 'JJJ Smith']
406 rev_aliases = _reverse_aliases(aliases)
407 for i,author in enumerate(authors):
408 if author in rev_aliases:
409 authors[i] = rev_aliases[author]
410 authors = sorted(list(set(authors)))
413 if with_email == False:
414 authors = _strip_email(*authors)
417 def _long_author_formatter(copyright_year_string, authors):
419 >>> print '\\n'.join(_long_author_formatter(
420 ... copyright_year_string='Copyright (C) 1990-2010',
421 ... authors=['Jack', 'Jill', 'John']))
422 Copyright (C) 1990-2010 Jack
426 lines = ['%s %s' % (copyright_year_string, authors[0])]
427 for author in authors[1:]:
428 lines.append(' '*(len(copyright_year_string)+1) + author)
431 def _short_author_formatter(copyright_year_string, authors):
433 >>> print '\\n'.join(_short_author_formatter(
434 ... copyright_year_string='Copyright (C) 1990-2010',
435 ... authors=['Jack', 'Jill', 'John']*5))
436 Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John
438 blurb = '%s %s' % (copyright_year_string, ', '.join(authors))
441 def _copyright_string(original_year, final_year, authors,
442 text=COPY_RIGHT_TEXT, extra_info={},
443 author_format_fn=_long_author_formatter,
444 formatter_kwargs={}, prefix='', wrap=True,
447 >>> print _copyright_string(original_year=2005,
449 ... authors=['A <a@a.com>', 'B <b@b.edu>'],
451 ... ) # doctest: +ELLIPSIS
452 # Copyright (C) 2005 A <a@a.com>
456 >>> print _copyright_string(original_year=2005,
458 ... authors=['A <a@a.com>', 'B <b@b.edu>']
459 ... ) # doctest: +ELLIPSIS
460 Copyright (C) 2005-2009 A <a@a.com>
464 >>> print _copyright_string(original_year=2005,
466 ... authors=['A <a@a.com>', 'B <b@b.edu>'],
467 ... text=SHORT_COPY_RIGHT_TEXT,
468 ... author_format_fn=_short_author_formatter,
469 ... extra_info={'get-details':'%(get-details)s'},
473 Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
475 update-copyright comes with ABSOLUTELY NO WARRANTY
476 and is licensed under the GNU General Public
477 License. For details, %(get-details)s.
479 >>> print _copyright_string(original_year=2005,
481 ... authors=['A <a@a.com>', 'B <b@b.edu>'],
482 ... text=SHORT_COPY_RIGHT_TEXT,
483 ... extra_info={'get-details':'%(get-details)s'},
484 ... author_format_fn=_short_author_formatter,
488 Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
490 update-copyright comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License. For details, %(get-details)s.
492 for key in ['initial_indent', 'subsequent_indent']:
493 if key not in wrap_kwargs:
494 wrap_kwargs[key] = prefix
496 if original_year == final_year:
497 date_range = '%s' % original_year
499 date_range = '%s-%s' % (original_year, final_year)
500 copyright_year_string = 'Copyright (C) %s' % date_range
502 lines = author_format_fn(copyright_year_string, authors,
504 for i,line in enumerate(lines):
505 lines[i] = prefix + line
507 info = dict(PROJECT_INFO)
508 for key,value in extra_info.items():
510 text = [paragraph % info for paragraph in text]
513 text = [textwrap.fill(p, **wrap_kwargs) for p in text]
515 assert wrap_kwargs['subsequent_indent'] == '', \
516 wrap_kwargs['subsequent_indent']
517 sep = '\n%s\n' % prefix.rstrip()
518 return sep.join(['\n'.join(lines)] + text)
520 def _tag_copyright(contents):
522 >>> contents = '''Some file
524 ... # Copyright (copyright begins)
525 ... # (copyright continues)
530 >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
540 for line in contents.splitlines():
541 if incopy == False and line.startswith('# Copyright'):
543 lines.append(COPY_RIGHT_TAG)
544 elif incopy == True and not line.startswith('#'):
547 lines.append(line.rstrip('\n'))
548 return '\n'.join(lines)+'\n'
550 def _update_copyright(contents, original_year, authors):
552 >>> contents = '''Some file
554 ... # Copyright (copyright begins)
555 ... # (copyright continues)
560 >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
561 ... ) # doctest: +ELLIPSIS, +REPORT_UDIFF
564 # Copyright (C) 2008-... Jack
572 current_year = time.gmtime()[0]
573 copyright_string = _copyright_string(
574 original_year, current_year, authors, prefix='# ')
575 contents = _tag_copyright(contents)
576 return contents.replace(COPY_RIGHT_TAG, copyright_string)
578 def ignored_file(filename, ignored_paths=None, ignored_files=None,
579 check_disk=True, check_vcs=True):
581 >>> ignored_paths = ['./a/', './b/']
582 >>> ignored_files = ['x', 'y']
583 >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
585 >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
587 >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
589 >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
591 >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
594 if ignored_paths == None:
595 ignored_paths = IGNORED_PATHS
596 if ignored_files == None:
597 ignored_files = IGNORED_FILES
598 if check_disk == True and os.path.isfile(filename) == False:
600 for path in ignored_paths:
601 if filename.startswith(path):
603 if os.path.basename(filename) in ignored_files:
605 if check_vcs == True and is_versioned(filename) == False:
609 def _set_contents(filename, contents, original_contents=None, dry_run=False,
611 if original_contents == None and os.path.isfile(filename):
612 f = open(filename, 'r')
613 original_contents = f.read()
616 print "checking %s ... " % filename,
617 if contents != original_contents:
619 if original_contents == None:
623 if verbose > 1 and original_contents != None:
625 difflib.unified_diff(
626 original_contents.splitlines(), contents.splitlines(),
627 fromfile=os.path.normpath(os.path.join('a', filename)),
628 tofile=os.path.normpath(os.path.join('b', filename)),
631 f = file(filename, 'w')
639 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
640 authors = authors_fn()
641 authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
642 new_contents = '%s was written by:\n%s\n' % (
643 PROJECT_INFO['project'],
646 _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
648 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
649 dry_run=False, verbose=0):
650 f = file(filename, 'r')
654 original_year = original_year_fn(filename)
655 authors = authors_fn(filename)
656 authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
658 new_contents = _update_copyright(contents, original_year, authors)
659 _set_contents(filename, contents=new_contents, original_contents=contents,
660 dry_run=dry_run, verbose=verbose)
662 def update_files(files=None, dry_run=False, verbose=0):
663 if files == None or len(files) == 0:
665 for dirpath,dirnames,filenames in os.walk('.'):
666 for filename in filenames:
667 files.append(os.path.join(dirpath, filename))
669 for filename in files:
670 if ignored_file(filename) == True:
672 update_file(filename, dry_run=dry_run, verbose=verbose)
674 def update_pyfile(path, original_year_fn=original_year,
675 authors_fn=authors_list, dry_run=False, verbose=0):
676 original_year = original_year_fn()
677 current_year = time.gmtime()[0]
678 authors = authors_fn()
679 authors = _replace_aliases(authors, with_email=False, aliases=ALIASES)
680 paragraphs = _copyright_string(
681 original_year, current_year, authors,
682 text=SHORT_COPY_RIGHT_TEXT,
683 extra_info={'get-details':'%(get-details)s'},
684 author_format_fn=_short_author_formatter, wrap=False,
687 _copyright_string(original_year, current_year, authors, prefix='# '),
688 '', 'import textwrap', '', '',
690 _copyright_string(original_year, current_year, authors, prefix=''),
693 'def short_license(extra_info, wrap=True, **kwargs):',
697 lines.append(" '%s' %% extra_info," % p.replace("'", r"\'"))
701 ' for i,p in enumerate(paragraphs):',
702 ' paragraphs[i] = textwrap.fill(p, **kwargs)',
703 r" return '\n\n'.join(paragraphs)",
705 new_contents = '\n'.join(lines)+'\n'
706 _set_contents(path, new_contents, dry_run=dry_run, verbose=verbose)
713 if __name__ == '__main__':
717 usage = """%%prog [options] [file ...]
719 Update copyright information in source code with information from
720 the %(vcs)s repository. Run from the %(project)s repository root.
722 Replaces every line starting with '^# Copyright' and continuing with
723 '^#' with an auto-generated copyright blurb. If you want to add
724 #-commented material after a copyright blurb, please insert a blank
725 line between the blurb and your comment, so the next run of
726 ``update_copyright.py`` doesn't clobber your comment.
728 If no files are given, a list of files to update is generated
731 p = optparse.OptionParser(usage)
732 p.add_option('--pyfile', dest='pyfile', default='hooke/license.py',
734 help='Write project license info to a Python module at PATH')
735 p.add_option('--test', dest='test', default=False,
736 action='store_true', help='Run internal tests and exit')
737 p.add_option('--dry-run', dest='dry_run', default=False,
738 action='store_true', help="Don't make any changes")
739 p.add_option('-v', '--verbose', dest='verbose', default=0,
740 action='count', help='Increment verbosity')
741 options,args = p.parse_args()
743 if options.test == True:
747 update_authors(dry_run=options.dry_run, verbose=options.verbose)
748 update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)
749 if options.pyfile != None:
750 update_pyfile(path=options.pyfile,
751 dry_run=options.dry_run, verbose=options.verbose)