3 # Copyright (C) 2009-2012 W. Trevor King <wking@drexel.edu>
5 # This file is part of update-copyright.
7 # update-copyright is free software: you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # update-copyright is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with update-copyright. If not, see
19 # <http://www.gnu.org/licenses/>.
21 """Automatically update copyright boilerplate.
23 This script is adapted from one written for `Bugs Everywhere`_. and
24 later modified for `Hooke`_ before returning to `Bugs Everywhere`_. I
25 finally gave up on maintaining separate versions, so here it is as a
28 .. _Bugs Everywhere: http://bugseverywhere.org/
29 .. _Hooke: http://code.google.com/p/hooke/
42 'project': 'update-copyright',
46 # Break "copyright" into "copy" and "right" to avoid matching the
47 # REGEXP if we decide to go back to regexps.
49 'This file is part of %(project)s.',
50 '%(project)s is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.',
51 '%(project)s is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.',
52 'You should have received a copy of the GNU General Public License along with %(project)s. If not, see <http://www.gnu.org/licenses/>.'
55 SHORT_COPY_RIGHT_TEXT = [
56 '%(project)s comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License. For details, %(get-details)s.'
59 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
61 # Convert author names to canonical forms.
62 # ALIASES[<canonical name>] = <list of aliases>
65 # 'John Doe <jdoe@a.com>':
66 # ['John Doe', 'jdoe', 'J. Doe <j@doe.net>'],
68 # Git-based projects are encouraged to use .mailmap instead of
69 # ALIASES. See git-shortlog(1) for details.
72 # List of paths that should not be scanned for copyright updates.
73 # IGNORED_PATHS = ['./.git/']
74 IGNORED_PATHS = ['./.git']
75 # List of files that should not be scanned for copyright updates.
76 # IGNORED_FILES = ['COPYING']
77 IGNORED_FILES = ['COPYING']
79 # Work around missing author holes in the VCS history.
80 # AUTHOR_HACKS[<path tuple>] = [<missing authors]
81 # for example, if John Doe contributed to module.py but wasn't listed
82 # in the VCS history of that file:
84 # ('path', 'to', 'module.py'):['John Doe'],
88 # Work around missing year holes in the VCS history.
89 # YEAR_HACKS[<path tuple>] = <original year>
90 # for example, if module.py was published in 2008 but the VCS history
91 # only goes back to 2010:
93 # ('path', 'to', 'module.py'):2008,
97 # Helpers for VCS-specific commands
100 """Recursively split a path into elements.
105 >>> splitpath(os.path.join('a', 'b', 'c'))
107 >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
110 path = os.path.normpath(path)
113 dirname,basename = os.path.split(path)
114 elements.insert(0,basename)
115 if dirname in ['', '.']:
118 return tuple(elements)
120 # VCS-specific commands
122 if PROJECT_INFO['vcs'] == 'Git':
126 _MSWINDOWS = sys.platform == 'win32'
127 _POSIX = not _MSWINDOWS
129 def invoke(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, expect=(0,)):
131 expect should be a tuple of allowed exit codes.
135 q = subprocess.Popen(args, stdin=subprocess.PIPE,
136 stdout=stdout, stderr=stderr)
138 assert _MSWINDOWS == True, 'invalid platform'
139 # win32 don't have os.execvp() so run the command in a shell
140 q = subprocess.Popen(args, stdin=subprocess.PIPE,
141 stdout=stdout, stderr=stderr, shell=True)
143 raise ValueError([args, e])
144 stdout,stderr = q.communicate(input=stdin)
146 if status not in expect:
147 raise ValueError([args, status, stdout, stderr])
148 return status, stdout, stderr
151 status,stdout,stderr = invoke(['git'] + list(args))
152 return stdout.rstrip('\n')
154 version = git_cmd('--version').split(' ')[-1]
155 if version.startswith('1.5.'):
156 # Author name <author email>
157 author_format = '--pretty=format:%an <%ae>'
158 year_format = ['--pretty=format:%ai'] # Author date
159 # YYYY-MM-DD HH:MM:SS Z
160 # Earlier versions of Git don't seem to recognize --date=short
162 author_format = '--pretty=format:%aN <%aE>'
163 year_format = ['--pretty=format:%ad', # Author date
164 '--date=short'] # YYYY-MM-DD
166 def original_year(filename=None, year_hacks=YEAR_HACKS):
167 args = ['log'] + year_format
168 if filename is not None:
169 args.extend(['--follow'] + [filename])
170 output = git_cmd(*args)
171 if version.startswith('1.5.'):
172 output = '\n'.join([x.split()[0] for x in output.splitlines()])
173 years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
175 years.extend(year_hacks.values())
176 elif splitpath(filename) in year_hacks:
177 years.append(year_hacks[splitpath(filename)])
181 def authors(filename, author_hacks=AUTHOR_HACKS):
182 output = git_cmd('log', '--follow', author_format,
184 ret = list(set(output.splitlines()))
185 if splitpath(filename) in author_hacks:
186 ret.extend(author_hacks[splitpath(filename)])
189 def authors_list(author_hacks=AUTHOR_HACKS):
190 output = git_cmd('log', author_format)
191 ret = list(set(output.splitlines()))
192 for path,authors in author_hacks.items():
196 def is_versioned(filename):
197 output = git_cmd('log', '--follow', filename)
202 elif PROJECT_INFO['vcs'] == 'Mercurial':
206 import mercurial.dispatch
208 def mercurial_cmd(*args):
212 tmp_stdout = StringIO.StringIO()
213 tmp_stderr = StringIO.StringIO()
214 sys.stdout = tmp_stdout
215 sys.stderr = tmp_stderr
217 mercurial.dispatch.dispatch(list(args))
222 return (tmp_stdout.getvalue().rstrip('\n'),
223 tmp_stderr.getvalue().rstrip('\n'))
225 def original_year(filename=None, year_hacks=YEAR_HACKS):
227 '--template', '{date|shortdate}\n',
228 # shortdate filter: YEAR-MONTH-DAY
231 args.extend(['--follow', filename])
232 output,error = mercurial_cmd('log', *args)
233 years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
235 years.extend(year_hacks.values())
236 elif splitpath(filename) in year_hacks:
237 years.append(year_hacks[splitpath(filename)])
241 def authors(filename, author_hacks=AUTHOR_HACKS):
242 output,error = mercurial_cmd('log', '--follow',
243 '--template', '{author}\n',
245 ret = list(set(output.splitlines()))
246 if splitpath(filename) in author_hacks:
247 ret.extend(author_hacks[splitpath(filename)])
250 def authors_list(author_hacks=AUTHOR_HACKS):
251 output,error = mercurial_cmd('log', '--template', '{author}\n')
252 ret = list(set(output.splitlines()))
253 for path,authors in author_hacks.items():
257 def is_versioned(filename):
258 output,error = mercurial_cmd('log', '--follow', filename)
263 elif PROJECT_INFO['vcs'] == 'Bazaar':
267 import bzrlib.builtins
270 class LogFormatter (bzrlib.log.LogFormatter):
271 supports_merge_revisions = True
273 supports_deta = False
274 supports_tags = False
275 supports_diff = False
277 def log_revision(self, revision):
278 raise NotImplementedError
280 class YearLogFormatter (LogFormatter):
281 def log_revision(self, revision):
283 time.strftime('%Y', time.gmtime(revision.rev.timestamp))
286 class AuthorLogFormatter (LogFormatter):
287 def log_revision(self, revision):
288 authors = revision.rev.get_apparent_authors()
289 self.to_file.write('\n'.join(authors)+'\n')
291 def original_year(filename=None, year_hacks=YEAR_HACKS):
292 cmd = bzrlib.builtins.cmd_log()
293 cmd.outf = StringIO.StringIO()
294 kwargs = {'log_format':YearLogFormatter, 'levels':0}
296 kwargs['file_list'] = [filename]
298 years = [int(year) for year in set(cmd.outf.getvalue().splitlines())]
300 years.append(year_hacks.values())
301 elif splitpath(filename) in year_hacks:
302 years.append(year_hacks[splitpath(filename)])
306 def authors(filename, author_hacks=AUTHOR_HACKS):
307 cmd = bzrlib.builtins.cmd_log()
308 cmd.outf = StringIO.StringIO()
309 cmd.run(file_list=[filename], log_format=AuthorLogFormatter, levels=0)
310 ret = list(set(cmd.outf.getvalue().splitlines()))
311 if splitpath(filename) in author_hacks:
312 ret.extend(author_hacks[splitpath(filename)])
315 def authors_list(author_hacks=AUTHOR_HACKS):
316 cmd = bzrlib.builtins.cmd_log()
317 cmd.outf = StringIO.StringIO()
318 cmd.run(log_format=AuthorLogFormatter, levels=0)
319 output = cmd.outf.getvalue()
320 ret = list(set(cmd.outf.getvalue().splitlines()))
321 for path,authors in author_hacks.items():
325 def is_versioned(filename):
326 cmd = bzrlib.builtins.cmd_log()
327 cmd.outf = StringIO.StringIO()
328 cmd.run(file_list=[filename])
332 raise NotImplementedError('Unrecognized VCS: %(vcs)s' % PROJECT_INFO)
334 # General utility commands
336 def _strip_email(*args):
337 """Remove email addresses from a series of names.
342 >>> _strip_email('J Doe')
344 >>> _strip_email('J Doe <jdoe@a.com>')
346 >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
347 ['J Doe', 'JJJ Smith']
350 for i,arg in enumerate(args):
353 author,addr = email.utils.parseaddr(arg)
359 def _reverse_aliases(aliases):
360 """Reverse an `aliases` dict.
362 Input: key: canonical name, value: list of aliases
363 Output: key: alias, value: canonical name
369 ... 'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
370 ... 'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
371 ... None:['Anonymous <a@a.com>'],
373 >>> r = _reverse_aliases(aliases)
374 >>> for item in sorted(r.items()):
376 ('Anonymous <a@a.com>', None)
377 ('J', 'J Doe <jdoe@a.com>')
378 ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
379 ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
382 for canonical_name,_aliases in aliases.items():
383 for alias in _aliases:
384 output[alias] = canonical_name
387 def _replace_aliases(authors, with_email=True, aliases=None):
388 """Consolidate and sort `authors`.
390 Make the replacements listed in the `aliases` dict (key: canonical
391 name, value: list of aliases). If `aliases` is ``None``, default
395 ... 'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
396 ... 'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
397 ... None:['Anonymous <a@a.com>'],
400 ... 'JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
401 ... 'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>', 'Anonymous <a@a.com>']
402 >>> _replace_aliases(authors, with_email=True, aliases=aliases)
403 ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
404 >>> _replace_aliases(authors, with_email=False, aliases=aliases)
405 ['J Doe', 'JJJ Smith']
409 rev_aliases = _reverse_aliases(aliases)
410 for i,author in enumerate(authors):
411 if author in rev_aliases:
412 authors[i] = rev_aliases[author]
413 authors = sorted(list(set(authors)))
416 if with_email == False:
417 authors = _strip_email(*authors)
420 def _long_author_formatter(copyright_year_string, authors):
422 >>> print '\\n'.join(_long_author_formatter(
423 ... copyright_year_string='Copyright (C) 1990-2010',
424 ... authors=['Jack', 'Jill', 'John']))
425 Copyright (C) 1990-2010 Jack
429 lines = ['%s %s' % (copyright_year_string, authors[0])]
430 for author in authors[1:]:
431 lines.append(' '*(len(copyright_year_string)+1) + author)
434 def _short_author_formatter(copyright_year_string, authors):
436 >>> print '\\n'.join(_short_author_formatter(
437 ... copyright_year_string='Copyright (C) 1990-2010',
438 ... authors=['Jack', 'Jill', 'John']*5))
439 Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John
441 blurb = '%s %s' % (copyright_year_string, ', '.join(authors))
444 def _copyright_string(original_year, final_year, authors,
445 text=COPY_RIGHT_TEXT, extra_info={},
446 author_format_fn=_long_author_formatter,
447 formatter_kwargs={}, prefix='', wrap=True,
450 >>> print _copyright_string(original_year=2005,
452 ... authors=['A <a@a.com>', 'B <b@b.edu>'],
454 ... ) # doctest: +ELLIPSIS
455 # Copyright (C) 2005 A <a@a.com>
459 >>> print _copyright_string(original_year=2005,
461 ... authors=['A <a@a.com>', 'B <b@b.edu>']
462 ... ) # doctest: +ELLIPSIS
463 Copyright (C) 2005-2009 A <a@a.com>
467 >>> print _copyright_string(original_year=2005,
469 ... authors=['A <a@a.com>', 'B <b@b.edu>'],
470 ... text=SHORT_COPY_RIGHT_TEXT,
471 ... author_format_fn=_short_author_formatter,
472 ... extra_info={'get-details':'%(get-details)s'},
476 Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
478 update-copyright comes with ABSOLUTELY NO WARRANTY
479 and is licensed under the GNU General Public
480 License. For details, %(get-details)s.
482 >>> print _copyright_string(original_year=2005,
484 ... authors=['A <a@a.com>', 'B <b@b.edu>'],
485 ... text=SHORT_COPY_RIGHT_TEXT,
486 ... extra_info={'get-details':'%(get-details)s'},
487 ... author_format_fn=_short_author_formatter,
491 Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
493 update-copyright comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License. For details, %(get-details)s.
495 for key in ['initial_indent', 'subsequent_indent']:
496 if key not in wrap_kwargs:
497 wrap_kwargs[key] = prefix
499 if original_year == final_year:
500 date_range = '%s' % original_year
502 date_range = '%s-%s' % (original_year, final_year)
503 copyright_year_string = 'Copyright (C) %s' % date_range
505 lines = author_format_fn(copyright_year_string, authors,
507 for i,line in enumerate(lines):
508 lines[i] = prefix + line
510 info = dict(PROJECT_INFO)
511 for key,value in extra_info.items():
513 text = [paragraph % info for paragraph in text]
516 text = [textwrap.fill(p, **wrap_kwargs) for p in text]
518 assert wrap_kwargs['subsequent_indent'] == '', \
519 wrap_kwargs['subsequent_indent']
520 sep = '\n%s\n' % prefix.rstrip()
521 return sep.join(['\n'.join(lines)] + text)
523 def _tag_copyright(contents):
525 >>> contents = '''Some file
527 ... # Copyright (copyright begins)
528 ... # (copyright continues)
533 >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
543 for line in contents.splitlines():
544 if incopy == False and line.startswith('# Copyright'):
546 lines.append(COPY_RIGHT_TAG)
547 elif incopy == True and not line.startswith('#'):
550 lines.append(line.rstrip('\n'))
551 return '\n'.join(lines)+'\n'
553 def _update_copyright(contents, original_year, authors):
555 >>> contents = '''Some file
557 ... # Copyright (copyright begins)
558 ... # (copyright continues)
563 >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
564 ... ) # doctest: +ELLIPSIS, +REPORT_UDIFF
567 # Copyright (C) 2008-... Jack
575 current_year = time.gmtime()[0]
576 copyright_string = _copyright_string(
577 original_year, current_year, authors, prefix='# ')
578 contents = _tag_copyright(contents)
579 return contents.replace(COPY_RIGHT_TAG, copyright_string)
581 def ignored_file(filename, ignored_paths=None, ignored_files=None,
582 check_disk=True, check_vcs=True):
584 >>> ignored_paths = ['./a/', './b/']
585 >>> ignored_files = ['x', 'y']
586 >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
588 >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
590 >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
592 >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
594 >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
597 if ignored_paths == None:
598 ignored_paths = IGNORED_PATHS
599 if ignored_files == None:
600 ignored_files = IGNORED_FILES
601 if check_disk == True and os.path.isfile(filename) == False:
603 for path in ignored_paths:
604 if filename.startswith(path):
606 if os.path.basename(filename) in ignored_files:
608 if check_vcs == True and is_versioned(filename) == False:
612 def _set_contents(filename, contents, original_contents=None, dry_run=False,
614 if original_contents == None and os.path.isfile(filename):
615 f = open(filename, 'r')
616 original_contents = f.read()
619 print "checking %s ... " % filename,
620 if contents != original_contents:
622 if original_contents == None:
626 if verbose > 1 and original_contents != None:
628 difflib.unified_diff(
629 original_contents.splitlines(), contents.splitlines(),
630 fromfile=os.path.normpath(os.path.join('a', filename)),
631 tofile=os.path.normpath(os.path.join('b', filename)),
634 f = file(filename, 'w')
642 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
643 authors = authors_fn()
644 authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
645 new_contents = '%s was written by:\n%s\n' % (
646 PROJECT_INFO['project'],
649 _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
651 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
652 dry_run=False, verbose=0):
653 f = file(filename, 'r')
657 original_year = original_year_fn(filename)
658 authors = authors_fn(filename)
659 authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
661 new_contents = _update_copyright(contents, original_year, authors)
662 _set_contents(filename, contents=new_contents, original_contents=contents,
663 dry_run=dry_run, verbose=verbose)
665 def update_files(files=None, dry_run=False, verbose=0):
666 if files == None or len(files) == 0:
668 for dirpath,dirnames,filenames in os.walk('.'):
669 for filename in filenames:
670 files.append(os.path.join(dirpath, filename))
672 for filename in files:
673 if ignored_file(filename) == True:
675 update_file(filename, dry_run=dry_run, verbose=verbose)
677 def update_pyfile(path, original_year_fn=original_year,
678 authors_fn=authors_list, dry_run=False, verbose=0):
679 original_year = original_year_fn()
680 current_year = time.gmtime()[0]
681 authors = authors_fn()
682 authors = _replace_aliases(authors, with_email=False, aliases=ALIASES)
683 paragraphs = _copyright_string(
684 original_year, current_year, authors,
685 text=SHORT_COPY_RIGHT_TEXT,
686 extra_info={'get-details':'%(get-details)s'},
687 author_format_fn=_short_author_formatter, wrap=False,
690 _copyright_string(original_year, current_year, authors, prefix='# '),
691 '', 'import textwrap', '', '',
693 _copyright_string(original_year, current_year, authors, prefix=''),
696 'def short_license(extra_info, wrap=True, **kwargs):',
700 lines.append(" '%s' %% extra_info," % p.replace("'", r"\'"))
704 ' for i,p in enumerate(paragraphs):',
705 ' paragraphs[i] = textwrap.fill(p, **kwargs)',
706 r" return '\n\n'.join(paragraphs)",
708 new_contents = '\n'.join(lines)+'\n'
709 _set_contents(path, new_contents, dry_run=dry_run, verbose=verbose)
716 if __name__ == '__main__':
720 usage = """%%prog [options] [file ...]
722 Update copyright information in source code with information from
723 the %(vcs)s repository. Run from the %(project)s repository root.
725 Replaces every line starting with '^# Copyright' and continuing with
726 '^#' with an auto-generated copyright blurb. If you want to add
727 #-commented material after a copyright blurb, please insert a blank
728 line between the blurb and your comment, so the next run of
729 ``update_copyright.py`` doesn't clobber your comment.
731 If no files are given, a list of files to update is generated
734 p = optparse.OptionParser(usage)
735 p.add_option('--pyfile', dest='pyfile', default='update_copyright/license.py',
737 help='Write project license info to a Python module at PATH')
738 p.add_option('--test', dest='test', default=False,
739 action='store_true', help='Run internal tests and exit')
740 p.add_option('--dry-run', dest='dry_run', default=False,
741 action='store_true', help="Don't make any changes")
742 p.add_option('-v', '--verbose', dest='verbose', default=0,
743 action='count', help='Increment verbosity')
744 options,args = p.parse_args()
746 if options.test == True:
750 update_authors(dry_run=options.dry_run, verbose=options.verbose)
751 update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)
752 if options.pyfile != None:
753 update_pyfile(path=options.pyfile,
754 dry_run=options.dry_run, verbose=options.verbose)