3 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
5 # This file is part of ChemDB.
7 # ChemDB is free software: you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by the
9 # Free Software Foundation, either version 3 of the License, or (at your
10 # option) any later version.
12 # ChemDB is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with ChemDB. If not, see <http://www.gnu.org/licenses/>.
20 """Automatically update copyright boilerplate.
22 This script is adapted from one written for `Bugs Everywhere`_.
24 .. _Bugs Everywhere: http://bugseverywhere.org/
41 # Break "copyright" into "copy" and "right" to avoid matching the
44 This file is part of %(project)s.
46 %(project)s is free software: you can redistribute it and/or modify it
47 under the terms of the GNU General Public License as published by the
48 Free Software Foundation, either version 3 of the License, or (at your
49 option) any later version.
51 %(project)s is distributed in the hope that it will be useful,
52 but WITHOUT ANY WARRANTY; without even the implied warranty of
53 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
54 GNU General Public License for more details.
56 You should have received a copy of the GNU General Public License
57 along with %(project)s. If not, see <http://www.gnu.org/licenses/>.
60 COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p
61 COMMENT_CHARS=['#', '%'] # allowed comment characters
63 # Convert author names to canonical forms.
64 # ALIASES[<canonical name>] = <list of aliases>
67 # 'John Doe <jdoe@a.com>':
68 # ['John Doe', 'jdoe', 'J. Doe <j@doe.net>'],
70 # Git-based projects are encouraged to use .mailmap instead of
71 # ALIASES. See git-shortlog(1) for details.
74 # List of paths that should not be scanned for copyright updates.
75 # IGNORED_PATHS = ['./.git/']
76 IGNORED_PATHS = ['./.git/', './template/']
77 # List of files that should not be scanned for copyright updates.
78 # IGNORED_FILES = ['COPYING']
79 IGNORED_FILES = ['COPYING', 'certgen.py', 'mk_simple_certs.py',
82 # Work around missing author holes in the VCS history.
83 # AUTHOR_HACKS[<path tuple>] = [<missing authors]
84 # for example, if John Doe contributed to module.py but wasn't listed
85 # in the VCS history of that file:
87 # ('path', 'to', 'module.py'):['John Doe'],
91 # Work around missing year holes in the VCS history.
92 # YEAR_HACKS[<path tuple>] = <original year>
93 # for example, if module.py was published in 2008 but the VCS history
94 # only goes back to 2010:
96 # ('path', 'to', 'module.py'):2008,
100 # Helpers for VCS-specific commands
103 """Recursively split a path into elements.
108 >>> splitpath(os.path.join('a', 'b', 'c'))
110 >>> splitpath(os.path.join('.', 'a', 'b', 'c'))
113 path = os.path.normpath(path)
116 dirname,basename = os.path.split(path)
117 elements.insert(0,basename)
118 if dirname in ['', '.']:
121 return tuple(elements)
123 # VCS-specific commands
125 if PROJECT_INFO['vcs'] == 'Git':
129 _MSWINDOWS = sys.platform == 'win32'
130 _POSIX = not _MSWINDOWS
132 def invoke(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, expect=(0,)):
134 expect should be a tuple of allowed exit codes.
138 q = subprocess.Popen(args, stdin=subprocess.PIPE,
139 stdout=stdout, stderr=stderr)
141 assert _MSWINDOWS == True, 'invalid platform'
142 # win32 don't have os.execvp() so run the command in a shell
143 q = subprocess.Popen(args, stdin=subprocess.PIPE,
144 stdout=stdout, stderr=stderr, shell=True)
146 raise ValueError([args, e])
147 stdout,stderr = q.communicate(input=stdin)
149 if status not in expect:
150 raise ValueError([args, status, stdout, stderr])
151 return status, stdout, stderr
154 status,stdout,stderr = invoke(['git'] + list(args))
155 return stdout.rstrip('\n')
157 def original_year(filename, year_hacks=YEAR_HACKS):
158 output = git_cmd('log', '--follow',
159 '--format=format:%ad', # Author date
160 '--date=short', # YYYY-MM-DD
162 years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
163 if splitpath(filename) in year_hacks:
164 years.append(year_hacks[splitpath(filename)])
168 def authors(filename, author_hacks=AUTHOR_HACKS):
169 output = git_cmd('log', '--follow', '--format=format:%aN <%aE>',
170 filename) # Author name <author email>
171 ret = list(set(output.splitlines()))
172 if splitpath(filename) in author_hacks:
173 ret.extend(author_hacks[splitpath(filename)])
176 def authors_list(author_hacks=AUTHOR_HACKS):
177 output = git_cmd('log', '--format=format:%aN <%aE>')
178 ret = list(set(output.splitlines()))
179 for path,authors in author_hacks.items():
183 def is_versioned(filename):
184 output = git_cmd('log', '--follow', filename)
189 elif PROJECT_INFO['vcs'] == 'Mercurial':
193 import mercurial.dispatch
195 def mercurial_cmd(*args):
199 tmp_stdout = StringIO.StringIO()
200 tmp_stderr = StringIO.StringIO()
201 sys.stdout = tmp_stdout
202 sys.stderr = tmp_stderr
204 mercurial.dispatch.dispatch(list(args))
209 return (tmp_stdout.getvalue().rstrip('\n'),
210 tmp_stderr.getvalue().rstrip('\n'))
212 def original_year(filename, year_hacks=YEAR_HACKS):
213 # shortdate filter: YEAR-MONTH-DAY
214 output,error = mercurial_cmd('log', '--follow',
215 '--template', '{date|shortdate}\n',
217 years = [int(line.split('-', 1)[0]) for line in output.splitlines()]
218 if splitpath(filename) in year_hacks:
219 years.append(year_hacks[splitpath(filename)])
223 def authors(filename, author_hacks=AUTHOR_HACKS):
224 output,error = mercurial_cmd('log', '--follow',
225 '--template', '{author}\n',
227 ret = list(set(output.splitlines()))
228 if splitpath(filename) in author_hacks:
229 ret.extend(author_hacks[splitpath(filename)])
232 def authors_list(author_hacks=AUTHOR_HACKS):
233 output,error = mercurial_cmd('log', '--template', '{author}\n')
234 ret = list(set(output.splitlines()))
235 for path,authors in author_hacks.items():
239 def is_versioned(filename):
240 output,error = mercurial_cmd('log', '--follow', filename)
245 elif PROJECT_INFO['vcs'] == 'Bazaar':
249 import bzrlib.builtins
252 class LogFormatter (bzrlib.log.LogFormatter):
253 supports_merge_revisions = True
255 supports_deta = False
256 supports_tags = False
257 supports_diff = False
259 def log_revision(self, revision):
260 raise NotImplementedError
262 class YearLogFormatter (LogFormatter):
263 def log_revision(self, revision):
265 time.strftime('%Y', time.gmtime(revision.rev.timestamp))
268 class AuthorLogFormatter (LogFormatter):
269 def log_revision(self, revision):
270 authors = revision.rev.get_apparent_authors()
271 self.to_file.write('\n'.join(authors)+'\n')
273 def original_year(filename, year_hacks=YEAR_HACKS):
274 cmd = bzrlib.builtins.cmd_log()
275 cmd.outf = StringIO.StringIO()
276 cmd.run(file_list=[filename], log_format=YearLogFormatter, levels=0)
277 years = [int(year) for year in set(cmd.outf.getvalue().splitlines())]
278 if splitpath(filename) in year_hacks:
279 years.append(year_hacks[splitpath(filename)])
283 def authors(filename, author_hacks=AUTHOR_HACKS):
284 cmd = bzrlib.builtins.cmd_log()
285 cmd.outf = StringIO.StringIO()
286 cmd.run(file_list=[filename], log_format=AuthorLogFormatter, levels=0)
287 ret = list(set(cmd.outf.getvalue().splitlines()))
288 if splitpath(filename) in author_hacks:
289 ret.extend(author_hacks[splitpath(filename)])
292 def authors_list(author_hacks=AUTHOR_HACKS):
293 cmd = bzrlib.builtins.cmd_log()
294 cmd.outf = StringIO.StringIO()
295 cmd.run(log_format=AuthorLogFormatter, levels=0)
296 output = cmd.outf.getvalue()
297 ret = list(set(cmd.outf.getvalue().splitlines()))
298 for path,authors in author_hacks.items():
302 def is_versioned(filename):
303 cmd = bzrlib.builtins.cmd_log()
304 cmd.outf = StringIO.StringIO()
305 cmd.run(file_list=[filename])
309 raise NotImplementedError('Unrecognized VCS: %(vcs)s' % PROJECT_INFO)
311 # General utility commands
313 def _strip_email(*args):
314 """Remove email addresses from a series of names.
319 >>> _strip_email('J Doe <jdoe@a.com>')
321 >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
322 ['J Doe', 'JJJ Smith']
325 for i,arg in enumerate(args):
328 author,addr = email.utils.parseaddr(arg)
332 def _reverse_aliases(aliases):
333 """Reverse an `aliases` dict.
335 Input: key: canonical name, value: list of aliases
336 Output: key: alias, value: canonical name
342 ... 'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>', 'J'],
343 ... 'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
344 ... None:['Anonymous <a@a.com>'],
346 >>> r = _reverse_aliases(aliases)
347 >>> for item in sorted(r.items()):
349 ('Anonymous <a@a.com>', None)
350 ('J', 'J Doe <jdoe@a.com>')
351 ('Jingly <jjjs@b.edu>', 'JJJ Smith <jjjs@a.com>')
352 ('Johnny <jdoe@b.edu>', 'J Doe <jdoe@a.com>')
355 for canonical_name,_aliases in aliases.items():
356 for alias in _aliases:
357 output[alias] = canonical_name
360 def _replace_aliases(authors, with_email=True, aliases=None):
361 """Consolidate and sort `authors`.
363 Make the replacements listed in the `aliases` dict (key: canonical
364 name, value: list of aliases). If `aliases` is ``None``, default
368 ... 'J Doe <jdoe@a.com>':['Johnny <jdoe@b.edu>'],
369 ... 'JJJ Smith <jjjs@a.com>':['Jingly <jjjs@b.edu>'],
370 ... None:['Anonymous <a@a.com>'],
372 >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
373 ... 'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
374 ... with_email=True, aliases=aliases)
375 ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
376 >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
377 ... with_email=False, aliases=aliases)
378 ['J Doe', 'JJJ Smith']
379 >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
380 ... 'Jingly <jjjs@b.edu>', 'J Doe <jdoe@a.com>'],
381 ... with_email=True, aliases=aliases)
382 ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
386 if with_email == False:
387 aliases = dict([(_strip_email(author)[0], _strip_email(*_aliases))
388 for author,_aliases in aliases.items()])
389 rev_aliases = _reverse_aliases(aliases)
390 for i,author in enumerate(authors):
391 if author in rev_aliases:
392 authors[i] = rev_aliases[author]
393 authors = sorted(list(set(authors)))
398 def _copyright_string(original_year, final_year, authors, prefix=''):
400 >>> print _copyright_string(original_year=2005,
402 ... authors=['A <a@a.com>', 'B <b@b.edu>'],
404 ... ) # doctest: +ELLIPSIS
405 # Copyright (C) 2005 A <a@a.com>
409 >>> print _copyright_string(original_year=2005,
411 ... authors=['A <a@a.com>', 'B <b@b.edu>']
412 ... ) # doctest: +ELLIPSIS
413 Copyright (C) 2005-2009 A <a@a.com>
418 if original_year == final_year:
419 date_range = '%s' % original_year
421 date_range = '%s-%s' % (original_year, final_year)
422 lines = ['Copyright (C) %s %s' % (date_range, authors[0])]
423 for author in authors[1:]:
424 lines.append(' '*(len('Copyright (C) ')+len(date_range)+1) +
427 lines.extend((COPY_RIGHT_TEXT % PROJECT_INFO).splitlines())
428 for i,line in enumerate(lines):
429 lines[i] = (prefix + line).rstrip()
430 return '\n'.join(lines)
432 def _tag_copyright(contents):
434 >>> contents = '''Some file
436 ... # Copyright (copyright begins)
437 ... # (copyright continues)
442 >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
449 >>> contents = contents.replace('#', '%')
450 >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR')
461 for line in contents.splitlines():
463 for c in COMMENT_CHARS:
464 if line.startswith('%s Copyright' % c):
467 lines.append(COPY_RIGHT_TAG + c)
469 elif incopy == True and not line.startswith(comment_char):
473 lines.append(line.rstrip('\n'))
474 return '\n'.join(lines)+'\n'
476 def _update_copyright(contents, original_year, authors):
478 >>> contents = '''Some file
480 ... # Copyright (copyright begins)
481 ... # (copyright continues)
486 >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
487 ... ) # doctest: +ELLIPSIS, +REPORT_UDIFF
490 # Copyright (C) 2008-... Jack
497 >>> contents = contents.replace('#', '%')
498 >>> print _update_copyright(contents, 2008, ['Jack', 'Jill']
499 ... ) # doctest: +ELLIPSIS, +REPORT_UDIFF
502 % Copyright (C) 2008-... Jack
510 current_year = time.gmtime()[0]
511 contents = _tag_copyright(contents)
513 for line in contents.splitlines():
514 if line.startswith(COPY_RIGHT_TAG):
515 comment_char = line[len(COPY_RIGHT_TAG):]
516 copyright_string = _copyright_string(
517 original_year, current_year, authors,
518 prefix='%s ' % comment_char)
519 lines.append(copyright_string.rstrip('\n'))
521 lines.append(line.rstrip('\n'))
522 return '\n'.join(lines)+'\n'
525 def ignored_file(filename, ignored_paths=None, ignored_files=None,
526 check_disk=True, check_vcs=True):
528 >>> ignored_paths = ['./a/', './b/']
529 >>> ignored_files = ['x', 'y']
530 >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False)
532 >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False)
534 >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False)
536 >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False)
538 >>> ignored_file('./z', ignored_paths, ignored_files, False, False)
541 if ignored_paths == None:
542 ignored_paths = IGNORED_PATHS
543 if ignored_files == None:
544 ignored_files = IGNORED_FILES
545 if check_disk == True and os.path.isfile(filename) == False:
547 for path in ignored_paths:
548 if filename.startswith(path):
550 if os.path.basename(filename) in ignored_files:
552 if check_vcs == True and is_versioned(filename) == False:
556 def _set_contents(filename, contents, original_contents=None, dry_run=False,
558 if original_contents == None and os.path.isfile(filename):
559 f = open(filename, 'r')
560 original_contents = f.read()
563 print "checking %s ... " % filename,
564 if contents != original_contents:
566 if original_contents == None:
570 if verbose > 1 and original_contents != None:
572 difflib.unified_diff(
573 original_contents.splitlines(), contents.splitlines(),
574 fromfile=os.path.normpath(os.path.join('a', filename)),
575 tofile=os.path.normpath(os.path.join('b', filename)),
578 f = file(filename, 'w')
586 def update_authors(authors_fn=authors_list, dry_run=False, verbose=0):
587 authors = authors_fn()
588 authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
589 new_contents = '%s was written by:\n%s\n' % (
590 PROJECT_INFO['project'],
593 _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose)
595 def update_file(filename, original_year_fn=original_year, authors_fn=authors,
596 dry_run=False, verbose=0):
597 f = file(filename, 'r')
601 original_year = original_year_fn(filename)
602 authors = authors_fn(filename)
603 authors = _replace_aliases(authors, with_email=True, aliases=ALIASES)
605 new_contents = _update_copyright(contents, original_year, authors)
606 _set_contents(filename, contents=new_contents, original_contents=contents,
607 dry_run=dry_run, verbose=verbose)
609 def update_files(files=None, dry_run=False, verbose=0):
610 if files == None or len(files) == 0:
612 for dirpath,dirnames,filenames in os.walk('.'):
613 for filename in filenames:
614 files.append(os.path.join(dirpath, filename))
616 for filename in files:
617 if ignored_file(filename) == True:
619 update_file(filename, dry_run=dry_run, verbose=verbose)
625 if __name__ == '__main__':
629 usage = """%%prog [options] [file ...]
631 Update copyright information in source code with information from
632 the %(vcs)s repository. Run from the %(project)s repository root.
634 Replaces every line starting with '^# Copyright' and continuing with
635 '^#' with an auto-generated copyright blurb. If you want to add
636 #-commented material after a copyright blurb, please insert a blank
637 line between the blurb and your comment, so the next run of
638 ``update_copyright.py`` doesn't clobber your comment.
640 If no files are given, a list of files to update is generated
643 p = optparse.OptionParser(usage)
644 p.add_option('--test', dest='test', default=False,
645 action='store_true', help='Run internal tests and exit')
646 p.add_option('--dry-run', dest='dry_run', default=False,
647 action='store_true', help="Don't make any changes")
648 p.add_option('-v', '--verbose', dest='verbose', default=0,
649 action='count', help='Increment verbosity')
650 options,args = p.parse_args()
652 if options.test == True:
656 update_authors(dry_run=options.dry_run, verbose=options.verbose)
657 update_files(files=args, dry_run=options.dry_run, verbose=options.verbose)