From: W. Trevor King Date: Thu, 16 Feb 2012 01:43:11 +0000 (-0500) Subject: Convert update-copyright.py to a more modular framework. X-Git-Tag: v0.2~16 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=024fce0e1e386ac861e72f7ae4e331bcc8813793;p=update-copyright.git Convert update-copyright.py to a more modular framework. --- diff --git a/bin/update-copyright.py b/bin/update-copyright.py new file mode 100755 index 0000000..590f27b --- /dev/null +++ b/bin/update-copyright.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# +# Copyright + +"""Update copyright information with information from the VCS repository. + +Run from the project's repository root. + +Replaces every line starting with ``^# Copyright`` and continuing with +``^#`` with an auto-generated copyright blurb. If you want to add +``#``-commented material after a copyright blurb, please insert a blank +line between the blurb and your comment, so the next run of +``update_copyright.py`` doesn't clobber your comment. + +If no files are given, a list of files to update is generated +automatically. +""" + +import logging as _logging + +from update_copyright import LOG as _LOG +from update_copyright.project import Project + + +if __name__ == '__main__': + import optparse + import sys + + usage = "%%prog [options] [file ...]" + + p = optparse.OptionParser(usage=usage, description=__doc__) + p.add_option('--config', dest='config', default='.update-copyright.conf', + metavar='PATH', help='path to project config file (%default)') + p.add_option('--no-authors', dest='authors', default=True, + action='store_false', help="Don't generate AUTHORS") + p.add_option('--no-files', dest='files', default=True, + action='store_false', help="Don't update file copyrights") + p.add_option('--no-pyfile', dest='pyfile', default=True, + action='store_false', help="Don't update the pyfile") + p.add_option('--dry-run', dest='dry_run', default=False, + action='store_true', help="Don't make any changes") + p.add_option('-v', '--verbose', dest='verbose', default=0, + action='count', help='Increment verbosity') + options,args = p.parse_args() + + _LOG.setLevel(max(0, _logging.ERROR - 10*options.verbose)) + + project = Project() + project.load_config(open(options.config, 'r')) + if options.authors: + project.update_authors(dry_run=options.dry_run) + if options.files: + project.update_files(files=args, dry_run=options.dry_run) + if options.pyfile: + project.update_pyfile(dry_run=options.dry_run) diff --git a/update_copyright.py b/update_copyright.py deleted file mode 100755 index 647e09d..0000000 --- a/update_copyright.py +++ /dev/null @@ -1,754 +0,0 @@ -#!/usr/bin/python -# -# Copyright (C) 2009-2012 W. Trevor King -# -# This file is part of update-copyright. -# -# update-copyright is free software: you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# update-copyright is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with update-copyright. If not, see -# . - -"""Automatically update copyright boilerplate. - -This script is adapted from one written for `Bugs Everywhere`_. and -later modified for `Hooke`_ before returning to `Bugs Everywhere`_. I -finally gave up on maintaining separate versions, so here it is as a -stand-alone module. - -.. _Bugs Everywhere: http://bugseverywhere.org/ -.. _Hooke: http://code.google.com/p/hooke/ -""" - -import difflib -import email.utils -import os -import os.path -import sys -import textwrap -import time - - -PROJECT_INFO = { - 'project': 'update-copyright', - 'vcs': 'Git', - } - -# Break "copyright" into "copy" and "right" to avoid matching the -# REGEXP if we decide to go back to regexps. -COPY_RIGHT_TEXT = [ - 'This file is part of %(project)s.', - '%(project)s is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.', - '%(project)s is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.', - 'You should have received a copy of the GNU General Public License along with %(project)s. If not, see .' - ] - -SHORT_COPY_RIGHT_TEXT = [ - '%(project)s comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License. For details, %(get-details)s.' - ] - -COPY_RIGHT_TAG='-xyz-COPY' + '-RIGHT-zyx-' # unlikely to occur in the wild :p - -# Convert author names to canonical forms. -# ALIASES[] = -# for example, -# ALIASES = { -# 'John Doe ': -# ['John Doe', 'jdoe', 'J. Doe '], -# } -# Git-based projects are encouraged to use .mailmap instead of -# ALIASES. See git-shortlog(1) for details. -ALIASES = {} - -# List of paths that should not be scanned for copyright updates. -# IGNORED_PATHS = ['./.git/'] -IGNORED_PATHS = ['./.git'] -# List of files that should not be scanned for copyright updates. -# IGNORED_FILES = ['COPYING'] -IGNORED_FILES = ['COPYING'] - -# Work around missing author holes in the VCS history. -# AUTHOR_HACKS[] = [] = -# for example, if module.py was published in 2008 but the VCS history -# only goes back to 2010: -# YEAR_HACKS = { -# ('path', 'to', 'module.py'):2008, -# } -YEAR_HACKS = {} - -# Helpers for VCS-specific commands - -def splitpath(path): - """Recursively split a path into elements. - - Examples - -------- - - >>> splitpath(os.path.join('a', 'b', 'c')) - ('a', 'b', 'c') - >>> splitpath(os.path.join('.', 'a', 'b', 'c')) - ('a', 'b', 'c') - """ - path = os.path.normpath(path) - elements = [] - while True: - dirname,basename = os.path.split(path) - elements.insert(0,basename) - if dirname in ['', '.']: - break - path = dirname - return tuple(elements) - -# VCS-specific commands - -if PROJECT_INFO['vcs'] == 'Git': - - import subprocess - - _MSWINDOWS = sys.platform == 'win32' - _POSIX = not _MSWINDOWS - - def invoke(args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, expect=(0,)): - """ - expect should be a tuple of allowed exit codes. - """ - try : - if _POSIX: - q = subprocess.Popen(args, stdin=subprocess.PIPE, - stdout=stdout, stderr=stderr) - else: - assert _MSWINDOWS == True, 'invalid platform' - # win32 don't have os.execvp() so run the command in a shell - q = subprocess.Popen(args, stdin=subprocess.PIPE, - stdout=stdout, stderr=stderr, shell=True) - except OSError, e: - raise ValueError([args, e]) - stdout,stderr = q.communicate(input=stdin) - status = q.wait() - if status not in expect: - raise ValueError([args, status, stdout, stderr]) - return status, stdout, stderr - - def git_cmd(*args): - status,stdout,stderr = invoke(['git'] + list(args)) - return stdout.rstrip('\n') - - version = git_cmd('--version').split(' ')[-1] - if version.startswith('1.5.'): - # Author name - author_format = '--pretty=format:%an <%ae>' - year_format = ['--pretty=format:%ai'] # Author date - # YYYY-MM-DD HH:MM:SS Z - # Earlier versions of Git don't seem to recognize --date=short - else: - author_format = '--pretty=format:%aN <%aE>' - year_format = ['--pretty=format:%ad', # Author date - '--date=short'] # YYYY-MM-DD - - def original_year(filename=None, year_hacks=YEAR_HACKS): - args = ['log'] + year_format - if filename is not None: - args.extend(['--follow'] + [filename]) - output = git_cmd(*args) - if version.startswith('1.5.'): - output = '\n'.join([x.split()[0] for x in output.splitlines()]) - years = [int(line.split('-', 1)[0]) for line in output.splitlines()] - if filename is None: - years.extend(year_hacks.values()) - elif splitpath(filename) in year_hacks: - years.append(year_hacks[splitpath(filename)]) - years.sort() - return years[0] - - def authors(filename, author_hacks=AUTHOR_HACKS): - output = git_cmd('log', '--follow', author_format, - filename) - ret = list(set(output.splitlines())) - if splitpath(filename) in author_hacks: - ret.extend(author_hacks[splitpath(filename)]) - return ret - - def authors_list(author_hacks=AUTHOR_HACKS): - output = git_cmd('log', author_format) - ret = list(set(output.splitlines())) - for path,authors in author_hacks.items(): - ret.extend(authors) - return ret - - def is_versioned(filename): - output = git_cmd('log', '--follow', filename) - if len(output) == 0: - return False - return True - -elif PROJECT_INFO['vcs'] == 'Mercurial': - - import StringIO - import mercurial - import mercurial.dispatch - - def mercurial_cmd(*args): - cwd = os.getcwd() - stdout = sys.stdout - stderr = sys.stderr - tmp_stdout = StringIO.StringIO() - tmp_stderr = StringIO.StringIO() - sys.stdout = tmp_stdout - sys.stderr = tmp_stderr - try: - mercurial.dispatch.dispatch(list(args)) - finally: - os.chdir(cwd) - sys.stdout = stdout - sys.stderr = stderr - return (tmp_stdout.getvalue().rstrip('\n'), - tmp_stderr.getvalue().rstrip('\n')) - - def original_year(filename=None, year_hacks=YEAR_HACKS): - args = [ - '--template', '{date|shortdate}\n', - # shortdate filter: YEAR-MONTH-DAY - ] - if filename != None: - args.extend(['--follow', filename]) - output,error = mercurial_cmd('log', *args) - years = [int(line.split('-', 1)[0]) for line in output.splitlines()] - if filename == None: - years.extend(year_hacks.values()) - elif splitpath(filename) in year_hacks: - years.append(year_hacks[splitpath(filename)]) - years.sort() - return years[0] - - def authors(filename, author_hacks=AUTHOR_HACKS): - output,error = mercurial_cmd('log', '--follow', - '--template', '{author}\n', - filename) - ret = list(set(output.splitlines())) - if splitpath(filename) in author_hacks: - ret.extend(author_hacks[splitpath(filename)]) - return ret - - def authors_list(author_hacks=AUTHOR_HACKS): - output,error = mercurial_cmd('log', '--template', '{author}\n') - ret = list(set(output.splitlines())) - for path,authors in author_hacks.items(): - ret.extend(authors) - return ret - - def is_versioned(filename): - output,error = mercurial_cmd('log', '--follow', filename) - if len(error) > 0: - return False - return True - -elif PROJECT_INFO['vcs'] == 'Bazaar': - - import StringIO - import bzrlib - import bzrlib.builtins - import bzrlib.log - - class LogFormatter (bzrlib.log.LogFormatter): - supports_merge_revisions = True - preferred_levels = 0 - supports_deta = False - supports_tags = False - supports_diff = False - - def log_revision(self, revision): - raise NotImplementedError - - class YearLogFormatter (LogFormatter): - def log_revision(self, revision): - self.to_file.write( - time.strftime('%Y', time.gmtime(revision.rev.timestamp)) - +'\n') - - class AuthorLogFormatter (LogFormatter): - def log_revision(self, revision): - authors = revision.rev.get_apparent_authors() - self.to_file.write('\n'.join(authors)+'\n') - - def original_year(filename=None, year_hacks=YEAR_HACKS): - cmd = bzrlib.builtins.cmd_log() - cmd.outf = StringIO.StringIO() - kwargs = {'log_format':YearLogFormatter, 'levels':0} - if filename != None: - kwargs['file_list'] = [filename] - cmd.run(**kwargs) - years = [int(year) for year in set(cmd.outf.getvalue().splitlines())] - if filename == None: - years.append(year_hacks.values()) - elif splitpath(filename) in year_hacks: - years.append(year_hacks[splitpath(filename)]) - years.sort() - return years[0] - - def authors(filename, author_hacks=AUTHOR_HACKS): - cmd = bzrlib.builtins.cmd_log() - cmd.outf = StringIO.StringIO() - cmd.run(file_list=[filename], log_format=AuthorLogFormatter, levels=0) - ret = list(set(cmd.outf.getvalue().splitlines())) - if splitpath(filename) in author_hacks: - ret.extend(author_hacks[splitpath(filename)]) - return ret - - def authors_list(author_hacks=AUTHOR_HACKS): - cmd = bzrlib.builtins.cmd_log() - cmd.outf = StringIO.StringIO() - cmd.run(log_format=AuthorLogFormatter, levels=0) - output = cmd.outf.getvalue() - ret = list(set(cmd.outf.getvalue().splitlines())) - for path,authors in author_hacks.items(): - ret.extend(authors) - return ret - - def is_versioned(filename): - cmd = bzrlib.builtins.cmd_log() - cmd.outf = StringIO.StringIO() - cmd.run(file_list=[filename]) - return True - -else: - raise NotImplementedError('Unrecognized VCS: %(vcs)s' % PROJECT_INFO) - -# General utility commands - -def _strip_email(*args): - """Remove email addresses from a series of names. - - Examples - -------- - - >>> _strip_email('J Doe') - ['J Doe'] - >>> _strip_email('J Doe ') - ['J Doe'] - >>> _strip_email('J Doe ', 'JJJ Smith ') - ['J Doe', 'JJJ Smith'] - """ - args = list(args) - for i,arg in enumerate(args): - if arg == None: - continue - author,addr = email.utils.parseaddr(arg) - if author == '': - author = arg - args[i] = author - return args - -def _reverse_aliases(aliases): - """Reverse an `aliases` dict. - - Input: key: canonical name, value: list of aliases - Output: key: alias, value: canonical name - - Examples - -------- - - >>> aliases = { - ... 'J Doe ':['Johnny ', 'J'], - ... 'JJJ Smith ':['Jingly '], - ... None:['Anonymous '], - ... } - >>> r = _reverse_aliases(aliases) - >>> for item in sorted(r.items()): - ... print item - ('Anonymous ', None) - ('J', 'J Doe ') - ('Jingly ', 'JJJ Smith ') - ('Johnny ', 'J Doe ') - """ - output = {} - for canonical_name,_aliases in aliases.items(): - for alias in _aliases: - output[alias] = canonical_name - return output - -def _replace_aliases(authors, with_email=True, aliases=None): - """Consolidate and sort `authors`. - - Make the replacements listed in the `aliases` dict (key: canonical - name, value: list of aliases). If `aliases` is ``None``, default - to ``ALIASES``. - - >>> aliases = { - ... 'J Doe ':['Johnny '], - ... 'JJJ Smith ':['Jingly '], - ... None:['Anonymous '], - ... } - >>> authors = [ - ... 'JJJ Smith ', 'Johnny ', - ... 'Jingly ', 'J Doe ', 'Anonymous '] - >>> _replace_aliases(authors, with_email=True, aliases=aliases) - ['J Doe ', 'JJJ Smith '] - >>> _replace_aliases(authors, with_email=False, aliases=aliases) - ['J Doe', 'JJJ Smith'] - """ - if aliases == None: - aliases = ALIASES - rev_aliases = _reverse_aliases(aliases) - for i,author in enumerate(authors): - if author in rev_aliases: - authors[i] = rev_aliases[author] - authors = sorted(list(set(authors))) - if None in authors: - authors.remove(None) - if with_email == False: - authors = _strip_email(*authors) - return authors - -def _long_author_formatter(copyright_year_string, authors): - """ - >>> print '\\n'.join(_long_author_formatter( - ... copyright_year_string='Copyright (C) 1990-2010', - ... authors=['Jack', 'Jill', 'John'])) - Copyright (C) 1990-2010 Jack - Jill - John - """ - lines = ['%s %s' % (copyright_year_string, authors[0])] - for author in authors[1:]: - lines.append(' '*(len(copyright_year_string)+1) + author) - return lines - -def _short_author_formatter(copyright_year_string, authors): - """ - >>> print '\\n'.join(_short_author_formatter( - ... copyright_year_string='Copyright (C) 1990-2010', - ... authors=['Jack', 'Jill', 'John']*5)) - Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John - """ - blurb = '%s %s' % (copyright_year_string, ', '.join(authors)) - return [blurb] - -def _copyright_string(original_year, final_year, authors, - text=COPY_RIGHT_TEXT, extra_info={}, - author_format_fn=_long_author_formatter, - formatter_kwargs={}, prefix='', wrap=True, - **wrap_kwargs): - """ - >>> print _copyright_string(original_year=2005, - ... final_year=2005, - ... authors=['A ', 'B '], - ... prefix='# ' - ... ) # doctest: +ELLIPSIS - # Copyright (C) 2005 A - # B - # - # This file... - >>> print _copyright_string(original_year=2005, - ... final_year=2009, - ... authors=['A ', 'B '] - ... ) # doctest: +ELLIPSIS - Copyright (C) 2005-2009 A - B - - This file... - >>> print _copyright_string(original_year=2005, - ... final_year=2005, - ... authors=['A ', 'B '], - ... text=SHORT_COPY_RIGHT_TEXT, - ... author_format_fn=_short_author_formatter, - ... extra_info={'get-details':'%(get-details)s'}, - ... prefix='', - ... width=50, - ... ) - Copyright (C) 2005 A , B - - update-copyright comes with ABSOLUTELY NO WARRANTY - and is licensed under the GNU General Public - License. For details, %(get-details)s. - - >>> print _copyright_string(original_year=2005, - ... final_year=2005, - ... authors=['A ', 'B '], - ... text=SHORT_COPY_RIGHT_TEXT, - ... extra_info={'get-details':'%(get-details)s'}, - ... author_format_fn=_short_author_formatter, - ... wrap=False, - ... prefix='', - ... ) - Copyright (C) 2005 A , B - - update-copyright comes with ABSOLUTELY NO WARRANTY and is licensed under the GNU General Public License. For details, %(get-details)s. - """ - for key in ['initial_indent', 'subsequent_indent']: - if key not in wrap_kwargs: - wrap_kwargs[key] = prefix - - if original_year == final_year: - date_range = '%s' % original_year - else: - date_range = '%s-%s' % (original_year, final_year) - copyright_year_string = 'Copyright (C) %s' % date_range - - lines = author_format_fn(copyright_year_string, authors, - **formatter_kwargs) - for i,line in enumerate(lines): - lines[i] = prefix + line - - info = dict(PROJECT_INFO) - for key,value in extra_info.items(): - info[key] = value - text = [paragraph % info for paragraph in text] - - if wrap == True: - text = [textwrap.fill(p, **wrap_kwargs) for p in text] - else: - assert wrap_kwargs['subsequent_indent'] == '', \ - wrap_kwargs['subsequent_indent'] - sep = '\n%s\n' % prefix.rstrip() - return sep.join(['\n'.join(lines)] + text) - -def _tag_copyright(contents): - """ - >>> contents = '''Some file - ... bla bla - ... # Copyright (copyright begins) - ... # (copyright continues) - ... # bla bla bla - ... (copyright ends) - ... bla bla bla - ... ''' - >>> print _tag_copyright(contents).replace('COPY-RIGHT', 'CR') - Some file - bla bla - -xyz-CR-zyx- - (copyright ends) - bla bla bla - - """ - lines = [] - incopy = False - for line in contents.splitlines(): - if incopy == False and line.startswith('# Copyright'): - incopy = True - lines.append(COPY_RIGHT_TAG) - elif incopy == True and not line.startswith('#'): - incopy = False - if incopy == False: - lines.append(line.rstrip('\n')) - return '\n'.join(lines)+'\n' - -def _update_copyright(contents, original_year, authors): - """ - >>> contents = '''Some file - ... bla bla - ... # Copyright (copyright begins) - ... # (copyright continues) - ... # bla bla bla - ... (copyright ends) - ... bla bla bla - ... ''' - >>> print _update_copyright(contents, 2008, ['Jack', 'Jill'] - ... ) # doctest: +ELLIPSIS, +REPORT_UDIFF - Some file - bla bla - # Copyright (C) 2008-... Jack - # Jill - # - # This file... - (copyright ends) - bla bla bla - - """ - current_year = time.gmtime()[0] - copyright_string = _copyright_string( - original_year, current_year, authors, prefix='# ') - contents = _tag_copyright(contents) - return contents.replace(COPY_RIGHT_TAG, copyright_string) - -def ignored_file(filename, ignored_paths=None, ignored_files=None, - check_disk=True, check_vcs=True): - """ - >>> ignored_paths = ['./a/', './b/'] - >>> ignored_files = ['x', 'y'] - >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False) - True - >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False) - False - >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False) - True - >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False) - False - >>> ignored_file('./z', ignored_paths, ignored_files, False, False) - False - """ - if ignored_paths == None: - ignored_paths = IGNORED_PATHS - if ignored_files == None: - ignored_files = IGNORED_FILES - if check_disk == True and os.path.isfile(filename) == False: - return True - for path in ignored_paths: - if filename.startswith(path): - return True - if os.path.basename(filename) in ignored_files: - return True - if check_vcs == True and is_versioned(filename) == False: - return True - return False - -def _set_contents(filename, contents, original_contents=None, dry_run=False, - verbose=0): - if original_contents == None and os.path.isfile(filename): - f = open(filename, 'r') - original_contents = f.read() - f.close() - if verbose > 0: - print "checking %s ... " % filename, - if contents != original_contents: - if verbose > 0: - if original_contents == None: - print "[creating]" - else: - print "[updating]" - if verbose > 1 and original_contents != None: - print '\n'.join( - difflib.unified_diff( - original_contents.splitlines(), contents.splitlines(), - fromfile=os.path.normpath(os.path.join('a', filename)), - tofile=os.path.normpath(os.path.join('b', filename)), - n=3, lineterm='')) - if dry_run == False: - f = file(filename, 'w') - f.write(contents) - f.close() - elif verbose > 0: - print "[no change]" - -# Update commands - -def update_authors(authors_fn=authors_list, dry_run=False, verbose=0): - authors = authors_fn() - authors = _replace_aliases(authors, with_email=True, aliases=ALIASES) - new_contents = '%s was written by:\n%s\n' % ( - PROJECT_INFO['project'], - '\n'.join(authors) - ) - _set_contents('AUTHORS', new_contents, dry_run=dry_run, verbose=verbose) - -def update_file(filename, original_year_fn=original_year, authors_fn=authors, - dry_run=False, verbose=0): - f = file(filename, 'r') - contents = f.read() - f.close() - - original_year = original_year_fn(filename) - authors = authors_fn(filename) - authors = _replace_aliases(authors, with_email=True, aliases=ALIASES) - - new_contents = _update_copyright(contents, original_year, authors) - _set_contents(filename, contents=new_contents, original_contents=contents, - dry_run=dry_run, verbose=verbose) - -def update_files(files=None, dry_run=False, verbose=0): - if files == None or len(files) == 0: - files = [] - for dirpath,dirnames,filenames in os.walk('.'): - for filename in filenames: - files.append(os.path.join(dirpath, filename)) - - for filename in files: - if ignored_file(filename) == True: - continue - update_file(filename, dry_run=dry_run, verbose=verbose) - -def update_pyfile(path, original_year_fn=original_year, - authors_fn=authors_list, dry_run=False, verbose=0): - original_year = original_year_fn() - current_year = time.gmtime()[0] - authors = authors_fn() - authors = _replace_aliases(authors, with_email=False, aliases=ALIASES) - paragraphs = _copyright_string( - original_year, current_year, authors, - text=SHORT_COPY_RIGHT_TEXT, - extra_info={'get-details':'%(get-details)s'}, - author_format_fn=_short_author_formatter, wrap=False, - ).split('\n\n') - lines = [ - _copyright_string(original_year, current_year, authors, prefix='# '), - '', 'import textwrap', '', '', - 'LICENSE = """', - _copyright_string(original_year, current_year, authors, prefix=''), - '""".strip()', - '', - 'def short_license(extra_info, wrap=True, **kwargs):', - ' paragraphs = [', - ] - for p in paragraphs: - lines.append(" '%s' %% extra_info," % p.replace("'", r"\'")) - lines.extend([ - ' ]', - ' if wrap == True:', - ' for i,p in enumerate(paragraphs):', - ' paragraphs[i] = textwrap.fill(p, **kwargs)', - r" return '\n\n'.join(paragraphs)", - ]) - new_contents = '\n'.join(lines)+'\n' - _set_contents(path, new_contents, dry_run=dry_run, verbose=verbose) - - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': - import optparse - import sys - - usage = """%%prog [options] [file ...] - -Update copyright information in source code with information from -the %(vcs)s repository. Run from the %(project)s repository root. - -Replaces every line starting with '^# Copyright' and continuing with -'^#' with an auto-generated copyright blurb. If you want to add -#-commented material after a copyright blurb, please insert a blank -line between the blurb and your comment, so the next run of -``update_copyright.py`` doesn't clobber your comment. - -If no files are given, a list of files to update is generated -automatically. -""" % PROJECT_INFO - p = optparse.OptionParser(usage) - p.add_option('--pyfile', dest='pyfile', default='update_copyright/license.py', - metavar='PATH', - help='Write project license info to a Python module at PATH') - p.add_option('--test', dest='test', default=False, - action='store_true', help='Run internal tests and exit') - p.add_option('--dry-run', dest='dry_run', default=False, - action='store_true', help="Don't make any changes") - p.add_option('-v', '--verbose', dest='verbose', default=0, - action='count', help='Increment verbosity') - options,args = p.parse_args() - - if options.test == True: - test() - sys.exit(0) - - update_authors(dry_run=options.dry_run, verbose=options.verbose) - update_files(files=args, dry_run=options.dry_run, verbose=options.verbose) - if options.pyfile != None: - update_pyfile(path=options.pyfile, - dry_run=options.dry_run, verbose=options.verbose) diff --git a/update_copyright/__init__.py b/update_copyright/__init__.py new file mode 100644 index 0000000..e523c34 --- /dev/null +++ b/update_copyright/__init__.py @@ -0,0 +1,20 @@ +# Copyright + +"""Automatically update copyright boilerplate. + +This package is adapted from a script written for `Bugs +Everywhere`_. and later modified for `Hooke`_ before returning to +`Bugs Everywhere`_. I finally gave up on maintaining separate +versions, so here it is as a stand-alone package. + +.. _Bugs Everywhere: http://bugseverywhere.org/ +.. _Hooke: http://code.google.com/p/hooke/ +""" + +from .log import get_basic_logger as _get_basic_logger + + +__version__ = '0.2' + + +LOG = _get_basic_logger(name='update-copyright') diff --git a/update_copyright/log.py b/update_copyright/log.py new file mode 100644 index 0000000..e679a82 --- /dev/null +++ b/update_copyright/log.py @@ -0,0 +1,32 @@ +# Copyright + +"""Tools for setting up a package logging. + +This module is separate from `tools` to avoid module dependency +cycles. This module has no internal dependencies, while `tools` +depends on many of the other modules. With this module separate, the +other internal modules have access to the default logger before the +package configuration is built up enough to configure it according to +your external specifications. +""" + +import logging as _logging + + +def get_basic_logger(name, level=_logging.WARN): + """Create and return a basic logger + + This utility function encapsulates a bunch of `logging` + boilerplate that I use in several packages. + """ + log = _logging.getLogger(name) + log.setLevel(level) + formatter = _logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + stream_handler = _logging.StreamHandler() + stream_handler.setLevel(_logging.DEBUG) + stream_handler.setFormatter(formatter) + log.addHandler(stream_handler) + # Cache handlers for easy swapping depending on config settings + log._handler_cache = {'stream': stream_handler} + return log diff --git a/update_copyright/project.py b/update_copyright/project.py new file mode 100644 index 0000000..7680174 --- /dev/null +++ b/update_copyright/project.py @@ -0,0 +1,235 @@ +# Copyright (C) 2009-2012 W. Trevor King +# +# This file is part of update-copyright. +# +# update-copyright is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# update-copyright is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with update-copyright. If not, see +# . + +"""Project-specific configuration. + +# Convert author names to canonical forms. +# ALIASES[] = +# for example, +# ALIASES = { +# 'John Doe ': +# ['John Doe', 'jdoe', 'J. Doe '], +# } +# Git-based projects are encouraged to use .mailmap instead of +# ALIASES. See git-shortlog(1) for details. + +# List of paths that should not be scanned for copyright updates. +# IGNORED_PATHS = ['./.git/'] +IGNORED_PATHS = ['./.git'] +# List of files that should not be scanned for copyright updates. +# IGNORED_FILES = ['COPYING'] +IGNORED_FILES = ['COPYING'] + +# Work around missing author holes in the VCS history. +# AUTHOR_HACKS[] = [] = +# for example, if module.py was published in 2008 but the VCS history +# only goes back to 2010: +# YEAR_HACKS = { +# ('path', 'to', 'module.py'):2008, +# } +YEAR_HACKS = {} +""" + +import ConfigParser as _configparser +import fnmatch as _fnmatch +import os.path as _os_path +import sys +import time as _time + +from . import LOG as _LOG +from . import utils as _utils +from .vcs.git import GitBackend as _GitBackend +try: + from .vcs.bazaar import BazaarBackend as _BazaarBackend +except ImportError, _bazaar_import_error: + _BazaarBackend = None +try: + from .vcs.mercurial import MercurialBackend as _MercurialBackend +except ImportError, _mercurial_import_error: + _MercurialBackend = None + + +class Project (object): + def __init__(self, name=None, vcs=None, copyright=None, + short_copyright=None): + self._name = name + self._vcs = vcs + self._copyright = None + self._short_copyright = None + self.with_authors = False + self.with_files = False + self._ignored_paths = None + self._pyfile = None + + # unlikely to occur in the wild :p + self._copyright_tag = '-xyz-COPY' + '-RIGHT-zyx-' + + def load_config(self, stream): + p = _configparser.RawConfigParser() + p.readfp(stream) + try: + self._name = p.get('project', 'name') + except _configparser.NoOptionError: + pass + try: + vcs = p.get('project', 'vcs') + except _configparser.NoOptionError: + pass + else: + if vcs == 'Git': + self._vcs = _GitBackend() + elif vcs == 'Bazaar': + self._vcs = _BazaarBackend() + elif vcs == 'Mercurial': + self._vcs = _MercurialBackend() + else: + raise NotImplementedError('vcs: {}'.format(vcs)) + try: + self._copyright = p.get('copyright', 'long').splitlines() + except _configparser.NoOptionError: + pass + try: + self._short_copyright = p.get('copyright', 'short').splitlines() + except _configparser.NoOptionError: + pass + try: + self.with_authors = p.get('files', 'authors') + except _configparser.NoOptionError: + pass + try: + self.with_files = p.get('files', 'files') + except _configparser.NoOptionError: + pass + try: + self._ignored_paths = p.get('files', 'ignored') + except _configparser.NoOptionError: + pass + try: + self._pyfile = p.get('files', 'pyfile') + except _configparser.NoOptionError: + pass + + def _info(self): + return { + 'project': self._name, + 'vcs': self._vcs.name, + } + + def update_authors(self, dry_run=False): + _LOG.info('update AUTHORS') + authors = self._vcs.authors() + new_contents = u'{} was written by:\n{}\n'.format( + self._name, u'\n'.join(authors)) + _utils.set_contents('AUTHORS', new_contents, dry_run=dry_run) + + def update_file(self, filename, dry_run=False): + _LOG.info('update {}'.format(filename)) + contents = _utils.get_contents(filename=filename) + original_year = self._vcs.original_year(filename=filename) + authors = self._vcs.authors(filename=filename) + new_contents = _utils.update_copyright( + contents=contents, original_year=original_year, authors=authors, + text=self._copyright, info=self._info(), prefix='# ', + tag=self._copyright_tag) + _utils.set_contents( + filename=filename, contents=new_contents, + original_contents=contents, dry_run=dry_run) + + def update_files(self, files=None, dry_run=False): + if files is None or len(files) == 0: + files = _utils.list_files(root='.') + for filename in files: + if self._ignored_file(filename=filename): + continue + self.update_file(filename=filename, dry_run=dry_run) + + def update_pyfile(self, dry_run=False): + if self._pyfile is None: + _LOG.info('no pyfile location configured, skip `update_pyfile`') + return + _LOG.info('update pyfile at {}'.format(self._pyfile)) + current_year = _time.gmtime()[0] + original_year = self._vcs.original_year() + authors = self._vcs.authors() + lines = [ + _utils.copyright_string( + original_year=original_year, final_year=current_year, + authors=authors, text=self._copyright, info=self._info(), + prefix='# '), + '', 'import textwrap as _textwrap', '', '', + 'LICENSE = """', + _utils.copyright_string( + original_year=original_year, final_year=current_year, + authors=authors, text=self._copyright, info=self._info(), + prefix=''), + '""".strip()', + '', + 'def short_license(info, wrap=True, **kwargs):', + ' paragraphs = [', + ] + paragraphs = _utils.copyright_string( + original_year=original_year, final_year=current_year, + authors=authors, text=self._short_copyright, info=self._info(), + author_format_fn=_utils.short_author_formatter, wrap=False, + ).split('\n\n') + for p in paragraphs: + lines.append(" '{}' % info,".format( + p.replace("'", r"\'"))) + lines.extend([ + ' ]', + ' if wrap:', + ' for i,p in enumerate(paragraphs):', + ' paragraphs[i] = _textwrap.fill(p, **kwargs)', + r" return '\n\n'.join(paragraphs)", + '', # for terminal endline + ]) + new_contents = '\n'.join(lines) + _utils.set_contents( + filename=self._pyfile, contents=new_contents, dry_run=dry_run) + + def _ignored_file(self, filename): + """ + >>> ignored_paths = ['./a/', './b/'] + >>> ignored_files = ['x', 'y'] + >>> ignored_file('./a/z', ignored_paths, ignored_files, False, False) + True + >>> ignored_file('./ab/z', ignored_paths, ignored_files, False, False) + False + >>> ignored_file('./ab/x', ignored_paths, ignored_files, False, False) + True + >>> ignored_file('./ab/xy', ignored_paths, ignored_files, False, False) + False + >>> ignored_file('./z', ignored_paths, ignored_files, False, False) + False + """ + if self._ignored_paths is not None: + for path in self._ignored_paths: + if _fnmatch.fnmatch(filename, path): + return True + if self._vcs and not self._vcs.is_versioned(filename): + return True + return False diff --git a/update_copyright/utils.py b/update_copyright/utils.py new file mode 100644 index 0000000..9aa9305 --- /dev/null +++ b/update_copyright/utils.py @@ -0,0 +1,208 @@ +# Copyright + +import difflib as _difflib +import os as _os +import os.path as _os_path +import textwrap as _textwrap +import time as _time + +from . import LOG as _LOG + + +def long_author_formatter(copyright_year_string, authors): + """ + >>> print '\\n'.join(long_author_formatter( + ... copyright_year_string='Copyright (C) 1990-2010', + ... authors=['Jack', 'Jill', 'John'])) + Copyright (C) 1990-2010 Jack + Jill + John + """ + lines = ['%s %s' % (copyright_year_string, authors[0])] + for author in authors[1:]: + lines.append(' '*(len(copyright_year_string)+1) + author) + return lines + +def short_author_formatter(copyright_year_string, authors): + """ + >>> print '\\n'.join(short_author_formatter( + ... copyright_year_string='Copyright (C) 1990-2010', + ... authors=['Jack', 'Jill', 'John']*5)) + Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John + """ + blurb = '%s %s' % (copyright_year_string, ', '.join(authors)) + return [blurb] + +def copyright_string(original_year, final_year, authors, text, info={}, + author_format_fn=long_author_formatter, + formatter_kwargs={}, prefix='', wrap=True, + **wrap_kwargs): + """ + >>> print(copyright_string(original_year=2005, final_year=2005, + ... authors=['A ', 'B '], + ... text=['BLURB',], prefix='# ' + ... )) # doctest: +REPORT_UDIFF + # Copyright (C) 2005 A + # B + # + # BLURB + >>> print(copyright_string(original_year=2005, final_year=2009, + ... authors=['A ', 'B '], + ... text=['BLURB',] + ... )) # doctest: +REPORT_UDIFF + Copyright (C) 2005-2009 A + B + + BLURB + >>> print(copyright_string(original_year=2005, final_year=2005, + ... authors=['A ', 'B '], + ... text=['This file is part of %(program)s.',], + ... author_format_fn=short_author_formatter, + ... info={'program':'update-copyright'}, + ... width=25, + ... )) # doctest: +REPORT_UDIFF + Copyright (C) 2005 A , B + + This file is part of + update-copyright. + >>> print(copyright_string(original_year=2005, final_year=2005, + ... authors=['A ', 'B '], + ... text=[('This file is part of %(program)s. '*3 + ... ).strip(),], + ... info={'program':'update-copyright'}, + ... author_format_fn=short_author_formatter, + ... wrap=False, + ... )) # doctest: +REPORT_UDIFF + Copyright (C) 2005 A , B + + This file is part of update-copyright. This file is part of update-copyright. This file is part of update-copyright. + """ + for key in ['initial_indent', 'subsequent_indent']: + if key not in wrap_kwargs: + wrap_kwargs[key] = prefix + + if original_year == final_year: + date_range = '%s' % original_year + else: + date_range = '%s-%s' % (original_year, final_year) + copyright_year_string = 'Copyright (C) %s' % date_range + + lines = author_format_fn(copyright_year_string, authors, + **formatter_kwargs) + for i,line in enumerate(lines): + lines[i] = prefix + line + + for i,paragraph in enumerate(text): + try: + text[i] = paragraph % info + except ValueError, e: + _LOG.error( + "{}: can't format {} with {}".format(e, paragraph, info)) + raise + except TypeError, e: + _LOG.error( + ('{}: copright text must be a list of paragraph strings, ' + 'not {}').format(e, repr(text))) + raise + + if wrap == True: + text = [_textwrap.fill(p, **wrap_kwargs) for p in text] + else: + assert wrap_kwargs['subsequent_indent'] == '', \ + wrap_kwargs['subsequent_indent'] + sep = '\n%s\n' % prefix.rstrip() + return sep.join(['\n'.join(lines)] + text) + +def tag_copyright(contents, tag=None): + """ + >>> contents = '''Some file + ... bla bla + ... # Copyright (copyright begins) + ... # (copyright continues) + ... # bla bla bla + ... (copyright ends) + ... bla bla bla + ... ''' + >>> print tag_copyright(contents, tag='-xyz-CR-zyx-') + Some file + bla bla + -xyz-CR-zyx- + (copyright ends) + bla bla bla + + """ + lines = [] + incopy = False + for line in contents.splitlines(): + if incopy == False and line.startswith('# Copyright'): + incopy = True + lines.append(tag) + elif incopy == True and not line.startswith('#'): + incopy = False + if incopy == False: + lines.append(line.rstrip('\n')) + return '\n'.join(lines)+'\n' + +def update_copyright(contents, tag=None, **kwargs): + """ + >>> contents = '''Some file + ... bla bla + ... # Copyright (copyright begins) + ... # (copyright continues) + ... # bla bla bla + ... (copyright ends) + ... bla bla bla + ... ''' + >>> print update_copyright(contents, original_year=2008, + ... authors=['Jack', 'Jill'], + ... text=['BLURB',], prefix='# ', tag='--tag--' + ... ) # doctest: +ELLIPSIS, +REPORT_UDIFF + Some file + bla bla + # Copyright (C) 2008-... Jack + # Jill + # + # BLURB + (copyright ends) + bla bla bla + + """ + current_year = _time.gmtime()[0] + string = copyright_string(final_year=current_year, **kwargs) + contents = tag_copyright(contents=contents, tag=tag) + return contents.replace(tag, string) + +def get_contents(filename): + if _os_path.isfile(filename): + f = open(filename, 'r') + contents = f.read() + f.close() + return contents + return None + +def set_contents(filename, contents, original_contents=None, dry_run=False): + if original_contents is None: + original_contents = get_contents(filename=filename) + _LOG.debug('check contents of {}'.format(filename)) + if contents != original_contents: + if original_contents is None: + _LOG.info('creating {}'.format(filename)) + else: + _LOG.info('updating {}'.format(filename)) + _LOG.debug('\n'.join( + _difflib.unified_diff( + original_contents.splitlines(), contents.splitlines(), + fromfile=_os_path.normpath( + _os_path.join('a', filename)), + tofile=_os_path.normpath(_os_path.join('b', filename)), + n=3, lineterm=''))) + if dry_run == False: + f = file(filename, 'w') + f.write(contents) + f.close() + _LOG.debug('no change in {}'.format(filename)) + +def list_files(root='.'): + for dirpath,dirnames,filenames in _os.walk(root): + for filename in filenames: + yield _os_path.join(root, dirpath, filename) diff --git a/update_copyright/vcs/__init__.py b/update_copyright/vcs/__init__.py new file mode 100644 index 0000000..bd9248b --- /dev/null +++ b/update_copyright/vcs/__init__.py @@ -0,0 +1,48 @@ +# Copyright + +"""Backends for version control systems.""" + +from . import utils as _utils + + +class VCSBackend (object): + name = None + + def __init__(self, author_hacks=None, year_hacks=None, aliases=None): + if author_hacks is None: + author_hacks = {} + self._author_hacks = author_hacks + if year_hacks is None: + year_hacks = {} + self._year_hacks = year_hacks + if aliases is None: + aliases = {} + self._aliases = aliases + + def _years(self, filename=None): + raise NotImplementedError() + + def original_year(self, filename=None): + years = self._years(filename=filename) + if filename is None: + years.update(self._year_hacks.values()) + elif _utils.splitpath(filename) in self._year_hacks: + years.update(year_hacks[_utils.splitpath(filename)]) + years = sorted(years) + return years[0] + + def _authors(self, filename=None): + raise NotImplementedError() + + def authors(self, filename=None, with_emails=True): + authors = self._authors(filename=filename) + if filename is None: + for path,authors in self._author_hacks.items(): + authors.update(authors) + elif _utils.splitpath(filename) in self._author_hacks: + authors.update(self._author_hacks[_utils.splitpath(filename)]) + return _utils.replace_aliases( + authors, with_email=False, aliases=self._aliases) + + def is_versioned(self, filename=None): + raise NotImplementedError() diff --git a/update_copyright/vcs/bazaar.py b/update_copyright/vcs/bazaar.py new file mode 100644 index 0000000..e54eed5 --- /dev/null +++ b/update_copyright/vcs/bazaar.py @@ -0,0 +1,67 @@ +# Copyright + +import StringIO as _StringIO + +import bzrlib as _bzrlib +import bzrlib.builtins as _bzrlib_builtins +import bzrlib.log as _bzrlib_log + +from . import VCSBackend as _VCSBackend + + +class _LogFormatter (_bzrlib_log.LogFormatter): + supports_merge_revisions = True + preferred_levels = 0 + supports_deta = False + supports_tags = False + supports_diff = False + + def log_revision(self, revision): + raise NotImplementedError + + +class _YearLogFormatter (_LogFormatter): + def log_revision(self, revision): + self.to_file.write( + time.strftime('%Y', time.gmtime(revision.rev.timestamp)) + +'\n') + + +class _AuthorLogFormatter (_LogFormatter): + def log_revision(self, revision): + authors = revision.rev.get_apparent_authors() + self.to_file.write('\n'.join(authors)+'\n') + + +class BazaarBackend (_VCSBackend): + name = 'Bazaar' + + def __init__(self, **kwargs): + super(BazaarBackend, self).__init__(**kwargs) + self._version = _bzrlib.__version__ + + def _years(self, filename=None): + cmd = _bzrlib_builtins.cmd_log() + cmd.outf = _StringIO.StringIO() + kwargs = {'log_format':_YearLogFormatter, 'levels':0} + if filename is not None: + kwargs['file_list'] = [filename] + cmd.run(**kwargs) + years = set(int(year) for year in cmd.outf.getvalue().splitlines()) + return years + + def _authors(self, filename=None): + cmd = _bzrlib_builtins.cmd_log() + cmd.outf = _StringIO.StringIO() + kwargs = {'log_format':_AuthorLogFormatter, 'levels':0} + if filename is not None: + kwargs['file_list'] = [filename] + cmd.run(**kwargs) + authors = set(cmd.outf.getvalue().splitlines()) + return authors + + def is_versioned(self, filename): + cmd = _bzrlib_builtins.cmd_log() + cmd.outf = StringIO.StringIO() + cmd.run(file_list=[filename]) + return True diff --git a/update_copyright/vcs/git.py b/update_copyright/vcs/git.py new file mode 100644 index 0000000..3cbbd5e --- /dev/null +++ b/update_copyright/vcs/git.py @@ -0,0 +1,51 @@ +# Copyright + +from . import VCSBackend as _VCSBackend +from . import utils as _utils + + +class GitBackend (_VCSBackend): + name = 'Git' + + @staticmethod + def _git_cmd(*args): + status,stdout,stderr = _utils.invoke(['git'] + list(args)) + return stdout.rstrip('\n') + + def __init__(self, **kwargs): + super(GitBackend, self).__init__(**kwargs) + self._version = self._git_cmd('--version').split(' ')[-1] + if self._version.startswith('1.5.'): + # Author name + self._author_format = '--pretty=format:%an <%ae>' + self._year_format = ['--pretty=format:%ai'] # Author date + # YYYY-MM-DD HH:MM:SS Z + # Earlier versions of Git don't seem to recognize --date=short + else: + self._author_format = '--pretty=format:%aN <%aE>' + self._year_format = ['--pretty=format:%ad', # Author date + '--date=short'] # YYYY-MM-DD + + def _years(self, filename=None): + args = ['log'] + self._year_format + if filename is not None: + args.extend(['--follow'] + [filename]) + output = self._git_cmd(*args) + if self._version.startswith('1.5.'): + output = '\n'.join([x.split()[0] for x in output.splitlines()]) + years = set(int(line.split('-', 1)[0]) for line in output.splitlines()) + return years + + def _authors(self, filename=None): + args = ['log', self._author_format] + if filename is not None: + args.extend(['--follow', filename]) + output = self._git_cmd(*args) + authors = set(output.splitlines()) + return authors + + def is_versioned(self, filename): + output = self._git_cmd('log', '--follow', filename) + if len(output) == 0: + return False + return True diff --git a/update_copyright/vcs/mercurial.py b/update_copyright/vcs/mercurial.py new file mode 100644 index 0000000..d71ce9c --- /dev/null +++ b/update_copyright/vcs/mercurial.py @@ -0,0 +1,65 @@ +# Copyright + +from __future__ import absolute_import + +import StringIO as _StringIO +import os as _os +import sys as _sys + +import mercurial as _mercurial +from mercurial.__version__ import version as _version +import mercurial.dispatch as _mercurial_dispatch + +from . import VCSBackend as _VCSBackend +from . import utils as _utils + + +class MercurialBackend (_VCSBackend): + name = 'Mercurial' + + @staticmethod + def _hg_cmd(*args): + cwd = _os.getcwd() + stdout = _sys.stdout + stderr = _sys.stderr + tmp_stdout = _StringIO.StringIO() + tmp_stderr = _StringIO.StringIO() + _sys.stdout = tmp_stdout + _sys.stderr = tmp_stderr + try: + _mercurial_dispatch.dispatch(list(args)) + finally: + _os.chdir(cwd) + _sys.stdout = stdout + _sys.stderr = stderr + return (tmp_stdout.getvalue().rstrip('\n'), + tmp_stderr.getvalue().rstrip('\n')) + + def __init__(self, **kwargs): + super(MercurialBackend, self).__init__(**kwargs) + self._version = _version + + def _years(self, filename=None): + args = [ + '--template', '{date|shortdate}\n', + # shortdate filter: YEAR-MONTH-DAY + ] + if filename is not None: + args.extend(['--follow', filename]) + output,error = mercurial_cmd('log', *args) + years = set(int(line.split('-', 1)[0]) for line in output.splitlines()) + return years + + def _authors(self, filename=None): + args = ['--template', '{author}\n'] + if filename is not None: + args.extend(['--follow', filename]) + output,error = mercurial_cmd('log', *args) + authors = set(output.splitlines()) + return authors + + def is_versioned(self, filename): + output,error = mercurial_cmd('log', '--follow', filename) + if len(error) > 0: + return False + return True diff --git a/update_copyright/vcs/utils.py b/update_copyright/vcs/utils.py new file mode 100644 index 0000000..3c3c1ba --- /dev/null +++ b/update_copyright/vcs/utils.py @@ -0,0 +1,141 @@ +# Copyright + +"""Useful utilities for backend classes.""" + +import email.utils as _email_utils +import os.path as _os_path +import subprocess as _subprocess +import sys as _sys + + +_MSWINDOWS = _sys.platform == 'win32' +_POSIX = not _MSWINDOWS + + +def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, + expect=(0,)): + """ + expect should be a tuple of allowed exit codes. + """ + try : + if _POSIX: + q = _subprocess.Popen(args, stdin=_subprocess.PIPE, + stdout=stdout, stderr=stderr) + else: + assert _MSWINDOWS == True, 'invalid platform' + # win32 don't have os.execvp() so run the command in a shell + q = _subprocess.Popen(args, stdin=_subprocess.PIPE, + stdout=stdout, stderr=stderr, shell=True) + except OSError, e: + raise ValueError([args, e]) + stdout,stderr = q.communicate(input=stdin) + status = q.wait() + if status not in expect: + raise ValueError([args, status, stdout, stderr]) + return status, stdout, stderr + +def splitpath(path): + """Recursively split a path into elements. + + Examples + -------- + + >>> import os.path + >>> splitpath(os.path.join('a', 'b', 'c')) + ('a', 'b', 'c') + >>> splitpath(os.path.join('.', 'a', 'b', 'c')) + ('a', 'b', 'c') + """ + path = _os_path.normpath(path) + elements = [] + while True: + dirname,basename = _os_path.split(path) + elements.insert(0,basename) + if dirname in ['', '.']: + break + path = dirname + return tuple(elements) + +def strip_email(*args): + """Remove email addresses from a series of names. + + Examples + -------- + + >>> strip_email('J Doe') + ['J Doe'] + >>> strip_email('J Doe ') + ['J Doe'] + >>> strip_email('J Doe ', 'JJJ Smith ') + ['J Doe', 'JJJ Smith'] + """ + args = list(args) + for i,arg in enumerate(args): + if arg == None: + continue + author,addr = _email_utils.parseaddr(arg) + if author == '': + author = arg + args[i] = author + return args + +def reverse_aliases(aliases): + """Reverse an `aliases` dict. + + Input: key: canonical name, value: list of aliases + Output: key: alias, value: canonical name + + Examples + -------- + + >>> aliases = { + ... 'J Doe ':['Johnny ', 'J'], + ... 'JJJ Smith ':['Jingly '], + ... None:['Anonymous '], + ... } + >>> r = reverse_aliases(aliases) + >>> for item in sorted(r.items()): + ... print item + ('Anonymous ', None) + ('J', 'J Doe ') + ('Jingly ', 'JJJ Smith ') + ('Johnny ', 'J Doe ') + """ + output = {} + for canonical_name,_aliases in aliases.items(): + for alias in _aliases: + output[alias] = canonical_name + return output + +def replace_aliases(authors, with_email=True, aliases=None): + """Consolidate and sort `authors`. + + Make the replacements listed in the `aliases` dict (key: canonical + name, value: list of aliases). If `aliases` is ``None``, default + to ``ALIASES``. + + >>> aliases = { + ... 'J Doe ':['Johnny '], + ... 'JJJ Smith ':['Jingly '], + ... None:['Anonymous '], + ... } + >>> authors = [ + ... 'JJJ Smith ', 'Johnny ', + ... 'Jingly ', 'J Doe ', 'Anonymous '] + >>> replace_aliases(authors, with_email=True, aliases=aliases) + ['J Doe ', 'JJJ Smith '] + >>> replace_aliases(authors, with_email=False, aliases=aliases) + ['J Doe', 'JJJ Smith'] + """ + if aliases == None: + aliases = ALIASES + rev_aliases = reverse_aliases(aliases) + for i,author in enumerate(authors): + if author in rev_aliases: + authors[i] = rev_aliases[author] + authors = sorted(list(set(authors))) + if None in authors: + authors.remove(None) + if with_email == False: + authors = strip_email(*authors) + return authors