update_copyright/utils.py

   1 # Copyright (C) 2012 W. Trevor King <wking@tremily.us>
   2 #
   3 # This file is part of update-copyright.
   4 #
   5 # update-copyright is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License as published by the Free
   7 # Software Foundation, either version 3 of the License, or (at your option) any
   8 # later version.
   9 #
  10 # update-copyright is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 # more details.
  14 #
  15 # You should have received a copy of the GNU General Public License along with
  16 # update-copyright.  If not, see <http://www.gnu.org/licenses/>.
  17
  18 import codecs as _codecs
  19 import difflib as _difflib
  20 import locale as _locale
  21 import os as _os
  22 import os.path as _os_path
  23 import sys as _sys
  24 import textwrap as _textwrap
  25 import time as _time
  26
  27 from . import LOG as _LOG
  28
  29
  30 ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding()
  31
  32
  33 def long_author_formatter(copyright_year_string, authors):
  34     """
  35     >>> print '\\n'.join(long_author_formatter(
  36     ...     copyright_year_string='Copyright (C) 1990-2010',
  37     ...     authors=['Jack', 'Jill', 'John']))
  38     Copyright (C) 1990-2010 Jack
  39                             Jill
  40                             John
  41     """
  42     lines = ['%s %s' % (copyright_year_string, authors[0])]
  43     for author in authors[1:]:
  44         lines.append(' '*(len(copyright_year_string)+1) + author)
  45     return lines
  46
  47 def short_author_formatter(copyright_year_string, authors):
  48     """
  49     >>> print '\\n'.join(short_author_formatter(
  50     ...     copyright_year_string='Copyright (C) 1990-2010',
  51     ...     authors=['Jack', 'Jill', 'John']*5))
  52     Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John
  53     """
  54     blurb = '%s %s' % (copyright_year_string, ', '.join(authors))
  55     return [blurb]
  56
  57 def copyright_string(original_year, final_year, authors, text, info={},
  58                      author_format_fn=long_author_formatter,
  59                      formatter_kwargs={}, prefix=('', '', None), wrap=True,
  60                      **wrap_kwargs):
  61     """
  62     >>> print(copyright_string(original_year=2005, final_year=2005,
  63     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  64     ...                        text=['BLURB',], prefix=('# ', '# ', None),
  65     ...                        )) # doctest: +REPORT_UDIFF
  66     # Copyright (C) 2005 A <a@a.com>
  67     #                    B <b@b.edu>
  68     #
  69     # BLURB
  70     >>> print(copyright_string(original_year=2005, final_year=2009,
  71     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  72     ...                        text=['BLURB',], prefix=('/* ', ' * ', ' */'),
  73     ...                        )) # doctest: +REPORT_UDIFF
  74     /* Copyright (C) 2005-2009 A <a@a.com>
  75      *                         B <b@b.edu>
  76      *
  77      * BLURB
  78      */
  79     >>> print(copyright_string(original_year=2005, final_year=2009,
  80     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  81     ...                        text=['BLURB',]
  82     ...                        )) # doctest: +REPORT_UDIFF
  83     Copyright (C) 2005-2009 A <a@a.com>
  84                             B <b@b.edu>
  85     <BLANKLINE>
  86     BLURB
  87     >>> print(copyright_string(original_year=2005, final_year=2005,
  88     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  89     ...                        text=['This file is part of %(program)s.',],
  90     ...                        author_format_fn=short_author_formatter,
  91     ...                        info={'program':'update-copyright'},
  92     ...                        width=25,
  93     ...                        )) # doctest: +REPORT_UDIFF
  94     Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
  95     <BLANKLINE>
  96     This file is part of
  97     update-copyright.
  98     >>> print(copyright_string(original_year=2005, final_year=2005,
  99     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
 100     ...                        text=[('This file is part of %(program)s.  '*3
 101     ...                               ).strip(),],
 102     ...                        info={'program':'update-copyright'},
 103     ...                        author_format_fn=short_author_formatter,
 104     ...                        wrap=False,
 105     ...                        )) # doctest: +REPORT_UDIFF
 106     Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
 107     <BLANKLINE>
 108     This file is part of update-copyright.  This file is part of update-copyright.  This file is part of update-copyright.
 109     """
 110     for key in ['initial_indent', 'subsequent_indent']:
 111         if key not in wrap_kwargs:
 112             wrap_kwargs[key] = prefix[1]
 113
 114     if original_year == final_year:
 115         date_range = '%s' % original_year
 116     else:
 117         date_range = '%s-%s' % (original_year, final_year)
 118     copyright_year_string = 'Copyright (C) %s' % date_range
 119
 120     lines = author_format_fn(copyright_year_string, authors,
 121                              **formatter_kwargs)
 122     for i,line in enumerate(lines):
 123         if i == 0:
 124             lines[i] = prefix[0] + line
 125         else:
 126             lines[i] = prefix[1] + line
 127
 128     for i,paragraph in enumerate(text):
 129         try:
 130             text[i] = paragraph % info
 131         except ValueError, e:
 132             _LOG.error(
 133                 "{}: can't format {} with {}".format(e, paragraph, info))
 134             raise
 135         except TypeError, e:
 136             _LOG.error(
 137                 ('{}: copright text must be a list of paragraph strings, '
 138                  'not {}').format(e, repr(text)))
 139             raise
 140
 141     if wrap == True:
 142         text = [_textwrap.fill(p, **wrap_kwargs) for p in text]
 143     else:
 144         assert wrap_kwargs['subsequent_indent'] == '', \
 145             wrap_kwargs['subsequent_indent']
 146     sep = '\n{}\n'.format(prefix[1].rstrip())
 147     ret = sep.join(['\n'.join(lines)] + text)
 148     if prefix[2]:
 149         ret += ('\n{}'.format(prefix[2]))
 150     return ret
 151
 152 def tag_copyright(contents, prefix=('# ', '# ', None), tag=None):
 153     """
 154     >>> contents = '''Some file
 155     ... bla bla
 156     ... # Copyright (copyright begins)
 157     ... # (copyright continues)
 158     ... # bla bla bla
 159     ... (copyright ends)
 160     ... bla bla bla
 161     ... '''
 162     >>> print tag_copyright(contents, tag='-xyz-CR-zyx-')
 163     Some file
 164     bla bla
 165     -xyz-CR-zyx-
 166     (copyright ends)
 167     bla bla bla
 168     <BLANKLINE>
 169     >>> contents = '''Some file
 170     ... bla bla
 171     ... /* Copyright (copyright begins)
 172     ...  * (copyright continues)
 173     ...  *
 174     ...  * bla bla bla
 175     ...  */
 176     ... (copyright ends)
 177     ... bla bla bla
 178     ... '''
 179     >>> print tag_copyright(
 180     ...     contents, prefix=('/* ', ' * ', ' */'), tag='-xyz-CR-zyx-')
 181     Some file
 182     bla bla
 183     -xyz-CR-zyx-
 184     (copyright ends)
 185     bla bla bla
 186     <BLANKLINE>
 187     """
 188     lines = []
 189     incopy = False
 190     start = prefix[0] + 'Copyright'
 191     middle = prefix[1].rstrip()
 192     end = prefix[2]
 193     for line in contents.splitlines():
 194         if not incopy and line.startswith(start):
 195             incopy = True
 196             lines.append(tag)
 197         elif incopy and not line.startswith(middle):
 198             if end:
 199                 assert line.startswith(end), line
 200             incopy = False
 201         if not incopy:
 202             lines.append(line.rstrip('\n'))
 203         if incopy and end and line.startswith(end):
 204             incopy = False
 205     return '\n'.join(lines)+'\n'
 206
 207 def update_copyright(contents, prefix=('# ', '# ', None), tag=None, **kwargs):
 208     """
 209     >>> contents = '''Some file
 210     ... bla bla
 211     ... # Copyright (copyright begins)
 212     ... # (copyright continues)
 213     ... # bla bla bla
 214     ... (copyright ends)
 215     ... bla bla bla
 216     ... '''
 217     >>> print update_copyright(
 218     ...     contents, original_year=2008, authors=['Jack', 'Jill'],
 219     ...     text=['BLURB',], prefix=('# ', '# ', None), tag='--tag--'
 220     ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
 221     Some file
 222     bla bla
 223     # Copyright (C) 2008-... Jack
 224     #                         Jill
 225     #
 226     # BLURB
 227     (copyright ends)
 228     bla bla bla
 229     <BLANKLINE>
 230     """
 231     current_year = _time.gmtime()[0]
 232     string = copyright_string(final_year=current_year, prefix=prefix, **kwargs)
 233     contents = tag_copyright(contents=contents, prefix=prefix, tag=tag)
 234     return contents.replace(tag, string)
 235
 236 def get_contents(filename, unicode=False, encoding=None):
 237     if _os_path.isfile(filename):
 238         if unicode:
 239             if encoding is None:
 240                 encoding = ENCODING
 241             f = _codecs.open(filename, 'r', encoding=encoding)
 242         else:
 243             f = open(filename, 'r')
 244         contents = f.read()
 245         f.close()
 246         return contents
 247     return None
 248
 249 def set_contents(filename, contents, original_contents=None, unicode=False,
 250                  encoding=None, dry_run=False):
 251     if original_contents is None:
 252         original_contents = get_contents(
 253             filename=filename, unicode=unicode, encoding=encoding)
 254     _LOG.debug('check contents of {}'.format(filename))
 255     if contents != original_contents:
 256         if original_contents is None:
 257             _LOG.info('creating {}'.format(filename))
 258         else:
 259             _LOG.info('updating {}'.format(filename))
 260             _LOG.debug(u'\n'.join(
 261                     _difflib.unified_diff(
 262                         original_contents.splitlines(), contents.splitlines(),
 263                         fromfile=_os_path.normpath(
 264                             _os_path.join('a', filename)),
 265                         tofile=_os_path.normpath(_os_path.join('b', filename)),
 266                         n=3, lineterm='')))
 267         if dry_run == False:
 268             if unicode:
 269                 if encoding is None:
 270                     encoding = ENCODING
 271                 f = _codecs.open(filename, 'w', encoding=encoding)
 272             else:
 273                 f = file(filename, 'w')
 274             f.write(contents)
 275             f.close()
 276     _LOG.debug('no change in {}'.format(filename))
 277
 278 def list_files(root='.'):
 279     for dirpath,dirnames,filenames in _os.walk(root):
 280         for filename in filenames:
 281             yield _os_path.normpath(_os_path.join(root, dirpath, filename))