update_copyright/utils.py

   1 # Copyright (C) 2012 W. Trevor King
   2 #
   3 # This file is part of update-copyright.
   4 #
   5 # update-copyright is free software: you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License as
   7 # published by the Free Software Foundation, either version 3 of the
   8 # License, or (at your option) any later version.
   9 #
  10 # update-copyright is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 # General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with update-copyright.  If not, see
  17 # <http://www.gnu.org/licenses/>.
  18
  19 import codecs as _codecs
  20 import difflib as _difflib
  21 import locale as _locale
  22 import os as _os
  23 import os.path as _os_path
  24 import sys as _sys
  25 import textwrap as _textwrap
  26 import time as _time
  27
  28 from . import LOG as _LOG
  29
  30
  31 ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding()
  32
  33
  34 def long_author_formatter(copyright_year_string, authors):
  35     """
  36     >>> print '\\n'.join(long_author_formatter(
  37     ...     copyright_year_string='Copyright (C) 1990-2010',
  38     ...     authors=['Jack', 'Jill', 'John']))
  39     Copyright (C) 1990-2010 Jack
  40                             Jill
  41                             John
  42     """
  43     lines = ['%s %s' % (copyright_year_string, authors[0])]
  44     for author in authors[1:]:
  45         lines.append(' '*(len(copyright_year_string)+1) + author)
  46     return lines
  47
  48 def short_author_formatter(copyright_year_string, authors):
  49     """
  50     >>> print '\\n'.join(short_author_formatter(
  51     ...     copyright_year_string='Copyright (C) 1990-2010',
  52     ...     authors=['Jack', 'Jill', 'John']*5))
  53     Copyright (C) 1990-2010 Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John, Jack, Jill, John
  54     """
  55     blurb = '%s %s' % (copyright_year_string, ', '.join(authors))
  56     return [blurb]
  57
  58 def copyright_string(original_year, final_year, authors, text, info={},
  59                      author_format_fn=long_author_formatter,
  60                      formatter_kwargs={}, prefix=('', '', None), wrap=True,
  61                      **wrap_kwargs):
  62     """
  63     >>> print(copyright_string(original_year=2005, final_year=2005,
  64     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  65     ...                        text=['BLURB',], prefix=('# ', '# ', None),
  66     ...                        )) # doctest: +REPORT_UDIFF
  67     # Copyright (C) 2005 A <a@a.com>
  68     #                    B <b@b.edu>
  69     #
  70     # BLURB
  71     >>> print(copyright_string(original_year=2005, final_year=2009,
  72     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  73     ...                        text=['BLURB',], prefix=('/* ', ' * ', ' */'),
  74     ...                        )) # doctest: +REPORT_UDIFF
  75     /* Copyright (C) 2005-2009 A <a@a.com>
  76      *                         B <b@b.edu>
  77      *
  78      * BLURB
  79      */
  80     >>> print(copyright_string(original_year=2005, final_year=2009,
  81     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  82     ...                        text=['BLURB',]
  83     ...                        )) # doctest: +REPORT_UDIFF
  84     Copyright (C) 2005-2009 A <a@a.com>
  85                             B <b@b.edu>
  86     <BLANKLINE>
  87     BLURB
  88     >>> print(copyright_string(original_year=2005, final_year=2005,
  89     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
  90     ...                        text=['This file is part of %(program)s.',],
  91     ...                        author_format_fn=short_author_formatter,
  92     ...                        info={'program':'update-copyright'},
  93     ...                        width=25,
  94     ...                        )) # doctest: +REPORT_UDIFF
  95     Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
  96     <BLANKLINE>
  97     This file is part of
  98     update-copyright.
  99     >>> print(copyright_string(original_year=2005, final_year=2005,
 100     ...                        authors=['A <a@a.com>', 'B <b@b.edu>'],
 101     ...                        text=[('This file is part of %(program)s.  '*3
 102     ...                               ).strip(),],
 103     ...                        info={'program':'update-copyright'},
 104     ...                        author_format_fn=short_author_formatter,
 105     ...                        wrap=False,
 106     ...                        )) # doctest: +REPORT_UDIFF
 107     Copyright (C) 2005 A <a@a.com>, B <b@b.edu>
 108     <BLANKLINE>
 109     This file is part of update-copyright.  This file is part of update-copyright.  This file is part of update-copyright.
 110     """
 111     for key in ['initial_indent', 'subsequent_indent']:
 112         if key not in wrap_kwargs:
 113             wrap_kwargs[key] = prefix[1]
 114
 115     if original_year == final_year:
 116         date_range = '%s' % original_year
 117     else:
 118         date_range = '%s-%s' % (original_year, final_year)
 119     copyright_year_string = 'Copyright (C) %s' % date_range
 120
 121     lines = author_format_fn(copyright_year_string, authors,
 122                              **formatter_kwargs)
 123     for i,line in enumerate(lines):
 124         if i == 0:
 125             lines[i] = prefix[0] + line
 126         else:
 127             lines[i] = prefix[1] + line
 128
 129     for i,paragraph in enumerate(text):
 130         try:
 131             text[i] = paragraph % info
 132         except ValueError, e:
 133             _LOG.error(
 134                 "{}: can't format {} with {}".format(e, paragraph, info))
 135             raise
 136         except TypeError, e:
 137             _LOG.error(
 138                 ('{}: copright text must be a list of paragraph strings, '
 139                  'not {}').format(e, repr(text)))
 140             raise
 141
 142     if wrap == True:
 143         text = [_textwrap.fill(p, **wrap_kwargs) for p in text]
 144     else:
 145         assert wrap_kwargs['subsequent_indent'] == '', \
 146             wrap_kwargs['subsequent_indent']
 147     sep = '\n{}\n'.format(prefix[1].rstrip())
 148     ret = sep.join(['\n'.join(lines)] + text)
 149     if prefix[2]:
 150         ret += ('\n{}'.format(prefix[2]))
 151     return ret
 152
 153 def tag_copyright(contents, prefix=('# ', '# ', None), tag=None):
 154     """
 155     >>> contents = '''Some file
 156     ... bla bla
 157     ... # Copyright (copyright begins)
 158     ... # (copyright continues)
 159     ... # bla bla bla
 160     ... (copyright ends)
 161     ... bla bla bla
 162     ... '''
 163     >>> print tag_copyright(contents, tag='-xyz-CR-zyx-')
 164     Some file
 165     bla bla
 166     -xyz-CR-zyx-
 167     (copyright ends)
 168     bla bla bla
 169     <BLANKLINE>
 170     >>> contents = '''Some file
 171     ... bla bla
 172     ... /* Copyright (copyright begins)
 173     ...  * (copyright continues)
 174     ...  *
 175     ...  * bla bla bla
 176     ...  */
 177     ... (copyright ends)
 178     ... bla bla bla
 179     ... '''
 180     >>> print tag_copyright(
 181     ...     contents, prefix=('/* ', ' * ', ' */'), tag='-xyz-CR-zyx-')
 182     Some file
 183     bla bla
 184     -xyz-CR-zyx-
 185     (copyright ends)
 186     bla bla bla
 187     <BLANKLINE>
 188     """
 189     lines = []
 190     incopy = False
 191     start = prefix[0] + 'Copyright'
 192     middle = prefix[1].rstrip()
 193     end = prefix[2]
 194     for line in contents.splitlines():
 195         if not incopy and line.startswith(start):
 196             incopy = True
 197             lines.append(tag)
 198         elif incopy and not line.startswith(middle):
 199             if end:
 200                 assert line.startswith(end), line
 201             incopy = False
 202         if not incopy:
 203             lines.append(line.rstrip('\n'))
 204         if incopy and end and line.startswith(end):
 205             incopy = False
 206     return '\n'.join(lines)+'\n'
 207
 208 def update_copyright(contents, prefix=('# ', '# ', None), tag=None, **kwargs):
 209     """
 210     >>> contents = '''Some file
 211     ... bla bla
 212     ... # Copyright (copyright begins)
 213     ... # (copyright continues)
 214     ... # bla bla bla
 215     ... (copyright ends)
 216     ... bla bla bla
 217     ... '''
 218     >>> print update_copyright(
 219     ...     contents, original_year=2008, authors=['Jack', 'Jill'],
 220     ...     text=['BLURB',], prefix=('# ', '# ', None), tag='--tag--'
 221     ...     ) # doctest: +ELLIPSIS, +REPORT_UDIFF
 222     Some file
 223     bla bla
 224     # Copyright (C) 2008-... Jack
 225     #                         Jill
 226     #
 227     # BLURB
 228     (copyright ends)
 229     bla bla bla
 230     <BLANKLINE>
 231     """
 232     current_year = _time.gmtime()[0]
 233     string = copyright_string(final_year=current_year, prefix=prefix, **kwargs)
 234     contents = tag_copyright(contents=contents, prefix=prefix, tag=tag)
 235     return contents.replace(tag, string)
 236
 237 def get_contents(filename, unicode=False, encoding=None):
 238     if _os_path.isfile(filename):
 239         if unicode:
 240             if encoding is None:
 241                 encoding = ENCODING
 242             f = _codecs.open(filename, 'r', encoding=encoding)
 243         else:
 244             f = open(filename, 'r')
 245         contents = f.read()
 246         f.close()
 247         return contents
 248     return None
 249
 250 def set_contents(filename, contents, original_contents=None, unicode=False,
 251                  encoding=None, dry_run=False):
 252     if original_contents is None:
 253         original_contents = get_contents(
 254             filename=filename, unicode=unicode, encoding=encoding)
 255     _LOG.debug('check contents of {}'.format(filename))
 256     if contents != original_contents:
 257         if original_contents is None:
 258             _LOG.info('creating {}'.format(filename))
 259         else:
 260             _LOG.info('updating {}'.format(filename))
 261             _LOG.debug(u'\n'.join(
 262                     _difflib.unified_diff(
 263                         original_contents.splitlines(), contents.splitlines(),
 264                         fromfile=_os_path.normpath(
 265                             _os_path.join('a', filename)),
 266                         tofile=_os_path.normpath(_os_path.join('b', filename)),
 267                         n=3, lineterm='')))
 268         if dry_run == False:
 269             if unicode:
 270                 if encoding is None:
 271                     encoding = ENCODING
 272                 f = _codecs.open(filename, 'w', encoding=encoding)
 273             else:
 274                 f = file(filename, 'w')
 275             f.write(contents)
 276             f.close()
 277     _LOG.debug('no change in {}'.format(filename))
 278
 279 def list_files(root='.'):
 280     for dirpath,dirnames,filenames in _os.walk(root):
 281         for filename in filenames:
 282             yield _os_path.normpath(_os_path.join(root, dirpath, filename))