update_copyright.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright (C) 2009 W. Trevor King <wking@drexel.edu>
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 2 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License along
  16 # with this program; if not, write to the Free Software Foundation, Inc.,
  17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18
  19 import os.path
  20 import re
  21 import sys
  22 import time
  23
  24 import os
  25 import sys
  26 import select
  27 from subprocess import Popen, PIPE, mswindows
  28 from threading import Thread
  29
  30 COPYRIGHT_TEXT="""#
  31 # This program is free software; you can redistribute it and/or modify
  32 # it under the terms of the GNU General Public License as published by
  33 # the Free Software Foundation; either version 2 of the License, or
  34 # (at your option) any later version.
  35 #
  36 # This program is distributed in the hope that it will be useful,
  37 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  38 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  39 # GNU General Public License for more details.
  40 #
  41 # You should have received a copy of the GNU General Public License along
  42 # with this program; if not, write to the Free Software Foundation, Inc.,
  43 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA."""
  44
  45 COPYRIGHT_TAG='-xyz-COPYRIGHT-zyx-' # unlikely to occur in the wild :p
  46
  47 ALIASES = [
  48     ['Ben Finney <benf@cybersource.com.au>',
  49      'Ben Finney <ben+python@benfinney.id.au>',
  50      'John Doe <jdoe@example.com>'],
  51     ['Chris Ball <cjb@laptop.org>',
  52      'Chris Ball <cjb@thunk.printf.net>'],
  53     ['Gianluca Montecchi <gian@grys.it>',
  54      'gian <gian@li82-39>',
  55      'gianluca <gian@galactica>'],
  56     ['W. Trevor King <wking@drexel.edu>',
  57      'wking <wking@mjolnir>'],
  58     [None,
  59      'j^ <j@oil21.org>'],
  60     ]
  61 COPYRIGHT_ALIASES = [
  62     ['Aaron Bentley and Panometrics, Inc.',
  63      'Aaron Bentley <abentley@panoramicfeedback.com>'],
  64     ]
  65 EXCLUDES = [
  66     ['Aaron Bentley and Panometrics, Inc.',
  67      'Aaron Bentley <aaron.bentley@utoronto.ca>',]
  68     ]
  69
  70
  71 IGNORED_PATHS = ['./.be/', './.bzr/', './build/']
  72 IGNORED_FILES = ['COPYING', 'update_copyright.py', 'catmutt']
  73
  74 class Pipe (object):
  75     """
  76     Simple interface for executing POSIX-style pipes based on the
  77     subprocess module.  The only complication is the adaptation of
  78     subprocess.Popen._comminucate to listen to the stderrs of all
  79     processes involved in the pipe, as well as the terminal process'
  80     stdout.  There are two implementations of Pipe._communicate, one
  81     for MS Windows, and one for POSIX systems.  The MS Windows
  82     implementation is currently untested.
  83
  84     >>> p = Pipe([['find', '/etc/'], ['grep', '^/etc/ssh$']])
  85     >>> p.stdout
  86     '/etc/ssh\\n'
  87     >>> p.status
  88     1
  89     >>> p.statuses
  90     [1, 0]
  91     >>> p.stderrs # doctest: +ELLIPSIS
  92     ["find: `...': Permission denied\\n...", '']
  93     """
  94     def __init__(self, cmds, stdin=None):
  95         # spawn processes
  96         self._procs = []
  97         for cmd in cmds:
  98             if len(self._procs) != 0:
  99                 stdin = self._procs[-1].stdout
 100             self._procs.append(Popen(cmd, stdin=stdin, stdout=PIPE, stderr=PIPE))
 101
 102         self.stdout,self.stderrs = self._communicate(input=None)
 103
 104         # collect process statuses
 105         self.statuses = []
 106         self.status = 0
 107         for proc in self._procs:
 108             self.statuses.append(proc.wait())
 109             if self.statuses[-1] != 0:
 110                 self.status = self.statuses[-1]
 111
 112     # Code excerpted from subprocess.Popen._communicate()
 113     if mswindows == True:
 114         def _communicate(self, input=None):
 115             assert input == None, "stdin != None not yet supported"
 116             # listen to each process' stderr
 117             threads = []
 118             std_X_arrays = []
 119             for proc in self._procs:
 120                 stderr_array = []
 121                 thread = Thread(target=proc._readerthread,
 122                                 args=(proc.stderr, stderr_array))
 123                 thread.setDaemon(True)
 124                 thread.start()
 125                 threads.append(thread)
 126                 std_X_arrays.append(stderr_array)
 127
 128             # also listen to the last processes stdout
 129             stdout_array = []
 130             thread = Thread(target=proc._readerthread,
 131                             args=(proc.stdout, stdout_array))
 132             thread.setDaemon(True)
 133             thread.start()
 134             threads.append(thread)
 135             std_X_arrays.append(stdout_array)
 136
 137             # join threads as they die
 138             for thread in threads:
 139                 thread.join()
 140
 141             # read output from reader threads
 142             std_X_strings = []
 143             for std_X_array in std_X_arrays:
 144                 std_X_strings.append(std_X_array[0])
 145
 146             stdout = std_X_strings.pop(-1)
 147             stderrs = std_X_strings
 148             return (stdout, stderrs)
 149     else: # POSIX
 150         def _communicate(self, input=None):
 151             read_set = []
 152             write_set = []
 153             read_arrays = []
 154             stdout = None # Return
 155             stderr = None # Return
 156
 157             if self._procs[0].stdin:
 158                 # Flush stdio buffer.  This might block, if the user has
 159                 # been writing to .stdin in an uncontrolled fashion.
 160                 self._procs[0].stdin.flush()
 161                 if input:
 162                     write_set.append(self._procs[0].stdin)
 163                 else:
 164                     self._procs[0].stdin.close()
 165             for proc in self._procs:
 166                 read_set.append(proc.stderr)
 167                 read_arrays.append([])
 168             read_set.append(self._procs[-1].stdout)
 169             read_arrays.append([])
 170
 171             input_offset = 0
 172             while read_set or write_set:
 173                 try:
 174                     rlist, wlist, xlist = select.select(read_set, write_set, [])
 175                 except select.error, e:
 176                     if e.args[0] == errno.EINTR:
 177                         continue
 178                     raise
 179                 if self._procs[0].stdin in wlist:
 180                     # When select has indicated that the file is writable,
 181                     # we can write up to PIPE_BUF bytes without risk
 182                     # blocking.  POSIX defines PIPE_BUF >= 512
 183                     chunk = input[input_offset : input_offset + 512]
 184                     bytes_written = os.write(self.stdin.fileno(), chunk)
 185                     input_offset += bytes_written
 186                     if input_offset >= len(input):
 187                         self._procs[0].stdin.close()
 188                         write_set.remove(self._procs[0].stdin)
 189                 if self._procs[-1].stdout in rlist:
 190                     data = os.read(self._procs[-1].stdout.fileno(), 1024)
 191                     if data == "":
 192                         self._procs[-1].stdout.close()
 193                         read_set.remove(self._procs[-1].stdout)
 194                     read_arrays[-1].append(data)
 195                 for i,proc in enumerate(self._procs):
 196                     if proc.stderr in rlist:
 197                         data = os.read(proc.stderr.fileno(), 1024)
 198                         if data == "":
 199                             proc.stderr.close()
 200                             read_set.remove(proc.stderr)
 201                         read_arrays[i].append(data)
 202
 203             # All data exchanged.  Translate lists into strings.
 204             read_strings = []
 205             for read_array in read_arrays:
 206                 read_strings.append(''.join(read_array))
 207
 208             stdout = read_strings.pop(-1)
 209             stderrs = read_strings
 210             return (stdout, stderrs)
 211
 212 def _strip_email(*args):
 213     """
 214     >>> _strip_email('J Doe <jdoe@a.com>')
 215     ['J Doe']
 216     >>> _strip_email('J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>')
 217     ['J Doe', 'JJJ Smith']
 218     """
 219     args = list(args)
 220     for i,arg in enumerate(args):
 221         if arg == None:
 222             continue
 223         index = arg.find('<')
 224         if index > 0:
 225             args[i] = arg[:index].rstrip()
 226     return args
 227
 228 def _replace_aliases(authors, with_email=True, aliases=None,
 229                      excludes=None):
 230     """
 231     >>> aliases = [['J Doe and C, Inc.', 'J Doe <jdoe@c.com>'],
 232     ...            ['J Doe <jdoe@a.com>', 'Johnny <jdoe@b.edu>'],
 233     ...            ['JJJ Smith <jjjs@a.com>', 'Jingly <jjjs@b.edu>'],
 234     ...            [None, 'Anonymous <a@a.com>']]
 235     >>> excludes = [['J Doe and C, Inc.', 'J Doe <jdoe@a.com>']]
 236     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 237     ...                   'Jingly <jjjs@b.edu>', 'Anonymous <a@a.com>'],
 238     ...                  with_email=True, aliases=aliases, excludes=excludes)
 239     ['J Doe <jdoe@a.com>', 'JJJ Smith <jjjs@a.com>']
 240     >>> _replace_aliases(['JJJ Smith', 'Johnny', 'Jingly', 'Anonymous'],
 241     ...                  with_email=False, aliases=aliases, excludes=excludes)
 242     ['J Doe', 'JJJ Smith']
 243     >>> _replace_aliases(['JJJ Smith <jjjs@a.com>', 'Johnny <jdoe@b.edu>',
 244     ...                   'Jingly <jjjs@b.edu>', 'J Doe <jdoe@c.com>'],
 245     ...                  with_email=True, aliases=aliases, excludes=excludes)
 246     ['J Doe and C, Inc.', 'JJJ Smith <jjjs@a.com>']
 247     """
 248     if aliases == None:
 249         aliases = ALIASES
 250     if excludes == None:
 251         excludes = EXCLUDES
 252     if with_email == False:
 253         aliases = [_strip_email(*alias) for alias in aliases]
 254         exclude = [_strip_email(*exclude) for exclude in excludes]
 255     for i,author in enumerate(authors):
 256         for alias in aliases:
 257             if author in alias[1:]:
 258                 authors[i] = alias[0]
 259                 break
 260     for i,author in enumerate(authors):
 261         for exclude in excludes:
 262             if author in exclude[1:] and exclude[0] in authors:
 263                 authors[i] = None
 264     authors = sorted(set(authors))
 265     if None in authors:
 266         authors.remove(None)
 267     return authors
 268
 269 def authors_list():
 270     p = Pipe([['bzr', 'log', '-n0'],
 271               ['grep', '^ *committer\|^ *author'],
 272               ['cut', '-d:', '-f2'],
 273               ['sed', 's/ <.*//;s/^ *//'],
 274               ['sort'],
 275               ['uniq']])
 276     assert p.status == 0, p.statuses
 277     authors = p.stdout.rstrip().split('\n')
 278     return _replace_aliases(authors, with_email=False)
 279
 280 def update_authors(verbose=True):
 281     print "updating AUTHORS"
 282     f = file('AUTHORS', 'w')
 283     authors_text = 'Bugs Everywhere was written by:\n%s\n' % '\n'.join(authors_list())
 284     f.write(authors_text)
 285     f.close()
 286
 287 def ignored_file(filename, ignored_paths=None, ignored_files=None):
 288     """
 289     >>> ignored_paths = ['./a/', './b/']
 290     >>> ignored_files = ['x', 'y']
 291     >>> ignored_file('./a/z', ignored_paths, ignored_files)
 292     True
 293     >>> ignored_file('./ab/z', ignored_paths, ignored_files)
 294     False
 295     >>> ignored_file('./ab/x', ignored_paths, ignored_files)
 296     True
 297     >>> ignored_file('./ab/xy', ignored_paths, ignored_files)
 298     False
 299     >>> ignored_file('./z', ignored_paths, ignored_files)
 300     False
 301     """
 302     if ignored_paths == None:
 303         ignored_paths = IGNORED_PATHS
 304     if ignored_files == None:
 305         ignored_files = IGNORED_FILES
 306     for path in ignored_paths:
 307         if filename.startswith(path):
 308             return True
 309     if os.path.basename(filename) in ignored_files:
 310         return True
 311     if os.path.abspath(filename) != os.path.realpath(filename):
 312         return True # symink somewhere in path...
 313     return False
 314
 315 def _copyright_string(orig_year, final_year, authors):
 316     """
 317     >>> print _copyright_string(orig_year=2005,
 318     ...                         final_year=2005,
 319     ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
 320     ...                        ) # doctest: +ELLIPSIS
 321     # Copyright (C) 2005 A <a@a.com>
 322     #                    B <b@b.edu>
 323     #
 324     # This program...
 325     >>> print _copyright_string(orig_year=2005,
 326     ...                         final_year=2009,
 327     ...                         authors=['A <a@a.com>', 'B <b@b.edu>']
 328     ...                        ) # doctest: +ELLIPSIS
 329     # Copyright (C) 2005-2009 A <a@a.com>
 330     #                         B <b@b.edu>
 331     #
 332     # This program...
 333     """
 334     if orig_year == final_year:
 335         date_range = '%s' % orig_year
 336     else:
 337         date_range = '%s-%s' % (orig_year, final_year)
 338     lines = ['# Copyright (C) %s %s' % (date_range, authors[0])]
 339     for author in authors[1:]:
 340         lines.append('#' +
 341                      ' '*(len(' Copyright (C) ')+len(date_range)+1) +
 342                      author)
 343     return '%s\n%s' % ('\n'.join(lines), COPYRIGHT_TEXT)
 344
 345 def _tag_copyright(contents):
 346     """
 347     >>> contents = '''Some file
 348     ... bla bla
 349     ... # Copyright (copyright begins)
 350     ... # (copyright continues)
 351     ... # bla bla bla
 352     ... (copyright ends)
 353     ... bla bla bla
 354     ... '''
 355     >>> print _tag_copyright(contents),
 356     Some file
 357     bla bla
 358     -xyz-COPYRIGHT-zyx-
 359     (copyright ends)
 360     bla bla bla
 361     """
 362     lines = []
 363     incopy = False
 364     for line in contents.splitlines():
 365         if incopy == False and line.startswith('# Copyright'):
 366             incopy = True
 367             lines.append(COPYRIGHT_TAG)
 368         elif incopy == True and not line.startswith('#'):
 369             incopy = False
 370         if incopy == False:
 371             lines.append(line.rstrip('\n'))
 372     return '\n'.join(lines)+'\n'
 373
 374 def _update_copyright(contents, orig_year, authors):
 375     current_year = time.gmtime()[0]
 376     copyright_string = _copyright_string(orig_year, current_year, authors)
 377     contents = _tag_copyright(contents)
 378     return contents.replace(COPYRIGHT_TAG, copyright_string)
 379
 380 def update_file(filename, verbose=True):
 381     if verbose == True:
 382         print "updating", filename
 383     contents = file(filename, 'r').read()
 384
 385     p = Pipe([['bzr', 'log', '-n0', filename],
 386               ['grep', '^ *timestamp: '],
 387               ['tail', '-n1'],
 388               ['sed', 's/^ *//;'],
 389               ['cut', '-b', '16-19']])
 390     if p.status != 0:
 391         assert p.statuses[0] == 3, p.statuses
 392         return # bzr doesn't version that file
 393     assert p.status == 0, p.statuses
 394     orig_year = int(p.stdout.strip())
 395
 396     p = Pipe([['bzr', 'log', '-n0', filename],
 397               ['grep', '^ *author: \|^ *committer: '],
 398               ['cut', '-d:', '-f2'],
 399               ['sed', 's/^ *//;s/ *$//'],
 400               ['sort'],
 401               ['uniq']])
 402     assert p.status == 0, p.statuses
 403     authors = p.stdout.rstrip().split('\n')
 404     authors = _replace_aliases(authors, with_email=True,
 405                                aliases=ALIASES+COPYRIGHT_ALIASES)
 406
 407     contents = _update_copyright(contents, orig_year, authors)
 408     f = file(filename, 'w')
 409     f.write(contents)
 410     f.close()
 411
 412 def test():
 413     import doctest
 414     doctest.testmod()
 415
 416 if __name__ == '__main__':
 417     import optparse
 418     usage = """%prog [options] [file ...]
 419
 420 Update copyright information in source code with information from
 421 the bzr repository.  Run from the BE repository root.
 422
 423 Replaces every line starting with '^# Copyright' and continuing with
 424 '^#' with an auto-generated copyright blurb.  If you want to add
 425 #-commented material after a copyright blurb, please insert a blank
 426 line between the blurb and your comment (as in this file), so the
 427 next run of update_copyright.py doesn't clobber your comment.
 428
 429 If no files are given, a list of files to update is generated
 430 automatically.
 431 """
 432     p = optparse.OptionParser(usage)
 433     p.add_option('--test', dest='test', default=False,
 434                  action='store_true', help='Run internal tests and exit')
 435     options,args = p.parse_args()
 436
 437     if options.test == True:
 438         test()
 439         sys.exit(0)
 440
 441     update_authors()
 442
 443     files = args
 444     if len(files) == 0:
 445         p = Pipe([['grep', '-rc', '# Copyright', '.'],
 446                   ['grep', '-v', ':0$'],
 447                   ['cut', '-d:', '-f1']])
 448         assert p.status == 0
 449         files = p.stdout.rstrip().split('\n')
 450
 451     for filename in files:
 452         if ignored_file(filename) == True:
 453             continue
 454         update_file(filename)