update_copyright: process files as raw bytes

author Zac Medico <zmedico@gentoo.org>

Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)

committer Zac Medico <zmedico@gentoo.org>

Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)
author Zac Medico <zmedico@gentoo.org>
Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)
committer Zac Medico <zmedico@gentoo.org>
Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)
diff --git a/pym/repoman/utilities.py b/pym/repoman/utilities.py

index eec6fdfeec0c10e89a7b97f3e6472dbeffdc77fe..9be69011b043c38430e4465b3ab9781b12e07823 100644 (file)
--- a/pym/repoman/utilities.py
+++ b/pym/repoman/utilities.py
@@ -524,8 +524,8 @@ def FindVCS():
  
         return outvcs
  
-_copyright_re1 = re.compile(r'^(# Copyright \d\d\d\d)-\d\d\d\d ')
-_copyright_re2 = re.compile(r'^(# Copyright )(\d\d\d\d) ')
+_copyright_re1 = re.compile(br'^(# Copyright \d\d\d\d)-\d\d\d\d ')
+_copyright_re2 = re.compile(br'^(# Copyright )(\d\d\d\d) ')
  
  
  class _copyright_repl(object):
@@ -536,8 +536,8 @@ class _copyright_repl(object):
                 if matchobj.group(2) == self.year:
                         return matchobj.group(0)
                 else:
-                       return '%s%s-%s ' % \
-                               (matchobj.group(1), matchobj.group(2), self.year)
+                       return matchobj.group(1) + matchobj.group(2) + \
+                               b'-' + self.year + b' '
  
  def _update_copyright_year(year, line):
         """
@@ -545,8 +545,14 @@ def _update_copyright_year(year, line):
         update_copyright(), except that we don't hardcode
         1999 here (in order to be more generic).
         """
-       line = _copyright_re1.sub(r'\1-%s ' % year, line)
+       is_bytes = isinstance(line, bytes)
+       year = _unicode_encode(year)
+       line = _unicode_encode(line)
+       
+       line = _copyright_re1.sub(br'\1-' + year + b' ', line)
         line = _copyright_re2.sub(_copyright_repl(year), line)
+       if not is_bytes:
+               line = _unicode_decode(line)
         return line
  
  def update_copyright(fn_path, year, pretend):
@@ -555,12 +561,15 @@ def update_copyright(fn_path, year, pretend):
         patterns used for replacing copyrights are taken from echangelog.
         Only the first lines of each file that start with a hash ('#') are
         considered, until a line is found that doesn't start with a hash.
+       Files are read and written in binary mode, so that this function
+       will work correctly with files encoded in any character set, as
+       long as the copyright statements consist of plain ASCII.
         """
  
         try:
                 fn_hdl = io.open(_unicode_encode(fn_path,
                         encoding=_encodings['fs'], errors='strict'),
-                       mode='r', encoding=_encodings['repo.content'], errors='replace')
+                       mode='rb')
         except EnvironmentError:
                 return
  
@@ -570,7 +579,7 @@ def update_copyright(fn_path, year, pretend):
         for line in fn_hdl:
                 line_strip = line.strip()
                 orig_header.append(line)
-               if not line_strip or line_strip[:1] != '#':
+               if not line_strip or line_strip[:1] != b'#':
                         new_header.append(line)
                         break
  
@@ -578,7 +587,9 @@ def update_copyright(fn_path, year, pretend):
                 new_header.append(line)
  
         difflines = 0
-       for line in difflib.unified_diff(orig_header, new_header,
+       for line in difflib.unified_diff(
+               [_unicode_decode(line) for line in orig_header],
+               [_unicode_decode(line) for line in new_header],
                         fromfile=fn_path, tofile=fn_path, n=0):
                 util.writemsg_stdout(line, noiselevel=-1)
                 difflines += 1
@@ -588,8 +599,7 @@ def update_copyright(fn_path, year, pretend):
         if difflines > 3 and not pretend:
                 # write new file with changed header
                 f, fnnew_path = mkstemp()
-               f = io.open(f, mode='w', encoding=_encodings['repo.content'],
-                       errors='backslashreplace')
+               f = io.open(f, mode='wb')
                 for line in new_header:
                         f.write(line)
                 for line in fn_hdl:
author	Zac Medico <zmedico@gentoo.org>
	Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)
committer	Zac Medico <zmedico@gentoo.org>
	Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)