update_copyright: process files as raw bytes
authorZac Medico <zmedico@gentoo.org>
Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)
committerZac Medico <zmedico@gentoo.org>
Fri, 21 Oct 2011 07:06:32 +0000 (00:06 -0700)
This function will work correctly with files encoded in any character
set, as long as the copyright statements consist of plain ASCII.

pym/repoman/utilities.py

index eec6fdfeec0c10e89a7b97f3e6472dbeffdc77fe..9be69011b043c38430e4465b3ab9781b12e07823 100644 (file)
@@ -524,8 +524,8 @@ def FindVCS():
 
        return outvcs
 
-_copyright_re1 = re.compile(r'^(# Copyright \d\d\d\d)-\d\d\d\d ')
-_copyright_re2 = re.compile(r'^(# Copyright )(\d\d\d\d) ')
+_copyright_re1 = re.compile(br'^(# Copyright \d\d\d\d)-\d\d\d\d ')
+_copyright_re2 = re.compile(br'^(# Copyright )(\d\d\d\d) ')
 
 
 class _copyright_repl(object):
@@ -536,8 +536,8 @@ class _copyright_repl(object):
                if matchobj.group(2) == self.year:
                        return matchobj.group(0)
                else:
-                       return '%s%s-%s ' % \
-                               (matchobj.group(1), matchobj.group(2), self.year)
+                       return matchobj.group(1) + matchobj.group(2) + \
+                               b'-' + self.year + b' '
 
 def _update_copyright_year(year, line):
        """
@@ -545,8 +545,14 @@ def _update_copyright_year(year, line):
        update_copyright(), except that we don't hardcode
        1999 here (in order to be more generic).
        """
-       line = _copyright_re1.sub(r'\1-%s ' % year, line)
+       is_bytes = isinstance(line, bytes)
+       year = _unicode_encode(year)
+       line = _unicode_encode(line)
+       
+       line = _copyright_re1.sub(br'\1-' + year + b' ', line)
        line = _copyright_re2.sub(_copyright_repl(year), line)
+       if not is_bytes:
+               line = _unicode_decode(line)
        return line
 
 def update_copyright(fn_path, year, pretend):
@@ -555,12 +561,15 @@ def update_copyright(fn_path, year, pretend):
        patterns used for replacing copyrights are taken from echangelog.
        Only the first lines of each file that start with a hash ('#') are
        considered, until a line is found that doesn't start with a hash.
+       Files are read and written in binary mode, so that this function
+       will work correctly with files encoded in any character set, as
+       long as the copyright statements consist of plain ASCII.
        """
 
        try:
                fn_hdl = io.open(_unicode_encode(fn_path,
                        encoding=_encodings['fs'], errors='strict'),
-                       mode='r', encoding=_encodings['repo.content'], errors='replace')
+                       mode='rb')
        except EnvironmentError:
                return
 
@@ -570,7 +579,7 @@ def update_copyright(fn_path, year, pretend):
        for line in fn_hdl:
                line_strip = line.strip()
                orig_header.append(line)
-               if not line_strip or line_strip[:1] != '#':
+               if not line_strip or line_strip[:1] != b'#':
                        new_header.append(line)
                        break
 
@@ -578,7 +587,9 @@ def update_copyright(fn_path, year, pretend):
                new_header.append(line)
 
        difflines = 0
-       for line in difflib.unified_diff(orig_header, new_header,
+       for line in difflib.unified_diff(
+               [_unicode_decode(line) for line in orig_header],
+               [_unicode_decode(line) for line in new_header],
                        fromfile=fn_path, tofile=fn_path, n=0):
                util.writemsg_stdout(line, noiselevel=-1)
                difflines += 1
@@ -588,8 +599,7 @@ def update_copyright(fn_path, year, pretend):
        if difflines > 3 and not pretend:
                # write new file with changed header
                f, fnnew_path = mkstemp()
-               f = io.open(f, mode='w', encoding=_encodings['repo.content'],
-                       errors='backslashreplace')
+               f = io.open(f, mode='wb')
                for line in new_header:
                        f.write(line)
                for line in fn_hdl: