fix code annotation for unicode source
authorStefan Behnel <scoder@users.berlios.de>
Fri, 9 May 2008 06:15:35 +0000 (08:15 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Fri, 9 May 2008 06:15:35 +0000 (08:15 +0200)
Cython/Compiler/Annotate.py

index 21ad7b3da689666b0201e819ab6a0059b90791b7..d88ac7bf74fc6d493bb64ee217063fe7ee2db647 100644 (file)
@@ -3,15 +3,17 @@
 import os
 import re
 import time
+import codecs
 from StringIO import StringIO
 
 import Version
 from Code import CCodeWriter
+from Cython import Utils
 
 # need one-characters subsitutions (for now) so offsets aren't off
-special_chars = [('<', '\xF0', '&lt;'),
-                 ('>', '\xF1', '&gt;'), 
-                 ('&', '\xF2', '&amp;')]
+special_chars = [(u'<', u'\xF0', u'&lt;'),
+                 (u'>', u'\xF1', u'&gt;'), 
+                 (u'&', u'\xF2', u'&amp;')]
 
 class AnnotationCCodeWriter(CCodeWriter):
 
@@ -48,7 +50,7 @@ class AnnotationCCodeWriter(CCodeWriter):
         
     def save_annotation(self, filename):
         self.mark_pos(None)
-        f = open(filename)
+        f = Utils.open_source_file(filename)
         lines = f.readlines()
         for k in range(len(lines)):
             line = lines[k]
@@ -76,10 +78,11 @@ class AnnotationCCodeWriter(CCodeWriter):
             line = lines[line_no]
             lines[line_no] = line[:col] + item + line[col:]
         
-        f = open("%s.html" % filename, "w")
-        f.write('<html>\n')
-        f.write("""
+        f = codecs.open("%s.html" % filename, "w", encoding="UTF-8")
+        f.write(u'<html>\n')
+        f.write(u"""
 <head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 <style type="text/css">
 
 body { font-family: courier; font-size: 12; }
@@ -112,16 +115,16 @@ function toggleDiv(id) {
 </script>
 </head>
         """)
-        f.write('<body>\n')
-        f.write('<p>Generated by Cython %s on %s\n' % (Version.version, time.asctime()))
-        c_file = os.path.basename(filename)[:-3] + 'c'
-        f.write('<p>Raw output: <a href="%s">%s</a>\n' % (c_file, c_file))
+        f.write(u'<body>\n')
+        f.write(u'<p>Generated by Cython %s on %s\n' % (Version.version, time.asctime()))
+        c_file = Utils.encode_filename(os.path.basename(filename)[:-3] + 'c')
+        f.write(u'<p>Raw output: <a href="%s">%s</a>\n' % (c_file, c_file))
         k = 0
         
-        py_c_api = re.compile('(Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]+)')
-        pyx_api = re.compile('(__Pyx[A-Za-z_]+)\(')
-        py_marco_api = re.compile('(Py[A-Za-z]*_[A-Z][A-Z_]+)')
-        error_goto = re.compile(r'((; *if .*)? \{__pyx_filename = .*goto __pyx_L\w+;\})')
+        py_c_api = re.compile(u'(Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]+)')
+        pyx_api = re.compile(u'(__Pyx[A-Za-z_]+)\(')
+        py_marco_api = re.compile(u'(Py[A-Za-z]*_[A-Z][A-Z_]+)')
+        error_goto = re.compile(ur'((; *if .*)? \{__pyx_filename = .*goto __pyx_L\w+;\})')
         
         for line in lines:
 
@@ -131,33 +134,33 @@ function toggleDiv(id) {
             except KeyError:
                 code = ''
                 
-            code, c_api_calls = py_c_api.subn(r"<span class='py_api'>\1</span>", code)
-            code, pyx_api_calls = pyx_api.subn(r"<span class='pyx_api'>\1</span>(", code)
-            code, macro_api_calls = py_marco_api.subn(r"<span class='py_macro_api'>\1</span>", code)
-            code, error_goto_calls = error_goto.subn(r"<span class='error_goto'>\1</span>", code)
+            code, c_api_calls = py_c_api.subn(ur"<span class='py_api'>\1</span>", code)
+            code, pyx_api_calls = pyx_api.subn(ur"<span class='pyx_api'>\1</span>(", code)
+            code, macro_api_calls = py_marco_api.subn(ur"<span class='py_macro_api'>\1</span>", code)
+            code, error_goto_calls = error_goto.subn(ur"<span class='error_goto'>\1</span>", code)
             
-            code = code.replace("<span class='error_goto'>;", ";<span class='error_goto'>")
+            code = code.replace(u"<span class='error_goto'>;", u";<span class='error_goto'>")
             
-            color = "FFFF%02x" % int(255/(1+(5*c_api_calls+2*pyx_api_calls+macro_api_calls)/10.0))
-            f.write("<pre class='line' style='background-color: #%s' onclick='toggleDiv(\"line%s\")'>" % (color, k))
+            color = u"FFFF%02x" % int(255/(1+(5*c_api_calls+2*pyx_api_calls+macro_api_calls)/10.0))
+            f.write(u"<pre class='line' style='background-color: #%s' onclick='toggleDiv(\"line%s\")'>" % (color, k))
 
-            f.write(" %d: " % k)
+            f.write(u" %d: " % k)
             for c, cc, html in special_chars:
-                line = str(line).replace(cc, html)
+                line = line.replace(cc, html)
             f.write(line.rstrip())
                 
-            f.write('</pre>\n')
-            f.write("<pre id='line%s' class='code' style='background-color: #%s'>%s</pre>" % (k, color, code))
-        f.write('</body></html>\n')
+            f.write(u'</pre>\n')
+            f.write(u"<pre id='line%s' class='code' style='background-color: #%s'>%s</pre>" % (k, color, code))
+        f.write(u'</body></html>\n')
         f.close()
         
 
 # TODO: make this cleaner
 def escape(raw_string):
-    raw_string = raw_string.replace("\'", r"&#146;")
-    raw_string = raw_string.replace('\"', r'&quot;')
-    raw_string = raw_string.replace('\n', r'<br>\n')
-    raw_string = raw_string.replace('\t', r'\t')
+    raw_string = raw_string.replace(u"\'", ur"&#146;")
+    raw_string = raw_string.replace(u'\"', ur'&quot;')
+    raw_string = raw_string.replace(u'\n', ur'<br>\n')
+    raw_string = raw_string.replace(u'\t', ur'\t')
     return raw_string
 
 
@@ -170,7 +173,7 @@ class AnnotationItem:
         self.size = size
         
     def start(self):
-        return "<span class='tag %s' title='%s'>%s" % (self.style, self.text, self.tag)
+        return u"<span class='tag %s' title='%s'>%s" % (self.style, self.text, self.tag)
     
     def end(self):
-        return self.size, "</span>"
+        return self.size, u"</span>"