script-publish.py: Add DOCTYPE and <meta charset="..." />
authorW. Trevor King <wking@tremily.us>
Sun, 13 Jan 2013 15:18:19 +0000 (10:18 -0500)
committerW. Trevor King <wking@tremily.us>
Sun, 13 Jan 2013 15:18:19 +0000 (10:18 -0500)
<meta charset="..." /> is new in HTML5.

posts/script/script-publish.py

index 622098d18436cb1de0148bd462f5fa2e416cd650..ea47b2cff2f98ae949564017fe7af933c456d592 100755 (executable)
@@ -661,6 +661,7 @@ class ScriptParser (object):
     def __init__(self, encoding='utf-8', css=None, term=None):
         self._control_parser = ControlParser(encoding=encoding, term=term)
         self._encoding = encoding
+        self._doctype = '<!DOCTYPE html>'
         if css is None:
             css = self._get_css()
         self.css = css
@@ -669,6 +670,8 @@ class ScriptParser (object):
         html = _etree.Element('html')
         head = _etree.Element('head')
         html.append(head)
+        charset = _etree.Element('meta', charset=self._encoding.upper())
+        head.append(charset)
         style = _etree.Element('style', type='text/css')
         head.append(style)
         style.text = self.css
@@ -678,6 +681,7 @@ class ScriptParser (object):
         body.append(pre)
         tree = _etree.ElementTree(element=html)
         html.text = html.tail = head.text = head.tail = '\n'
+        charset.tail = '\n'
         body.text = body.tail = pre.tail = '\n'
         return tree
 
@@ -698,12 +702,18 @@ class ScriptParser (object):
         return text
 
     def _serialize(self, tree):
-        return _etree.tostring(tree.getroot(), 'unicode')
+        return '\n'.join([
+                self._doctype,
+                _etree.tostring(tree.getroot(), encoding='unicode'),
+                ])
 
     def _write_etree(self, tree, filename=None):
         if filename:
             _LOG.info('write {}'.format(filename))
-            tree.write(filename, encoding=self._encoding)
+            with open(filename, 'wb') as f:
+                f.write(self._doctype.encode(self._encoding))
+                f.write('\n'.encode(self._encoding))
+                tree.write(f, encoding=self._encoding)
         else:
             _LOG.info('write to stdout')
             result = self._serialize(tree)