script-publish.py: Add my terminal-escape-to-HTML conversion script

author W. Trevor King <wking@tremily.us>

Sun, 13 Jan 2013 01:10:51 +0000 (20:10 -0500)

committer W. Trevor King <wking@tremily.us>

Sun, 13 Jan 2013 01:47:56 +0000 (20:47 -0500)
author W. Trevor King <wking@tremily.us>
Sun, 13 Jan 2013 01:10:51 +0000 (20:10 -0500)
committer W. Trevor King <wking@tremily.us>
Sun, 13 Jan 2013 01:47:56 +0000 (20:47 -0500)
diff --git a/posts/script.mdwn b/posts/script.mdwn

index 459d35680d756863063cc8ec5c95f4c77322dfba..a2e15436fa817e6b66caced5655909d03f2bca85 100644 (file)
--- a/posts/script.mdwn
+++ b/posts/script.mdwn
@@ -105,8 +105,13 @@ The user can compare with their current terminal:
  
  It would be nice if there was an `iconv`-style converter to translate
  between terminal operation encodings, but I haven't found one yet.
-[[Screen]] may do something like this internally, but I do sometimes
-run into terminfo-related problems when using `screen`, so perhaps it
-does not.
+[[Screen]] [does something like this internally][screen], and their
+[list of control sequences][seq] is a useful reference.  I've started
+work on an [[escape-sequence-to-HTML
+converter|script/script-publish.py]], in case you want to play around
+with these conversions in [[Python]].
+
  
  [68556]: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=68556
+[screen]: http://www.gnu.org/software/screen/manual/html_node/Virtual-Terminal.html
+[seq]: http://www.gnu.org/software/screen/manual/html_node/Control-Sequences.html#Control-Sequences
diff --git a/posts/script/script-publish.py b/posts/script/script-publish.py

new file mode 100755 (executable)

index 0000000..eeb602b
--- /dev/null
+++ b/posts/script/script-publish.py
@@ -0,0 +1,781 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2012-2013 W. Trevor King <wking@tremily.us>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Publish a terminal session recorded with script to HTML
+
+Since 3.0BSD, the ``script`` utility has made it easy to record a
+typescript of everything printed on your terminal.  While this is
+useful for the technically savvy, it is hard to share the raw script
+output with programming newbies.  This script makes it easy to render
+the typescript as HTML, which you can then serve with any webserver.
+
+Both single shot and continuous (polled) publishing are possible,
+which makes for a cheap way to share a log of the terminal you're
+using to teach a class.  Students can easily scroll back and see what
+happened earlier, as well as archive the class log in a format that
+they may be more comfortable with.
+
+If you aren't expecting much load, you can use Python's built in HTTP
+server to serve the typescript live::
+
+  shell-a $ script -f /tmp/script.raw
+  shell-a $ …hack hack hack…
+
+  shell-b $ script-publish.py -s /tmp/script.raw --serve 8080 -vv
+  serving on host.example.net:8080
+  address: host.example.net ('192.168.0.6', 8080)
+
+  shell-c $ echo "The typescript is at http://192.168.0.6:8080/" | write
+"""
+
+import copy as _copy
+import curses as _curses
+import logging as _logging
+import os as _os
+import re as _re
+import select as _select
+import socket as _socket
+import sys as _sys
+import time as _time
+import wsgiref.simple_server as _wsgiref_simple_server
+import xml.etree.ElementTree as _etree
+
+
+__version__ = '0.1'
+
+_LOG = _logging.getLogger('script-publish')
+_LOG.addHandler(_logging.StreamHandler())
+_LOG.setLevel(_logging.ERROR)
+
+
+DEFAULT_STYLE = {
+    'foreground': 'black',
+    'background': 'white',
+    }
+
+
+class ControlCodeMatch (object):
+    """An instance of a control code in a byte stream
+    """
+    def __init__(self, control_code, match):
+        self.control_code = control_code
+        self.match = match
+
+    def __str__(self):
+        if self.match and self.match.groups():
+            return '<{} {} {}>'.format(
+                type(self).__name__, self.control_code.name,
+                self.match.groups())
+        else:
+            return '<{} {}>'.format(
+                type(self).__name__, self.control_code.name)
+
+    def __repr__(self):
+        if self.match and self.match.groups():
+            return '<{} {} {} 0x{:x}>'.format(
+                type(self).__name__, self.control_code.name,
+                self.match.groups(), id(self))
+        else:
+            return '<{} {} 0x{:x}>'.format(
+                type(self).__name__, self.control_code.name, id(self))
+
+
+class ControlCode (object):
+    """Base class for defining control code handlers
+
+    Notes
+    -----
+
+    One common format for representing control characters is with
+    caret notation.  ASCII control codes are represented by::
+
+    >>> def caret_notation(code):
+    ...     return '^{}'.format(chr(code + 64))
+    >>> caret_notation(0)
+    '^@'
+    >>> caret_notation(8)
+    '^H'
+    >>> def caret_escape(representation):
+    ...     return ord(representation[-1]) - 64
+    >>> caret_escape('^H')
+    8
+    """
+    def __init__(self, name=None, capname=None, code=None, regexp=None):
+        self.name = name
+        self.capname = capname  # terminfo capability name (cf. terminfo(5))
+        if code is None:
+            code = self._get_code()
+        self.code = code
+        if regexp is None:
+            regexp = self._get_regexp()
+            if regexp:
+                regexp = _re.compile(regexp)
+        self.regexp = regexp
+
+    def __str__(self):
+        return '<{} {}>'.format(type(self).__name__, self.name)
+
+    def __repr__(self):
+        return '<{} {} 0x{:x}>'.format(
+            type(self).__name__, self.name, id(self))
+
+    def _get_code(self):
+        if self.capname is None:
+            return None
+        try:
+            code = _curses.tigetstr(self.capname)
+        except _curses.error:
+            term = _os.environ.get('term', 'xterm')
+            with open('/dev/null', 'w') as f:
+                # setupterm() is required for curses.tiget*()
+                _curses.setupterm(term, f.fileno())
+            code = _curses.tigetstr(self.capname)
+        # code may be None if your terminal doesn't support the capability
+        return code
+
+    def _get_regexp(self, code=None):
+        r"""Return a Python regular expression matching a terminfo code
+
+        >>> cc = ControlCode()
+        >>> cc._get_regexp(b'\b')
+        b'\x08'
+        >>> cc._get_regexp(b'\x1b[K')
+        b'\x1b\\[K'
+        >>> cc._get_regexp(b'%9.4d')
+        b'([0-9.]+)'
+        >>> cc._get_regexp(b'%9.4x')
+        b'([0-9a-f.]+)'
+        >>> sgr = (b'%?%p9%t\x1b(0%e\x1b(B%;\x1b[0%?%p6%t;1%;%?%p2'
+        ...        b'%t;4%;%?%p1%p3%|%t;7%;%?%p4%t;5%;%?%p7%t;8%;m')
+        >>> cc._get_regexp(sgr)  # good luck getting this working ;)
+        """
+        # see 'Parameterized Strings' in terminfo(5) for the parameter
+        # string syntax (for formatting the codes).  It can get pretty
+        # funky, so we only parse a subset here.
+        if code is None:
+            code = self.code
+            if code is None:
+                return
+        regexp = code
+        for r,replacement in [
+                (_re.compile(b'\\\\'), b'\\\\'),
+                (_re.compile(b'\\['), b'\\['),
+                (_re.compile(b'\\]'), b'\\]'),
+                # %? (if you can figure it out) should go here (before '?')
+                (_re.compile(b'\\?'), b'\\?'),
+                (_re.compile(b'%(:?[-+# ]+)?([0-9]+([.][0-9]+)?)?([doxXs])'),
+                 self._get_regexp_integer),
+                (_re.compile(b'%p[1-9]'), b''
+                 ),  # we don't care what parameter you're pushing
+                (_re.compile(b'%i'), b''
+                 ),  # we don't care if you added one to your parameter
+                ]:
+            regexp = r.sub(replacement, regexp)
+        pp = _re.compile(b'%%')
+        doubles = len(pp.findall(regexp))
+        singles = len(_re.findall(b'%', regexp))
+        if doubles != 2*singles:
+            raise NotImplementedError(
+                'untranslated formatting codes for {} ({!r}): {!r}'.format(
+                    self.capname, self.code, regexp))
+        pp.sub(b'%', regexp)
+        return regexp
+
+    def _get_regexp_integer(self, match):
+        flags,width,precision,fmt = match.groups()
+        if flags:
+            raise NotImplementedError(flags)
+        if fmt == b'd':
+            return b'([0-9.]+)'
+        elif fmt == b'x':
+            return b'([0-9a-f.]+)'
+        elif fmt == b'X':
+            return b'([0-9A-F.]+)'
+        else:
+            raise NotImplementedError(fmt)
+
+    def search(self, text):
+        match = self.regexp.search(text)
+        if match:
+            return ControlCodeMatch(self, match)
+
+
+class SelectGraphicRendition (ControlCode):
+    # see 'Color Handling' in terminfo(5)
+    colors = [
+        'black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'white']
+
+    def __init__(self):
+        super(SelectGraphicRendition, self).__init__(
+            'select graphic rendition', 'sgr',
+            regexp=_re.compile(b'\x1b[[]([0-9]+)?;?([0-9]+)?m'))
+
+    def style(self, match, style=None):
+        if not style:
+            style = {}
+        n1,n2 = match.groups()
+        ns = [n1]
+        if n2:
+            ns.append(n2)
+        for n in ns:
+            if n is not None:
+                n = int(n)
+            style = self._style(n, style)
+        return style
+
+    def _style(self, code=None, style=None):
+        if style is None:
+            style = {}
+        if code in [None, 0]:
+            style = None  # reset
+        elif code == 1:
+            style['weight'] = 'bold'
+        elif code == 7:  # inverse (reverse video)
+            f = style.get('background', DEFAULT_STYLE['background'])
+            style['background'] = style.get(
+                'foreground', DEFAULT_STYLE['foreground'])
+            style['foreground'] = f
+        elif code == 27:
+            _LOG.debug('skipping {}'.format(self))
+        elif code >= 30 and code <= 37:
+            style['foreground'] = self.colors[code - 30]
+        elif code == 39:  # default text color
+            style.pop('foreground', None)
+        elif code >= 40 and code <= 46:
+            style['background'] = self.colors[code - 40]
+        elif code == 49:  # default background color
+            style.pop('background', None)
+        else:
+            raise NotImplementedError('{} code {}'.format(self.name, code))
+        if not style:
+            return None
+        return style
+
+
+CONTROL_CODES = [  # see terminfo(5)
+    # Safe to ignore
+    ControlCode('bell', 'bel'),
+    ControlCode('meta on', 'smm'),  # eight bit mode
+    # Clearing the screen
+    ControlCode('backspace key', 'kbs'),
+    ControlCode('delete one character', 'dch1'),
+    ControlCode('delete characters', 'dch'),
+    ControlCode('clear screen', 'clear'),
+    ControlCode('clear to beginning of line', 'el1'),
+    ControlCode('clear to end of line', 'el'),
+    ControlCode('clear to end of screen', 'ed'),
+    # Cursor motion
+    ControlCode('carriage return', 'cr'),
+    ControlCode('scroll forward', 'ind'),
+    ControlCode('cursor to upper left', 'home'),
+    ControlCode('enter ca mode', 'smcup'),  # start program using 'cup'
+    ControlCode('exit ca mode', 'rmcup'),  # end program using 'cup'
+    ControlCode('cursor address', 'cup'),
+    ControlCode('cursor up', 'cuu1'),
+    ControlCode('cursor down', 'cud1'),
+    ControlCode('cursor left', 'cub1'),
+    ControlCode('cursor right', 'cuf1'),
+    ControlCode('cursor row address', 'vpa'),
+    # Cursor visibility
+    ControlCode('cursor visible', 'cvvis'),
+    ControlCode('cursor invisible', 'civis'),
+    ControlCode('cursor normal visibility', 'cnorm'),
+    # Funky stuff
+    ControlCode('horizontal tab', 'ht'),
+    ControlCode('insert line', 'il1'),
+    ControlCode('insert line', 'il'),
+    ControlCode('erase characters', 'ech'),
+    ControlCode('operating system command', regexp=_re.compile(b'\x1b]')),  # [1]
+    ControlCode('application keypad mode', regexp=_re.compile(b'\x1b=')),  # [1]
+    ControlCode('numeric keypad mode', regexp=_re.compile(b'\x1b>')),  # [1]
+    ControlCode('set application cursor keys', regexp=_re.compile(b'\x1b[[][?]1h')),  # [1]
+    ControlCode('reset application cursor keys', regexp=_re.compile(b'\x1b[[][?]1l')),  # [1]
+    ControlCode('set scrolling region', regexp=_re.compile(b'\x1b[[]([0-9]+);([0-9]+)r')),  # [1]
+    ControlCode('send secondary device attributes', regexp=_re.compile(b'\x1b[[]>0c')),  # [1], i.e. "what kind of device are you?" [2]
+    SelectGraphicRendition(),
+    ]
+# [1]: http://www.gnu.org/software/screen/manual/html_node/Control-Sequences.html
+# [2]: http://vt100.net/docs/vt220-rm/chapter4.html#S4.17.1.2
+
+
+class StyledText (object):
+    def __init__(self, text, style=None):
+        self.text = text
+        self.style = style
+
+    def __str__(self):
+        if self.style is None:
+            style = self.style
+        else:  # sorted dict for easy doctests
+            style = ''.join([
+                    '{',
+                    ', '.join(
+                        ['{!r}: {!r}'.format(k, v)
+                         for k,v in sorted(self.style.items())]),
+                    '}'])
+        return '<{} {!r}>'.format(style, self.text)
+
+
+def CLASS(*args):  # class is a reserved word in Python
+    return {'class': ' '.join(args)}
+
+
+class ControlParser (object):
+    r"""Control code parser
+
+    >>> cp = ControlParser()
+
+    C0 codes:
+
+    >>> cp._replace_control_codes(b'\a')  # doctest: +ELLIPSIS
+    [<ControlCodeMatch bell 0x...>]
+
+    C1 codes are also supported.  For example, here is a prompt
+    generated with
+    ``PS1='\[\033[01;32m\]\u@\h\[\033[01;34m\] \w \$\[\033[00m\] '``:
+
+    >>> cp._replace_control_codes(b'\x1b]')  # doctest: +ELLIPSIS
+    [<ControlCodeMatch operating system command 0x...>]
+    >>> text = (
+    ...     b'\x1b]0;user@host:~\a\x1b[?1034h'
+    ...     b'\x1b[01;32muser@host\x1b[01;34m ~ $'
+    ...     b'\x1b[00m echo hello\r')
+    >>> for chunk in cp._replace_control_codes(text):
+    ...     if isinstance(chunk, bytes):
+    ...         print(repr(chunk))
+    ...     else:
+    ...         print(chunk)
+    ... # doctest: +REPORT_UDIFF
+    <ControlCodeMatch operating system command>
+    b'0;user@host:~'
+    <ControlCodeMatch bell>
+    <ControlCodeMatch meta on>
+    <ControlCodeMatch select graphic rendition (b'01', b'32')>
+    b'user@host'
+    <ControlCodeMatch select graphic rendition (b'01', b'34')>
+    b' ~ $'
+    <ControlCodeMatch select graphic rendition (b'00', None)>
+    b' echo hello'
+    <ControlCodeMatch carriage return>
+
+    >>> for chunk in cp._style_chunks(
+    ...         cp._decode_text(cp._replace_control_codes(text))):
+    ...     print(chunk)
+    ... # doctest: +REPORT_UDIFF
+    <ControlCodeMatch operating system command>
+    <None '0;user@host:~'>
+    <ControlCodeMatch bell>
+    <ControlCodeMatch meta on>
+    <{'foreground': 'green', 'weight': 'bold'} 'user@host'>
+    <{'foreground': 'blue', 'weight': 'bold'} ' ~ $'>
+    <None ' echo hello'>
+    <ControlCodeMatch carriage return>
+
+    >>> e = _etree.Element('pre')
+    >>> e = cp.parse(e, text)
+    >>> print(str(_etree.tostring(e), 'ascii'))
+    <pre><span style="font-weight: bold; color: green">user@host</span><span style="font-weight: bold; color: blue"> ~ $</span> echo hello</pre>
+
+    Select graphic rendition codes (a subset of C1):
+
+    >>> cp._replace_control_codes(b'\x1b[01;32m')  # doctest: +ELLIPSIS
+    [<ControlCodeMatch select graphic rendition (b'01', b'32') 0x...>]
+    >>> cp._replace_control_codes(b'\x1b[01m')  # doctest: +ELLIPSIS
+    [<ControlCodeMatch select graphic rendition (b'01', None) 0x...>]
+    >>> cp._replace_control_codes(b'\x1b[m')  # doctest: +ELLIPSIS
+    [<ControlCodeMatch select graphic rendition (None, None) 0x...>]
+
+    >> e = _etree.Element('span', **CLASS('outside'))
+    >> text = b'\x1b[36m@@ -8,6 +8,10 @@\x1b[m \x1b[midea'
+    >> cp._style_chunks(text)
+    [({'foreground': 'cyan'}, b'@@ -8,6 +8,10 @@'), (None, ' idea')]
+    >> e = cp.parse(e, text)
+    >> print(str(_etree.tostring(e), 'ascii'))
+    <span class="outside"><span style="color: cyan">@@ -8,6 +8,10 @@</span> idea</span>
+
+    """
+    def __init__(self, encoding='utf-8'):
+        self._encoding = encoding
+
+    def _replace_control_codes(self, text):
+        """Split ``text`` into a list of chunks
+
+        Some chunks will be unaltered bytes, and the rest will be
+        ``ControlCodeMatch`` instances.
+        """
+        stack = [text]
+        for cc in CONTROL_CODES:
+            if cc.regexp is None:
+                continue
+            stream = []
+            while stack:
+                chunk = stack.pop(0)
+                if not isinstance(chunk, bytes):  # control code
+                    stream.append(chunk)
+                    continue
+                match = cc.search(chunk)
+                if not match:  # no control code in this chunk
+                    stream.append(chunk)
+                    continue
+                before = chunk[:match.match.start()]
+                if before:
+                    stream.append(before)
+                after = chunk[match.match.end():]
+                if after:
+                    stack.insert(0, after)
+                stream.append(match)
+            stack = stream
+        return stream
+
+    def _decode_text(self, chunks):
+        for i,chunk in enumerate(chunks):
+            if isinstance(chunk, bytes):
+                chunks[i] = str(chunk, self._encoding)
+        return chunks
+
+    def _merge_adjacent_text(self, chunks):
+        i = 1
+        while i < len(chunks):
+            if isinstance(chunks[i], str) and isinstance(chunks[i-1], str):
+                chunks[i-1] = chunks[i-1] + chunks.pop(i)
+            else:
+                i += 1
+        return chunks
+
+    def _remove_operating_system_commands(self, chunks):
+        i = 0
+        while i < len(chunks):
+            if (isinstance(chunks[i], ControlCodeMatch) and
+                    chunks[i].control_code.name == 'operating system command'):
+                # I haven't been able to dig up docs for this, but it
+                # involved with fancy PS1 prompts.
+                while i < len(chunks) and not isinstance(chunks[i], str):
+                    chunks.pop(i)  # strip to next text
+                if i < len(chunks) and isinstance(chunks[i], str):
+                    chunks.pop(i)  # strip next text
+            else:
+                i += 1
+        return chunks
+
+    def _process_backspaces(self, chunks):
+        # without cursor tracking, this is a bit ad hoc
+        last_bytes = None
+        i = 0
+        while i < len(chunks):
+            if isinstance(chunks[i], str):
+                last_bytes = i
+            elif (isinstance(chunks[i], ControlCodeMatch) and
+                  chunks[i].control_code.name == 'backspace key' and
+                  chunks[last_bytes]):
+                _LOG.debug('backspace {!r} -> {!r}'.format(
+                        chunks[last_bytes], chunks[last_bytes][:-1]))
+                chunks[last_bytes] = chunks[last_bytes][:-1]
+                chunks.pop(i)
+                if (i < len(chunks) and
+                        isinstance(chunks[i], ControlCodeMatch) and
+                        chunks[i].control_code.name == 'clear to end of line'):
+                    chunks.pop(i)
+                continue
+            i += 1
+        return [chunk for chunk in chunks if chunk != b'']
+
+    def _drop_no_op(self, chunks):
+        return [
+            chunk for chunk in chunks
+            if not (
+                isinstance(chunk, ControlCodeMatch) and
+                chunk.control_code.name in [
+                    'bell',
+                    'meta on',
+                    'enter ca mode',
+                    'exit ca mode',
+                    'application keypad mode',
+                    'numeric keypad mode',
+                    ])]
+
+    def _drop_cursors(self, chunks):
+        return [chunk for chunk in chunks
+                if not (isinstance(chunk, ControlCodeMatch) and
+                        chunk.control_code.name.startswith('cursor '))]
+
+    def _drop_carriage_return(self, chunks):
+        return [chunk for chunk in chunks
+                if not (isinstance(chunk, ControlCodeMatch) and
+                        chunk.control_code.name == 'carriage return')]
+
+    def _drop_clear_line(self, chunks):
+        return [chunk for chunk in chunks
+                if not (isinstance(chunk, ControlCodeMatch) and
+                        chunk.control_code.name.startswith('clear ') and
+                        chunk.control_code.name != 'clear screen')]
+
+    def _collapse_adjacent_newlines(self, chunks):
+        i = 1
+        while i < len(chunks):
+            if (isinstance(chunks[i], ControlCodeMatch) and
+                    chunks[i].control_code.name == 'scroll forward' and
+                    isinstance(chunks[i-1], ControlCodeMatch) and
+                    chunks[i-1].control_code.name == 'scroll forward'):
+                chunks.pop(i)
+            else:
+                i += 1
+        return chunks
+
+    def _style_chunks(self, chunks):
+        styled = []
+        style = None
+        for chunk in chunks:
+            if isinstance(chunk, str):
+                chunk = StyledText(text=chunk, style=_copy.copy(style))
+            elif (isinstance(chunk, ControlCodeMatch) and
+                  chunk.control_code.name == 'select graphic rendition'):
+                style = chunk.control_code.style(chunk.match, style=style)
+                continue  # no need to append this chunk
+            styled.append(chunk)
+        return styled
+
+    def parse(self, element, text):
+        chunks = self._replace_control_codes(text)
+        chunks = self._decode_text(chunks)
+        chunks = self._merge_adjacent_text(chunks)
+        chunks = self._remove_operating_system_commands(chunks)
+        chunks = self._merge_adjacent_text(chunks)
+        chunks = self._process_backspaces(chunks)
+        chunks = self._drop_no_op(chunks)
+        chunks = self._drop_cursors(chunks)
+        chunks = self._drop_carriage_return(chunks)
+        chunks = self._drop_clear_line(chunks)
+        chunks = self._collapse_adjacent_newlines(chunks)
+        chunks = self._merge_adjacent_text(chunks)
+        chunks = self._style_chunks(chunks)
+        previous = element
+        last_chunk = None
+        for chunk in chunks:
+            if isinstance(chunk, StyledText):
+                if chunk.style:
+                    styles = []
+                    for src,tgt in [
+                            ('weight', 'font-weight'),
+                            ('foreground', 'color'),
+                            ('background', 'background-color'),
+                            ]:
+                        if src in chunk.style:
+                            styles.append(
+                                '{}: {}'.format(tgt, chunk.style[src]))
+                    style = '; '.join(styles)
+                    e = _etree.Element('span', style=style)
+                    try:
+                        e.text = chunk.text
+                    except ValueError as e:
+                        raise ValueError(repr(chunk.text)) from e
+                    element.append(e)
+                    previous = e
+                elif previous == element:
+                    try:
+                        element.text = chunk.text
+                    except ValueError as e:
+                        raise ValueError(repr(chunk.text)) from e
+                else:
+                    try:
+                        previous.tail = chunk.text
+                    except ValueError as e:
+                        raise ValueError(repr(chunk.text)) from e
+            elif isinstance(chunk, ControlCodeMatch):
+                cc = chunk.control_code
+                if cc.name in ['scroll forward', 'clear screen']:
+                    e = _etree.Element('br')
+                    e.tail = '\n'
+                    element.append(e)
+                    previous = e
+                else:
+                    _LOG.info('ignoring {}'.format(cc))
+            last_chunk = chunk
+        return element
+
+
+class ScriptParser (object):
+    def __init__(self, encoding='utf-8', css=None):
+        self._control_parser = ControlParser(encoding=encoding)
+        self._encoding = encoding
+        if css is None:
+            css = self._get_css()
+        self.css = css
+
+    def render(self, text):
+        html = _etree.Element('html')
+        head = _etree.Element('head')
+        html.append(head)
+        style = _etree.Element('style', type='text/css')
+        head.append(style)
+        style.text = self.css
+        body = _etree.Element('body')
+        html.append(body)
+        pre = self._control_parser.parse(_etree.Element('pre'), text)
+        body.append(pre)
+        tree = _etree.ElementTree(element=html)
+        html.text = html.tail = head.text = head.tail = '\n'
+        body.text = body.tail = '\n'
+        return tree
+
+    def process(self, source, target=None):
+        typescript = self._read_file(filename=source)  # byte stream
+        rendered = self.render(typescript)
+        self._write_etree(tree=rendered, filename=target)
+
+    def _read_file(self, filename=None):
+        # byte stream
+        if filename:
+            _LOG.info('read {}'.format(filename))
+            with open(filename, 'rb') as f:
+                text = f.read()
+        else:
+            _LOG.info('read from stdin')
+            text = _sys.stdin.buffer.read()
+        return text
+
+    def _serialize(self, tree):
+        return _etree.tostring(tree.getroot(), 'unicode')
+
+    def _write_etree(self, tree, filename=None):
+        if filename:
+            _LOG.info('write {}'.format(filename))
+            tree.write(filename, encoding=self._encoding)
+        else:
+            _LOG.info('write to stdout')
+            result = self._serialize(tree)
+            _sys.stdout.buffer.write(result.encode(self._encoding))
+
+    def _get_css(self):
+        return '\n'.join([
+                'body {',
+                '  color: {};'.format(DEFAULT_STYLE['foreground']),
+                '  background: {}'.format(DEFAULT_STYLE['background']),
+                '}',
+                ''
+                ])
+
+class ScriptServerApp (ScriptParser):
+    """WSGI client serving the HTML typescript
+
+    Subclasses ``ScriptParser`` to easily access the serialized
+    typescript.
+
+    For details on WGSI, see `PEP 333`_.
+
+    .. _PEP 333: http://www.python.org/dev/peps/pep-0333/
+    """
+    def __init__(self, source=None, target=None, delay=2, **kwargs):
+        super(ScriptServerApp, self).__init__(**kwargs)
+        self._source = source
+        self._target = target
+        self._delay = delay
+        self._next_render = 0
+        self._content = None
+
+    def process(self):
+        if _time.time() > self._next_render:
+            super(ScriptServerApp, self).process(
+                source=self._source, target=self._target)
+            self._next_render += self._delay
+
+    def _write_etree(self, tree, **kwargs):
+        self._content = self._serialize(tree)
+
+    def __call__(self, environ, start_response):
+        "WSGI entry point"
+        self.process()
+        status = '200 OK'
+        headers = [
+            ('Content-type', 'text/html; charset={}'.format(self._encoding)),
+            ]
+        start_response(status, headers)
+        return [self._content.encode(self._encoding)]
+
+    def run(self, host='', port=8000):
+        server = _wsgiref_simple_server.make_server(
+            host=host, port=port, app=self)
+        self._log_start(host=host, port=port)
+        try:
+            server.serve_forever()
+        except _select.error as e:
+            if len(e.args) == 2 and e.args[1] == 'Interrupted system call':
+                pass
+            else:
+                raise
+
+    def _log_start(self, host, port):
+        if not host:
+            host = _socket.getfqdn()
+        _LOG.info('serving on {}:{}'.format(host, port))
+        try:
+            addrs = _socket.getaddrinfo(host=host, port=port)
+        except _socket.gaierror as e:
+            _LOG.warning(e)
+        else:
+            seen = set()
+            for family,type_,proto,canonname,sockaddr in addrs:
+                c = canonname or host
+                if (c, sockaddr) not in seen:
+                    _LOG.info('address: {} {}'.format(c, sockaddr))
+                    seen.add((c, sockaddr))
+
+
+if __name__ == '__main__':
+    import argparse as _argparse
+
+    parser = _argparse.ArgumentParser(
+        description=__doc__.splitlines()[0],
+        epilog='\n'.join(__doc__.splitlines()[2:]),
+        formatter_class=_argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        '--version', action='version',
+        version='%(prog)s {}'.format(__version__))
+    parser.add_argument(
+        '-v', '--verbose', default=0, action='count',
+        help='increment verbosity')
+    parser.add_argument(
+        '-s', '--source', metavar='SOURCE',
+        help='source script file (defaults to stdin)')
+    parser.add_argument(
+        '-t', '--target', metavar='TARGET',
+        help='target HTML file (defaults to stdout)')
+    parser.add_argument(
+        '-e', '--encoding', metavar='ENCODING', default='utf-8',
+        help='file content encoding (defaults to utf-8)')
+    parser.add_argument(
+        '-p', '--poll', metavar='SECONDS', type=float,
+        help='poll frequency in seconds')
+    parser.add_argument(
+        '--serve', metavar='PORT', type=int,
+        help='serve the HTTP typescript on PORT')
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        _LOG.setLevel(max(_logging.DEBUG, _LOG.level - 10*args.verbose))
+
+    sp = ScriptParser(encoding=args.encoding)
+    if args.serve:
+        server = ScriptServerApp(
+            source=args.source, target=args.target, delay=args.poll or 2)
+        server.run(port=args.serve)
+    elif args.poll:
+        while True:
+            sp.process(source=args.source, target=args.target)
+            _time.sleep(args.poll)
+    else:  # single shot
+        sp.process(source=args.source, target=args.target)
author	W. Trevor King <wking@tremily.us>
	Sun, 13 Jan 2013 01:10:51 +0000 (20:10 -0500)
committer	W. Trevor King <wking@tremily.us>
	Sun, 13 Jan 2013 01:47:56 +0000 (20:47 -0500)
posts/script.mdwn		patch \| blob \| history
posts/script/script-publish.py	[new file with mode: 0755]	patch \| blob