gallery.py: Pass page data through .index()
[blog.git] / posts / gallery / gallery.py
index 7d44b774d96329da49f24713005814902e6b527e..a7ec9442515173ae1d610e01046ed7a34dc1a2f2 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Copyright (C) 2010-2012 W. Trevor King <wking@drexel.edu>
+# Copyright (C) 2010-2013 W. Trevor King <wking@tremily.us>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""
-CGI gallery server for a picture directory organized along::
+"""Gallery server for a picture directory organized along::
 
   pics
   |-- some_directory
@@ -37,12 +35,15 @@ With::
 Note that you can store a caption for ``<PICTURE>`` as plain text in
 ``<PICTURE>.txt``.
 
-See RFC 3875 for more details on the the Common Gateway Interface.
+See RFC 3875 for more details on the the Common Gateway Interface (CGI).
+
+Besides the CGI interface, this script can also be run as:
 
-This script can also be run as a Simple Common Gateway Interface
-(SCGI) with the ``--scgi`` option.
+* a Simple Common Gateway Interface (SCGI) with the ``--mode=scgi`` option
+* a stand-alone server with the ``--mode=wsgi`` option
 """
 
+import collections as _collections
 import logging as _logging
 import logging.handlers as _logging_handlers
 import math as _math
@@ -52,6 +53,11 @@ import os.path as _os_path
 import random as _random
 import re as _re
 import subprocess as _subprocess
+try:  # Python 3
+    import urllib.parse as _urllib_parse
+except ImportError:  # Python 2
+    import urlparse as _urllib_parse
+import xml.sax.saxutils as _xml_sax_saxutils
 
 
 __version__ = '0.5'
@@ -62,7 +68,9 @@ VIDEO_EXTENSIONS = ['.mov', '.mp4', '.ogv']
 STREAMING_TYPES = ['video/ogg']
 RESPONSES = {  # httplib takes half a second to load
     200: 'OK',
+    400: 'Bad Request',
     404: 'Not Found',
+    500: 'Internal Server Error',
     }
 
 LOG = _logging.getLogger('gallery.py')
@@ -89,6 +97,8 @@ class HTTPError(Exception):
     def __init__(self, status, message=None, content=None):
         if message is None:
             message = RESPONSES[status]
+        if content is None:
+            content = message
         super(HTTPError, self).__init__('{} {}'.format(status, message))
         self.status = status
         self.message = message
@@ -96,7 +106,8 @@ class HTTPError(Exception):
 
 
 class ProcessingComplete(Exception):
-    pass
+    def __init__(self, headers=None):
+        self.headers = headers
 
 
 def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE,
@@ -113,7 +124,7 @@ def invoke(args, stdin=None, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE,
     try :
         q = _subprocess.Popen(args, stdin=_subprocess.PIPE, stdout=stdout,
                               stderr=stderr, cwd=cwd)
-    except OSError, e:
+    except OSError as e:
         raise CommandError(args, status=e.args[0], stderr=e)
     stdout,stderr = q.communicate(input=stdin)
     status = q.wait()
@@ -144,56 +155,78 @@ class CGIGalleryServer (object):
     def __init__(self, base_path='.',
                  base_url='/',
                  cache_path='/tmp/gallery-cache/',
-                 serve_originals=True):
+                 serve_originals=True,
+                 write_http_headers=True):
         self._base_path = _os_path.abspath(base_path)
         self._base_url = base_url
         self._cache_path = cache_path
         self._serve_originals = serve_originals
+        self._write_http_headers = write_http_headers
+        self._text_charset = 'UTF-8'
         self._url_regexp = _re.compile('^[a-zA-Z0-9._/-]*$')
         self._rows = 3
         self._columns = 3
         self.header = []
         self.footer = []
 
-    def _http_header(self, mime='text/html', status=200):
+    def _get_charset(self, headers):
+        content_type = headers.get('Content-type', '')
+        if 'charset=' in content_type:
+            return content_type.split('charset=', 1)[-1]
+
+    def _http_headers(self, mime='text/html', status=200):
         msg = RESPONSES[status]
-        header = ['Status: {:d} {}'.format(status, msg)]
+        headers = _collections.OrderedDict()
+        headers['Status'] = '{:d} {}'.format(status, msg)
         if mime.startswith('text/'):
-            charset = '; charset=UTF-8'
+            charset = '; charset={}'.format(self._text_charset)
         else:
             charset = ''
-        header.append('Content-type: {}{}'.format(mime, charset))
-        return '\n'.join(header)
+        headers['Content-type'] = '{}{}'.format(mime, charset)
+        return headers
 
-    def _response(self, header=None, content='<h1>It works!</h1>',
+    def _add_header(self, headers=None, stream=None):
+        if headers and self._write_http_headers:
+            for key, value in headers.items():
+                stream.write(
+                    '{}: {}\r\n'.format(key, value).encode('US-ASCII'))
+            stream.write('\r\n'.encode('US-ASCII'))
+
+    def _response(self, headers=None, content='<h1>It works!</h1>',
                   stream=None):
-        if header is None:
-            header = self._http_header()
-        stream.write(header)
-        stream.write('\n\n')
+        if headers is None:
+            headers = self._http_headers()
+        self._add_header(headers=headers, stream=stream)
+        charset = self._get_charset(headers=headers)
+        if charset:
+            content = content.encode(charset)
         stream.write(content)
-        raise ProcessingComplete()
+        raise ProcessingComplete(headers=headers)
 
-    def _response_stream(self, header=None, content=None, stream=None,
+    def _response_stream(self, headers=None, content=None, stream=None,
                          chunk_size=1024):
         LOG.debug('streaming response')
-        if header is None:
-            header = self._http_header()
-        stream.write(header)
-        stream.write('\n\n')
+        if headers is None:
+            headers = self._http_headers()
+        charset = self._get_charset(headers=headers)
+        if charset:
+            raise HTTPError(
+                500,
+                content='charset {} set for streamed response'.format(charset))
+        self._add_header(headers=headers, stream=stream)
         stream.flush()  # flush headers
         while True:
             chunk = content.read(chunk_size)
             if not chunk:
                 break
             stream.write(chunk)
-        raise ProcessingComplete()
+        raise ProcessingComplete(headers=headers)
 
     def _error(self, status=404, content=None, stream=None):
-        header = self._http_header(status=status)
+        headers = self._http_headers(status=status)
         if content is None:
             content = RESPONSES[status]
-        self._response(header=header, content=content, stream=stream)
+        self._response(headers=headers, content=content, stream=stream)
 
     def validate_url(self, url, exists=True, directory=False):
         LOG.debug('validating {} (exists={}, directory={})'.format(
@@ -214,14 +247,14 @@ class CGIGalleryServer (object):
                     raise HTTPError(404)
             else:
                 if not _os_path.isfile(path):
-                    raise HTTPError(404, 'nonexistent file')
+                    raise HTTPError(404, content='nonexistent file')
 
     def serve(self, url=None, page=0, stream=None):
         LOG.info('serving url {} (page {})'.format(url, page))
         try:
             try:
                 if url is None:
-                    self.index(stream=stream)
+                    self.index(stream=stream, page=page)
                 elif url.endswith('random'):
                     self.random(
                         url=url, stream=stream, max_width=500, max_height=500)
@@ -231,31 +264,52 @@ class CGIGalleryServer (object):
                 else:
                     self.validate_url(url=url, exists=False, directory=True)
                     self.page(url=url, page=page, stream=stream)
-                raise HTTPError(404, 'unexpected URL type')
+                raise HTTPError(404, content='unexpected URL type')
             except HTTPError as e:
                 LOG.error(e.message)
                 self._error(e.status, content=e.content, stream=stream)
-        except ProcessingComplete:
-            pass
+        except ProcessingComplete as e:
+            return e
+
+    def page_from_query(self, query=None, query_string=None):
+        """Extract the requested page from a query string
+
+        This is a helper method for CGIGalleryServer consumers.
+        Specify either query or query_string, but not both.
+        """
+        if query is None:
+            query = _urllib_parse.parse_qs(query_string)
+        page = 0
+        if 'pp' in query:
+            pp = query['pp']
+            if isinstance(pp, list):
+                pp = pp[0]
+            try:
+                page = int(pp) - 1
+            except ValueError:
+                pass
+        return page
 
     def relative_url(self, url):
         if url is None:
             return url
         if not url.startswith(self._base_url):
-            message = 'cannot convert {} to a relative URL of {}'.format(
+            content = 'cannot convert {} to a relative URL of {}'.format(
                 url, self._base_url)
-            raise HTTPError(404, message)
+            raise HTTPError(404, content=content)
         if url == self._base_url:
             return None
         return url[len(self._base_url):]
 
-    def _url(self, path):
+    def _url(self, path, query=None):
         relpath = _os_path.relpath(
             _os_path.join(self._base_path, path), self._base_path)
         if relpath == '.':
             relpath = ''
         elif path.endswith('/'):
             relpath += '/'
+        if query:
+            relpath = '{}?{}'.format(relpath, _urllib_parse.urlencode(query))
         return '{}{}'.format(self._base_url, relpath)
 
     def _label(self, path):
@@ -264,10 +318,11 @@ class CGIGalleryServer (object):
             dirname,base = _os_path.split(dirname)
         return base.replace('_', ' ').title()
 
-    def _link(self, path, text=None):
+    def _link(self, path, query=None, text=None):
         if text is None:
             text = self._label(path)
-        return '<a href="{}">{}</a>'.format(self._url(path), text)
+        return '<a href="{}">{}</a>'.format(
+            self._url(path=path, query=query), text)
 
     def _subdirs(self, path):
         try:
@@ -294,9 +349,9 @@ class CGIGalleryServer (object):
             if is_image(picture_path):
                 yield picture_path
 
-    def index(self, stream=None):
+    def index(self, page=0, stream=None):
         LOG.debug('index page')
-        return self._directory(self._base_path, stream=stream)
+        return self._directory(path=self._base_path, page=page, stream=stream)
 
     def _original_url(self, url):
         """Reverse thumbnail URL mapping
@@ -308,12 +363,14 @@ class CGIGalleryServer (object):
             try:
                 root,width,height = base.rsplit('-', 2)
             except ValueError:
-                raise HTTPError(404, 'missing width/height in {}'.format(base))
+                raise HTTPError(
+                    400, content='missing width/height in {}'.format(base))
             try:
                 width = int(width)
                 height = int(height)
             except ValueError as e:
-                raise HTTPError(404, 'invalid width/height: {}'.format(e))
+                raise HTTPError(
+                    400, content='invalid width/height: {}'.format(e))
             return (
                 root + '.jpg',
                 self._thumb, 
@@ -326,7 +383,7 @@ class CGIGalleryServer (object):
                 getattr(self, '_{}'.format(extension), None),
                 {},
                 )
-        raise HTTPError(404, 'no original URL for {}'.format(url))
+        raise HTTPError(400, content='no original URL for {}'.format(url))
 
     def _thumb(self, image, max_width=None, max_height=None):
         if not _os_path.exists(self._cache_path):
@@ -342,7 +399,8 @@ class CGIGalleryServer (object):
         thumb_path = _os_path.join(cache_dir, thumb_filename)
         image_path = _os_path.join(self._base_path, image)
         if not _os_path.isfile(image_path):
-            raise HTTPError(404, 'image path for thumbnail does not exist')
+            raise HTTPError(
+                404, content='image path for thumbnail does not exist')
         if (not _os_path.isfile(thumb_path)
             or _os_path.getmtime(image_path) > _os_path.getmtime(thumb_path)):
             invoke(['convert', '-format', 'png', '-strip', '-quality', '95',
@@ -351,9 +409,10 @@ class CGIGalleryServer (object):
                     thumb_path])
         return (thumb_path, self._url(thumb_url))
 
-    def _mp4(self, video):
+    def _mp4(self, video, *args):
         if not video.endswith('.mov'):
-            raise HTTPError(404, "can't translate {} to MPEGv4".format(video))
+            raise HTTPError(
+                500, content="can't translate {} to MPEGv4".format(video))
         dirname,filename = _os_path.split(video)
         mp4_filename = image_base(filename) + '.mp4'
         reldir = _os_path.relpath(dirname, self._base_path)
@@ -363,7 +422,7 @@ class CGIGalleryServer (object):
         mp4_url = _os_path.join(dirname, mp4_filename)
         mp4_path = _os_path.join(cache_dir, mp4_filename)
         if not _os_path.isfile(video):
-            raise HTTPError(404, 'source video path does not exist')
+            raise HTTPError(404, content='source video path does not exist')
         if (not _os_path.isfile(mp4_path)
             or _os_path.getmtime(video) > _os_path.getmtime(mp4_path)):
             arg = ['ffmpeg', '-i', video, '-acodec', 'libfaac', '-aq', '200',
@@ -375,7 +434,7 @@ class CGIGalleryServer (object):
             invoke(arg)
         return (mp4_path, self._url(mp4_url))
 
-    def _ogv(self, video, stream=None):
+    def _ogv(self, video, *args):
         if not video.endswith('.mov'):
             LOG.error("can't translate {} to Ogg Video".format(video))
         dirname,filename = _os_path.split(video)
@@ -426,6 +485,7 @@ class CGIGalleryServer (object):
         else:
             content.append(img)
         if caption:
+            caption = _xml_sax_saxutils.escape(caption)
             content.append('<p>{}</p>'.format(caption))
         return content
 
@@ -461,7 +521,19 @@ class CGIGalleryServer (object):
     def _image(self, image, **kwargs):
         if kwargs:
             image_path,image_url = self._thumb(image, **kwargs)
-        return '<img src="{}" />'.format(image_url)
+        else:
+            image_url = image
+        sections = ['<img src="{}"'.format(image_url)]
+        caption = self._get_image_caption(path=image)
+        if caption:
+            caption = _xml_sax_saxutils.quoteattr(
+                caption.replace('\n', ' ').strip())
+            sections.extend([
+                    'title={}'.format(caption),
+                    'alt={}'.format(caption),
+                    ])
+        sections.append('/>')
+        return ' '.join(sections)
 
     def _image_page(self, image):
         return image_base(image) + '/'
@@ -497,7 +569,8 @@ class CGIGalleryServer (object):
         LOG.debug('retrieving possibly cached item')
         mime = _mimetypes.guess_type(url)[0]
         if mime is None:
-            raise HTTPError(404, 'unknown mime type for {}'.format(url))
+            raise HTTPError(
+                500, content='unknown mime type for {}'.format(url))
         cache_path = _os_path.join(self._cache_path, url)
         original_path = _os_path.join(self._base_path, url)
         path = None
@@ -519,18 +592,19 @@ class CGIGalleryServer (object):
             content = open(path, 'rb')
         except IOError as e:
             LOG.error(e)
-            raise HTTPError(404, 'item not found {}'.format(url))
-        header = self._http_header(mime=mime)
+            raise HTTPError(404, content='item not found {}'.format(url))
+        headers = self._http_headers(mime=mime)
         if mime in STREAMING_TYPES:
             self._response_stream(
-                header=header, content=content, stream=stream)
+                headers=headers, content=content, stream=stream)
         content = content.read()
-        self._response(header=header, content=content, stream=stream)
+        self._response(headers=headers, content=content, stream=stream)
 
     def page(self, url, page=0, stream=None):
         LOG.debug('HTML page {} {}'.format(url, page))
         if not url.endswith('/'):
-            raise HTTPError(404, 'HTML page URLs must end with a slash')
+            raise HTTPError(
+                404, content='HTML page URLs must end with a slash')
         abspath = _os_path.join(self._base_path, url)
         if _os_path.isdir(abspath):
             self._directory(path=abspath, page=page, stream=stream)
@@ -538,7 +612,7 @@ class CGIGalleryServer (object):
             file_path = abspath[:-1] + extension
             if _os_path.isfile(file_path):
                 self._page(path=file_path, stream=stream)
-        raise HTTPError(404, 'unknown HTML page {}'.format(url))
+        raise HTTPError(404, content='unknown HTML page {}'.format(url))
 
     def _directory_header(self, path):
         relpath = _os_path.relpath(path, self._base_path)
@@ -569,14 +643,14 @@ class CGIGalleryServer (object):
     def _directory_page_navigation(self, path, page, pages):
         if pages <= 1:
             return []
-        prev_page = path + '?pp={:d}'.format((page - 1) % pages + 1)
-        next_page = path + '?pp={:d}'.format((page + 1) % pages + 1)
+        prev_query = {'pp': (page - 1) % pages + 1}
+        next_query = {'pp': (page + 1) % pages + 1}
         return [
             '<div style="text-align: center;">',
             '<p>',
-            self._link(prev_page, 'previous'),
+            self._link(path=path, query=prev_query, text='previous'),
             '({:d} of {:d})'.format(page+1, pages),
-            self._link(next_page, 'next'),
+            self._link(path=path, query=next_query, text='next'),
             '</p>',
             '</div>', 
            ]
@@ -597,7 +671,7 @@ class CGIGalleryServer (object):
         for image in images:
             page = self._image_page(image)
             img = self._image(image, max_width=300, max_height=300)
-            link = self._link(page, img)
+            link = self._link(path=page, text=img)
             if column == 0:
                 content.append('  <tr>')
             content.extend([
@@ -623,8 +697,9 @@ class CGIGalleryServer (object):
         if page < 0 or page >= pages:
             raise HTTPError(
                 404,
-                'page out of bounds for this gallery 0 <= {:d} < {:d}'.format(
-                    page, pages))
+                content=(
+                    'page out of bounds for this gallery 0 <= {:d} < {:d}'
+                    ).format(page, pages))
         first_image = images_per_page * page
         images = images[first_image:first_image+images_per_page]
         content = []
@@ -644,8 +719,8 @@ class CGIGalleryServer (object):
         images = list(self._images(gallery))
         images_per_page = self._rows * self._columns
         i = images.index(path)
-        page = i / images_per_page
-        gallery_page = '{}/?pp={:d}'.format(gallery, page + 1)
+        page = i // images_per_page
+        gallery_page = '{}/'.format(gallery)
         prev_page = self._image_page(images[i - 1])
         next_page = self._image_page(images[(i + 1) % len(images)])
         content = []
@@ -653,9 +728,10 @@ class CGIGalleryServer (object):
         content.extend([
                 '<div style="text-align: center;">',
                 '<p>',
-                self._link(prev_page, 'previous'),
-                self._link(gallery_page, 'all'),
-                self._link(next_page, 'next'),
+                self._link(path=prev_page, text='previous'),
+                self._link(
+                    path=gallery_page, query={'pp': page+1}, text='all'),
+                self._link(path=next_page, text='next'),
                 '</p>',
                 ])
         content.extend(self._captioned_video(path))
@@ -670,47 +746,37 @@ def serve_cgi(server):
     import sys
 
     url=None
-    page=0
     cgitb.enable()
     #cgitb.enable(display=0, logdir="/tmp/")
     data = cgi.FieldStorage()
+    page = server.page_from_query(
+        query={key: data[key].getlist() for key in data.keys()})
     if 'p' in data:
         p = data['p']
         if isinstance(p, list):
             p = p[0]
         url = p.value
-    if 'pp' in data:
-        try:
-            page = int(data['pp'].value) - 1
-        except ValueError:
-            pass
-    server.serve(url=url, page=page, stream=sys.stdout)
+    stream = sys.stdout  # Python 2
+    if hasattr(stream, 'buffer'):  # Python 3
+        stream = sys.stdout.buffer
+    server.serve(url=url, page=page, stream=stream)
 
 def serve_scgi(server, host='localhost', port=4000):
     import scgi
     import scgi.scgi_server
-    import urlparse
 
     class GalleryHandler(scgi.scgi_server.SCGIHandler):
         def produce(self, env, bodysize, input, output):
             #LOG.info(HTTP_USER_AGENT REQUEST_METHOD REMOTE_ADDR REQUEST_URI
             url = env.get('DOCUMENT_URI', None)
-            page = 0
-            data = urlparse.parse_qs(env.get('QUERY_STRING', ''))
-            if 'pp' in data:
-                pp = data['pp']
-                if isinstance(pp, list):
-                    pp = pp[0]
-                try:
-                    page = int(pp) - 1
-                except ValueError:
-                    pass
+            page = server.page_from_query(
+                query_string=env.get('QUERY_STRING', ''))
             try:
                 try:
                     url = server.relative_url(url=url)
                 except HTTPError as e:
-                    LOG.error(e.message)
-                    server._error(e.status, content=e.content, stream=stream)
+                    LOG.error(e.content)
+                    server._error(e.status, content=e.content, stream=output)
             except ProcessingComplete:
                 pass
             else:
@@ -721,6 +787,39 @@ def serve_scgi(server, host='localhost', port=4000):
     LOG.info('serving SCGI on {}:{}'.format(host, port))
     s.serve()
 
+def serve_wsgi(server, host='localhost', port=4000):
+    import io
+    import wsgiref.simple_server
+
+    server._write_http_headers = False
+
+    def app(environ, start_response):
+        url = environ.get('PATH_INFO', None)
+        page = server.page_from_query(
+            query_string=environ.get('QUERY_STRING', ''))
+        status = '200 OK'
+        headers = {}
+        stream = io.BytesIO()
+        try:
+            try:
+                url = server.relative_url(url=url)
+            except HTTPError as e:
+                LOG.error(e.message)
+                server._error(e.status, content=e.content, stream=stream)
+        except ProcessingComplete as e:
+            headers = e.headers
+        else:
+            e = server.serve(url=url, page=page, stream=stream)
+            headers = e.headers
+        output = stream.getvalue()
+        status = headers.pop('Status')
+        start_response(status, list(headers.items()))
+        return [output]
+
+    wsgi = wsgiref.simple_server.make_server(host=host, port=port, app=app)
+    LOG.info('serving WSGI on {}:{}'.format(host, port))
+    wsgi.serve_forever()
+
 
 if __name__ == '__main__':
     import argparse as _argparse
@@ -729,8 +828,8 @@ if __name__ == '__main__':
         description=__doc__, version=__version__,
         formatter_class=_argparse.RawDescriptionHelpFormatter)
     parser.add_argument(
-        '--scgi', default=False, action='store_const', const=True,
-        help='Run as a SCGI server (vs. serving a single CGI call)')
+        '--mode', default='cgi', choices=['cgi', 'scgi', 'wsgi'],
+        help='Server mode (defaults to CGI)')
     parser.add_argument(
         '--port', default=4000, type=int,
         help='Port to listen to (if runing as a SCGI server)')
@@ -758,7 +857,9 @@ if __name__ == '__main__':
         s.header = [open(_os_path.join(shared, 'header.shtml'), 'r').read()]
         s.footer = [open(_os_path.join(shared, 'footer.shtml'), 'r').read()]
 
-    if args.scgi:
+    if args.mode == 'scgi':
         serve_scgi(server=s, port=args.port)
+    if args.mode == 'wsgi':
+        serve_wsgi(server=s, port=args.port)
     else:
         serve_cgi(server=s)