From: W. Trevor King <wking@drexel.edu>
Date: Sat, 23 Oct 2010 11:42:02 +0000 (-0400)
Subject: Move sawsim running into a new pysawsim.sawsim.SawsimRunner class.
X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c2eb6307c6d34dd1ef3ad32301c094fff928424c;p=sawsim.git

Move sawsim running into a new pysawsim.sawsim.SawsimRunner class.
---

diff --git a/bin/vel_dep_scan.py b/bin/sawsim_hist_scan.py
similarity index 59%
rename from bin/vel_dep_scan.py
rename to bin/sawsim_hist_scan.py
index 7864380..e86c788 100755
--- a/bin/vel_dep_scan.py
+++ b/bin/sawsim_hist_scan.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 
-from pysawsim import velocity_dependant_scan as vds
+from pysawsim import parameter_scan as ps
 from pysawsim.manager.mpi import MPI_worker_death
 
 
 MPI_worker_death()
-vds.main()
+ps.main()
diff --git a/pysawsim/histogram.py b/pysawsim/histogram.py
index 32a12cf..ae8378a 100644
--- a/pysawsim/histogram.py
+++ b/pysawsim/histogram.py
@@ -22,6 +22,8 @@
 
 import numpy
 
+from . import log
+
 
 class Histogram (object):
     """A histogram with a flexible comparison method, `residual()`.
@@ -112,7 +114,11 @@ class Histogram (object):
                     line = line[len('#'):]
                     self.headings = [x.strip() for x in line.split('\t')]
                 continue # ignore blank lines and comments
-            bin_edge,count = line.split()
+            try:
+                bin_edge,count = line.split()
+            except ValueError:
+                log().error('Unable to parse histogram line: "%s"' % line)
+                raise
             self.bin_edges.append(float(bin_edge))
             self.counts.append(float(count))
         bin_width = self.bin_edges[1] - self.bin_edges[0]
@@ -140,13 +146,13 @@ class Histogram (object):
         >>> h.counts = [10, 40, 5]
         >>> h.to_stream(sys.stdout)
         ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
-        # Force (N)\tUnfolding events
+        #Force (N)\tUnfolding events
         1.5e-10\t10
         2e-10\t40
         2.5e-10\t5
         """
         if self.headings != None:
-            stream.write('# %s\n' % '\t'.join(self.headings))
+            stream.write('#%s\n' % '\t'.join(self.headings))
         for bin,count in zip(self.bin_edges, self.counts):
             stream.write('%g\t%g\n' % (bin, count))
 
diff --git a/pysawsim/velocity_dependant_scan.py b/pysawsim/parameter_scan.py
similarity index 58%
rename from pysawsim/velocity_dependant_scan.py
rename to pysawsim/parameter_scan.py
index 59c9cf4..60c60f9 100644
--- a/pysawsim/velocity_dependant_scan.py
+++ b/pysawsim/parameter_scan.py
@@ -17,31 +17,84 @@
 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
 # Philadelphia PA 19104, USA.
 
-import os # HACK, for getpid()
+"""Experiment vs. simulation comparison and scanning.
+"""
+
+from os import getpid  # for rss()
 import os.path
 import pickle
-import shutil
-import sys
+from StringIO import StringIO
 
 import matplotlib
 matplotlib.use('Agg')  # select backend that doesn't require X Windows
 import numpy
 import pylab
-from scipy.optimize import leastsq
 
 from . import log
 from .histogram import Histogram
 from .manager import MANAGERS, get_manager
-from . import sawsim_histogram
+from .sawsim_histogram import sawsim_histogram
+from .sawsim import SawsimRunner
 
 
 FIGURE = pylab.figure()  # avoid memory problems.
 """`pylab` keeps internal references to all created figures, so share
 a single instance.
 """
+EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
+# Other general comments...
+
+#HISTOGRAM: -v 6e-7
+#Force (N)\tUnfolding events
+1.8e-10\t1
+1.9e-10\t0
+2e-10\t0
+2.1e-10\t0
+2.2e-10\t0
+2.3e-10\t0
+2.4e-10\t1
+2.5e-10\t0
+2.6e-10\t2
+2.7e-10\t0
+2.8e-10\t1
+2.9e-10\t6
+3e-10\t2
+3.1e-10\t3
+
+#HISTOGRAM: -v 8e-7
+#Force (N)\tUnfolding events
+2.4e-10\t1
+2.5e-10\t0
+2.6e-10\t4
+2.7e-10\t2
+2.8e-10\t2
+2.9e-10\t3
+3e-10\t2
+3.1e-10\t1
+3.2e-10\t1
+
+#HISTOGRAM: -v 1e-6
+#Force (N)\tUnfolding events
+2e-10\t1
+2.1e-10\t0
+2.2e-10\t1
+2.3e-10\t1
+2.4e-10\t1
+2.5e-10\t0
+2.6e-10\t2
+2.7e-10\t1
+2.8e-10\t4
+2.9e-10\t2
+3e-10\t2
+3.1e-10\t0
+3.2e-10\t1
+"""
+
 
 MEM_DEBUG = False
 
+
+
 def rss():
     """
     For debugging memory usage.
@@ -49,57 +102,101 @@ def rss():
     resident set size, the non-swapped physical memory that a task has
     used (in kilo-bytes).
     """
-    call = "ps -o rss= -p %d" % os.getpid()
+    call = "ps -o rss= -p %d" % getpid()
     status,stdout,stderr = invoke(call)
     return int(stdout)
 
 
 class HistogramMatcher (object):
-    """Compare experimental velocity dependent histograms to simulated data.
+    """Compare experimental histograms to simulated data.
 
     The main entry points are `fit()` and `plot()`.
+
+    The input `histogram_stream` should contain a series of
+    experimental histograms with '#HISTOGRAM: <params>` lines starting
+    each histogram.  `<params>` should list the `sawsim` parameters
+    that are unique to that experiment.
+
+    >>> from .manager.thread import ThreadManager
+    >>> velocity_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
+    >>> param_format_string = (
+    ...     '-s cantilever,hooke,0.05 -N1 '
+    ...     '-s folded,null -N8 '
+    ...     '-s "unfolded,wlc,{0.39e-9,28e-9}" '
+    ...     '-k "folded,unfolded,bell,{%g,%g}" -q folded')
+    >>> m = ThreadManager()
+    >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
+    >>> hm = HistogramMatcher(velocity_stream, param_format_string, sr, N=3)
+    >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
+    >>> m.teardown()
     """
-    def __init__(self, velocity_stream, param_format_string, N=400,
-                 manager=None, residual_type='jensen-shannon', plot=True,
-                 use_cache=False, clean_cache=False):
-        self.experiment_histograms = self.read_force_histograms(velocity_stream)
+    def __init__(self, histogram_stream, param_format_string,
+                 sawsim_runner, N=400, residual_type='jensen-shannon',
+                 plot=True):
+        self.experiment_histograms = self._read_force_histograms(
+            histogram_stream)
         self.param_format_string = param_format_string
-        self.residual_type = residual_type
+        self.sawsim_runner = sawsim_runner
         self.N = N
-        self._manager = manager
-        self.plot = plot
-        self.sawsim_histogram = sawsim_histogram.SawsimHistogram(
-            use_cache=use_cache, clean_cach=clean_cache)
+        self.residual_type = residual_type
+        self._plot = plot
 
-    def read_force_histograms(self, stream):
+    def _read_force_histograms(self, stream):
         """
-        v_file format:
+        File format:
         # comment and blank lines ignored
         <velocity in m/s><whitespace><path to histogram file>
         ...
 
-        e.g.
-
-        5e-7    histA
-        1e-6    histB
+        >>> import sys
+        >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
+        >>> hm = HistogramMatcher(StringIO(), None, None, None)
+        >>> histograms = hm._read_force_histograms(stream)
+        >>> sorted(histograms.iterkeys())
+        ['-v 1e-6', '-v 6e-7', '-v 8e-7']
+        >>> histograms['-v 1e-6'].to_stream(sys.stdout)
+        ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
+        #Force (N)\tUnfolding events
+        2e-10\t1
+        2.1e-10\t0
+        2.2e-10\t1
+        2.3e-10\t1
+        2.4e-10\t1
+        2.5e-10\t0
+        2.6e-10\t2
+        2.7e-10\t1
+        2.8e-10\t4
+        2.9e-10\t2
+        3e-10\t2
+        3.1e-10\t0
+        3.2e-10\t1
         """
-        histograms = {}
-        v_file_dir = os.path.dirname(v_file)
-        for line in strem.readlines():
+        token = '#HISTOGRAM:'
+        hist_blocks = {None: []}
+        params = None
+        for line in stream.readlines():
             line = line.strip()
-            if len(line) == 0 or line[0] == "#":
-                continue # ignore blank lines and comments
-            v,h_file = line.split()
+            if line.startswith(token):
+                params = line[len(token):].strip()
+                assert params not in hist_blocks, params
+                hist_blocks[params] = []
+            else:
+                hist_blocks[params].append(line)
+
+        histograms = {}
+        for params,block in hist_blocks.iteritems():
+            if params == None:
+                continue
             h = Histogram()
-            h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
-            histograms[v] = h
+            h.from_stream(StringIO('\n'.join(block)))
+            histograms[params] = h
         return histograms
 
-    def param_string(self, params, velocity):
+    def param_string(self, params, hist_params):
         """Generate a string of options to pass to `sawsim`.
         """
-        return '%s -v %g' % (
-            self.param_format_string % tuple(params), velocity)
+        return '%s %s' % (
+            self.param_format_string % tuple(params), hist_params)
 
     def get_all_unfolding_data(self, dirname, velocity_string):
         datafile = os.path.join(dirname, "data_" + velocity_string)
@@ -117,32 +214,23 @@ class HistogramMatcher (object):
 
     def _residual(self, params):
         residual = 0
-        for velocity,experiment_hist in self.experiment_histograms.iteritems():
-            sawsim_hist = self.sawsim_histogram(
-                param_string=self.param_string(params, velocity), N=self.N,
-                bin_edges=experiment_hist.bin_edges, manager=self._manager)
-            r = experiment_histogram.residual(
-                sawsim_hist, type=self.residual_type)
+        for hist_params,experiment_hist in self.experiment_histograms.iteritems():
+            sawsim_hist = sawsim_histogram(
+                sawsim_runner=self.sawsim_runner,
+                param_string=self.param_string(params, hist_params),
+                N=self.N, bin_edges=experiment_hist.bin_edges)
+            r = experiment_hist.residual(sawsim_hist, type=self.residual_type)
             residual += r
-            if self.plot == True:
-                title = ", ".join(["%g" % p for p in (params+[velocity])])
-                filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
+            if self._plot == True:
+                title = ", ".join(["%g" % p for p in params]+[hist_params])
+                filename = "residual-%s-%g.png" % (
+                    title.replace(', ', '_').replace(' ', '_'), r)
                 self._plot_residual_comparison(
                     experiment_hist, sawsim_hist, residual=r,
                     title=title, filename=filename)
         log().debug('residual: %g' % residual)
         return residual
 
-    def fit(self, initial_params):
-        p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
-                                      full_output=True, maxfev=1000)
-        _log = log()
-        _log.info('Fitted params: %s' % p)
-        _log.info('Covariance mx: %s' % cov)
-        _log.info('Info: %s' % info)
-        _log.info('Mesg: %s' % mesg)
-        return p
-
     def plot(self, param_ranges, logx=False, logy=False, contour=False):
         xranges = param_ranges[0]
         yranges = param_ranges[1]
@@ -159,20 +247,21 @@ class HistogramMatcher (object):
         X, Y = pylab.meshgrid(x,y)
         C = numpy.zeros((len(y)-1, len(x)-1))
         for i,xi in enumerate(x[:-1]):
-            for j,yi in enumerate(y[:-1]):
-                print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
-                params = (xi,yi)
+            for j,yj in enumerate(y[:-1]):
+                log().info('point %d %d (%d of %d)'
+                           % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
+                params = (xi,yj)
                 r = self._residual(params)
                 C[j,i] = numpy.log(r) # better resolution in valleys
                 if MEM_DEBUG == True:
                     log().debug('RSS: %d KB' % rss())
         C = numpy.nan_to_num(C) # NaN -> 0
-        fid = file("fit_force_histograms-XYC.pkl", "wb")
+        fid = file("histogram_matcher-XYC.pkl", "wb")
         pickle.dump([X,Y,C], fid)
         fid.close()
         # read in with
         # import pickle
-        # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
+        # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
         # ...
         FIGURE.clear()
         axes = FIGURE.add_subplot(111)
@@ -189,7 +278,7 @@ class HistogramMatcher (object):
         FIGURE.colorbar(p)
         FIGURE.savefig("figure.png")
 
-    def _plot_residual_comparison(self, expeiment_hist, theory_hist,
+    def _plot_residual_comparison(self, experiment_hist, theory_hist,
                                   residual, title, filename):
         FIGURE.clear()
         p = pylab.plot(experiment_hist.bin_edges[:-1],
@@ -217,15 +306,33 @@ def parse_param_ranges_string(string):
     return ranges
 
 
-def main():
-    import optparse
+def main(argv=None):
+    """
+    >>> import tempfile
+    >>> f = tempfile.NamedTemporaryFile()
+    >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
+    >>> f.flush()
+    >>> main(['-s', 'bin/sawsim',
+    ...       '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
+    ...       '-N', '2',
+    ...       f.name])
+    >>> f.close()
+    """
+    from optparse import OptionParser
+    import sys
+
+    if argv == None:
+        argv = sys.argv[1:]
+
+    sr = SawsimRunner()
+
     usage = "%prog [options] velocity_file"
 
-    parser = optparse.OptionParser(usage)
-    parser.add_option("-s","--sawsim", dest="sawsim",
-                      metavar="PATH",
-                      help="Set sawsim binary (%default).",
-                      default=sawsim_histogram.SAWSIM)
+    parser = OptionParser(usage)
+    for option in sr.optparse_options:
+        if option.dest == 'param_string':
+            continue
+        parser.add_option(option)
     parser.add_option("-f","--param-format", dest="param_format",
                       metavar="FORMAT",
                       help="Convert params to sawsim options (%default).",
@@ -238,25 +345,6 @@ def main():
                       metavar="PARAMS",
                       help="Param range for plotting (%default).",
                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
-    parser.add_option("-N", "--number-of-runs", dest="N",
-                      metavar="INT", type='int',
-                      help="Number of sawsim runs at each point in parameter space (%default).",
-                      default=400)
-    parser.add_option("-m", "--manager", dest="manager",
-                      metavar="STRING",
-                      help="Job manager name (one of %s) (%%default)."
-                      % (', '.join(MANAGERS)),
-                      default=MANAGERS[0])
-    parser.add_option("-C","--use-cache", dest="use_cache",
-                      help="Use cached simulations if they exist (vs. running new simulations) (%default)",
-                      default=False, action="store_true")
-    parser.add_option("--clean-cache", dest="clean_cache",
-                      help="Remove previously cached simulations if they exist (%default)",
-                      default=False, action="store_true")
-    parser.add_option("-d","--cache-dir", dest="cache_dir",
-                      metavar="STRING",
-                      help="Cache directory for sawsim unfolding forces (%default).",
-                      default=sawsim_histogram.CACHE_DIR)
     parser.add_option("-R","--residual", dest="residual",
                       metavar="STRING",
                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
@@ -273,22 +361,21 @@ def main():
     parser.add_option("-c","--contour-plot", dest="contour_plot",
                       help="Select contour plot (vs. the default pseudocolor plot).",
                       default=False, action="store_true")
-    options,args = parser.parse_args()
+
+    options,args = parser.parse_args(argv)
+
     initial_params = [float(p) for p in options.initial_params.split(",")]
     param_ranges = parse_param_ranges_string(options.param_range)
     velocity_file = args[0]
+    sr_call_params = sr.initialize_from_options(options)
 
-    sawsim_histogram.SAWSIM = options.sawsim
-    sawsim_histogram.CACHE_DIR = options.cache_dir
-    manager = get_manager(options.manager)()
     try:
         hm = HistogramMatcher(
             file(velocity_file, 'r'), param_format_string=options.param_format,
-            N=options.N, manager=manager, residual_type=options.residual,
-            plot=options.plot_residuals, use_cache=options.use_cache,
-            clean_cache=options.clean_cache)
+            sawsim_runner=sr, residual_type=options.residual,
+            plot=options.plot_residuals, **sr_call_params)
         #hm.fit(initial_params)
         hm.plot(param_ranges, logx=options.logx, logy=options.logy,
                 contour=options.contour_plot)
     finally:
-        manager.teardown()
+        sr.teardown()
diff --git a/pysawsim/sawsim.py b/pysawsim/sawsim.py
index a58deab..1500a93 100644
--- a/pysawsim/sawsim.py
+++ b/pysawsim/sawsim.py
@@ -18,149 +18,291 @@
 # Philadelphia PA 19104, USA.
 
 
-"""`sawsim` output parsing utilities.
-
-* `Event` instances represent domain state transitions.
-* `parse()` parses the output of a typical `sawsim` run.
+"""`Seminar` for running `sawsim` and parsing the results.
 """
 
 try:
     from collections import namedtuple
 except ImportError:  # work around Python < 2.6
-    from operator import itemgetter as _itemgetter
-    from keyword import iskeyword as _iskeyword
-    import sys as _sys
-    def namedtuple(typename, field_names, verbose=False):
-        """Returns a new subclass of tuple with named fields.
-
-        Copied from Python 2.6's collections.py.
-
-        >>> Point = namedtuple('Point', 'x y')
-        >>> Point.__doc__                   # docstring for the new class
-        'Point(x, y)'
-        >>> p = Point(11, y=22)             # instantiate with positional args or keywords
-        >>> p[0] + p[1]                     # indexable like a plain tuple
-        33
-        >>> x, y = p                        # unpack like a regular tuple
-        >>> x, y
-        (11, 22)
-        >>> p.x + p.y                       # fields also accessable by name
-        33
-        >>> d = p._asdict()                 # convert to a dictionary
-        >>> d['x']
-        11
-        >>> Point(**d)                      # convert from a dictionary
-        Point(x=11, y=22)
-        >>> p._replace(x=100)               # _replace() is like str.replace() but targets named fields
-        Point(x=100, y=22)
+    from ._collections import namedtuple
+import hashlib
+from optparse import Option
+import os
+import os.path
+import shutil
+from uuid import uuid4
 
-        """
+from .manager import MANAGERS, get_manager, InvokeJob
 
-        # Parse and validate the field names.  Validation serves two purposes,
-        # generating informative error messages and preventing template injection attacks.
-        if isinstance(field_names, basestring):
-            field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
-        field_names = tuple(map(str, field_names))
-        for name in (typename,) + field_names:
-            if not all(c.isalnum() or c=='_' for c in name):
-                raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
-            if _iskeyword(name):
-                raise ValueError('Type names and field names cannot be a keyword: %r' % name)
-            if name[0].isdigit():
-                raise ValueError('Type names and field names cannot start with a number: %r' % name)
-        seen_names = set()
-        for name in field_names:
-            if name.startswith('_'):
-                raise ValueError('Field names cannot start with an underscore: %r' % name)
-            if name in seen_names:
-                raise ValueError('Encountered duplicate field name: %r' % name)
-            seen_names.add(name)
-
-        # Create and fill-in the class template
-        numfields = len(field_names)
-        argtxt = repr(field_names).replace("'", "")[1:-1]   # tuple repr without parens or quotes
-        reprtxt = ', '.join('%s=%%r' % name for name in field_names)
-        dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names))
-        template = '''class %(typename)s(tuple):
-        '%(typename)s(%(argtxt)s)' \n
-        __slots__ = () \n
-        _fields = %(field_names)r \n
-        def __new__(_cls, %(argtxt)s):
-            return _tuple.__new__(_cls, (%(argtxt)s)) \n
-        @classmethod
-        def _make(cls, iterable, new=tuple.__new__, len=len):
-            'Make a new %(typename)s object from a sequence or iterable'
-            result = new(cls, iterable)
-            if len(result) != %(numfields)d:
-                raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
-            return result \n
-        def __repr__(self):
-            return '%(typename)s(%(reprtxt)s)' %% self \n
-        def _asdict(t):
-            'Return a new dict which maps field names to their values'
-            return {%(dicttxt)s} \n
-        def _replace(_self, **kwds):
-            'Return a new %(typename)s object replacing specified fields with new values'
-            result = _self._make(map(kwds.pop, %(field_names)r, _self))
-            if kwds:
-                raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
-            return result \n
-        def __getnewargs__(self):
-            return tuple(self) \n\n''' % locals()
-        for i, name in enumerate(field_names):
-            template += '        %s = _property(_itemgetter(%d))\n' % (name, i)
-        if verbose:
-            print template
-
-        # Execute the template string in a temporary namespace and
-        # support tracing utilities by setting a value for frame.f_globals['__name__']
-        namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
-                         _property=property, _tuple=tuple)
-        try:
-            exec template in namespace
-        except SyntaxError, e:
-            raise SyntaxError(e.message + ':\n' + template)
-        result = namespace[typename]
-
-        # For pickling to work, the __module__ variable needs to be set to the frame
-        # where the named tuple is created.  Bypass this step in enviroments where
-        # sys._getframe is not defined (Jython for example).
-        if hasattr(_sys, '_getframe'):
-            result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
-
-        return result
 
+SAWSIM = 'sawsim'  # os.path.expand(os.path.join('~', 'bin', 'sawsim'))
+CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim_cache'))
+DEFAULT_PARAM_STRING = (
+    '-s cantilever,hooke,0.05 -N1 '
+    '-s folded,null -N8 '
+    "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
+    "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
+    '-q folded -v 1e-6')
 
+
+# `Event` instances represent domain state transitions.
 Event = namedtuple(
     typename='Event',
     field_names=['force', 'initial_state', 'final_state'])
 
 
-def parse(text):
-    """Parse the output of a `sawsim` run.
-
-    >>> text = '''#Force (N)\\tinitial state\\tFinal state
-    ... 2.90301e-10\\tfolded\\tunfolded
-    ... 2.83948e-10\\tfolded\\tunfolded
-    ... 2.83674e-10\\tfolded\\tunfolded
-    ... 2.48384e-10\\tfolded\\tunfolded
-    ... 2.43033e-10\\tfolded\\tunfolded
-    ... 2.77589e-10\\tfolded\\tunfolded
-    ... 2.85343e-10\\tfolded\\tunfolded
-    ... 2.67796e-10\\tfolded\\tunfolded
-    ... '''
-    >>> events = list(parse(text))
-    >>> len(events)
-    8
-    >>> events[0]  # doctest: +ELLIPSIS
-    Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
+class SawsimRunner (object):
+    """
+    >>> from .manager.thread import ThreadManager
+    >>> m = ThreadManager()
+    >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
+    >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2):
+    ...     print 'New run'
+    ...     for i,event in enumerate(run):
+    ...         print i, event  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    New run
+    0 Event(force=..., initial_state='folded', final_state='unfolded')
+    1 Event(force=..., initial_state='folded', final_state='unfolded')
+    2 Event(force=..., initial_state='folded', final_state='unfolded')
+    3 Event(force=..., initial_state='folded', final_state='unfolded')
+    4 Event(force=..., initial_state='folded', final_state='unfolded')
+    5 Event(force=..., initial_state='folded', final_state='unfolded')
+    6 Event(force=..., initial_state='folded', final_state='unfolded')
+    7 Event(force=..., initial_state='folded', final_state='unfolded')
+    New run
+    0 Event(force=..., initial_state='folded', final_state='unfolded')
+    1 Event(force=..., initial_state='folded', final_state='unfolded')
+    2 Event(force=..., initial_state='folded', final_state='unfolded')
+    3 Event(force=..., initial_state='folded', final_state='unfolded')
+    4 Event(force=..., initial_state='folded', final_state='unfolded')
+    5 Event(force=..., initial_state='folded', final_state='unfolded')
+    6 Event(force=..., initial_state='folded', final_state='unfolded')
+    7 Event(force=..., initial_state='folded', final_state='unfolded')
+    >>> m.teardown()
     """
-    for line in text.splitlines():
-        line = line.strip()
-        if len(line) == 0 or line.startswith('#'):
-            continue
-        fields = line.split('\t')
-        if len(fields) != 3:
-            raise ValueError(fields)
-        force,initial_state,final_state = fields
-        yield Event(float(force), initial_state, final_state)
+
+    optparse_options = [
+        Option("-s","--sawsim", dest="sawsim",
+               metavar="PATH",
+               help="Set sawsim binary (%default).",
+               default=SAWSIM),
+        Option("-p","--params", dest="param_string",
+               metavar="PARAMS",
+               help="Initial params for fitting (%default).",
+               default=DEFAULT_PARAM_STRING),
+        Option("-N", "--number-of-runs", dest="N",
+               metavar="INT", type='int',
+               help="Number of sawsim runs at each point in parameter space (%default).",
+               default=400),
+        Option("-m", "--manager", dest="manager",
+               metavar="STRING",
+               help="Job manager name (one of %s) (%%default)."
+               % (', '.join(MANAGERS)),
+               default=MANAGERS[0]),
+        Option("-C","--use-cache", dest="use_cache",
+               help="Use cached simulations if they exist (vs. running new simulations) (%default)",
+               default=False, action="store_true"),
+        Option("--clean-cache", dest="clean_cache",
+               help="Remove previously cached simulations if they exist (%default)",
+               default=False, action="store_true"),
+        Option("-d","--cache-dir", dest="cache_dir",
+               metavar="STRING",
+               help="Cache directory for sawsim unfolding forces (%default).",
+               default=CACHE_DIR),
+    ]
+
+    def __init__(self, sawsim=None, cache_dir=None,
+                 use_cache=False, clean_cache=False,
+                 manager=None):
+        if sawsim == None:
+            sawsim = SAWSIM
+        self._sawsim = sawsim
+        if cache_dir == None:
+            cache_dir = CACHE_DIR
+        self._cache_dir = cache_dir
+        self._use_cache = use_cache
+        self._clean_cache = clean_cache
+        self._manager = manager
+        self._local_manager = False
+        self._headline = None
+
+    def initialize_from_options(self, options):
+        self._sawsim = options.sawsim
+        self._cache_dir = options.cache_dir
+        self._use_cache = options.use_cache
+        self._clean_cache = options.clean_cache
+        self._manager = get_manager(options.manager)()
+        self._local_manager = True
+        call_params = {}
+        for param in ['param_string', 'N']:
+            try:
+                call_params[param] = getattr(options, param)
+            except AttributeError:
+                pass
+        return call_params
+
+    def teardown(self):
+        if self._local_manager == True:
+            self._manager.teardown()
+
+    def __call__(self, param_string, N):
+        """Run `N` simulations and yield `Event` generators for each run.
+
+        Use the `JobManager` instance `manager` for asynchronous job
+        execution.
+
+        If `_use_cache` is `True`, store an array of unfolding forces
+        in `cache_dir` for each simulated pull.  If the cached forces
+        are already present for `param_string`, do not redo the
+        simulation unless `_clean_cache` is `True`.
+        """
+        count = N
+        if self._use_cache == True:
+            d = self._param_cache_dir(param_string)
+            if os.path.exists(d):
+                if self._clean_cache == True:
+                    shutil.rmtree(d)
+                    self._make_cache(param_string)
+                else:
+                    for data in self._load_cached_data(param_string):
+                        yield data
+                        count -= 1
+                        if count == 0:
+                            return
+            else:
+                self._make_cache(param_string)
+
+        jobs = {}
+        for i in range(count):
+            jobs[i] = self._manager.async_invoke(InvokeJob(
+                    target='%s %s' % (self._sawsim, param_string)))
+        complete_jobs = self._manager.wait(
+            [job.id for job in jobs.itervalues()])
+        for i,job in jobs.iteritems():
+            j = complete_jobs[job.id]
+            assert j.status == 0, j.data
+            if self._use_cache == True:
+                self._cache_run(d, j.data['stdout'])
+            yield self.parse(j.data['stdout'])
+        del(jobs)
+        del(complete_jobs)
+
+    def _param_cache_dir(self, param_string):
+        """
+        >>> s = SawsimRunner()
+        >>> s._param_cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
+        '/.../.sawsim_cache/...'
+        """
+        return os.path.join(
+            self._cache_dir, hashlib.sha256(param_string).hexdigest())
+
+    def _make_cache(self, param_string):
+        cache_dir = self._param_cache_dir(param_string)
+        os.makedirs(cache_dir)
+        with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
+            f.write('# version: %s\n%s\n' % (__version__, param_string))
+
+    def _load_cached_data(self, param_string):
+        pcd = self._param_cache_dir(param_string)
+        for filename in os.listdir(pcd):
+            if not filename.endswith('.dat'):
+                continue
+            with open(os.path.join(pcd, filename), 'r') as f:
+                yield self.parse(f.read())
+
+    def _cache_run(self, cache_dir, stdout):
+        simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4())
+        with open(simulation_path, 'w') as f:
+            f.write(stdout)
+
+    def parse(self, text):
+        """Parse the output of a `sawsim` run.
+    
+        >>> text = '''#Force (N)\\tinitial state\\tFinal state
+        ... 2.90301e-10\\tfolded\\tunfolded
+        ... 2.83948e-10\\tfolded\\tunfolded
+        ... 2.83674e-10\\tfolded\\tunfolded
+        ... 2.48384e-10\\tfolded\\tunfolded
+        ... 2.43033e-10\\tfolded\\tunfolded
+        ... 2.77589e-10\\tfolded\\tunfolded
+        ... 2.85343e-10\\tfolded\\tunfolded
+        ... 2.67796e-10\\tfolded\\tunfolded
+        ... '''
+        >>> sr = SawsimRunner()
+        >>> events = list(sr.parse(text))
+        >>> len(events)
+        8
+        >>> events[0]  # doctest: +ELLIPSIS
+        Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
+        >>> sr._headline
+        ['Force (N)', 'initial state', 'Final state']
+        """
+        for line in text.splitlines():
+            line = line.strip()
+            if len(line) == 0:
+                continue
+            elif line.startswith('#'):
+                if self._headline == None:
+                    self._headline = line[len('#'):].split('\t')
+                continue
+            fields = line.split('\t')
+            if len(fields) != 3:
+                raise ValueError(fields)
+            force,initial_state,final_state = fields
+            yield Event(float(force), initial_state, final_state)
+
+
+def main(argv=None):
+    """
+    >>> try:
+    ...     main(['--help'])
+    ... except SystemExit, e:
+    ...     pass  # doctest: +ELLIPSIS, +REPORT_UDIFF
+    Usage: ... [options]
+    <BLANKLINE>
+    Options:
+      -h, --help            show this help message and exit
+      -s PATH, --sawsim=PATH
+                            Set sawsim binary (sawsim).
+      ...
+    >>> print e
+    0
+    >>> main(['--sawsim', 'bin/sawsim', '-N', '2'])
+    ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    #Force (N)  Initial state  Final state
+    ...         folded         unfolded
+    ...         folded         unfolded
+    ...         folded         unfolded
+    ...         folded         unfolded
+    ...
+    ...         folded         unfolded
+    """
+    from optparse import OptionParser
+    import sys
+
+    if argv == None:
+        argv = sys.argv[1:]
+
+    sr = SawsimRunner()
+
+    usage = "%prog [options]"
+    parser = OptionParser(usage)
+    for option in sr.optparse_options:
+        parser.add_option(option)
+    
+    options,args = parser.parse_args(argv)
+
+    try:
+        sr_call_params = sr.initialize_from_options(options)
+    
+        first_run = True
+        for run in sr(**sr_call_params):
+            if first_run == True:
+                first_run = False
+                run = list(run)  # force iterator evaluation
+                if sr._headline != None:
+                    print '#%s' % '\t'.join(sr._headline)
+            for event in run:
+                print '\t'.join([str(x) for x in event])
+    finally:
+        sr.teardown()
diff --git a/pysawsim/sawsim_histogram.py b/pysawsim/sawsim_histogram.py
index d5eb306..6ca2cb2 100644
--- a/pysawsim/sawsim_histogram.py
+++ b/pysawsim/sawsim_histogram.py
@@ -19,113 +19,25 @@
 
 from __future__ import with_statement
 
-import hashlib
-import os.path
-import shutil
-
 import numpy
 
 from . import __version__, log
 from .histogram import Histogram
-from .manager import InvokeJob, MANAGERS, get_manager
-from .sawsim import parse
-
-
-SAWSIM = 'sawsim'  # os.path.expand(os.path.join('~', 'bin', 'sawsim'))
-CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim_histogram'))
-DEFAULT_PARAM_STRING = (
-    '-s cantilever,hooke,0.05 -N1 '
-    '-s folded,null -N8 '
-    "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
-    "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
-    '-q folded -v 1e-6')
-
-
-class SawsimHistogram (object):
-    def __init__(self, use_cache=False, clean_cache=False):
-        self._use_cache = use_cache
-        self._clean_cache = clean_cache
-
-    def _cache_dir(self, param_string):
-        """
-        >>> s = SawsimHistogram()
-        >>> s._cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
-        '/.../.sawsim_histogram/...'
-        """
-        return os.path.join(
-            CACHE_DIR, hashlib.sha256(param_string).hexdigest())
-
-    def _make_cache(self, param_string):
-        cache_dir = self._cache_dir(param_string)
-        os.makedirs(cache_dir)
-        with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
-            f.write('# version: %s\n%s\n' % (__version__, param_string))
+from .manager import MANAGERS, get_manager
+from .sawsim import SawsimRunner
 
-    def _load_cached_data(self, cache_dir, N):
-        data = {}
-        for i in range(N):
-            simulation_path = os.path.join(cache_dir, '%d.dat' % i)
-            if os.path.exists(simulation_path):
-                with open(simulation_path, 'r') as f:
-                    data[i] = parse(f.read())
-            else:
-                break
-        return data
 
-    def _cache_run(self, cache_dir, i, stdout):
-        simulation_path = os.path.join(cache_dir, '%d.dat' % i)
-        with open(simulation_path, 'w') as f:
-            f.write(stdout)
-
-    def __call__(self, param_string=None, N=400, bin_edges=None, manager=None):
+def sawsim_histogram(sawsim_runner, param_string, N=400, bin_edges=None):
         """Run `N` simulations and return a histogram with `bin_edges`.
 
         If `bin_edges == None`, return a numpy array of all unfolding
         forces.
-
-        Use the `JobManager` instance `manager` for asynchronous job
-        execution.
-
-        If `_use_cache` is `True`, store an array of unfolding forces
-        in `CACHE_DIR` for each simulated pull.  If the cached forces
-        are already present for `param_string`, do not redo the
-        simulation unless `_clean_cache` is `True`.
         """
-        data = {}
-        if self._use_cache == True:
-            d = self._cache_dir(param_string)
-            if os.path.exists(d):
-                if self._clean_cache == True:
-                    shutil.rmtree(d)
-                    self._make_cache(param_string)
-                else:
-                    data = self._load_cached_data(d, N)
-                    log().debug('using %d cached runs for %s'
-                                % (len(data), param_string))
-            else:
-                self._make_cache(param_string)
-
-        jobs = {}
-        for i in range(N):
-            if i in data:
-                continue
-            jobs[i] = manager.async_invoke(InvokeJob(
-                    target='%s %s' % (SAWSIM, param_string)))
-        complete_jobs = manager.wait([job.id for job in jobs.itervalues()])
-        for i,job in jobs.iteritems():
-            j = complete_jobs[job.id]
-            assert j.status == 0, j.data
-            if self._use_cache == True:
-                self._cache_run(d, i, j.data['stdout'])
-            data[i] = parse(j.data['stdout'])
-        del(jobs)
-        del(complete_jobs)
-
-        # generate histogram
         events = []
-        for d_i in data.values():
-            events.extend([e.force for e in d_i
-                           if e.initial_state == 'folded'])
+        for run in sawsim_runner(param_string=param_string, N=N):
+            events.extend([event.force for event in run
+                           if (event.initial_state == 'folded'
+                               and event.final_state == 'unfolded')])
         events = numpy.array(events)
         if bin_edges == None:
             return events
@@ -134,59 +46,38 @@ class SawsimHistogram (object):
         return h
 
 
-def main():
-    import optparse
+def main(argv=None):
+    """
+    >>> main(['--sawsim', 'bin/sawsim', '-N', '2'])
+    ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    #Force (N)  Unfolding events
+    ...
+    """
+    from optparse import OptionParser
     import sys
 
-    global SAWSIM, CACHE_DIR
+    if argv == None:
+        argv = sys.argv[1:]
 
-    usage = "%prog [options] velocity_file"
+    sr = SawsimRunner()
 
-    parser = optparse.OptionParser(usage)
-    parser.add_option("-s","--sawsim", dest="sawsim",
-                      metavar="PATH",
-                      help="Set sawsim binary (%default).",
-                      default=SAWSIM)
-    parser.add_option("-p","--params", dest="params",
-                      metavar="PARAMS",
-                      help="Initial params for fitting (%default).",
-                      default=DEFAULT_PARAM_STRING)
-    parser.add_option("-N", "--number-of-runs", dest="N",
-                      metavar="INT", type='int',
-                      help="Number of sawsim runs at each point in parameter space (%default).",
-                      default=400)
+    usage = "%prog [options] velocity_file"
+    parser = OptionParser(usage)
+    for option in sr.optparse_options:
+        parser.add_option(option)
     parser.add_option("-w", "--bin-width", dest="bin_width",
                       metavar="FLOAT", type='float',
                       help="Histogram bin width in newtons (%default).",
-                      default=None)
-    parser.add_option("-m", "--manager", dest="manager",
-                      metavar="STRING",
-                      help="Job manager name (one of %s) (%%default)."
-                      % (', '.join(MANAGERS)),
-                      default=MANAGERS[0])
-    parser.add_option("-C","--use-cache", dest="use_cache",
-                      help="Use cached simulations if they exist (vs. running new simulations) (%default)",
-                      default=False, action="store_true")
-    parser.add_option("--clean-cache", dest="clean_cache",
-                      help="Remove previously cached simulations if they exist (%default)",
-                      default=False, action="store_true")
-    parser.add_option("-d","--cache-dir", dest="cache_dir",
-                      metavar="STRING",
-                      help="Cache directory for sawsim unfolding forces (%default).",
-                      default=CACHE_DIR)
-    options,args = parser.parse_args()
-
-    SAWSIM = options.sawsim
-    CACHE_DIR = options.cache_dir
+                      default=10e-12)
 
-    sh = SawsimHistogram(use_cache=options.use_cache,
-                         clean_cache=options.clean_cache)
+    options,args = parser.parse_args(argv)
 
-    manager = get_manager(options.manager)()
+    sr_call_params = sr.initialize_from_options(options)
     try:
-        events = sh(param_string=options.params, N=options.N, manager=manager)
+        events = sawsim_histogram(
+            sawsim_runner=sr, bin_edges=None, **sr_call_params)
     finally:
-        manager.teardown()
+        sr.teardown()
 
     if options.bin_width == None:
         sys.stdout.write('# Unfolding force (N)\n')