Compare theory vs. sim scaled by bin width (not theory, which may be zero).
[sawsim.git] / pysawsim / sawsim_histogram.py
index 848ca9091c3afc464c97846816b431c3a342c201..14b94ede18227195f61fa294a835d1d116f1b288 100644 (file)
 # write to Trevor King, Drudge's University, Physics Dept., 3141 Chestnut St.,
 # Philadelphia PA 19104, USA.
 
-from __future__ import with_statement
-
-import hashlib
-import os.path
-import shutil
-
 import numpy
 
-from . import __version__, log
 from .histogram import Histogram
-from .manager import InvokeJob
-from .sawsim import parse
-
-
-SAWSIM = 'sawsim'  # os.path.expand(os.path.join('~', 'bin', 'sawsim'))
-CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim_histogram'))
-DEFAULT_PARAM_STRING = (
-    '-s cantilever,hooke,0.05 -N1 '
-    '-s folded,null -N8 '
-    "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
-    "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
-    '-q folded -v 1e-6')
-
-
-class SawsimHistogram (object):
-    def __init__(self, use_cache=False, clean_cache=False):
-        self._use_cache = use_cache
-        self._clean_cache = clean_cache
-
-    def _cache_dir(self, param_string):
-        """
-        >>> s = SawsimHistogram()
-        >>> s._cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
-        '/.../.sawsim_histogram/...'
-        """
-        return os.path.join(
-            CACHE_DIR, hashlib.sha256(param_string).hexdigest())
+from .manager import MANAGERS, get_manager
+from .sawsim import SawsimRunner
 
-    def _make_cache(self, param_string):
-        cache_dir = self._cache_dir(params_string)
-        os.makedirs(cache_dir)
-        with open(os.path.join(d, 'param_string'), 'w') as f:
-            f.write('# version: %s\n%s\n' % (__version__, param_string))
 
-    def _load_cached_data(self, cache_dir, N):
-        data = {}
-        for i in range(N):
-            simulation_path = os.path.join(d, '%d.dat' % i)
-            if os.path.exists(simulation_path):
-                with open(simulation_path, 'r') as f:
-                    data[i] = parse(f.read())
-            else:
-                break
-        return data
+_multiprocess_can_split_ = True
+"""Allow nosetests to split tests between processes.
+"""
 
-    def _cache_run(self, cache_dir, i, stdout):
-        simulation_path = os.path.join(d, '%d.dat' % i)
-        with open(simulation_path, 'w') as f:
-            f.write(stdout)
 
-    def __call__(self, param_string=None, N=400, bin_edges=None, manager=None):
+def sawsim_histogram(sawsim_runner, param_string, N=400, bin_edges=None):
         """Run `N` simulations and return a histogram with `bin_edges`.
 
         If `bin_edges == None`, return a numpy array of all unfolding
         forces.
-
-        Use the `JobManager` instance `manager` for asynchronous job
-        execution.
-
-        If `_use_cache` is `True`, store an array of unfolding forces
-        in `CACHE_DIR` for each simulated pull.  If the cached forces
-        are already present for `param_string`, do not redo the
-        simulation unless `_clean_cache` is `True`.
         """
-        data = {}
-        if self._use_cache == True:
-            d = self._cache_dir(param_string)
-            if os.path.exists(d):
-                if self._clean_cache == True:
-                    shutil.rmtree(d)
-                    self._make_cache(param_string)
-                else:
-                    data = self._load_cached_data(d, N)
-                    log().debug('using %d cached runs for %s'
-                                % (len(data), param_string))
-            else:
-                self._make_cache(param_string)
-
-        jobs = {}
-        for i in range(N):
-            if i in data:
-                continue
-            jobs[i] = manager.async_invoke(InvokeJob(
-                    '%s %s' % (SAWSIM, param_string)))
-        complete_jobs = manager.wait([job.id for job in jobs])
-        for i,job in jobs.iteritems:
-            j = complete_jobs[job.id]
-            assert j.status == 0, j.data
-            if self._use_cache == True:
-                self._cache_run(d, i, j.data['stdout'])
-            data[i] = parse(j.data['stdout'])
-        del(jobs)
-        del(complete_jobs)
-
-        # generate histogram
         events = []
-        for d_i in data.values():
-            events.extend([e.force for e in d_i
-                           if e.initial_state == 'folded'])
-        events = numpy.array(d_i)
+        for run in sawsim_runner(param_string=param_string, N=N):
+            events.extend([event.force for event in run
+                           if (event.initial_state == 'folded'
+                               and event.final_state == 'unfolded')])
+        events = numpy.array(events)
         if bin_edges == None:
             return events
         h = Histogram()
         h.from_data(events, bin_edges)
         return h
+
+
+def main(argv=None):
+    """
+    >>> main(['-N', '2'])
+    ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    #Force (N)  Unfolding events
+    ...
+    """
+    from optparse import OptionParser
+    import sys
+
+    if argv == None:
+        argv = sys.argv[1:]
+
+    sr = SawsimRunner()
+
+    usage = '%prog [options]'
+    epilog = '\n'.join([
+            'Generate an unfolding force histogram from a series of `sawsim`',
+           'runs.',
+            ])
+    parser = OptionParser(usage, epilog=epilog)
+    for option in sr.optparse_options:
+        parser.add_option(option)
+    parser.add_option('-w', '--bin-width', dest='bin_width',
+                      metavar='FLOAT', type='float',
+                      help='Histogram bin width in newtons (%default).',
+                      default=10e-12)
+
+    options,args = parser.parse_args(argv)
+
+    sr_call_params = sr.initialize_from_options(options)
+    try:
+        events = sawsim_histogram(
+            sawsim_runner=sr, bin_edges=None, **sr_call_params)
+    finally:
+        sr.teardown()
+
+    if options.bin_width == None:
+        sys.stdout.write('# Unfolding force (N)\n')
+        events.tofile(sys.stdout, sep='\n')
+        sys.stdout.write('\n')
+    else:
+        h = Histogram()
+        h.from_data(events, bin_edges=h.calculate_bin_edges(
+                events, options.bin_width))
+        h.headings = ['Force (N)', 'Unfolding events']
+        h.to_stream(sys.stdout)