Correct mpi4py URL.

[sawsim.git] / pysawsim / sawsim.py
diff --git a/pysawsim/sawsim.py b/pysawsim/sawsim.py

index a58deabac3f447857574a9e1803515273f69ca44..0f85869dd634111a0ce0f64d65a9700895f6d8f1 100644 (file)
--- a/pysawsim/sawsim.py
+++ b/pysawsim/sawsim.py
@@ -18,149 +18,309 @@
  # Philadelphia PA 19104, USA.
  
  
-"""`sawsim` output parsing utilities.
-
-* `Event` instances represent domain state transitions.
-* `parse()` parses the output of a typical `sawsim` run.
+"""`Seminar` for running `sawsim` and parsing the results.
  """
  
+from __future__ import with_statement
+
  try:
      from collections import namedtuple
  except ImportError:  # work around Python < 2.6
-    from operator import itemgetter as _itemgetter
-    from keyword import iskeyword as _iskeyword
-    import sys as _sys
-    def namedtuple(typename, field_names, verbose=False):
-        """Returns a new subclass of tuple with named fields.
-
-        Copied from Python 2.6's collections.py.
-
-        >>> Point = namedtuple('Point', 'x y')
-        >>> Point.__doc__                   # docstring for the new class
-        'Point(x, y)'
-        >>> p = Point(11, y=22)             # instantiate with positional args or keywords
-        >>> p[0] + p[1]                     # indexable like a plain tuple
-        33
-        >>> x, y = p                        # unpack like a regular tuple
-        >>> x, y
-        (11, 22)
-        >>> p.x + p.y                       # fields also accessable by name
-        33
-        >>> d = p._asdict()                 # convert to a dictionary
-        >>> d['x']
-        11
-        >>> Point(**d)                      # convert from a dictionary
-        Point(x=11, y=22)
-        >>> p._replace(x=100)               # _replace() is like str.replace() but targets named fields
-        Point(x=100, y=22)
+    from ._collections import namedtuple
+from distutils.spawn import find_executable
+import hashlib
+from optparse import Option
+import os
+import os.path
+from random import shuffle
+import shutil
+from uuid import uuid4
  
-        """
+from . import __version__
+from .manager import MANAGERS, get_manager, InvokeJob
  
-        # Parse and validate the field names.  Validation serves two purposes,
-        # generating informative error messages and preventing template injection attacks.
-        if isinstance(field_names, basestring):
-            field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
-        field_names = tuple(map(str, field_names))
-        for name in (typename,) + field_names:
-            if not all(c.isalnum() or c=='_' for c in name):
-                raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
-            if _iskeyword(name):
-                raise ValueError('Type names and field names cannot be a keyword: %r' % name)
-            if name[0].isdigit():
-                raise ValueError('Type names and field names cannot start with a number: %r' % name)
-        seen_names = set()
-        for name in field_names:
-            if name.startswith('_'):
-                raise ValueError('Field names cannot start with an underscore: %r' % name)
-            if name in seen_names:
-                raise ValueError('Encountered duplicate field name: %r' % name)
-            seen_names.add(name)
-
-        # Create and fill-in the class template
-        numfields = len(field_names)
-        argtxt = repr(field_names).replace("'", "")[1:-1]   # tuple repr without parens or quotes
-        reprtxt = ', '.join('%s=%%r' % name for name in field_names)
-        dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names))
-        template = '''class %(typename)s(tuple):
-        '%(typename)s(%(argtxt)s)' \n
-        __slots__ = () \n
-        _fields = %(field_names)r \n
-        def __new__(_cls, %(argtxt)s):
-            return _tuple.__new__(_cls, (%(argtxt)s)) \n
-        @classmethod
-        def _make(cls, iterable, new=tuple.__new__, len=len):
-            'Make a new %(typename)s object from a sequence or iterable'
-            result = new(cls, iterable)
-            if len(result) != %(numfields)d:
-                raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
-            return result \n
-        def __repr__(self):
-            return '%(typename)s(%(reprtxt)s)' %% self \n
-        def _asdict(t):
-            'Return a new dict which maps field names to their values'
-            return {%(dicttxt)s} \n
-        def _replace(_self, **kwds):
-            'Return a new %(typename)s object replacing specified fields with new values'
-            result = _self._make(map(kwds.pop, %(field_names)r, _self))
-            if kwds:
-                raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
-            return result \n
-        def __getnewargs__(self):
-            return tuple(self) \n\n''' % locals()
-        for i, name in enumerate(field_names):
-            template += '        %s = _property(_itemgetter(%d))\n' % (name, i)
-        if verbose:
-            print template
-
-        # Execute the template string in a temporary namespace and
-        # support tracing utilities by setting a value for frame.f_globals['__name__']
-        namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
-                         _property=property, _tuple=tuple)
-        try:
-            exec template in namespace
-        except SyntaxError, e:
-            raise SyntaxError(e.message + ':\n' + template)
-        result = namespace[typename]
-
-        # For pickling to work, the __module__ variable needs to be set to the frame
-        # where the named tuple is created.  Bypass this step in enviroments where
-        # sys._getframe is not defined (Jython for example).
-        if hasattr(_sys, '_getframe'):
-            result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
-
-        return result
  
+_multiprocess_can_split_ = True
+"""Allow nosetests to split tests between processes.
+"""
+
+SAWSIM = find_executable('sawsim')
+if SAWSIM == None:
+    SAWSIM = os.path.join('bin', 'sawsim')
  
+CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim-cache'))
+DEFAULT_PARAM_STRING = (
+    '-s cantilever,hooke,0.05 -N1 '
+    '-s folded,null -N8 '
+    "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
+    "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
+    '-q folded -v 1e-6')
+
+
+# `Event` instances represent domain state transitions.
  Event = namedtuple(
      typename='Event',
      field_names=['force', 'initial_state', 'final_state'])
  
  
-def parse(text):
-    """Parse the output of a `sawsim` run.
-
-    >>> text = '''#Force (N)\\tinitial state\\tFinal state
-    ... 2.90301e-10\\tfolded\\tunfolded
-    ... 2.83948e-10\\tfolded\\tunfolded
-    ... 2.83674e-10\\tfolded\\tunfolded
-    ... 2.48384e-10\\tfolded\\tunfolded
-    ... 2.43033e-10\\tfolded\\tunfolded
-    ... 2.77589e-10\\tfolded\\tunfolded
-    ... 2.85343e-10\\tfolded\\tunfolded
-    ... 2.67796e-10\\tfolded\\tunfolded
-    ... '''
-    >>> events = list(parse(text))
-    >>> len(events)
-    8
-    >>> events[0]  # doctest: +ELLIPSIS
-    Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
+class SawsimRunner (object):
      """
-    for line in text.splitlines():
-        line = line.strip()
-        if len(line) == 0 or line.startswith('#'):
-            continue
-        fields = line.split('\t')
-        if len(fields) != 3:
-            raise ValueError(fields)
-        force,initial_state,final_state = fields
-        yield Event(float(force), initial_state, final_state)
+    >>> m = get_manager()()
+    >>> sr = SawsimRunner(manager=m)
+    >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2):
+    ...     print 'New run'
+    ...     for i,event in enumerate(run):
+    ...         print i, event  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    New run
+    0 Event(force=..., initial_state='folded', final_state='unfolded')
+    1 Event(force=..., initial_state='folded', final_state='unfolded')
+    2 Event(force=..., initial_state='folded', final_state='unfolded')
+    3 Event(force=..., initial_state='folded', final_state='unfolded')
+    4 Event(force=..., initial_state='folded', final_state='unfolded')
+    5 Event(force=..., initial_state='folded', final_state='unfolded')
+    6 Event(force=..., initial_state='folded', final_state='unfolded')
+    7 Event(force=..., initial_state='folded', final_state='unfolded')
+    New run
+    0 Event(force=..., initial_state='folded', final_state='unfolded')
+    1 Event(force=..., initial_state='folded', final_state='unfolded')
+    2 Event(force=..., initial_state='folded', final_state='unfolded')
+    3 Event(force=..., initial_state='folded', final_state='unfolded')
+    4 Event(force=..., initial_state='folded', final_state='unfolded')
+    5 Event(force=..., initial_state='folded', final_state='unfolded')
+    6 Event(force=..., initial_state='folded', final_state='unfolded')
+    7 Event(force=..., initial_state='folded', final_state='unfolded')
+    >>> m.teardown()
+    """
+
+    optparse_options = [
+        Option('-s','--sawsim', dest='sawsim',
+               metavar='PATH',
+               help='Set sawsim binary (%default).',
+               default=SAWSIM),
+        Option('-p','--params', dest='param_string',
+               metavar='PARAMS',
+               help='Initial params for fitting (%default).',
+               default=DEFAULT_PARAM_STRING),
+        Option('-N', '--number-of-runs', dest='N',
+               metavar='INT', type='int',
+               help='Number of sawsim runs at each point in parameter space (%default).',
+               default=400),
+        Option('-m', '--manager', dest='manager',
+               metavar='STRING',
+               help='Job manager name (one of %s) (default: auto-select).'
+               % (', '.join(MANAGERS)),
+               default=None),
+        Option('-C','--use-cache', dest='use_cache',
+               help='Use cached simulations if they exist (vs. running new simulations) (%default)',
+               default=False, action='store_true'),
+        Option('--clean-cache', dest='clean_cache',
+               help='Remove previously cached simulations if they exist (%default)',
+               default=False, action='store_true'),
+        Option('-d','--cache-dir', dest='cache_dir',
+               metavar='STRING',
+               help='Cache directory for sawsim unfolding forces (%default).',
+               default=CACHE_DIR),
+    ]
+
+    def __init__(self, sawsim=None, cache_dir=None,
+                 use_cache=False, clean_cache=False,
+                 manager=None):
+        if sawsim == None:
+            sawsim = SAWSIM
+        self._sawsim = sawsim
+        if cache_dir == None:
+            cache_dir = CACHE_DIR
+        self._cache_dir = cache_dir
+        self._use_cache = use_cache
+        self._clean_cache = clean_cache
+        self._manager = manager
+        self._local_manager = False
+        self._headline = None
+
+    def initialize_from_options(self, options):
+        self._sawsim = options.sawsim
+        self._cache_dir = options.cache_dir
+        self._use_cache = options.use_cache
+        self._clean_cache = options.clean_cache
+        self._manager = get_manager(options.manager)()
+        self._local_manager = True
+        call_params = {}
+        for param in ['param_string', 'N']:
+            try:
+                call_params[param] = getattr(options, param)
+            except AttributeError:
+                pass
+        return call_params
+
+    def teardown(self):
+        if self._local_manager == True:
+            self._manager.teardown()
+
+    def __call__(self, param_string, N):
+        """Run `N` simulations and yield `Event` generators for each run.
+
+        Use the `JobManager` instance `manager` for asynchronous job
+        execution.
+
+        If `_use_cache` is `True`, store an array of unfolding forces
+        in `cache_dir` for each simulated pull.  If the cached forces
+        are already present for `param_string`, do not redo the
+        simulation unless `_clean_cache` is `True`.
+        """
+        count = N
+        if self._use_cache == True:
+            d = self._param_cache_dir(param_string)
+            if os.path.exists(d):
+                if self._clean_cache == True:
+                    shutil.rmtree(d)
+                    self._make_cache(param_string)
+                else:
+                    for data in self._load_cached_data(param_string):
+                        yield data
+                        count -= 1
+                        if count == 0:
+                            return
+            else:
+                self._make_cache(param_string)
+
+        jobs = {}
+        for i in range(count):
+            jobs[i] = self._manager.async_invoke(InvokeJob(
+                    target='%s %s' % (self._sawsim, param_string)))
+        complete_jobs = self._manager.wait(
+            [job.id for job in jobs.itervalues()])
+        for i,job in jobs.iteritems():
+            j = complete_jobs[job.id]
+            assert j.status == 0, j.data['error']
+            if self._use_cache == True:
+                self._cache_run(d, j.data['stdout'])
+            yield self.parse(j.data['stdout'])
+        del(jobs)
+        del(complete_jobs)
+
+    def _param_cache_dir(self, param_string):
+        """
+        >>> s = SawsimRunner()
+        >>> s._param_cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
+        '/.../.sawsim-cache/...'
+        """
+        return os.path.join(
+            self._cache_dir, hashlib.sha256(param_string).hexdigest())
+
+    def _make_cache(self, param_string):
+        cache_dir = self._param_cache_dir(param_string)
+        os.makedirs(cache_dir)
+        with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
+            f.write('# version: %s\n%s\n' % (__version__, param_string))
+
+    def _load_cached_data(self, param_string):
+        pcd = self._param_cache_dir(param_string)
+        filenames = os.listdir(pcd)
+        shuffle(filenames)
+        for filename in filenames:
+            if not filename.endswith('.dat'):
+                continue
+            with open(os.path.join(pcd, filename), 'r') as f:
+                yield self.parse(f.read())
+
+    def _cache_run(self, cache_dir, stdout):
+        simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4())
+        with open(simulation_path, 'w') as f:
+            f.write(stdout)
+
+    def parse(self, text):
+        """Parse the output of a `sawsim` run.
+    
+        >>> text = '''#Force (N)\\tinitial state\\tFinal state
+        ... 2.90301e-10\\tfolded\\tunfolded
+        ... 2.83948e-10\\tfolded\\tunfolded
+        ... 2.83674e-10\\tfolded\\tunfolded
+        ... 2.48384e-10\\tfolded\\tunfolded
+        ... 2.43033e-10\\tfolded\\tunfolded
+        ... 2.77589e-10\\tfolded\\tunfolded
+        ... 2.85343e-10\\tfolded\\tunfolded
+        ... 2.67796e-10\\tfolded\\tunfolded
+        ... '''
+        >>> sr = SawsimRunner()
+        >>> events = list(sr.parse(text))
+        >>> len(events)
+        8
+        >>> events[0]  # doctest: +ELLIPSIS
+        Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
+        >>> sr._headline
+        ['Force (N)', 'initial state', 'Final state']
+        """
+        for line in text.splitlines():
+            line = line.strip()
+            if len(line) == 0:
+                continue
+            elif line.startswith('#'):
+                if self._headline == None:
+                    self._headline = line[len('#'):].split('\t')
+                continue
+            fields = line.split('\t')
+            if len(fields) != 3:
+                raise ValueError(fields)
+            force,initial_state,final_state = fields
+            yield Event(float(force), initial_state, final_state)
+
+
+def main(argv=None):
+    """
+    >>> try:
+    ...     main(['--help'])
+    ... except SystemExit, e:
+    ...     pass  # doctest: +ELLIPSIS, +REPORT_UDIFF
+    Usage: ... [options]
+    <BLANKLINE>
+    Options:
+      -h, --help            show this help message and exit
+      -s PATH, --sawsim=PATH
+                            Set sawsim binary (...sawsim).
+      ...
+    >>> print e
+    0
+    >>> main(['-N', '2'])
+    ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    #Force (N)  Initial state  Final state
+    ...         folded         unfolded
+    ...         folded         unfolded
+    ...         folded         unfolded
+    ...         folded         unfolded
+    ...
+    ...         folded         unfolded
+    """
+    from optparse import OptionParser
+    import sys
+
+    if argv == None:
+        argv = sys.argv[1:]
+
+    sr = SawsimRunner()
+
+    usage = '%prog [options]'
+    epilog = '\n'.join([
+            'Python wrapper around `sawsim`.  Distribute `N` runs using',
+            'one of the possible job "managers".  Also supports caching',
+            'results to speed future runs.'
+            ])
+    parser = OptionParser(usage, epilog=epilog)
+    for option in sr.optparse_options:
+        parser.add_option(option)
+    
+    options,args = parser.parse_args(argv)
+
+    try:
+        sr_call_params = sr.initialize_from_options(options)
+    
+        first_run = True
+        for run in sr(**sr_call_params):
+            if first_run == True:
+                first_run = False
+                run = list(run)  # force iterator evaluation
+                if sr._headline != None:
+                    print '#%s' % '\t'.join(sr._headline)
+            for event in run:
+                print '\t'.join([str(x) for x in event])
+    finally:
+        sr.teardown()