# Philadelphia PA 19104, USA.
-"""`sawsim` output parsing utilities.
-
-* `Event` instances represent domain state transitions.
-* `parse()` parses the output of a typical `sawsim` run.
+"""`Seminar` for running `sawsim` and parsing the results.
"""
+from __future__ import with_statement
+
try:
from collections import namedtuple
except ImportError: # work around Python < 2.6
- from operator import itemgetter as _itemgetter
- from keyword import iskeyword as _iskeyword
- import sys as _sys
- def namedtuple(typename, field_names, verbose=False):
- """Returns a new subclass of tuple with named fields.
-
- Copied from Python 2.6's collections.py.
-
- >>> Point = namedtuple('Point', 'x y')
- >>> Point.__doc__ # docstring for the new class
- 'Point(x, y)'
- >>> p = Point(11, y=22) # instantiate with positional args or keywords
- >>> p[0] + p[1] # indexable like a plain tuple
- 33
- >>> x, y = p # unpack like a regular tuple
- >>> x, y
- (11, 22)
- >>> p.x + p.y # fields also accessable by name
- 33
- >>> d = p._asdict() # convert to a dictionary
- >>> d['x']
- 11
- >>> Point(**d) # convert from a dictionary
- Point(x=11, y=22)
- >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
- Point(x=100, y=22)
+ from ._collections import namedtuple
+from distutils.spawn import find_executable
+import hashlib
+from optparse import Option
+import os
+import os.path
+from random import shuffle
+import shutil
+from uuid import uuid4
- """
+from . import __version__
+from .manager import MANAGERS, get_manager, InvokeJob
- # Parse and validate the field names. Validation serves two purposes,
- # generating informative error messages and preventing template injection attacks.
- if isinstance(field_names, basestring):
- field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
- field_names = tuple(map(str, field_names))
- for name in (typename,) + field_names:
- if not all(c.isalnum() or c=='_' for c in name):
- raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
- if _iskeyword(name):
- raise ValueError('Type names and field names cannot be a keyword: %r' % name)
- if name[0].isdigit():
- raise ValueError('Type names and field names cannot start with a number: %r' % name)
- seen_names = set()
- for name in field_names:
- if name.startswith('_'):
- raise ValueError('Field names cannot start with an underscore: %r' % name)
- if name in seen_names:
- raise ValueError('Encountered duplicate field name: %r' % name)
- seen_names.add(name)
-
- # Create and fill-in the class template
- numfields = len(field_names)
- argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
- reprtxt = ', '.join('%s=%%r' % name for name in field_names)
- dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names))
- template = '''class %(typename)s(tuple):
- '%(typename)s(%(argtxt)s)' \n
- __slots__ = () \n
- _fields = %(field_names)r \n
- def __new__(_cls, %(argtxt)s):
- return _tuple.__new__(_cls, (%(argtxt)s)) \n
- @classmethod
- def _make(cls, iterable, new=tuple.__new__, len=len):
- 'Make a new %(typename)s object from a sequence or iterable'
- result = new(cls, iterable)
- if len(result) != %(numfields)d:
- raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
- return result \n
- def __repr__(self):
- return '%(typename)s(%(reprtxt)s)' %% self \n
- def _asdict(t):
- 'Return a new dict which maps field names to their values'
- return {%(dicttxt)s} \n
- def _replace(_self, **kwds):
- 'Return a new %(typename)s object replacing specified fields with new values'
- result = _self._make(map(kwds.pop, %(field_names)r, _self))
- if kwds:
- raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
- return result \n
- def __getnewargs__(self):
- return tuple(self) \n\n''' % locals()
- for i, name in enumerate(field_names):
- template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
- if verbose:
- print template
-
- # Execute the template string in a temporary namespace and
- # support tracing utilities by setting a value for frame.f_globals['__name__']
- namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
- _property=property, _tuple=tuple)
- try:
- exec template in namespace
- except SyntaxError, e:
- raise SyntaxError(e.message + ':\n' + template)
- result = namespace[typename]
-
- # For pickling to work, the __module__ variable needs to be set to the frame
- # where the named tuple is created. Bypass this step in enviroments where
- # sys._getframe is not defined (Jython for example).
- if hasattr(_sys, '_getframe'):
- result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
-
- return result
+_multiprocess_can_split_ = True
+"""Allow nosetests to split tests between processes.
+"""
+
+SAWSIM = find_executable('sawsim')
+if SAWSIM == None:
+ SAWSIM = os.path.join('bin', 'sawsim')
+CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim-cache'))
+DEFAULT_PARAM_STRING = (
+ '-s cantilever,hooke,0.05 -N1 '
+ '-s folded,null -N8 '
+ "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
+ "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
+ '-q folded -v 1e-6')
+
+
+# `Event` instances represent domain state transitions.
Event = namedtuple(
typename='Event',
field_names=['force', 'initial_state', 'final_state'])
-def parse(text):
- """Parse the output of a `sawsim` run.
-
- >>> text = '''#Force (N)\\tinitial state\\tFinal state
- ... 2.90301e-10\\tfolded\\tunfolded
- ... 2.83948e-10\\tfolded\\tunfolded
- ... 2.83674e-10\\tfolded\\tunfolded
- ... 2.48384e-10\\tfolded\\tunfolded
- ... 2.43033e-10\\tfolded\\tunfolded
- ... 2.77589e-10\\tfolded\\tunfolded
- ... 2.85343e-10\\tfolded\\tunfolded
- ... 2.67796e-10\\tfolded\\tunfolded
- ... '''
- >>> events = list(parse(text))
- >>> len(events)
- 8
- >>> events[0] # doctest: +ELLIPSIS
- Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
+class SawsimRunner (object):
"""
- for line in text.splitlines():
- line = line.strip()
- if len(line) == 0 or line.startswith('#'):
- continue
- fields = line.split('\t')
- if len(fields) != 3:
- raise ValueError(fields)
- force,initial_state,final_state = fields
- yield Event(float(force), initial_state, final_state)
+ >>> m = get_manager()()
+ >>> sr = SawsimRunner(manager=m)
+ >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2):
+ ... print 'New run'
+ ... for i,event in enumerate(run):
+ ... print i, event # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ New run
+ 0 Event(force=..., initial_state='folded', final_state='unfolded')
+ 1 Event(force=..., initial_state='folded', final_state='unfolded')
+ 2 Event(force=..., initial_state='folded', final_state='unfolded')
+ 3 Event(force=..., initial_state='folded', final_state='unfolded')
+ 4 Event(force=..., initial_state='folded', final_state='unfolded')
+ 5 Event(force=..., initial_state='folded', final_state='unfolded')
+ 6 Event(force=..., initial_state='folded', final_state='unfolded')
+ 7 Event(force=..., initial_state='folded', final_state='unfolded')
+ New run
+ 0 Event(force=..., initial_state='folded', final_state='unfolded')
+ 1 Event(force=..., initial_state='folded', final_state='unfolded')
+ 2 Event(force=..., initial_state='folded', final_state='unfolded')
+ 3 Event(force=..., initial_state='folded', final_state='unfolded')
+ 4 Event(force=..., initial_state='folded', final_state='unfolded')
+ 5 Event(force=..., initial_state='folded', final_state='unfolded')
+ 6 Event(force=..., initial_state='folded', final_state='unfolded')
+ 7 Event(force=..., initial_state='folded', final_state='unfolded')
+ >>> m.teardown()
+ """
+
+ optparse_options = [
+ Option('-s','--sawsim', dest='sawsim',
+ metavar='PATH',
+ help='Set sawsim binary (%default).',
+ default=SAWSIM),
+ Option('-p','--params', dest='param_string',
+ metavar='PARAMS',
+ help='Initial params for fitting (%default).',
+ default=DEFAULT_PARAM_STRING),
+ Option('-N', '--number-of-runs', dest='N',
+ metavar='INT', type='int',
+ help='Number of sawsim runs at each point in parameter space (%default).',
+ default=400),
+ Option('-m', '--manager', dest='manager',
+ metavar='STRING',
+ help='Job manager name (one of %s) (default: auto-select).'
+ % (', '.join(MANAGERS)),
+ default=None),
+ Option('-C','--use-cache', dest='use_cache',
+ help='Use cached simulations if they exist (vs. running new simulations) (%default)',
+ default=False, action='store_true'),
+ Option('--clean-cache', dest='clean_cache',
+ help='Remove previously cached simulations if they exist (%default)',
+ default=False, action='store_true'),
+ Option('-d','--cache-dir', dest='cache_dir',
+ metavar='STRING',
+ help='Cache directory for sawsim unfolding forces (%default).',
+ default=CACHE_DIR),
+ ]
+
+ def __init__(self, sawsim=None, cache_dir=None,
+ use_cache=False, clean_cache=False,
+ manager=None):
+ if sawsim == None:
+ sawsim = SAWSIM
+ self._sawsim = sawsim
+ if cache_dir == None:
+ cache_dir = CACHE_DIR
+ self._cache_dir = cache_dir
+ self._use_cache = use_cache
+ self._clean_cache = clean_cache
+ self._manager = manager
+ self._local_manager = False
+ self._headline = None
+
+ def initialize_from_options(self, options):
+ self._sawsim = options.sawsim
+ self._cache_dir = options.cache_dir
+ self._use_cache = options.use_cache
+ self._clean_cache = options.clean_cache
+ self._manager = get_manager(options.manager)()
+ self._local_manager = True
+ call_params = {}
+ for param in ['param_string', 'N']:
+ try:
+ call_params[param] = getattr(options, param)
+ except AttributeError:
+ pass
+ return call_params
+
+ def teardown(self):
+ if self._local_manager == True:
+ self._manager.teardown()
+
+ def __call__(self, param_string, N):
+ """Run `N` simulations and yield `Event` generators for each run.
+
+ Use the `JobManager` instance `manager` for asynchronous job
+ execution.
+
+ If `_use_cache` is `True`, store an array of unfolding forces
+ in `cache_dir` for each simulated pull. If the cached forces
+ are already present for `param_string`, do not redo the
+ simulation unless `_clean_cache` is `True`.
+ """
+ count = N
+ if self._use_cache == True:
+ d = self._param_cache_dir(param_string)
+ if os.path.exists(d):
+ if self._clean_cache == True:
+ shutil.rmtree(d)
+ self._make_cache(param_string)
+ else:
+ for data in self._load_cached_data(param_string):
+ yield data
+ count -= 1
+ if count == 0:
+ return
+ else:
+ self._make_cache(param_string)
+
+ jobs = {}
+ for i in range(count):
+ jobs[i] = self._manager.async_invoke(InvokeJob(
+ target='%s %s' % (self._sawsim, param_string)))
+ complete_jobs = self._manager.wait(
+ [job.id for job in jobs.itervalues()])
+ for i,job in jobs.iteritems():
+ j = complete_jobs[job.id]
+ assert j.status == 0, j.data['error']
+ if self._use_cache == True:
+ self._cache_run(d, j.data['stdout'])
+ yield self.parse(j.data['stdout'])
+ del(jobs)
+ del(complete_jobs)
+
+ def _param_cache_dir(self, param_string):
+ """
+ >>> s = SawsimRunner()
+ >>> s._param_cache_dir(DEFAULT_PARAM_STRING) # doctest: +ELLIPSIS
+ '/.../.sawsim-cache/...'
+ """
+ return os.path.join(
+ self._cache_dir, hashlib.sha256(param_string).hexdigest())
+
+ def _make_cache(self, param_string):
+ cache_dir = self._param_cache_dir(param_string)
+ os.makedirs(cache_dir)
+ with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
+ f.write('# version: %s\n%s\n' % (__version__, param_string))
+
+ def _load_cached_data(self, param_string):
+ pcd = self._param_cache_dir(param_string)
+ filenames = os.listdir(pcd)
+ shuffle(filenames)
+ for filename in filenames:
+ if not filename.endswith('.dat'):
+ continue
+ with open(os.path.join(pcd, filename), 'r') as f:
+ yield self.parse(f.read())
+
+ def _cache_run(self, cache_dir, stdout):
+ simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4())
+ with open(simulation_path, 'w') as f:
+ f.write(stdout)
+
+ def parse(self, text):
+ """Parse the output of a `sawsim` run.
+
+ >>> text = '''#Force (N)\\tinitial state\\tFinal state
+ ... 2.90301e-10\\tfolded\\tunfolded
+ ... 2.83948e-10\\tfolded\\tunfolded
+ ... 2.83674e-10\\tfolded\\tunfolded
+ ... 2.48384e-10\\tfolded\\tunfolded
+ ... 2.43033e-10\\tfolded\\tunfolded
+ ... 2.77589e-10\\tfolded\\tunfolded
+ ... 2.85343e-10\\tfolded\\tunfolded
+ ... 2.67796e-10\\tfolded\\tunfolded
+ ... '''
+ >>> sr = SawsimRunner()
+ >>> events = list(sr.parse(text))
+ >>> len(events)
+ 8
+ >>> events[0] # doctest: +ELLIPSIS
+ Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
+ >>> sr._headline
+ ['Force (N)', 'initial state', 'Final state']
+ """
+ for line in text.splitlines():
+ line = line.strip()
+ if len(line) == 0:
+ continue
+ elif line.startswith('#'):
+ if self._headline == None:
+ self._headline = line[len('#'):].split('\t')
+ continue
+ fields = line.split('\t')
+ if len(fields) != 3:
+ raise ValueError(fields)
+ force,initial_state,final_state = fields
+ yield Event(float(force), initial_state, final_state)
+
+
+def main(argv=None):
+ """
+ >>> try:
+ ... main(['--help'])
+ ... except SystemExit, e:
+ ... pass # doctest: +ELLIPSIS, +REPORT_UDIFF
+ Usage: ... [options]
+ <BLANKLINE>
+ Options:
+ -h, --help show this help message and exit
+ -s PATH, --sawsim=PATH
+ Set sawsim binary (...sawsim).
+ ...
+ >>> print e
+ 0
+ >>> main(['-N', '2'])
+ ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+ #Force (N) Initial state Final state
+ ... folded unfolded
+ ... folded unfolded
+ ... folded unfolded
+ ... folded unfolded
+ ...
+ ... folded unfolded
+ """
+ from optparse import OptionParser
+ import sys
+
+ if argv == None:
+ argv = sys.argv[1:]
+
+ sr = SawsimRunner()
+
+ usage = '%prog [options]'
+ epilog = '\n'.join([
+ 'Python wrapper around `sawsim`. Distribute `N` runs using',
+ 'one of the possible job "managers". Also supports caching',
+ 'results to speed future runs.'
+ ])
+ parser = OptionParser(usage, epilog=epilog)
+ for option in sr.optparse_options:
+ parser.add_option(option)
+
+ options,args = parser.parse_args(argv)
+
+ try:
+ sr_call_params = sr.initialize_from_options(options)
+
+ first_run = True
+ for run in sr(**sr_call_params):
+ if first_run == True:
+ first_run = False
+ run = list(run) # force iterator evaluation
+ if sr._headline != None:
+ print '#%s' % '\t'.join(sr._headline)
+ for event in run:
+ print '\t'.join([str(x) for x in event])
+ finally:
+ sr.teardown()