0f85869dd634111a0ce0f64d65a9700895f6d8f1
[sawsim.git] / pysawsim / sawsim.py
1 # Copyright (C) 2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drudge's University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20
21 """`Seminar` for running `sawsim` and parsing the results.
22 """
23
24 from __future__ import with_statement
25
26 try:
27     from collections import namedtuple
28 except ImportError:  # work around Python < 2.6
29     from ._collections import namedtuple
30 from distutils.spawn import find_executable
31 import hashlib
32 from optparse import Option
33 import os
34 import os.path
35 from random import shuffle
36 import shutil
37 from uuid import uuid4
38
39 from . import __version__
40 from .manager import MANAGERS, get_manager, InvokeJob
41
42
43 _multiprocess_can_split_ = True
44 """Allow nosetests to split tests between processes.
45 """
46
47 SAWSIM = find_executable('sawsim')
48 if SAWSIM == None:
49     SAWSIM = os.path.join('bin', 'sawsim')
50
51 CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim-cache'))
52 DEFAULT_PARAM_STRING = (
53     '-s cantilever,hooke,0.05 -N1 '
54     '-s folded,null -N8 '
55     "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
56     "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
57     '-q folded -v 1e-6')
58
59
60 # `Event` instances represent domain state transitions.
61 Event = namedtuple(
62     typename='Event',
63     field_names=['force', 'initial_state', 'final_state'])
64
65
66 class SawsimRunner (object):
67     """
68     >>> m = get_manager()()
69     >>> sr = SawsimRunner(manager=m)
70     >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2):
71     ...     print 'New run'
72     ...     for i,event in enumerate(run):
73     ...         print i, event  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
74     New run
75     0 Event(force=..., initial_state='folded', final_state='unfolded')
76     1 Event(force=..., initial_state='folded', final_state='unfolded')
77     2 Event(force=..., initial_state='folded', final_state='unfolded')
78     3 Event(force=..., initial_state='folded', final_state='unfolded')
79     4 Event(force=..., initial_state='folded', final_state='unfolded')
80     5 Event(force=..., initial_state='folded', final_state='unfolded')
81     6 Event(force=..., initial_state='folded', final_state='unfolded')
82     7 Event(force=..., initial_state='folded', final_state='unfolded')
83     New run
84     0 Event(force=..., initial_state='folded', final_state='unfolded')
85     1 Event(force=..., initial_state='folded', final_state='unfolded')
86     2 Event(force=..., initial_state='folded', final_state='unfolded')
87     3 Event(force=..., initial_state='folded', final_state='unfolded')
88     4 Event(force=..., initial_state='folded', final_state='unfolded')
89     5 Event(force=..., initial_state='folded', final_state='unfolded')
90     6 Event(force=..., initial_state='folded', final_state='unfolded')
91     7 Event(force=..., initial_state='folded', final_state='unfolded')
92     >>> m.teardown()
93     """
94
95     optparse_options = [
96         Option('-s','--sawsim', dest='sawsim',
97                metavar='PATH',
98                help='Set sawsim binary (%default).',
99                default=SAWSIM),
100         Option('-p','--params', dest='param_string',
101                metavar='PARAMS',
102                help='Initial params for fitting (%default).',
103                default=DEFAULT_PARAM_STRING),
104         Option('-N', '--number-of-runs', dest='N',
105                metavar='INT', type='int',
106                help='Number of sawsim runs at each point in parameter space (%default).',
107                default=400),
108         Option('-m', '--manager', dest='manager',
109                metavar='STRING',
110                help='Job manager name (one of %s) (default: auto-select).'
111                % (', '.join(MANAGERS)),
112                default=None),
113         Option('-C','--use-cache', dest='use_cache',
114                help='Use cached simulations if they exist (vs. running new simulations) (%default)',
115                default=False, action='store_true'),
116         Option('--clean-cache', dest='clean_cache',
117                help='Remove previously cached simulations if they exist (%default)',
118                default=False, action='store_true'),
119         Option('-d','--cache-dir', dest='cache_dir',
120                metavar='STRING',
121                help='Cache directory for sawsim unfolding forces (%default).',
122                default=CACHE_DIR),
123     ]
124
125     def __init__(self, sawsim=None, cache_dir=None,
126                  use_cache=False, clean_cache=False,
127                  manager=None):
128         if sawsim == None:
129             sawsim = SAWSIM
130         self._sawsim = sawsim
131         if cache_dir == None:
132             cache_dir = CACHE_DIR
133         self._cache_dir = cache_dir
134         self._use_cache = use_cache
135         self._clean_cache = clean_cache
136         self._manager = manager
137         self._local_manager = False
138         self._headline = None
139
140     def initialize_from_options(self, options):
141         self._sawsim = options.sawsim
142         self._cache_dir = options.cache_dir
143         self._use_cache = options.use_cache
144         self._clean_cache = options.clean_cache
145         self._manager = get_manager(options.manager)()
146         self._local_manager = True
147         call_params = {}
148         for param in ['param_string', 'N']:
149             try:
150                 call_params[param] = getattr(options, param)
151             except AttributeError:
152                 pass
153         return call_params
154
155     def teardown(self):
156         if self._local_manager == True:
157             self._manager.teardown()
158
159     def __call__(self, param_string, N):
160         """Run `N` simulations and yield `Event` generators for each run.
161
162         Use the `JobManager` instance `manager` for asynchronous job
163         execution.
164
165         If `_use_cache` is `True`, store an array of unfolding forces
166         in `cache_dir` for each simulated pull.  If the cached forces
167         are already present for `param_string`, do not redo the
168         simulation unless `_clean_cache` is `True`.
169         """
170         count = N
171         if self._use_cache == True:
172             d = self._param_cache_dir(param_string)
173             if os.path.exists(d):
174                 if self._clean_cache == True:
175                     shutil.rmtree(d)
176                     self._make_cache(param_string)
177                 else:
178                     for data in self._load_cached_data(param_string):
179                         yield data
180                         count -= 1
181                         if count == 0:
182                             return
183             else:
184                 self._make_cache(param_string)
185
186         jobs = {}
187         for i in range(count):
188             jobs[i] = self._manager.async_invoke(InvokeJob(
189                     target='%s %s' % (self._sawsim, param_string)))
190         complete_jobs = self._manager.wait(
191             [job.id for job in jobs.itervalues()])
192         for i,job in jobs.iteritems():
193             j = complete_jobs[job.id]
194             assert j.status == 0, j.data['error']
195             if self._use_cache == True:
196                 self._cache_run(d, j.data['stdout'])
197             yield self.parse(j.data['stdout'])
198         del(jobs)
199         del(complete_jobs)
200
201     def _param_cache_dir(self, param_string):
202         """
203         >>> s = SawsimRunner()
204         >>> s._param_cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
205         '/.../.sawsim-cache/...'
206         """
207         return os.path.join(
208             self._cache_dir, hashlib.sha256(param_string).hexdigest())
209
210     def _make_cache(self, param_string):
211         cache_dir = self._param_cache_dir(param_string)
212         os.makedirs(cache_dir)
213         with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
214             f.write('# version: %s\n%s\n' % (__version__, param_string))
215
216     def _load_cached_data(self, param_string):
217         pcd = self._param_cache_dir(param_string)
218         filenames = os.listdir(pcd)
219         shuffle(filenames)
220         for filename in filenames:
221             if not filename.endswith('.dat'):
222                 continue
223             with open(os.path.join(pcd, filename), 'r') as f:
224                 yield self.parse(f.read())
225
226     def _cache_run(self, cache_dir, stdout):
227         simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4())
228         with open(simulation_path, 'w') as f:
229             f.write(stdout)
230
231     def parse(self, text):
232         """Parse the output of a `sawsim` run.
233     
234         >>> text = '''#Force (N)\\tinitial state\\tFinal state
235         ... 2.90301e-10\\tfolded\\tunfolded
236         ... 2.83948e-10\\tfolded\\tunfolded
237         ... 2.83674e-10\\tfolded\\tunfolded
238         ... 2.48384e-10\\tfolded\\tunfolded
239         ... 2.43033e-10\\tfolded\\tunfolded
240         ... 2.77589e-10\\tfolded\\tunfolded
241         ... 2.85343e-10\\tfolded\\tunfolded
242         ... 2.67796e-10\\tfolded\\tunfolded
243         ... '''
244         >>> sr = SawsimRunner()
245         >>> events = list(sr.parse(text))
246         >>> len(events)
247         8
248         >>> events[0]  # doctest: +ELLIPSIS
249         Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
250         >>> sr._headline
251         ['Force (N)', 'initial state', 'Final state']
252         """
253         for line in text.splitlines():
254             line = line.strip()
255             if len(line) == 0:
256                 continue
257             elif line.startswith('#'):
258                 if self._headline == None:
259                     self._headline = line[len('#'):].split('\t')
260                 continue
261             fields = line.split('\t')
262             if len(fields) != 3:
263                 raise ValueError(fields)
264             force,initial_state,final_state = fields
265             yield Event(float(force), initial_state, final_state)
266
267
268 def main(argv=None):
269     """
270     >>> try:
271     ...     main(['--help'])
272     ... except SystemExit, e:
273     ...     pass  # doctest: +ELLIPSIS, +REPORT_UDIFF
274     Usage: ... [options]
275     <BLANKLINE>
276     Options:
277       -h, --help            show this help message and exit
278       -s PATH, --sawsim=PATH
279                             Set sawsim binary (...sawsim).
280       ...
281     >>> print e
282     0
283     >>> main(['-N', '2'])
284     ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
285     #Force (N)  Initial state  Final state
286     ...         folded         unfolded
287     ...         folded         unfolded
288     ...         folded         unfolded
289     ...         folded         unfolded
290     ...
291     ...         folded         unfolded
292     """
293     from optparse import OptionParser
294     import sys
295
296     if argv == None:
297         argv = sys.argv[1:]
298
299     sr = SawsimRunner()
300
301     usage = '%prog [options]'
302     epilog = '\n'.join([
303             'Python wrapper around `sawsim`.  Distribute `N` runs using',
304             'one of the possible job "managers".  Also supports caching',
305             'results to speed future runs.'
306             ])
307     parser = OptionParser(usage, epilog=epilog)
308     for option in sr.optparse_options:
309         parser.add_option(option)
310     
311     options,args = parser.parse_args(argv)
312
313     try:
314         sr_call_params = sr.initialize_from_options(options)
315     
316         first_run = True
317         for run in sr(**sr_call_params):
318             if first_run == True:
319                 first_run = False
320                 run = list(run)  # force iterator evaluation
321                 if sr._headline != None:
322                     print '#%s' % '\t'.join(sr._headline)
323             for event in run:
324                 print '\t'.join([str(x) for x in event])
325     finally:
326         sr.teardown()