4f34da89c540ee4b44d824ce7cb8a8603d0c64a3
[sawsim.git] / pysawsim / sawsim.py
1 # Copyright (C) 2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drudge's University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20
21 """`Seminar` for running `sawsim` and parsing the results.
22 """
23
24 from __future__ import with_statement
25
26 try:
27     from collections import namedtuple
28 except ImportError:  # work around Python < 2.6
29     from ._collections import namedtuple
30 import hashlib
31 from optparse import Option
32 import os
33 import os.path
34 from random import shuffle
35 import shutil
36 from uuid import uuid4
37
38 from . import __version__
39 from .manager import MANAGERS, get_manager, InvokeJob
40
41
42 _multiprocess_can_split_ = True
43 """Allow nosetests to split tests between processes.
44 """
45
46 SAWSIM = 'sawsim'  # os.path.expand(os.path.join('~', 'bin', 'sawsim'))
47 CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim-cache'))
48 DEFAULT_PARAM_STRING = (
49     '-s cantilever,hooke,0.05 -N1 '
50     '-s folded,null -N8 '
51     "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
52     "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
53     '-q folded -v 1e-6')
54
55
56 # `Event` instances represent domain state transitions.
57 Event = namedtuple(
58     typename='Event',
59     field_names=['force', 'initial_state', 'final_state'])
60
61
62 class SawsimRunner (object):
63     """
64     >>> m = get_manager()()
65     >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
66     >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2):
67     ...     print 'New run'
68     ...     for i,event in enumerate(run):
69     ...         print i, event  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
70     New run
71     0 Event(force=..., initial_state='folded', final_state='unfolded')
72     1 Event(force=..., initial_state='folded', final_state='unfolded')
73     2 Event(force=..., initial_state='folded', final_state='unfolded')
74     3 Event(force=..., initial_state='folded', final_state='unfolded')
75     4 Event(force=..., initial_state='folded', final_state='unfolded')
76     5 Event(force=..., initial_state='folded', final_state='unfolded')
77     6 Event(force=..., initial_state='folded', final_state='unfolded')
78     7 Event(force=..., initial_state='folded', final_state='unfolded')
79     New run
80     0 Event(force=..., initial_state='folded', final_state='unfolded')
81     1 Event(force=..., initial_state='folded', final_state='unfolded')
82     2 Event(force=..., initial_state='folded', final_state='unfolded')
83     3 Event(force=..., initial_state='folded', final_state='unfolded')
84     4 Event(force=..., initial_state='folded', final_state='unfolded')
85     5 Event(force=..., initial_state='folded', final_state='unfolded')
86     6 Event(force=..., initial_state='folded', final_state='unfolded')
87     7 Event(force=..., initial_state='folded', final_state='unfolded')
88     >>> m.teardown()
89     """
90
91     optparse_options = [
92         Option('-s','--sawsim', dest='sawsim',
93                metavar='PATH',
94                help='Set sawsim binary (%default).',
95                default=SAWSIM),
96         Option('-p','--params', dest='param_string',
97                metavar='PARAMS',
98                help='Initial params for fitting (%default).',
99                default=DEFAULT_PARAM_STRING),
100         Option('-N', '--number-of-runs', dest='N',
101                metavar='INT', type='int',
102                help='Number of sawsim runs at each point in parameter space (%default).',
103                default=400),
104         Option('-m', '--manager', dest='manager',
105                metavar='STRING',
106                help='Job manager name (one of %s) (default: auto-select).'
107                % (', '.join(MANAGERS)),
108                default=None),
109         Option('-C','--use-cache', dest='use_cache',
110                help='Use cached simulations if they exist (vs. running new simulations) (%default)',
111                default=False, action='store_true'),
112         Option('--clean-cache', dest='clean_cache',
113                help='Remove previously cached simulations if they exist (%default)',
114                default=False, action='store_true'),
115         Option('-d','--cache-dir', dest='cache_dir',
116                metavar='STRING',
117                help='Cache directory for sawsim unfolding forces (%default).',
118                default=CACHE_DIR),
119     ]
120
121     def __init__(self, sawsim=None, cache_dir=None,
122                  use_cache=False, clean_cache=False,
123                  manager=None):
124         if sawsim == None:
125             sawsim = SAWSIM
126         self._sawsim = sawsim
127         if cache_dir == None:
128             cache_dir = CACHE_DIR
129         self._cache_dir = cache_dir
130         self._use_cache = use_cache
131         self._clean_cache = clean_cache
132         self._manager = manager
133         self._local_manager = False
134         self._headline = None
135
136     def initialize_from_options(self, options):
137         self._sawsim = options.sawsim
138         self._cache_dir = options.cache_dir
139         self._use_cache = options.use_cache
140         self._clean_cache = options.clean_cache
141         self._manager = get_manager(options.manager)()
142         self._local_manager = True
143         call_params = {}
144         for param in ['param_string', 'N']:
145             try:
146                 call_params[param] = getattr(options, param)
147             except AttributeError:
148                 pass
149         return call_params
150
151     def teardown(self):
152         if self._local_manager == True:
153             self._manager.teardown()
154
155     def __call__(self, param_string, N):
156         """Run `N` simulations and yield `Event` generators for each run.
157
158         Use the `JobManager` instance `manager` for asynchronous job
159         execution.
160
161         If `_use_cache` is `True`, store an array of unfolding forces
162         in `cache_dir` for each simulated pull.  If the cached forces
163         are already present for `param_string`, do not redo the
164         simulation unless `_clean_cache` is `True`.
165         """
166         count = N
167         if self._use_cache == True:
168             d = self._param_cache_dir(param_string)
169             if os.path.exists(d):
170                 if self._clean_cache == True:
171                     shutil.rmtree(d)
172                     self._make_cache(param_string)
173                 else:
174                     for data in self._load_cached_data(param_string):
175                         yield data
176                         count -= 1
177                         if count == 0:
178                             return
179             else:
180                 self._make_cache(param_string)
181
182         jobs = {}
183         for i in range(count):
184             jobs[i] = self._manager.async_invoke(InvokeJob(
185                     target='%s %s' % (self._sawsim, param_string)))
186         complete_jobs = self._manager.wait(
187             [job.id for job in jobs.itervalues()])
188         for i,job in jobs.iteritems():
189             j = complete_jobs[job.id]
190             assert j.status == 0, j.data['error']
191             if self._use_cache == True:
192                 self._cache_run(d, j.data['stdout'])
193             yield self.parse(j.data['stdout'])
194         del(jobs)
195         del(complete_jobs)
196
197     def _param_cache_dir(self, param_string):
198         """
199         >>> s = SawsimRunner()
200         >>> s._param_cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
201         '/.../.sawsim-cache/...'
202         """
203         return os.path.join(
204             self._cache_dir, hashlib.sha256(param_string).hexdigest())
205
206     def _make_cache(self, param_string):
207         cache_dir = self._param_cache_dir(param_string)
208         os.makedirs(cache_dir)
209         with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
210             f.write('# version: %s\n%s\n' % (__version__, param_string))
211
212     def _load_cached_data(self, param_string):
213         pcd = self._param_cache_dir(param_string)
214         filenames = os.listdir(pcd)
215         shuffle(filenames)
216         for filename in filenames:
217             if not filename.endswith('.dat'):
218                 continue
219             with open(os.path.join(pcd, filename), 'r') as f:
220                 yield self.parse(f.read())
221
222     def _cache_run(self, cache_dir, stdout):
223         simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4())
224         with open(simulation_path, 'w') as f:
225             f.write(stdout)
226
227     def parse(self, text):
228         """Parse the output of a `sawsim` run.
229     
230         >>> text = '''#Force (N)\\tinitial state\\tFinal state
231         ... 2.90301e-10\\tfolded\\tunfolded
232         ... 2.83948e-10\\tfolded\\tunfolded
233         ... 2.83674e-10\\tfolded\\tunfolded
234         ... 2.48384e-10\\tfolded\\tunfolded
235         ... 2.43033e-10\\tfolded\\tunfolded
236         ... 2.77589e-10\\tfolded\\tunfolded
237         ... 2.85343e-10\\tfolded\\tunfolded
238         ... 2.67796e-10\\tfolded\\tunfolded
239         ... '''
240         >>> sr = SawsimRunner()
241         >>> events = list(sr.parse(text))
242         >>> len(events)
243         8
244         >>> events[0]  # doctest: +ELLIPSIS
245         Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
246         >>> sr._headline
247         ['Force (N)', 'initial state', 'Final state']
248         """
249         for line in text.splitlines():
250             line = line.strip()
251             if len(line) == 0:
252                 continue
253             elif line.startswith('#'):
254                 if self._headline == None:
255                     self._headline = line[len('#'):].split('\t')
256                 continue
257             fields = line.split('\t')
258             if len(fields) != 3:
259                 raise ValueError(fields)
260             force,initial_state,final_state = fields
261             yield Event(float(force), initial_state, final_state)
262
263
264 def main(argv=None):
265     """
266     >>> try:
267     ...     main(['--help'])
268     ... except SystemExit, e:
269     ...     pass  # doctest: +ELLIPSIS, +REPORT_UDIFF
270     Usage: ... [options]
271     <BLANKLINE>
272     Options:
273       -h, --help            show this help message and exit
274       -s PATH, --sawsim=PATH
275                             Set sawsim binary (sawsim).
276       ...
277     >>> print e
278     0
279     >>> main(['--sawsim', 'bin/sawsim', '-N', '2'])
280     ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
281     #Force (N)  Initial state  Final state
282     ...         folded         unfolded
283     ...         folded         unfolded
284     ...         folded         unfolded
285     ...         folded         unfolded
286     ...
287     ...         folded         unfolded
288     """
289     from optparse import OptionParser
290     import sys
291
292     if argv == None:
293         argv = sys.argv[1:]
294
295     sr = SawsimRunner()
296
297     usage = '%prog [options]'
298     epilog = '\n'.join([
299             'Python wrapper around `sawsim`.  Distribute `N` runs using',
300             'one of the possible job "managers".  Also supports caching',
301             'results to speed future runs.'
302             ])
303     parser = OptionParser(usage, epilog=epilog)
304     for option in sr.optparse_options:
305         parser.add_option(option)
306     
307     options,args = parser.parse_args(argv)
308
309     try:
310         sr_call_params = sr.initialize_from_options(options)
311     
312         first_run = True
313         for run in sr(**sr_call_params):
314             if first_run == True:
315                 first_run = False
316                 run = list(run)  # force iterator evaluation
317                 if sr._headline != None:
318                     print '#%s' % '\t'.join(sr._headline)
319             for event in run:
320                 print '\t'.join([str(x) for x in event])
321     finally:
322         sr.teardown()