c0589ef0b1e33f2080bee6f5b10abc83bacbbc2f
[sawsim.git] / pysawsim / sawsim.py
1 # Copyright (C) 2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drudge's University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20
21 """`Seminar` for running `sawsim` and parsing the results.
22 """
23
24 from __future__ import with_statement
25
26 try:
27     from collections import namedtuple
28 except ImportError:  # work around Python < 2.6
29     from ._collections import namedtuple
30 import hashlib
31 from optparse import Option
32 import os
33 import os.path
34 from random import shuffle
35 import shutil
36 from uuid import uuid4
37
38 from . import __version__
39 from .manager import MANAGERS, get_manager, InvokeJob
40
41
42 SAWSIM = 'sawsim'  # os.path.expand(os.path.join('~', 'bin', 'sawsim'))
43 CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim-cache'))
44 DEFAULT_PARAM_STRING = (
45     '-s cantilever,hooke,0.05 -N1 '
46     '-s folded,null -N8 '
47     "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
48     "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
49     '-q folded -v 1e-6')
50
51
52 # `Event` instances represent domain state transitions.
53 Event = namedtuple(
54     typename='Event',
55     field_names=['force', 'initial_state', 'final_state'])
56
57
58 class SawsimRunner (object):
59     """
60     >>> from .manager.thread import ThreadManager
61     >>> m = ThreadManager()
62     >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
63     >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2):
64     ...     print 'New run'
65     ...     for i,event in enumerate(run):
66     ...         print i, event  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
67     New run
68     0 Event(force=..., initial_state='folded', final_state='unfolded')
69     1 Event(force=..., initial_state='folded', final_state='unfolded')
70     2 Event(force=..., initial_state='folded', final_state='unfolded')
71     3 Event(force=..., initial_state='folded', final_state='unfolded')
72     4 Event(force=..., initial_state='folded', final_state='unfolded')
73     5 Event(force=..., initial_state='folded', final_state='unfolded')
74     6 Event(force=..., initial_state='folded', final_state='unfolded')
75     7 Event(force=..., initial_state='folded', final_state='unfolded')
76     New run
77     0 Event(force=..., initial_state='folded', final_state='unfolded')
78     1 Event(force=..., initial_state='folded', final_state='unfolded')
79     2 Event(force=..., initial_state='folded', final_state='unfolded')
80     3 Event(force=..., initial_state='folded', final_state='unfolded')
81     4 Event(force=..., initial_state='folded', final_state='unfolded')
82     5 Event(force=..., initial_state='folded', final_state='unfolded')
83     6 Event(force=..., initial_state='folded', final_state='unfolded')
84     7 Event(force=..., initial_state='folded', final_state='unfolded')
85     >>> m.teardown()
86     """
87
88     optparse_options = [
89         Option("-s","--sawsim", dest="sawsim",
90                metavar="PATH",
91                help="Set sawsim binary (%default).",
92                default=SAWSIM),
93         Option("-p","--params", dest="param_string",
94                metavar="PARAMS",
95                help="Initial params for fitting (%default).",
96                default=DEFAULT_PARAM_STRING),
97         Option("-N", "--number-of-runs", dest="N",
98                metavar="INT", type='int',
99                help="Number of sawsim runs at each point in parameter space (%default).",
100                default=400),
101         Option("-m", "--manager", dest="manager",
102                metavar="STRING",
103                help="Job manager name (one of %s) (%%default)."
104                % (', '.join(MANAGERS)),
105                default=MANAGERS[0]),
106         Option("-C","--use-cache", dest="use_cache",
107                help="Use cached simulations if they exist (vs. running new simulations) (%default)",
108                default=False, action="store_true"),
109         Option("--clean-cache", dest="clean_cache",
110                help="Remove previously cached simulations if they exist (%default)",
111                default=False, action="store_true"),
112         Option("-d","--cache-dir", dest="cache_dir",
113                metavar="STRING",
114                help="Cache directory for sawsim unfolding forces (%default).",
115                default=CACHE_DIR),
116     ]
117
118     def __init__(self, sawsim=None, cache_dir=None,
119                  use_cache=False, clean_cache=False,
120                  manager=None):
121         if sawsim == None:
122             sawsim = SAWSIM
123         self._sawsim = sawsim
124         if cache_dir == None:
125             cache_dir = CACHE_DIR
126         self._cache_dir = cache_dir
127         self._use_cache = use_cache
128         self._clean_cache = clean_cache
129         self._manager = manager
130         self._local_manager = False
131         self._headline = None
132
133     def initialize_from_options(self, options):
134         self._sawsim = options.sawsim
135         self._cache_dir = options.cache_dir
136         self._use_cache = options.use_cache
137         self._clean_cache = options.clean_cache
138         self._manager = get_manager(options.manager)()
139         self._local_manager = True
140         call_params = {}
141         for param in ['param_string', 'N']:
142             try:
143                 call_params[param] = getattr(options, param)
144             except AttributeError:
145                 pass
146         return call_params
147
148     def teardown(self):
149         if self._local_manager == True:
150             self._manager.teardown()
151
152     def __call__(self, param_string, N):
153         """Run `N` simulations and yield `Event` generators for each run.
154
155         Use the `JobManager` instance `manager` for asynchronous job
156         execution.
157
158         If `_use_cache` is `True`, store an array of unfolding forces
159         in `cache_dir` for each simulated pull.  If the cached forces
160         are already present for `param_string`, do not redo the
161         simulation unless `_clean_cache` is `True`.
162         """
163         count = N
164         if self._use_cache == True:
165             d = self._param_cache_dir(param_string)
166             if os.path.exists(d):
167                 if self._clean_cache == True:
168                     shutil.rmtree(d)
169                     self._make_cache(param_string)
170                 else:
171                     for data in self._load_cached_data(param_string):
172                         yield data
173                         count -= 1
174                         if count == 0:
175                             return
176             else:
177                 self._make_cache(param_string)
178
179         jobs = {}
180         for i in range(count):
181             jobs[i] = self._manager.async_invoke(InvokeJob(
182                     target='%s %s' % (self._sawsim, param_string)))
183         complete_jobs = self._manager.wait(
184             [job.id for job in jobs.itervalues()])
185         for i,job in jobs.iteritems():
186             j = complete_jobs[job.id]
187             assert j.status == 0, j.data
188             if self._use_cache == True:
189                 self._cache_run(d, j.data['stdout'])
190             yield self.parse(j.data['stdout'])
191         del(jobs)
192         del(complete_jobs)
193
194     def _param_cache_dir(self, param_string):
195         """
196         >>> s = SawsimRunner()
197         >>> s._param_cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
198         '/.../.sawsim-cache/...'
199         """
200         return os.path.join(
201             self._cache_dir, hashlib.sha256(param_string).hexdigest())
202
203     def _make_cache(self, param_string):
204         cache_dir = self._param_cache_dir(param_string)
205         os.makedirs(cache_dir)
206         with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
207             f.write('# version: %s\n%s\n' % (__version__, param_string))
208
209     def _load_cached_data(self, param_string):
210         pcd = self._param_cache_dir(param_string)
211         for filename in shuffle(os.listdir(pcd)):
212             if not filename.endswith('.dat'):
213                 continue
214             with open(os.path.join(pcd, filename), 'r') as f:
215                 yield self.parse(f.read())
216
217     def _cache_run(self, cache_dir, stdout):
218         simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4())
219         with open(simulation_path, 'w') as f:
220             f.write(stdout)
221
222     def parse(self, text):
223         """Parse the output of a `sawsim` run.
224     
225         >>> text = '''#Force (N)\\tinitial state\\tFinal state
226         ... 2.90301e-10\\tfolded\\tunfolded
227         ... 2.83948e-10\\tfolded\\tunfolded
228         ... 2.83674e-10\\tfolded\\tunfolded
229         ... 2.48384e-10\\tfolded\\tunfolded
230         ... 2.43033e-10\\tfolded\\tunfolded
231         ... 2.77589e-10\\tfolded\\tunfolded
232         ... 2.85343e-10\\tfolded\\tunfolded
233         ... 2.67796e-10\\tfolded\\tunfolded
234         ... '''
235         >>> sr = SawsimRunner()
236         >>> events = list(sr.parse(text))
237         >>> len(events)
238         8
239         >>> events[0]  # doctest: +ELLIPSIS
240         Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
241         >>> sr._headline
242         ['Force (N)', 'initial state', 'Final state']
243         """
244         for line in text.splitlines():
245             line = line.strip()
246             if len(line) == 0:
247                 continue
248             elif line.startswith('#'):
249                 if self._headline == None:
250                     self._headline = line[len('#'):].split('\t')
251                 continue
252             fields = line.split('\t')
253             if len(fields) != 3:
254                 raise ValueError(fields)
255             force,initial_state,final_state = fields
256             yield Event(float(force), initial_state, final_state)
257
258
259 def main(argv=None):
260     """
261     >>> try:
262     ...     main(['--help'])
263     ... except SystemExit, e:
264     ...     pass  # doctest: +ELLIPSIS, +REPORT_UDIFF
265     Usage: ... [options]
266     <BLANKLINE>
267     Options:
268       -h, --help            show this help message and exit
269       -s PATH, --sawsim=PATH
270                             Set sawsim binary (sawsim).
271       ...
272     >>> print e
273     0
274     >>> main(['--sawsim', 'bin/sawsim', '-N', '2'])
275     ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
276     #Force (N)  Initial state  Final state
277     ...         folded         unfolded
278     ...         folded         unfolded
279     ...         folded         unfolded
280     ...         folded         unfolded
281     ...
282     ...         folded         unfolded
283     """
284     from optparse import OptionParser
285     import sys
286
287     if argv == None:
288         argv = sys.argv[1:]
289
290     sr = SawsimRunner()
291
292     usage = '%prog [options]'
293     epilog = '\n'.join([
294             'Python wrapper around `sawsim`.  Distribute `N` runs using',
295             'one of the possible job "managers".  Also supports caching',
296             'results to speed future runs.'
297             ])
298     parser = OptionParser(usage, epilog=epilog)
299     for option in sr.optparse_options:
300         parser.add_option(option)
301     
302     options,args = parser.parse_args(argv)
303
304     try:
305         sr_call_params = sr.initialize_from_options(options)
306     
307         first_run = True
308         for run in sr(**sr_call_params):
309             if first_run == True:
310                 first_run = False
311                 run = list(run)  # force iterator evaluation
312                 if sr._headline != None:
313                     print '#%s' % '\t'.join(sr._headline)
314             for event in run:
315                 print '\t'.join([str(x) for x in event])
316     finally:
317         sr.teardown()