Move `import with_statement` from sawsim_histogram to sawsim.
[sawsim.git] / pysawsim / sawsim.py
1 # Copyright (C) 2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drudge's University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20
21 """`Seminar` for running `sawsim` and parsing the results.
22 """
23
24 from __future__ import with_statement
25
26 try:
27     from collections import namedtuple
28 except ImportError:  # work around Python < 2.6
29     from ._collections import namedtuple
30 import hashlib
31 from optparse import Option
32 import os
33 import os.path
34 from random import shuffle
35 import shutil
36 from uuid import uuid4
37
38 from .manager import MANAGERS, get_manager, InvokeJob
39
40
41 SAWSIM = 'sawsim'  # os.path.expand(os.path.join('~', 'bin', 'sawsim'))
42 CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim-cache'))
43 DEFAULT_PARAM_STRING = (
44     '-s cantilever,hooke,0.05 -N1 '
45     '-s folded,null -N8 '
46     "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
47     "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
48     '-q folded -v 1e-6')
49
50
51 # `Event` instances represent domain state transitions.
52 Event = namedtuple(
53     typename='Event',
54     field_names=['force', 'initial_state', 'final_state'])
55
56
57 class SawsimRunner (object):
58     """
59     >>> from .manager.thread import ThreadManager
60     >>> m = ThreadManager()
61     >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
62     >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2):
63     ...     print 'New run'
64     ...     for i,event in enumerate(run):
65     ...         print i, event  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
66     New run
67     0 Event(force=..., initial_state='folded', final_state='unfolded')
68     1 Event(force=..., initial_state='folded', final_state='unfolded')
69     2 Event(force=..., initial_state='folded', final_state='unfolded')
70     3 Event(force=..., initial_state='folded', final_state='unfolded')
71     4 Event(force=..., initial_state='folded', final_state='unfolded')
72     5 Event(force=..., initial_state='folded', final_state='unfolded')
73     6 Event(force=..., initial_state='folded', final_state='unfolded')
74     7 Event(force=..., initial_state='folded', final_state='unfolded')
75     New run
76     0 Event(force=..., initial_state='folded', final_state='unfolded')
77     1 Event(force=..., initial_state='folded', final_state='unfolded')
78     2 Event(force=..., initial_state='folded', final_state='unfolded')
79     3 Event(force=..., initial_state='folded', final_state='unfolded')
80     4 Event(force=..., initial_state='folded', final_state='unfolded')
81     5 Event(force=..., initial_state='folded', final_state='unfolded')
82     6 Event(force=..., initial_state='folded', final_state='unfolded')
83     7 Event(force=..., initial_state='folded', final_state='unfolded')
84     >>> m.teardown()
85     """
86
87     optparse_options = [
88         Option("-s","--sawsim", dest="sawsim",
89                metavar="PATH",
90                help="Set sawsim binary (%default).",
91                default=SAWSIM),
92         Option("-p","--params", dest="param_string",
93                metavar="PARAMS",
94                help="Initial params for fitting (%default).",
95                default=DEFAULT_PARAM_STRING),
96         Option("-N", "--number-of-runs", dest="N",
97                metavar="INT", type='int',
98                help="Number of sawsim runs at each point in parameter space (%default).",
99                default=400),
100         Option("-m", "--manager", dest="manager",
101                metavar="STRING",
102                help="Job manager name (one of %s) (%%default)."
103                % (', '.join(MANAGERS)),
104                default=MANAGERS[0]),
105         Option("-C","--use-cache", dest="use_cache",
106                help="Use cached simulations if they exist (vs. running new simulations) (%default)",
107                default=False, action="store_true"),
108         Option("--clean-cache", dest="clean_cache",
109                help="Remove previously cached simulations if they exist (%default)",
110                default=False, action="store_true"),
111         Option("-d","--cache-dir", dest="cache_dir",
112                metavar="STRING",
113                help="Cache directory for sawsim unfolding forces (%default).",
114                default=CACHE_DIR),
115     ]
116
117     def __init__(self, sawsim=None, cache_dir=None,
118                  use_cache=False, clean_cache=False,
119                  manager=None):
120         if sawsim == None:
121             sawsim = SAWSIM
122         self._sawsim = sawsim
123         if cache_dir == None:
124             cache_dir = CACHE_DIR
125         self._cache_dir = cache_dir
126         self._use_cache = use_cache
127         self._clean_cache = clean_cache
128         self._manager = manager
129         self._local_manager = False
130         self._headline = None
131
132     def initialize_from_options(self, options):
133         self._sawsim = options.sawsim
134         self._cache_dir = options.cache_dir
135         self._use_cache = options.use_cache
136         self._clean_cache = options.clean_cache
137         self._manager = get_manager(options.manager)()
138         self._local_manager = True
139         call_params = {}
140         for param in ['param_string', 'N']:
141             try:
142                 call_params[param] = getattr(options, param)
143             except AttributeError:
144                 pass
145         return call_params
146
147     def teardown(self):
148         if self._local_manager == True:
149             self._manager.teardown()
150
151     def __call__(self, param_string, N):
152         """Run `N` simulations and yield `Event` generators for each run.
153
154         Use the `JobManager` instance `manager` for asynchronous job
155         execution.
156
157         If `_use_cache` is `True`, store an array of unfolding forces
158         in `cache_dir` for each simulated pull.  If the cached forces
159         are already present for `param_string`, do not redo the
160         simulation unless `_clean_cache` is `True`.
161         """
162         count = N
163         if self._use_cache == True:
164             d = self._param_cache_dir(param_string)
165             if os.path.exists(d):
166                 if self._clean_cache == True:
167                     shutil.rmtree(d)
168                     self._make_cache(param_string)
169                 else:
170                     for data in self._load_cached_data(param_string):
171                         yield data
172                         count -= 1
173                         if count == 0:
174                             return
175             else:
176                 self._make_cache(param_string)
177
178         jobs = {}
179         for i in range(count):
180             jobs[i] = self._manager.async_invoke(InvokeJob(
181                     target='%s %s' % (self._sawsim, param_string)))
182         complete_jobs = self._manager.wait(
183             [job.id for job in jobs.itervalues()])
184         for i,job in jobs.iteritems():
185             j = complete_jobs[job.id]
186             assert j.status == 0, j.data
187             if self._use_cache == True:
188                 self._cache_run(d, j.data['stdout'])
189             yield self.parse(j.data['stdout'])
190         del(jobs)
191         del(complete_jobs)
192
193     def _param_cache_dir(self, param_string):
194         """
195         >>> s = SawsimRunner()
196         >>> s._param_cache_dir(DEFAULT_PARAM_STRING)  # doctest: +ELLIPSIS
197         '/.../.sawsim-cache/...'
198         """
199         return os.path.join(
200             self._cache_dir, hashlib.sha256(param_string).hexdigest())
201
202     def _make_cache(self, param_string):
203         cache_dir = self._param_cache_dir(param_string)
204         os.makedirs(cache_dir)
205         with open(os.path.join(cache_dir, 'param_string'), 'w') as f:
206             f.write('# version: %s\n%s\n' % (__version__, param_string))
207
208     def _load_cached_data(self, param_string):
209         pcd = self._param_cache_dir(param_string)
210         for filename in shuffle(os.listdir(pcd)):
211             if not filename.endswith('.dat'):
212                 continue
213             with open(os.path.join(pcd, filename), 'r') as f:
214                 yield self.parse(f.read())
215
216     def _cache_run(self, cache_dir, stdout):
217         simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4())
218         with open(simulation_path, 'w') as f:
219             f.write(stdout)
220
221     def parse(self, text):
222         """Parse the output of a `sawsim` run.
223     
224         >>> text = '''#Force (N)\\tinitial state\\tFinal state
225         ... 2.90301e-10\\tfolded\\tunfolded
226         ... 2.83948e-10\\tfolded\\tunfolded
227         ... 2.83674e-10\\tfolded\\tunfolded
228         ... 2.48384e-10\\tfolded\\tunfolded
229         ... 2.43033e-10\\tfolded\\tunfolded
230         ... 2.77589e-10\\tfolded\\tunfolded
231         ... 2.85343e-10\\tfolded\\tunfolded
232         ... 2.67796e-10\\tfolded\\tunfolded
233         ... '''
234         >>> sr = SawsimRunner()
235         >>> events = list(sr.parse(text))
236         >>> len(events)
237         8
238         >>> events[0]  # doctest: +ELLIPSIS
239         Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
240         >>> sr._headline
241         ['Force (N)', 'initial state', 'Final state']
242         """
243         for line in text.splitlines():
244             line = line.strip()
245             if len(line) == 0:
246                 continue
247             elif line.startswith('#'):
248                 if self._headline == None:
249                     self._headline = line[len('#'):].split('\t')
250                 continue
251             fields = line.split('\t')
252             if len(fields) != 3:
253                 raise ValueError(fields)
254             force,initial_state,final_state = fields
255             yield Event(float(force), initial_state, final_state)
256
257
258 def main(argv=None):
259     """
260     >>> try:
261     ...     main(['--help'])
262     ... except SystemExit, e:
263     ...     pass  # doctest: +ELLIPSIS, +REPORT_UDIFF
264     Usage: ... [options]
265     <BLANKLINE>
266     Options:
267       -h, --help            show this help message and exit
268       -s PATH, --sawsim=PATH
269                             Set sawsim binary (sawsim).
270       ...
271     >>> print e
272     0
273     >>> main(['--sawsim', 'bin/sawsim', '-N', '2'])
274     ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
275     #Force (N)  Initial state  Final state
276     ...         folded         unfolded
277     ...         folded         unfolded
278     ...         folded         unfolded
279     ...         folded         unfolded
280     ...
281     ...         folded         unfolded
282     """
283     from optparse import OptionParser
284     import sys
285
286     if argv == None:
287         argv = sys.argv[1:]
288
289     sr = SawsimRunner()
290
291     usage = '%prog [options]'
292     epilog = '\n'.join([
293             'Python wrapper around `sawsim`.  Distribute `N` runs using',
294             'one of the possible job "managers".  Also supports caching',
295             'results to speed future runs.'
296             ])
297     parser = OptionParser(usage, epilog=epilog)
298     for option in sr.optparse_options:
299         parser.add_option(option)
300     
301     options,args = parser.parse_args(argv)
302
303     try:
304         sr_call_params = sr.initialize_from_options(options)
305     
306         first_run = True
307         for run in sr(**sr_call_params):
308             if first_run == True:
309                 first_run = False
310                 run = list(run)  # force iterator evaluation
311                 if sr._headline != None:
312                     print '#%s' % '\t'.join(sr._headline)
313             for event in run:
314                 print '\t'.join([str(x) for x in event])
315     finally:
316         sr.teardown()