Use single-quote delimited strings when possssible.
[sawsim.git] / pysawsim / parameter_scan.py
1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20 """Experiment vs. simulation comparison and scanning.
21 """
22
23 from os import getpid  # for rss()
24 import os.path
25 import pickle
26 from StringIO import StringIO
27
28 import matplotlib
29 matplotlib.use('Agg')  # select backend that doesn't require X Windows
30 import numpy
31 import pylab
32
33 from . import log
34 from .histogram import Histogram
35 from .sawsim_histogram import sawsim_histogram
36 from .sawsim import SawsimRunner
37
38
39 FIGURE = pylab.figure()  # avoid memory problems.
40 """`pylab` keeps internal references to all created figures, so share
41 a single instance.
42 """
43 EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
44 # Other general comments...
45
46 #HISTOGRAM: -v 6e-7
47 #Force (N)\tUnfolding events
48 1.4e-10\t1
49 1.5e-10\t0
50 1.6e-10\t4
51 1.7e-10\t6
52 1.8e-10\t8
53 1.9e-10\t20
54 2e-10\t28
55 2.1e-10\t38
56 2.2e-10\t72
57 2.3e-10\t110
58 2.4e-10\t155
59 2.5e-10\t247
60 2.6e-10\t395
61 2.7e-10\t451
62 2.8e-10\t430
63 2.9e-10\t300
64 3e-10\t116
65 3.1e-10\t18
66 3.2e-10\t1
67
68 #HISTOGRAM: -v 8e-7
69 #Force (N)\tUnfolding events
70 8e-11\t1
71 9e-11\t0
72 1e-10\t0
73 1.1e-10\t1
74 1.2e-10\t0
75 1.3e-10\t0
76 1.4e-10\t0
77 1.5e-10\t3
78 1.6e-10\t3
79 1.7e-10\t4
80 1.8e-10\t4
81 1.9e-10\t13
82 2e-10\t29
83 2.1e-10\t39
84 2.2e-10\t60
85 2.3e-10\t102
86 2.4e-10\t154
87 2.5e-10\t262
88 2.6e-10\t402
89 2.7e-10\t497
90 2.8e-10\t541
91 2.9e-10\t555
92 3e-10\t325
93 3.1e-10\t142
94 3.2e-10\t50
95 3.3e-10\t13
96
97 #HISTOGRAM: -v 1e-6
98 #Force (N)\tUnfolding events
99 1.5e-10\t2
100 1.6e-10\t3
101 1.7e-10\t7
102 1.8e-10\t8
103 1.9e-10\t7
104 2e-10\t25
105 2.1e-10\t30
106 2.2e-10\t58
107 2.3e-10\t76
108 2.4e-10\t159
109 2.5e-10\t216
110 2.6e-10\t313
111 2.7e-10\t451
112 2.8e-10\t568
113 2.9e-10\t533
114 3e-10\t416
115 3.1e-10\t222
116 3.2e-10\t80
117 3.3e-10\t24
118 3.4e-10\t2
119 """
120
121
122 MEM_DEBUG = False
123
124
125
126 def rss():
127     """
128     For debugging memory usage.
129
130     resident set size, the non-swapped physical memory that a task has
131     used (in kilo-bytes).
132     """
133     call = "ps -o rss= -p %d" % getpid()
134     status,stdout,stderr = invoke(call)
135     return int(stdout)
136
137
138 class HistogramMatcher (object):
139     """Compare experimental histograms to simulated data.
140
141     The main entry points are `fit()` and `plot()`.
142
143     The input `histogram_stream` should contain a series of
144     experimental histograms with '#HISTOGRAM: <params>` lines starting
145     each histogram.  `<params>` lists the `sawsim` parameters that are
146     unique to that experiment.
147
148     >>> from .manager.thread import ThreadManager
149     >>> histogram_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
150     >>> param_format_string = (
151     ...     '-s cantilever,hooke,0.05 -N1 '
152     ...     '-s folded,null -N8 '
153     ...     '-s "unfolded,wlc,{0.39e-9,28e-9}" '
154     ...     '-k "folded,unfolded,bell,{%g,%g}" -q folded')
155     >>> m = ThreadManager()
156     >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
157     >>> hm = HistogramMatcher(histogram_stream, param_format_string, sr, N=3)
158     >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
159     >>> m.teardown()
160     """
161     def __init__(self, histogram_stream, param_format_string,
162                  sawsim_runner, N=400, residual_type='jensen-shannon',
163                  plot=False):
164         self.experiment_histograms = self._read_force_histograms(
165             histogram_stream)
166         self.param_format_string = param_format_string
167         self.sawsim_runner = sawsim_runner
168         self.N = N
169         self.residual_type = residual_type
170         self._plot = plot
171
172     def _read_force_histograms(self, stream):
173         """
174         File format:
175
176           # comment and blank lines ignored
177           #HISTOGRAM: <histogram-specific params>
178           <pysawsim.histogram.Histogram-compatible histogram>
179           #HISTOGRAM: <other histogram-specific params>
180           <another pysawsim.histogram.Histogram-compatible histogram>
181           ...
182
183         >>> import sys
184         >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
185         >>> hm = HistogramMatcher(StringIO(), None, None, None)
186         >>> histograms = hm._read_force_histograms(stream)
187         >>> sorted(histograms.iterkeys())
188         ['-v 1e-6', '-v 6e-7', '-v 8e-7']
189         >>> histograms['-v 1e-6'].to_stream(sys.stdout)
190         ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
191         #Force (N)\tUnfolding events
192         1.5e-10\t2
193         1.6e-10\t3
194         1.7e-10\t7
195         1.8e-10\t8
196         1.9e-10\t7
197         2e-10\t25
198         2.1e-10\t30
199         2.2e-10\t58
200         2.3e-10\t76
201         2.4e-10\t159
202         2.5e-10\t216
203         2.6e-10\t313
204         2.7e-10\t451
205         2.8e-10\t568
206         2.9e-10\t533
207         3e-10\t416
208         3.1e-10\t222
209         3.2e-10\t80
210         3.3e-10\t24
211         3.4e-10\t2
212         """
213         token = '#HISTOGRAM:'
214         hist_blocks = {None: []}
215         params = None
216         for line in stream.readlines():
217             line = line.strip()
218             if line.startswith(token):
219                 params = line[len(token):].strip()
220                 assert params not in hist_blocks, params
221                 hist_blocks[params] = []
222             else:
223                 hist_blocks[params].append(line)
224
225         histograms = {}
226         for params,block in hist_blocks.iteritems():
227             if params == None:
228                 continue
229             h = Histogram()
230             h.from_stream(StringIO('\n'.join(block)))
231             histograms[params] = h
232         return histograms
233
234     def param_string(self, params, hist_params):
235         """Generate a string of options to pass to `sawsim`.
236         """
237         return '%s %s' % (
238             self.param_format_string % tuple(params), hist_params)
239
240     def residual(self, params):
241         residual = 0
242         for hist_params,experiment_hist in self.experiment_histograms.iteritems():
243             sawsim_hist = sawsim_histogram(
244                 sawsim_runner=self.sawsim_runner,
245                 param_string=self.param_string(params, hist_params),
246                 N=self.N, bin_edges=experiment_hist.bin_edges)
247             r = experiment_hist.residual(sawsim_hist, type=self.residual_type)
248             residual += r
249             if self._plot == True:
250                 title = ", ".join(["%g" % p for p in params]+[hist_params])
251                 filename = "residual-%s-%g.png" % (
252                     title.replace(', ', '_').replace(' ', '_'), r)
253                 self._plot_residual_comparison(
254                     experiment_hist, sawsim_hist, residual=r,
255                     title=title, filename=filename)
256         log().debug('residual %s: %g' % (params, residual))
257         return residual
258
259     def plot(self, param_ranges, logx=False, logy=False, contour=False):
260         xranges = param_ranges[0]
261         yranges = param_ranges[1]
262         if logx == False:
263             x = numpy.linspace(*xranges)
264         else:
265             m,M,n = xranges
266             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
267         if logy == False:
268             y = numpy.linspace(*yranges)
269         else:
270             m,M,n = yranges
271             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
272         X, Y = pylab.meshgrid(x,y)
273         C = numpy.zeros((len(y)-1, len(x)-1))
274         for i,xi in enumerate(x[:-1]):
275             for j,yj in enumerate(y[:-1]):
276                 log().info('point %d %d (%d of %d)'
277                            % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
278                 params = (xi,yj)
279                 r = self.residual(params)
280                 C[j,i] = numpy.log(r) # better resolution in valleys
281                 if MEM_DEBUG == True:
282                     log().debug('RSS: %d KB' % rss())
283         C = numpy.nan_to_num(C) # NaN -> 0
284         fid = file("histogram_matcher-XYC.pkl", "wb")
285         pickle.dump([X,Y,C], fid)
286         fid.close()
287         # read in with
288         # import pickle
289         # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
290         # ...
291         FIGURE.clear()
292         axes = FIGURE.add_subplot(111)
293         if logx == True:
294             axes.set_xscale('log')
295         if logy == True:
296             axes.set_yscale('log')
297         if contour == True:
298             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
299             # [:-1,:-1] to strip dummy last row & column from X&Y.
300         else: # pseudocolor plot
301             p = axes.pcolor(X, Y, C)
302             axes.autoscale_view(tight=True)
303         FIGURE.colorbar(p)
304         FIGURE.savefig("figure.png")
305
306     def _plot_residual_comparison(self, experiment_hist, theory_hist,
307                                   residual, title, filename):
308         FIGURE.clear()
309         p = pylab.plot(experiment_hist.bin_edges[:-1],
310                        experiment_hist.probabilities, 'r-',
311                        theory_hist.bin_edges[:-1],
312                        theory_hist.probabilities, 'b-')
313         pylab.title(title)
314         FIGURE.savefig(filename)
315
316
317 def parse_param_ranges_string(string):
318     """Parse parameter range stings.
319
320     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
321       ->
322     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
323
324     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
325     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
326     >>> parse_param_ranges_string('[1,2,3]')
327     [[1.0, 2.0, 3.0]]
328     """
329     ranges = []
330     for range_string in string.split("],["):
331         range_number_strings = range_string.strip("[]").split(",")
332         ranges.append([float(x) for x in range_number_strings])
333     return ranges
334
335
336 def main(argv=None):
337     """
338     >>> import tempfile
339     >>> f = tempfile.NamedTemporaryFile()
340     >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
341     >>> f.flush()
342     >>> main(['-s', 'bin/sawsim',
343     ...       '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
344     ...       '-N', '2',
345     ...       f.name])
346     >>> f.close()
347     """
348     from optparse import OptionParser
349     import sys
350
351     if argv == None:
352         argv = sys.argv[1:]
353
354     sr = SawsimRunner()
355
356     usage = '%prog [options] histogram_file'
357     epilog = '\n'.join([
358             'Compare simulated results against experimental values over a',
359             'range of parameters.  Generates a plot of fit quality over',
360             'the parameter space.  The histogram file should look something',
361             'like:',
362             '',
363             EXAMPLE_HISTOGRAM_FILE_CONTENTS,
364             ''
365             '`#HISTOGRAM: <params>` lines start each histogram.  `params`',
366             'lists the `sawsim` parameters that are unique to that',
367             'experiment.',
368             '',
369             'Each histogram line is of the format:',
370             '',
371             '<bin_edge><whitespace><count>',
372             '',
373             '`<bin_edge>` should mark the left-hand side of the bin, and',
374             'all bins should be of equal width (so we know where the last',
375             'one ends).',
376             ])
377     parser = OptionParser(usage, epilog=epilog)
378     parser.format_epilog = lambda formatter: epilog+'\n'
379     for option in sr.optparse_options:
380         if option.dest == 'param_string':
381             continue
382         parser.add_option(option)
383     parser.add_option('-f','--param-format', dest='param_format',
384                       metavar='FORMAT',
385                       help='Convert params to sawsim options (%default).',
386                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
387     parser.add_option('-p','--initial-params', dest='initial_params',
388                       metavar='PARAMS',
389                       help='Initial params for fitting (%default).',
390                       default='3.3e-4,0.25e-9')
391     parser.add_option('-r','--param-range', dest='param_range',
392                       metavar='PARAMS',
393                       help='Param range for plotting (%default).',
394                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
395     parser.add_option('--logx', dest='logx',
396                       help='Use a log scale for the x range.',
397                       default=False, action='store_true')
398     parser.add_option('--logy', dest='logy',
399                       help='Use a log scale for the y range.',
400                       default=False, action='store_true')
401     parser.add_option('-R','--residual', dest='residual',
402                       metavar='STRING',
403                       help='Residual type (from %s; default: %%default).'
404                       % ', '.join(Histogram().types()),
405                       default='jensen-shannon')
406     parser.add_option('-P','--plot-residuals', dest='plot_residuals',
407                       help='Generate residual difference plots for each point in the plot range.',
408                       default=False, action='store_true')
409     parser.add_option('-c','--contour-plot', dest='contour_plot',
410                       help='Select contour plot (vs. the default pseudocolor plot).',
411                       default=False, action='store_true')
412
413     options,args = parser.parse_args(argv)
414
415     initial_params = [float(p) for p in options.initial_params.split(",")]
416     param_ranges = parse_param_ranges_string(options.param_range)
417     histogram_file = args[0]
418     sr_call_params = sr.initialize_from_options(options)
419
420     try:
421         hm = HistogramMatcher(
422             file(histogram_file, 'r'),
423             param_format_string=options.param_format,
424             sawsim_runner=sr, residual_type=options.residual,
425             plot=options.plot_residuals, **sr_call_params)
426         #hm.fit(initial_params)
427         hm.plot(param_ranges, logx=options.logx, logy=options.logy,
428                 contour=options.contour_plot)
429     finally:
430         sr.teardown()