efbfbf: upgrade to Bugs Everywhere Directory v1.5
[sawsim.git] / pysawsim / parameter_scan.py
1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20 """Experiment vs. simulation comparison and scanning.
21 """
22
23 from os import getpid  # for rss()
24 import os.path
25 import pickle
26 from StringIO import StringIO
27
28 import numpy
29
30 from . import log
31 from . import PYSAWSIM_LOG_LEVEL_MSG as _PYSAWSIM_LOG_LEVEL_MSG
32 from .histogram import Histogram as _Histogram
33 from .histogram import FIGURE as _FIGURE
34 from .sawsim_histogram import sawsim_histogram
35 from .sawsim import SawsimRunner
36
37 # import after .histogram, which selects an appropriate backend
38 import pylab
39
40 _multiprocess_can_split_ = True
41 """Allow nosetests to split tests between processes.
42 """
43
44 EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
45 # Other general comments...
46
47 #HISTOGRAM: -v 6e-7
48 #Force (N)\tUnfolding events
49 1.4e-10\t1
50 1.5e-10\t0
51 1.6e-10\t4
52 1.7e-10\t6
53 1.8e-10\t8
54 1.9e-10\t20
55 2e-10\t28
56 2.1e-10\t38
57 2.2e-10\t72
58 2.3e-10\t110
59 2.4e-10\t155
60 2.5e-10\t247
61 2.6e-10\t395
62 2.7e-10\t451
63 2.8e-10\t430
64 2.9e-10\t300
65 3e-10\t116
66 3.1e-10\t18
67 3.2e-10\t1
68
69 #HISTOGRAM: -v 8e-7
70 #Force (N)\tUnfolding events
71 8e-11\t1
72 9e-11\t0
73 1e-10\t0
74 1.1e-10\t1
75 1.2e-10\t0
76 1.3e-10\t0
77 1.4e-10\t0
78 1.5e-10\t3
79 1.6e-10\t3
80 1.7e-10\t4
81 1.8e-10\t4
82 1.9e-10\t13
83 2e-10\t29
84 2.1e-10\t39
85 2.2e-10\t60
86 2.3e-10\t102
87 2.4e-10\t154
88 2.5e-10\t262
89 2.6e-10\t402
90 2.7e-10\t497
91 2.8e-10\t541
92 2.9e-10\t555
93 3e-10\t325
94 3.1e-10\t142
95 3.2e-10\t50
96 3.3e-10\t13
97
98 #HISTOGRAM: -v 1e-6
99 #Force (N)\tUnfolding events
100 1.5e-10\t2
101 1.6e-10\t3
102 1.7e-10\t7
103 1.8e-10\t8
104 1.9e-10\t7
105 2e-10\t25
106 2.1e-10\t30
107 2.2e-10\t58
108 2.3e-10\t76
109 2.4e-10\t159
110 2.5e-10\t216
111 2.6e-10\t313
112 2.7e-10\t451
113 2.8e-10\t568
114 2.9e-10\t533
115 3e-10\t416
116 3.1e-10\t222
117 3.2e-10\t80
118 3.3e-10\t24
119 3.4e-10\t2
120 """
121
122
123 MEM_DEBUG = False
124
125
126
127 def rss():
128     """
129     For debugging memory usage.
130
131     resident set size, the non-swapped physical memory that a task has
132     used (in kilo-bytes).
133     """
134     call = "ps -o rss= -p %d" % getpid()
135     status,stdout,stderr = invoke(call)
136     return int(stdout)
137
138
139 class HistogramMatcher (object):
140     """Compare experimental histograms to simulated data.
141
142     The main entry points are `fit()` and `plot()`.
143
144     The input `histogram_stream` should contain a series of
145     experimental histograms with '#HISTOGRAM: <params>` lines starting
146     each histogram.  `<params>` lists the `sawsim` parameters that are
147     unique to that experiment.
148
149     >>> from .manager.thread import ThreadManager
150     >>> histogram_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
151     >>> param_format_string = (
152     ...     '-s cantilever,hooke,0.05 -N1 '
153     ...     '-s folded,null -N8 '
154     ...     '-s "unfolded,wlc,{0.39e-9,28e-9}" '
155     ...     '-k "folded,unfolded,bell,{%g,%g}" -q folded')
156     >>> m = ThreadManager()
157     >>> sr = SawsimRunner(manager=m)
158     >>> hm = HistogramMatcher(histogram_stream, param_format_string, sr, N=3)
159     >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
160     >>> m.teardown()
161     """
162     def __init__(self, histogram_stream, param_format_string,
163                  sawsim_runner, N=400, residual_type='jensen-shannon',
164                  plot=False):
165         self.experiment_histograms = self._read_force_histograms(
166             histogram_stream)
167         self.param_format_string = param_format_string
168         self.sawsim_runner = sawsim_runner
169         self.N = N
170         self.residual_type = residual_type
171         self._plot = plot
172
173     @staticmethod
174     def _read_force_histograms(stream):
175         """
176         File format:
177
178           # comment and blank lines ignored
179           #HISTOGRAM: <histogram-specific params>
180           <pysawsim.histogram.Histogram-compatible histogram>
181           #HISTOGRAM: <other histogram-specific params>
182           <another pysawsim.histogram.Histogram-compatible histogram>
183           ...
184
185         >>> import sys
186         >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
187         >>> hm = HistogramMatcher(StringIO(), None, None, None)
188         >>> histograms = hm._read_force_histograms(stream)
189         >>> sorted(histograms.iterkeys())
190         ['-v 1e-6', '-v 6e-7', '-v 8e-7']
191         >>> histograms['-v 1e-6'].to_stream(sys.stdout)
192         ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
193         #Force (N)\tUnfolding events
194         1.5e-10\t2
195         1.6e-10\t3
196         1.7e-10\t7
197         1.8e-10\t8
198         1.9e-10\t7
199         2e-10\t25
200         2.1e-10\t30
201         2.2e-10\t58
202         2.3e-10\t76
203         2.4e-10\t159
204         2.5e-10\t216
205         2.6e-10\t313
206         2.7e-10\t451
207         2.8e-10\t568
208         2.9e-10\t533
209         3e-10\t416
210         3.1e-10\t222
211         3.2e-10\t80
212         3.3e-10\t24
213         3.4e-10\t2
214         """
215         token = '#HISTOGRAM:'
216         hist_blocks = {None: []}
217         params = None
218         for line in stream.readlines():
219             line = line.strip()
220             if line.startswith(token):
221                 params = line[len(token):].strip()
222                 assert params not in hist_blocks, params
223                 hist_blocks[params] = []
224             else:
225                 hist_blocks[params].append(line)
226
227         histograms = {}
228         for params,block in hist_blocks.iteritems():
229             if params == None:
230                 continue
231             h = _Histogram()
232             h.from_stream(StringIO('\n'.join(block)))
233             histograms[params] = h
234         return histograms
235
236     def param_string(self, params, hist_params):
237         """Generate a string of options to pass to `sawsim`.
238         """
239         return '%s %s' % (
240             self.param_format_string % tuple(params), hist_params)
241
242     def residual(self, params):
243         residual = 0
244         for hist_params,experiment_hist in self.experiment_histograms.iteritems():
245             sawsim_hist = sawsim_histogram(
246                 sawsim_runner=self.sawsim_runner,
247                 param_string=self.param_string(params, hist_params),
248                 N=self.N, bin_edges=experiment_hist.bin_edges)
249             r = experiment_hist.residual(sawsim_hist, type=self.residual_type)
250             residual += r
251             if self._plot == True:
252                 title = ", ".join(["%g" % p for p in params]+[hist_params])
253                 filename = "residual-%s-%g.png" % (
254                     title.replace(', ', '_').replace(' ', '_'), r)
255                 self._plot_residual_comparison(
256                     experiment_hist, sawsim_hist, residual=r,
257                     title=title, filename=filename)
258         log().debug('residual %s: %g' % (params, residual))
259         return residual
260
261     def plot(self, param_ranges, logx=False, logy=False, contour=False,
262              csv=None):
263         if csv:
264             csv.write(','.join(('param 1', 'param 2', 'fit quality')) + '\n')
265         xranges = param_ranges[0]
266         yranges = param_ranges[1]
267         if logx == False:
268             x = numpy.linspace(*xranges)
269         else:
270             m,M,n = xranges
271             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
272         if logy == False:
273             y = numpy.linspace(*yranges)
274         else:
275             m,M,n = yranges
276             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
277         X, Y = pylab.meshgrid(x,y)
278         C = numpy.zeros((len(y)-1, len(x)-1))
279         for i,xi in enumerate(x[:-1]):
280             for j,yj in enumerate(y[:-1]):
281                 log().info('point %d %d (%d of %d)'
282                            % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
283                 params = (xi,yj)
284                 r = self.residual(params)
285                 if csv:
286                     csv.write(','.join([str(v) for v in (xi,yj,r)]) + '\n')
287                 C[j,i] = numpy.log(r) # better resolution in valleys
288                 if MEM_DEBUG == True:
289                     log().debug('RSS: %d KB' % rss())
290         C = numpy.nan_to_num(C) # NaN -> 0
291         fid = file("histogram_matcher-XYC.pkl", "wb")
292         pickle.dump([X,Y,C], fid)
293         fid.close()
294         # read in with
295         # import pickle
296         # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
297         # ...
298         _FIGURE.clear()
299         axes = _FIGURE.add_subplot(1, 1, 1)
300         if logx == True:
301             axes.set_xscale('log')
302         if logy == True:
303             axes.set_yscale('log')
304         if contour == True:
305             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
306             # [:-1,:-1] to strip dummy last row & column from X&Y.
307         else: # pseudocolor plot
308             p = axes.pcolor(X, Y, C)
309             axes.autoscale_view(tight=True)
310         _FIGURE.colorbar(p)
311         _FIGURE.savefig("figure.png")
312
313     def _plot_residual_comparison(self, experiment_hist, theory_hist,
314                                   residual, title, filename):
315         _FIGURE.clear()
316         axes = _FIGURE.add_subplot(1, 1, 1)
317         axes.hold(True)
318         axes.hist(x=experiment_hist.bin_edges[:-1],  # one fake entry for each bin
319                   weights=experiment_hist.counts,    # weigh the fake entries by count
320                   bins=experiment_hist.bin_edges,
321                   align='mid', histtype='stepfilled',
322                   color='r')
323         axes.hist(x=theory_hist.bin_edges[:-1],  # one fake entry for each bin
324                   weights=theory_hist.counts,    # weigh the fake entries by count
325                   bins=theory_hist.bin_edges,
326                   align='mid', histtype='stepfilled',
327                   color='b', alpha=0.5)
328         axes.set_title(title)
329         _FIGURE.savefig(filename)
330
331
332 def parse_param_ranges_string(string):
333     """Parse parameter range stings.
334
335     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
336       ->
337     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
338
339     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
340     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
341     >>> parse_param_ranges_string('[1,2,3]')
342     [[1.0, 2.0, 3.0]]
343     """
344     ranges = []
345     for range_string in string.split("],["):
346         range_number_strings = range_string.strip("[]").split(",")
347         ranges.append([float(x) for x in range_number_strings])
348     return ranges
349
350
351 def main(argv=None):
352     """
353     >>> import tempfile
354     >>> f = tempfile.NamedTemporaryFile()
355     >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
356     >>> f.flush()
357     >>> main(['-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
358     ...       '-N', '2',
359     ...       f.name])
360     >>> f.close()
361     """
362     from optparse import OptionParser
363     import sys
364
365     if argv == None:
366         argv = sys.argv[1:]
367
368     sr = SawsimRunner()
369
370     usage = '%prog [options] histogram_file'
371     epilog = '\n'.join([
372             'Compare simulated results against experimental values over a',
373             'range of parameters.  Generates a plot of fit quality over',
374             'the parameter space.  The histogram file should look something',
375             'like:',
376             '',
377             EXAMPLE_HISTOGRAM_FILE_CONTENTS,
378             ''
379             '`#HISTOGRAM: <params>` lines start each histogram.  `params`',
380             'lists the `sawsim` parameters that are unique to that',
381             'experiment.',
382             '',
383             'Each histogram line is of the format:',
384             '',
385             '<bin_edge><whitespace><count>',
386             '',
387             '`<bin_edge>` should mark the left-hand side of the bin, and',
388             'all bins should be of equal width (so we know where the last',
389             'one ends).',
390             _PYSAWSIM_LOG_LEVEL_MSG,
391             ])
392     parser = OptionParser(usage, epilog=epilog)
393     parser.format_epilog = lambda formatter: epilog+'\n'
394     for option in sr.optparse_options:
395         if option.dest == 'param_string':
396             continue
397         parser.add_option(option)
398     parser.add_option('-f','--param-format', dest='param_format',
399                       metavar='FORMAT',
400                       help='Convert params to sawsim options (%default).',
401                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
402     parser.add_option('-p','--initial-params', dest='initial_params',
403                       metavar='PARAMS',
404                       help='Initial params for fitting (%default).',
405                       default='3.3e-4,0.25e-9')
406     parser.add_option('-r','--param-range', dest='param_range',
407                       metavar='PARAMS',
408                       help='Param range for plotting (%default).',
409                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
410     parser.add_option('--logx', dest='logx',
411                       help='Use a log scale for the x range.',
412                       default=False, action='store_true')
413     parser.add_option('--logy', dest='logy',
414                       help='Use a log scale for the y range.',
415                       default=False, action='store_true')
416     parser.add_option('-R','--residual', dest='residual',
417                       metavar='STRING',
418                       help='Residual type (from %s; default: %%default).'
419                       % ', '.join(_Histogram().types()),
420                       default='jensen-shannon')
421     parser.add_option('-P','--plot-residuals', dest='plot_residuals',
422                       help='Generate residual difference plots for each point in the plot range.',
423                       default=False, action='store_true')
424     parser.add_option('-c','--contour-plot', dest='contour_plot',
425                       help='Select contour plot (vs. the default pseudocolor plot).',
426                       default=False, action='store_true')
427     parser.add_option('--csv', dest='csv', metavar='FILE',
428                       help='Save fit qualities to a comma-separated value file FILE.'),
429
430     options,args = parser.parse_args(argv)
431
432     initial_params = [float(p) for p in options.initial_params.split(",")]
433     param_ranges = parse_param_ranges_string(options.param_range)
434     histogram_file = args[0]
435     csv = None
436     sr_call_params = sr.initialize_from_options(options)
437
438     try:
439         hm = HistogramMatcher(
440             file(histogram_file, 'r'),
441             param_format_string=options.param_format,
442             sawsim_runner=sr, residual_type=options.residual,
443             plot=options.plot_residuals, **sr_call_params)
444         #hm.fit(initial_params)
445         if options.csv:
446             csv = open(options.csv, 'w')
447         hm.plot(param_ranges, logx=options.logx, logy=options.logy,
448                 contour=options.contour_plot, csv=csv)
449     finally:
450         sr.teardown()
451         if csv:
452             csv.close()