Add epilogs to each OptionParser in pysawsim.
[sawsim.git] / pysawsim / parameter_scan.py
1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20 """Experiment vs. simulation comparison and scanning.
21 """
22
23 from os import getpid  # for rss()
24 import os.path
25 import pickle
26 from StringIO import StringIO
27
28 import matplotlib
29 matplotlib.use('Agg')  # select backend that doesn't require X Windows
30 import numpy
31 import pylab
32
33 from . import log
34 from .histogram import Histogram
35 from .manager import MANAGERS, get_manager
36 from .sawsim_histogram import sawsim_histogram
37 from .sawsim import SawsimRunner
38
39
40 FIGURE = pylab.figure()  # avoid memory problems.
41 """`pylab` keeps internal references to all created figures, so share
42 a single instance.
43 """
44 EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
45 # Other general comments...
46
47 #HISTOGRAM: -v 6e-7
48 #Force (N)\tUnfolding events
49 1.8e-10\t1
50 1.9e-10\t0
51 2e-10\t0
52 2.1e-10\t0
53 2.2e-10\t0
54 2.3e-10\t0
55 2.4e-10\t1
56 2.5e-10\t0
57 2.6e-10\t2
58 2.7e-10\t0
59 2.8e-10\t1
60 2.9e-10\t6
61 3e-10\t2
62 3.1e-10\t3
63
64 #HISTOGRAM: -v 8e-7
65 #Force (N)\tUnfolding events
66 2.4e-10\t1
67 2.5e-10\t0
68 2.6e-10\t4
69 2.7e-10\t2
70 2.8e-10\t2
71 2.9e-10\t3
72 3e-10\t2
73 3.1e-10\t1
74 3.2e-10\t1
75
76 #HISTOGRAM: -v 1e-6
77 #Force (N)\tUnfolding events
78 2e-10\t1
79 2.1e-10\t0
80 2.2e-10\t1
81 2.3e-10\t1
82 2.4e-10\t1
83 2.5e-10\t0
84 2.6e-10\t2
85 2.7e-10\t1
86 2.8e-10\t4
87 2.9e-10\t2
88 3e-10\t2
89 3.1e-10\t0
90 3.2e-10\t1
91 """
92
93
94 MEM_DEBUG = False
95
96
97
98 def rss():
99     """
100     For debugging memory usage.
101
102     resident set size, the non-swapped physical memory that a task has
103     used (in kilo-bytes).
104     """
105     call = "ps -o rss= -p %d" % getpid()
106     status,stdout,stderr = invoke(call)
107     return int(stdout)
108
109
110 class HistogramMatcher (object):
111     """Compare experimental histograms to simulated data.
112
113     The main entry points are `fit()` and `plot()`.
114
115     The input `histogram_stream` should contain a series of
116     experimental histograms with '#HISTOGRAM: <params>` lines starting
117     each histogram.  `<params>` lists the `sawsim` parameters that are
118     unique to that experiment.
119
120     >>> from .manager.thread import ThreadManager
121     >>> velocity_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
122     >>> param_format_string = (
123     ...     '-s cantilever,hooke,0.05 -N1 '
124     ...     '-s folded,null -N8 '
125     ...     '-s "unfolded,wlc,{0.39e-9,28e-9}" '
126     ...     '-k "folded,unfolded,bell,{%g,%g}" -q folded')
127     >>> m = ThreadManager()
128     >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
129     >>> hm = HistogramMatcher(velocity_stream, param_format_string, sr, N=3)
130     >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
131     >>> m.teardown()
132     """
133     def __init__(self, histogram_stream, param_format_string,
134                  sawsim_runner, N=400, residual_type='jensen-shannon',
135                  plot=True):
136         self.experiment_histograms = self._read_force_histograms(
137             histogram_stream)
138         self.param_format_string = param_format_string
139         self.sawsim_runner = sawsim_runner
140         self.N = N
141         self.residual_type = residual_type
142         self._plot = plot
143
144     def _read_force_histograms(self, stream):
145         """
146         File format:
147         # comment and blank lines ignored
148         <velocity in m/s><whitespace><path to histogram file>
149         ...
150
151         >>> import sys
152         >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
153         >>> hm = HistogramMatcher(StringIO(), None, None, None)
154         >>> histograms = hm._read_force_histograms(stream)
155         >>> sorted(histograms.iterkeys())
156         ['-v 1e-6', '-v 6e-7', '-v 8e-7']
157         >>> histograms['-v 1e-6'].to_stream(sys.stdout)
158         ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
159         #Force (N)\tUnfolding events
160         2e-10\t1
161         2.1e-10\t0
162         2.2e-10\t1
163         2.3e-10\t1
164         2.4e-10\t1
165         2.5e-10\t0
166         2.6e-10\t2
167         2.7e-10\t1
168         2.8e-10\t4
169         2.9e-10\t2
170         3e-10\t2
171         3.1e-10\t0
172         3.2e-10\t1
173         """
174         token = '#HISTOGRAM:'
175         hist_blocks = {None: []}
176         params = None
177         for line in stream.readlines():
178             line = line.strip()
179             if line.startswith(token):
180                 params = line[len(token):].strip()
181                 assert params not in hist_blocks, params
182                 hist_blocks[params] = []
183             else:
184                 hist_blocks[params].append(line)
185
186         histograms = {}
187         for params,block in hist_blocks.iteritems():
188             if params == None:
189                 continue
190             h = Histogram()
191             h.from_stream(StringIO('\n'.join(block)))
192             histograms[params] = h
193         return histograms
194
195     def param_string(self, params, hist_params):
196         """Generate a string of options to pass to `sawsim`.
197         """
198         return '%s %s' % (
199             self.param_format_string % tuple(params), hist_params)
200
201     def get_all_unfolding_data(self, dirname, velocity_string):
202         datafile = os.path.join(dirname, "data_" + velocity_string)
203         return numpy.fromfile(datafile, sep=" ")
204
205         sawsim_histograms = {}
206         for velocity in velocities:
207             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
208             bin_edges = histograms[velocity].bin_edges
209             h = Histogram()
210             h.from_data(unfolding_forces, bin_edges)
211             sawsim_histograms[velocity] = h
212             sawsim_histograms[velocity].normalize()
213         return sawsim_histograms
214
215     def _residual(self, params):
216         residual = 0
217         for hist_params,experiment_hist in self.experiment_histograms.iteritems():
218             sawsim_hist = sawsim_histogram(
219                 sawsim_runner=self.sawsim_runner,
220                 param_string=self.param_string(params, hist_params),
221                 N=self.N, bin_edges=experiment_hist.bin_edges)
222             r = experiment_hist.residual(sawsim_hist, type=self.residual_type)
223             residual += r
224             if self._plot == True:
225                 title = ", ".join(["%g" % p for p in params]+[hist_params])
226                 filename = "residual-%s-%g.png" % (
227                     title.replace(', ', '_').replace(' ', '_'), r)
228                 self._plot_residual_comparison(
229                     experiment_hist, sawsim_hist, residual=r,
230                     title=title, filename=filename)
231         log().debug('residual: %g' % residual)
232         return residual
233
234     def plot(self, param_ranges, logx=False, logy=False, contour=False):
235         xranges = param_ranges[0]
236         yranges = param_ranges[1]
237         if logx == False:
238             x = numpy.linspace(*xranges)
239         else:
240             m,M,n = xranges
241             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
242         if logy == False:
243             y = numpy.linspace(*yranges)
244         else:
245             m,M,n = yranges
246             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
247         X, Y = pylab.meshgrid(x,y)
248         C = numpy.zeros((len(y)-1, len(x)-1))
249         for i,xi in enumerate(x[:-1]):
250             for j,yj in enumerate(y[:-1]):
251                 log().info('point %d %d (%d of %d)'
252                            % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
253                 params = (xi,yj)
254                 r = self._residual(params)
255                 C[j,i] = numpy.log(r) # better resolution in valleys
256                 if MEM_DEBUG == True:
257                     log().debug('RSS: %d KB' % rss())
258         C = numpy.nan_to_num(C) # NaN -> 0
259         fid = file("histogram_matcher-XYC.pkl", "wb")
260         pickle.dump([X,Y,C], fid)
261         fid.close()
262         # read in with
263         # import pickle
264         # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
265         # ...
266         FIGURE.clear()
267         axes = FIGURE.add_subplot(111)
268         if logx == True:
269             axes.set_xscale('log')
270         if logy == True:
271             axes.set_yscale('log')
272         if contour == True:
273             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
274             # [:-1,:-1] to strip dummy last row & column from X&Y.
275         else: # pseudocolor plot
276             p = axes.pcolor(X, Y, C)
277             axes.autoscale_view(tight=True)
278         FIGURE.colorbar(p)
279         FIGURE.savefig("figure.png")
280
281     def _plot_residual_comparison(self, experiment_hist, theory_hist,
282                                   residual, title, filename):
283         FIGURE.clear()
284         p = pylab.plot(experiment_hist.bin_edges[:-1],
285                        experiment_hist.probabilities, 'r-',
286                        theory_hist.bin_edges[:-1],
287                        theory_hist.probabilities, 'b-')
288         pylab.title(title)
289         FIGURE.savefig(filename)
290
291
292 def parse_param_ranges_string(string):
293     """Parse parameter range stings.
294
295     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
296       ->
297     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
298
299     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
300     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
301     """
302     ranges = []
303     for range_string in string.split("],["):
304         range_number_strings = range_string.strip("[]").split(",")
305         ranges.append([float(x) for x in range_number_strings])
306     return ranges
307
308
309 def main(argv=None):
310     """
311     >>> import tempfile
312     >>> f = tempfile.NamedTemporaryFile()
313     >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
314     >>> f.flush()
315     >>> main(['-s', 'bin/sawsim',
316     ...       '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
317     ...       '-N', '2',
318     ...       f.name])
319     >>> f.close()
320     """
321     from optparse import OptionParser
322     import sys
323
324     if argv == None:
325         argv = sys.argv[1:]
326
327     sr = SawsimRunner()
328
329     usage = '%prog [options] histogram_file'
330     epilog = '\n'.join([
331             'Compare simulated results against experimental values over a',
332             'range of parameters.  Generates a plot of fit quality over',
333             'the parameter space.  The histogram file should look something',
334             'like:',
335             '',
336             EXAMPLE_HISTOGRAM_FILE_CONTENTS,
337             ''
338             '`#HISTOGRAM: <params>` lines start each histogram.  `params`',
339             'lists the `sawsim` parameters that are unique to that',
340             'experiment.',
341             '',
342             'Each histogram line is of the format:',
343             '',
344             '<bin_edge><whitespace><count>',
345             '',
346             '`<bin_edge>` should mark the left-hand side of the bin, and',
347             'all bins should be of equal width (so we know where the last',
348             'one ends).',
349             ])
350     parser = OptionParser(usage, epilog=epilog)
351     parser.format_epilog = lambda formatter: epilog+'\n'
352     for option in sr.optparse_options:
353         if option.dest == 'param_string':
354             continue
355         parser.add_option(option)
356     parser.add_option("-f","--param-format", dest="param_format",
357                       metavar="FORMAT",
358                       help="Convert params to sawsim options (%default).",
359                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
360     parser.add_option("-p","--initial-params", dest="initial_params",
361                       metavar="PARAMS",
362                       help="Initial params for fitting (%default).",
363                       default='3.3e-4,0.25e-9')
364     parser.add_option("-r","--param-range", dest="param_range",
365                       metavar="PARAMS",
366                       help="Param range for plotting (%default).",
367                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
368     parser.add_option("-R","--residual", dest="residual",
369                       metavar="STRING",
370                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
371                       default='jensen-shannon')
372     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
373                       help="Generate residual difference plots for each point in the plot range.",
374                       default=False, action="store_true")
375     parser.add_option("--logx", dest="logx",
376                       help="Use a log scale for the x range.",
377                       default=False, action="store_true")
378     parser.add_option("--logy", dest="logy",
379                       help="Use a log scale for the y range.",
380                       default=False, action="store_true")
381     parser.add_option("-c","--contour-plot", dest="contour_plot",
382                       help="Select contour plot (vs. the default pseudocolor plot).",
383                       default=False, action="store_true")
384
385     options,args = parser.parse_args(argv)
386
387     initial_params = [float(p) for p in options.initial_params.split(",")]
388     param_ranges = parse_param_ranges_string(options.param_range)
389     velocity_file = args[0]
390     sr_call_params = sr.initialize_from_options(options)
391
392     try:
393         hm = HistogramMatcher(
394             file(velocity_file, 'r'), param_format_string=options.param_format,
395             sawsim_runner=sr, residual_type=options.residual,
396             plot=options.plot_residuals, **sr_call_params)
397         #hm.fit(initial_params)
398         hm.plot(param_ranges, logx=options.logx, logy=options.logy,
399                 contour=options.contour_plot)
400     finally:
401         sr.teardown()