1 # Copyright (C) 2009-2010 W. Trevor King <wking@drexel.edu>
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
20 """Experiment vs. simulation comparison and scanning.
23 from os import getpid # for rss()
26 from StringIO import StringIO
29 matplotlib.use('Agg') # select backend that doesn't require X Windows
34 from .histogram import Histogram
35 from .manager import MANAGERS, get_manager
36 from .sawsim_histogram import sawsim_histogram
37 from .sawsim import SawsimRunner
40 FIGURE = pylab.figure() # avoid memory problems.
41 """`pylab` keeps internal references to all created figures, so share
44 EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
45 # Other general comments...
48 #Force (N)\tUnfolding events
70 #Force (N)\tUnfolding events
99 #Force (N)\tUnfolding events
129 For debugging memory usage.
131 resident set size, the non-swapped physical memory that a task has
132 used (in kilo-bytes).
134 call = "ps -o rss= -p %d" % getpid()
135 status,stdout,stderr = invoke(call)
139 class HistogramMatcher (object):
140 """Compare experimental histograms to simulated data.
142 The main entry points are `fit()` and `plot()`.
144 The input `histogram_stream` should contain a series of
145 experimental histograms with '#HISTOGRAM: <params>` lines starting
146 each histogram. `<params>` lists the `sawsim` parameters that are
147 unique to that experiment.
149 >>> from .manager.thread import ThreadManager
150 >>> velocity_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
151 >>> param_format_string = (
152 ... '-s cantilever,hooke,0.05 -N1 '
153 ... '-s folded,null -N8 '
154 ... '-s "unfolded,wlc,{0.39e-9,28e-9}" '
155 ... '-k "folded,unfolded,bell,{%g,%g}" -q folded')
156 >>> m = ThreadManager()
157 >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
158 >>> hm = HistogramMatcher(velocity_stream, param_format_string, sr, N=3)
159 >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
162 def __init__(self, histogram_stream, param_format_string,
163 sawsim_runner, N=400, residual_type='jensen-shannon',
165 self.experiment_histograms = self._read_force_histograms(
167 self.param_format_string = param_format_string
168 self.sawsim_runner = sawsim_runner
170 self.residual_type = residual_type
173 def _read_force_histograms(self, stream):
176 # comment and blank lines ignored
177 <velocity in m/s><whitespace><path to histogram file>
181 >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
182 >>> hm = HistogramMatcher(StringIO(), None, None, None)
183 >>> histograms = hm._read_force_histograms(stream)
184 >>> sorted(histograms.iterkeys())
185 ['-v 1e-6', '-v 6e-7', '-v 8e-7']
186 >>> histograms['-v 1e-6'].to_stream(sys.stdout)
187 ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
188 #Force (N)\tUnfolding events
210 token = '#HISTOGRAM:'
211 hist_blocks = {None: []}
213 for line in stream.readlines():
215 if line.startswith(token):
216 params = line[len(token):].strip()
217 assert params not in hist_blocks, params
218 hist_blocks[params] = []
220 hist_blocks[params].append(line)
223 for params,block in hist_blocks.iteritems():
227 h.from_stream(StringIO('\n'.join(block)))
228 histograms[params] = h
231 def param_string(self, params, hist_params):
232 """Generate a string of options to pass to `sawsim`.
235 self.param_format_string % tuple(params), hist_params)
237 def get_all_unfolding_data(self, dirname, velocity_string):
238 datafile = os.path.join(dirname, "data_" + velocity_string)
239 return numpy.fromfile(datafile, sep=" ")
241 sawsim_histograms = {}
242 for velocity in velocities:
243 unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
244 bin_edges = histograms[velocity].bin_edges
246 h.from_data(unfolding_forces, bin_edges)
247 sawsim_histograms[velocity] = h
248 sawsim_histograms[velocity].normalize()
249 return sawsim_histograms
251 def _residual(self, params):
253 for hist_params,experiment_hist in self.experiment_histograms.iteritems():
254 sawsim_hist = sawsim_histogram(
255 sawsim_runner=self.sawsim_runner,
256 param_string=self.param_string(params, hist_params),
257 N=self.N, bin_edges=experiment_hist.bin_edges)
258 r = experiment_hist.residual(sawsim_hist, type=self.residual_type)
260 if self._plot == True:
261 title = ", ".join(["%g" % p for p in params]+[hist_params])
262 filename = "residual-%s-%g.png" % (
263 title.replace(', ', '_').replace(' ', '_'), r)
264 self._plot_residual_comparison(
265 experiment_hist, sawsim_hist, residual=r,
266 title=title, filename=filename)
267 log().debug('residual: %g' % residual)
270 def plot(self, param_ranges, logx=False, logy=False, contour=False):
271 xranges = param_ranges[0]
272 yranges = param_ranges[1]
274 x = numpy.linspace(*xranges)
277 x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
279 y = numpy.linspace(*yranges)
282 y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
283 X, Y = pylab.meshgrid(x,y)
284 C = numpy.zeros((len(y)-1, len(x)-1))
285 for i,xi in enumerate(x[:-1]):
286 for j,yj in enumerate(y[:-1]):
287 log().info('point %d %d (%d of %d)'
288 % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
290 r = self._residual(params)
291 C[j,i] = numpy.log(r) # better resolution in valleys
292 if MEM_DEBUG == True:
293 log().debug('RSS: %d KB' % rss())
294 C = numpy.nan_to_num(C) # NaN -> 0
295 fid = file("histogram_matcher-XYC.pkl", "wb")
296 pickle.dump([X,Y,C], fid)
300 # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
303 axes = FIGURE.add_subplot(111)
305 axes.set_xscale('log')
307 axes.set_yscale('log')
309 p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
310 # [:-1,:-1] to strip dummy last row & column from X&Y.
311 else: # pseudocolor plot
312 p = axes.pcolor(X, Y, C)
313 axes.autoscale_view(tight=True)
315 FIGURE.savefig("figure.png")
317 def _plot_residual_comparison(self, experiment_hist, theory_hist,
318 residual, title, filename):
320 p = pylab.plot(experiment_hist.bin_edges[:-1],
321 experiment_hist.probabilities, 'r-',
322 theory_hist.bin_edges[:-1],
323 theory_hist.probabilities, 'b-')
325 FIGURE.savefig(filename)
328 def parse_param_ranges_string(string):
329 """Parse parameter range stings.
331 '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
333 [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
335 >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
336 [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
339 for range_string in string.split("],["):
340 range_number_strings = range_string.strip("[]").split(",")
341 ranges.append([float(x) for x in range_number_strings])
348 >>> f = tempfile.NamedTemporaryFile()
349 >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
351 >>> main(['-s', 'bin/sawsim',
352 ... '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
357 from optparse import OptionParser
365 usage = '%prog [options] histogram_file'
367 'Compare simulated results against experimental values over a',
368 'range of parameters. Generates a plot of fit quality over',
369 'the parameter space. The histogram file should look something',
372 EXAMPLE_HISTOGRAM_FILE_CONTENTS,
374 '`#HISTOGRAM: <params>` lines start each histogram. `params`',
375 'lists the `sawsim` parameters that are unique to that',
378 'Each histogram line is of the format:',
380 '<bin_edge><whitespace><count>',
382 '`<bin_edge>` should mark the left-hand side of the bin, and',
383 'all bins should be of equal width (so we know where the last',
386 parser = OptionParser(usage, epilog=epilog)
387 parser.format_epilog = lambda formatter: epilog+'\n'
388 for option in sr.optparse_options:
389 if option.dest == 'param_string':
391 parser.add_option(option)
392 parser.add_option("-f","--param-format", dest="param_format",
394 help="Convert params to sawsim options (%default).",
395 default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
396 parser.add_option("-p","--initial-params", dest="initial_params",
398 help="Initial params for fitting (%default).",
399 default='3.3e-4,0.25e-9')
400 parser.add_option("-r","--param-range", dest="param_range",
402 help="Param range for plotting (%default).",
403 default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
404 parser.add_option("-R","--residual", dest="residual",
406 help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
407 default='jensen-shannon')
408 parser.add_option("-P","--plot-residuals", dest="plot_residuals",
409 help="Generate residual difference plots for each point in the plot range.",
410 default=False, action="store_true")
411 parser.add_option("--logx", dest="logx",
412 help="Use a log scale for the x range.",
413 default=False, action="store_true")
414 parser.add_option("--logy", dest="logy",
415 help="Use a log scale for the y range.",
416 default=False, action="store_true")
417 parser.add_option("-c","--contour-plot", dest="contour_plot",
418 help="Select contour plot (vs. the default pseudocolor plot).",
419 default=False, action="store_true")
421 options,args = parser.parse_args(argv)
423 initial_params = [float(p) for p in options.initial_params.split(",")]
424 param_ranges = parse_param_ranges_string(options.param_range)
425 velocity_file = args[0]
426 sr_call_params = sr.initialize_from_options(options)
429 hm = HistogramMatcher(
430 file(velocity_file, 'r'), param_format_string=options.param_format,
431 sawsim_runner=sr, residual_type=options.residual,
432 plot=options.plot_residuals, **sr_call_params)
433 #hm.fit(initial_params)
434 hm.plot(param_ranges, logx=options.logx, logy=options.logy,
435 contour=options.contour_plot)