1 # Copyright (C) 2009-2010 W. Trevor King <wking@drexel.edu>
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
20 """Experiment vs. simulation comparison and scanning.
23 from os import getpid # for rss()
26 from StringIO import StringIO
29 matplotlib.use('Agg') # select backend that doesn't require X Windows
34 from .histogram import Histogram
35 from .manager import MANAGERS, get_manager
36 from .sawsim_histogram import sawsim_histogram
37 from .sawsim import SawsimRunner
40 FIGURE = pylab.figure() # avoid memory problems.
41 """`pylab` keeps internal references to all created figures, so share
44 EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
45 # Other general comments...
48 #Force (N)\tUnfolding events
65 #Force (N)\tUnfolding events
77 #Force (N)\tUnfolding events
100 For debugging memory usage.
102 resident set size, the non-swapped physical memory that a task has
103 used (in kilo-bytes).
105 call = "ps -o rss= -p %d" % getpid()
106 status,stdout,stderr = invoke(call)
110 class HistogramMatcher (object):
111 """Compare experimental histograms to simulated data.
113 The main entry points are `fit()` and `plot()`.
115 The input `histogram_stream` should contain a series of
116 experimental histograms with '#HISTOGRAM: <params>` lines starting
117 each histogram. `<params>` should list the `sawsim` parameters
118 that are unique to that experiment.
120 >>> from .manager.thread import ThreadManager
121 >>> velocity_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
122 >>> param_format_string = (
123 ... '-s cantilever,hooke,0.05 -N1 '
124 ... '-s folded,null -N8 '
125 ... '-s "unfolded,wlc,{0.39e-9,28e-9}" '
126 ... '-k "folded,unfolded,bell,{%g,%g}" -q folded')
127 >>> m = ThreadManager()
128 >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
129 >>> hm = HistogramMatcher(velocity_stream, param_format_string, sr, N=3)
130 >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
133 def __init__(self, histogram_stream, param_format_string,
134 sawsim_runner, N=400, residual_type='jensen-shannon',
136 self.experiment_histograms = self._read_force_histograms(
138 self.param_format_string = param_format_string
139 self.sawsim_runner = sawsim_runner
141 self.residual_type = residual_type
144 def _read_force_histograms(self, stream):
147 # comment and blank lines ignored
148 <velocity in m/s><whitespace><path to histogram file>
152 >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
153 >>> hm = HistogramMatcher(StringIO(), None, None, None)
154 >>> histograms = hm._read_force_histograms(stream)
155 >>> sorted(histograms.iterkeys())
156 ['-v 1e-6', '-v 6e-7', '-v 8e-7']
157 >>> histograms['-v 1e-6'].to_stream(sys.stdout)
158 ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
159 #Force (N)\tUnfolding events
174 token = '#HISTOGRAM:'
175 hist_blocks = {None: []}
177 for line in stream.readlines():
179 if line.startswith(token):
180 params = line[len(token):].strip()
181 assert params not in hist_blocks, params
182 hist_blocks[params] = []
184 hist_blocks[params].append(line)
187 for params,block in hist_blocks.iteritems():
191 h.from_stream(StringIO('\n'.join(block)))
192 histograms[params] = h
195 def param_string(self, params, hist_params):
196 """Generate a string of options to pass to `sawsim`.
199 self.param_format_string % tuple(params), hist_params)
201 def get_all_unfolding_data(self, dirname, velocity_string):
202 datafile = os.path.join(dirname, "data_" + velocity_string)
203 return numpy.fromfile(datafile, sep=" ")
205 sawsim_histograms = {}
206 for velocity in velocities:
207 unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
208 bin_edges = histograms[velocity].bin_edges
210 h.from_data(unfolding_forces, bin_edges)
211 sawsim_histograms[velocity] = h
212 sawsim_histograms[velocity].normalize()
213 return sawsim_histograms
215 def _residual(self, params):
217 for hist_params,experiment_hist in self.experiment_histograms.iteritems():
218 sawsim_hist = sawsim_histogram(
219 sawsim_runner=self.sawsim_runner,
220 param_string=self.param_string(params, hist_params),
221 N=self.N, bin_edges=experiment_hist.bin_edges)
222 r = experiment_hist.residual(sawsim_hist, type=self.residual_type)
224 if self._plot == True:
225 title = ", ".join(["%g" % p for p in params]+[hist_params])
226 filename = "residual-%s-%g.png" % (
227 title.replace(', ', '_').replace(' ', '_'), r)
228 self._plot_residual_comparison(
229 experiment_hist, sawsim_hist, residual=r,
230 title=title, filename=filename)
231 log().debug('residual: %g' % residual)
234 def plot(self, param_ranges, logx=False, logy=False, contour=False):
235 xranges = param_ranges[0]
236 yranges = param_ranges[1]
238 x = numpy.linspace(*xranges)
241 x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
243 y = numpy.linspace(*yranges)
246 y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
247 X, Y = pylab.meshgrid(x,y)
248 C = numpy.zeros((len(y)-1, len(x)-1))
249 for i,xi in enumerate(x[:-1]):
250 for j,yj in enumerate(y[:-1]):
251 log().info('point %d %d (%d of %d)'
252 % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
254 r = self._residual(params)
255 C[j,i] = numpy.log(r) # better resolution in valleys
256 if MEM_DEBUG == True:
257 log().debug('RSS: %d KB' % rss())
258 C = numpy.nan_to_num(C) # NaN -> 0
259 fid = file("histogram_matcher-XYC.pkl", "wb")
260 pickle.dump([X,Y,C], fid)
264 # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
267 axes = FIGURE.add_subplot(111)
269 axes.set_xscale('log')
271 axes.set_yscale('log')
273 p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
274 # [:-1,:-1] to strip dummy last row & column from X&Y.
275 else: # pseudocolor plot
276 p = axes.pcolor(X, Y, C)
277 axes.autoscale_view(tight=True)
279 FIGURE.savefig("figure.png")
281 def _plot_residual_comparison(self, experiment_hist, theory_hist,
282 residual, title, filename):
284 p = pylab.plot(experiment_hist.bin_edges[:-1],
285 experiment_hist.probabilities, 'r-',
286 theory_hist.bin_edges[:-1],
287 theory_hist.probabilities, 'b-')
289 FIGURE.savefig(filename)
292 def parse_param_ranges_string(string):
293 """Parse parameter range stings.
295 '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
297 [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
299 >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
300 [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
303 for range_string in string.split("],["):
304 range_number_strings = range_string.strip("[]").split(",")
305 ranges.append([float(x) for x in range_number_strings])
312 >>> f = tempfile.NamedTemporaryFile()
313 >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
315 >>> main(['-s', 'bin/sawsim',
316 ... '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
321 from optparse import OptionParser
329 usage = "%prog [options] velocity_file"
331 parser = OptionParser(usage)
332 for option in sr.optparse_options:
333 if option.dest == 'param_string':
335 parser.add_option(option)
336 parser.add_option("-f","--param-format", dest="param_format",
338 help="Convert params to sawsim options (%default).",
339 default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
340 parser.add_option("-p","--initial-params", dest="initial_params",
342 help="Initial params for fitting (%default).",
343 default='3.3e-4,0.25e-9')
344 parser.add_option("-r","--param-range", dest="param_range",
346 help="Param range for plotting (%default).",
347 default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
348 parser.add_option("-R","--residual", dest="residual",
350 help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
351 default='jensen-shannon')
352 parser.add_option("-P","--plot-residuals", dest="plot_residuals",
353 help="Generate residual difference plots for each point in the plot range.",
354 default=False, action="store_true")
355 parser.add_option("--logx", dest="logx",
356 help="Use a log scale for the x range.",
357 default=False, action="store_true")
358 parser.add_option("--logy", dest="logy",
359 help="Use a log scale for the y range.",
360 default=False, action="store_true")
361 parser.add_option("-c","--contour-plot", dest="contour_plot",
362 help="Select contour plot (vs. the default pseudocolor plot).",
363 default=False, action="store_true")
365 options,args = parser.parse_args(argv)
367 initial_params = [float(p) for p in options.initial_params.split(",")]
368 param_ranges = parse_param_ranges_string(options.param_range)
369 velocity_file = args[0]
370 sr_call_params = sr.initialize_from_options(options)
373 hm = HistogramMatcher(
374 file(velocity_file, 'r'), param_format_string=options.param_format,
375 sawsim_runner=sr, residual_type=options.residual,
376 plot=options.plot_residuals, **sr_call_params)
377 #hm.fit(initial_params)
378 hm.plot(param_ranges, logx=options.logx, logy=options.logy,
379 contour=options.contour_plot)