1 # Copyright (C) 2009-2010 W. Trevor King <wking@drexel.edu>
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
20 import os # HACK, for getpid()
27 matplotlib.use('Agg') # select backend that doesn't require X Windows
30 from scipy.optimize import leastsq
33 from .histogram import Histogram
34 from .manager import MANAGERS, get_manager
35 from . import sawsim_histogram
38 FIGURE = pylab.figure() # avoid memory problems.
39 """`pylab` keeps internal references to all created figures, so share
47 For debugging memory usage.
49 resident set size, the non-swapped physical memory that a task has
52 call = "ps -o rss= -p %d" % os.getpid()
53 status,stdout,stderr = invoke(call)
57 class HistogramMatcher (object):
58 """Compare experimental velocity dependent histograms to simulated data.
60 The main entry points are `fit()` and `plot()`.
62 def __init__(self, velocity_stream, param_format_string, N=400,
63 manager=None, residual_type='jensen-shannon', plot=True,
64 use_cache=False, clean_cache=False):
65 self.experiment_histograms = self.read_force_histograms(velocity_stream)
66 self.param_format_string = param_format_string
67 self.residual_type = residual_type
69 self._manager = manager
71 self.sawsim_histogram = sawsim_histogram.SawsimHistogram(
72 use_cache=use_cache, clean_cach=clean_cache)
74 def read_force_histograms(self, stream):
77 # comment and blank lines ignored
78 <velocity in m/s><whitespace><path to histogram file>
87 v_file_dir = os.path.dirname(v_file)
88 for line in strem.readlines():
90 if len(line) == 0 or line[0] == "#":
91 continue # ignore blank lines and comments
92 v,h_file = line.split()
94 h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
98 def param_string(self, params, velocity):
99 """Generate a string of options to pass to `sawsim`.
101 return '%s -v %g' % (
102 self.param_format_string % tuple(params), velocity)
104 def get_all_unfolding_data(self, dirname, velocity_string):
105 datafile = os.path.join(dirname, "data_" + velocity_string)
106 return numpy.fromfile(datafile, sep=" ")
108 sawsim_histograms = {}
109 for velocity in velocities:
110 unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
111 bin_edges = histograms[velocity].bin_edges
113 h.from_data(unfolding_forces, bin_edges)
114 sawsim_histograms[velocity] = h
115 sawsim_histograms[velocity].normalize()
116 return sawsim_histograms
118 def _residual(self, params):
120 for velocity,experiment_hist in self.experiment_histograms.iteritems():
121 sawsim_hist = self.sawsim_histogram(
122 param_string=self.param_string(params, velocity), N=self.N,
123 bin_edges=experiment_hist.bin_edges, manager=self._manager)
124 r = experiment_histogram.residual(
125 sawsim_hist, type=self.residual_type)
127 if self.plot == True:
128 title = ", ".join(["%g" % p for p in (params+[velocity])])
129 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
130 self._plot_residual_comparison(
131 experiment_hist, sawsim_hist, residual=r,
132 title=title, filename=filename)
133 log().debug('residual: %g' % residual)
136 def fit(self, initial_params):
137 p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
138 full_output=True, maxfev=1000)
140 _log.info('Fitted params: %s' % p)
141 _log.info('Covariance mx: %s' % cov)
142 _log.info('Info: %s' % info)
143 _log.info('Mesg: %s' % mesg)
146 def plot(self, param_ranges, logx=False, logy=False, contour=False):
147 xranges = param_ranges[0]
148 yranges = param_ranges[1]
150 x = numpy.linspace(*xranges)
153 x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
155 y = numpy.linspace(*yranges)
158 y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
159 X, Y = pylab.meshgrid(x,y)
160 C = numpy.zeros((len(y)-1, len(x)-1))
161 for i,xi in enumerate(x[:-1]):
162 for j,yi in enumerate(y[:-1]):
163 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
165 r = self._residual(params)
166 C[j,i] = numpy.log(r) # better resolution in valleys
167 if MEM_DEBUG == True:
168 log().debug('RSS: %d KB' % rss())
169 C = numpy.nan_to_num(C) # NaN -> 0
170 fid = file("fit_force_histograms-XYC.pkl", "wb")
171 pickle.dump([X,Y,C], fid)
175 # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
178 axes = FIGURE.add_subplot(111)
180 axes.set_xscale('log')
182 axes.set_yscale('log')
184 p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
185 # [:-1,:-1] to strip dummy last row & column from X&Y.
186 else: # pseudocolor plot
187 p = axes.pcolor(X, Y, C)
188 axes.autoscale_view(tight=True)
190 FIGURE.savefig("figure.png")
192 def _plot_residual_comparison(self, expeiment_hist, theory_hist,
193 residual, title, filename):
195 p = pylab.plot(experiment_hist.bin_edges[:-1],
196 experiment_hist.probabilities, 'r-',
197 theory_hist.bin_edges[:-1],
198 theory_hist.probabilities, 'b-')
200 FIGURE.savefig(filename)
203 def parse_param_ranges_string(string):
204 """Parse parameter range stings.
206 '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
208 [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
210 >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
211 [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
214 for range_string in string.split("],["):
215 range_number_strings = range_string.strip("[]").split(",")
216 ranges.append([float(x) for x in range_number_strings])
222 usage = "%prog [options] velocity_file"
224 parser = optparse.OptionParser(usage)
225 parser.add_option("-f","--param-format", dest="param_format",
227 help="Convert params to sawsim options (%default).",
228 default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
229 parser.add_option("-p","--initial-params", dest="initial_params",
231 help="Initial params for fitting (%default).",
232 default='3.3e-4,0.25e-9')
233 parser.add_option("-r","--param-range", dest="param_range",
235 help="Param range for plotting (%default).",
236 default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
237 parser.add_option("-N", "--number-of-runs", dest="N",
238 metavar="INT", type='int',
239 help="Number of sawsim runs at each point in parameter space (%default).",
241 parser.add_option("-m", "--manager", dest="manager",
243 help="Job manager name (one of %s) (%%default)."
244 % (', '.join(MANAGERS)),
246 parser.add_option("-C","--use-cache", dest="use_cache",
247 help="Use cached simulations if they exist (vs. running new simulations) (%default)",
248 default=False, action="store_true")
249 parser.add_option("--clean-cache", dest="clean_cache",
250 help="Remove previously cached simulations if they exist (%default)",
251 default=False, action="store_true")
252 parser.add_option("-d","--cache-dir", dest="cache_dir",
254 help="Cache directory for sawsim unfolding forces (%default).",
255 default=sawsim_histogram.CACHE_DIR)
256 parser.add_option("-R","--residual", dest="residual",
258 help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
259 default='jensen-shannon')
260 parser.add_option("-P","--plot-residuals", dest="plot_residuals",
261 help="Generate residual difference plots for each point in the plot range.",
262 default=False, action="store_true")
263 parser.add_option("--logx", dest="logx",
264 help="Use a log scale for the x range.",
265 default=False, action="store_true")
266 parser.add_option("--logy", dest="logy",
267 help="Use a log scale for the y range.",
268 default=False, action="store_true")
269 parser.add_option("-c","--contour-plot", dest="contour_plot",
270 help="Select contour plot (vs. the default pseudocolor plot).",
271 default=False, action="store_true")
272 options,args = parser.parse_args()
273 initial_params = [float(p) for p in options.initial_params.split(",")]
274 param_ranges = parse_param_ranges_string(options.param_range)
275 velocity_file = args[0]
277 sawsim_histogram.CACHE_DIR = options.cache_dir
278 manager = get_manager(options.manager)()
280 hm = HistogramMatcher(
281 file(velocity_file, 'r'), param_format_string=options.param_format,
282 N=options.N, manager=manager, residual_type=options.residual,
283 plot=options.plot_residuals, use_cache=options.use_cache,
284 clean_cache=options.clean_cache)
285 #hm.fit(initial_params)
286 hm.plot(param_ranges, logx=options.logx, logy=options.logy,
287 contour=options.contour_plot)