Fix a few deadlock errors in pysawsim.manager.thread.
[sawsim.git] / pysawsim / fit_force_histograms.py
1 #!/usr/bin/python
2 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 #
17 # The author may be contacted at <wking@drexel.edu> on the Internet, or
18 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
19 # Philadelphia PA 19104, USA.
20
21 import os # HACK, for getpid()
22 import os.path
23 import pickle
24 import shutil
25 import sys
26
27
28 import matplotlib
29 import numpy
30 import pylab
31 from scipy.optimize import leastsq
32
33
34 matplotlib.use('Agg')  # select backend that doesn't require X Windows
35 FIGURE = pylab.figure()  # avoid memory problems.
36 """`pylab` keeps internal references to all created figures, so share
37 a single instance.
38 """
39
40 MEM_DEBUG = False
41
42 def rss():
43     """
44     For debugging memory usage.
45
46     resident set size, the non-swapped physical memory that a task has
47     used (in kiloBytes).
48     """
49     call = "ps -o rss= -p %d" % os.getpid()
50     status,stdout,stderr = invoke(call)
51     return int(stdout)
52
53
54 class HistogramMatcher (object):
55     def __init__(self, velocity_file, param_format_string, dir_prefix="v_dep"):
56         self.experiment_histograms = self.read_force_histograms(velocity_file)
57         self.param_format_string = param_format_string
58         self.dir_prefix = dir_prefix
59
60     def read_force_histograms(self, v_file):
61         """
62         v_file format:
63         # comment and blank lines ignored
64         <velocity in m/s><TAB><path to histogram file>
65         ...
66
67         e.g.
68
69         5e-7    histA
70         1e-6    histB
71         """
72         histograms = {}
73         v_file_dir = os.path.dirname(v_file)
74         for line in file(v_file, "r").readlines():
75             line = line.strip()
76             if len(line) == 0 or line[0] == "#":
77                 continue # ignore blank lines and comments
78             v,h_file = line.split("\t")
79             h = Histogram()
80             h.from_file(os.path.join(v_file_dir, h_file))
81             histograms[v] = h
82             histograms[v].normalize()
83         return histograms
84
85     def dirname(self, params):
86         if not hasattr(self, "_tag_i"):
87             self._tag_i = 0
88         self._tag_i += 1
89         return "%s-%d" % (self.dir_prefix, self._tag_i)
90
91     def generate_sawsim_histograms(self, params, histograms,
92                                    run_repeat_simulation=True):
93         velocities = histograms.keys()
94         dirname = self.dirname(params)
95         cmd = "run_vel_dep %s %s %s" \
96               % (dirname,
97                  ",".join([str(v) for v in velocities]),
98                  self.param_string(params))
99         if os.path.exists(dirname) and run_repeat_simulation == True:
100             print 'repeating simulation'
101             shutil.rmtree(dirname)
102         if not os.path.exists(dirname):
103             invoke(cmd)
104         sawsim_histograms = {}
105         for velocity in velocities:
106             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
107             bin_edges = histograms[velocity].bin_edges
108             h = Histogram()
109             h.from_data(unfolding_forces, bin_edges)
110             sawsim_histograms[velocity] = h
111             sawsim_histograms[velocity].normalize()
112         return sawsim_histograms
113
114     def param_string(self, params):
115         """
116         Generate a string of non-velocity options to pass to sawsim
117         for a given parameter list.
118         """
119         return self.param_format_string % (params[0], params[1])#tuple(params)
120
121     def get_all_unfolding_data(self, dirname, velocity_string):
122         datafile = os.path.join(dirname, "data_" + velocity_string)
123         return numpy.fromfile(datafile, sep=" ")
124
125     def residual(self, params, type='bin-height', plot=False,
126                  run_repeat_simulation=True):
127         sawsim_histograms = self.generate_sawsim_histograms( \
128             params, self.experiment_histograms,
129             run_repeat_simulation=run_repeat_simulation)
130         assert sorted(sawsim_histograms.keys()) == sorted(self.experiment_histograms.keys())
131         residual = 0
132         for velocity,experiment_histogram in self.experiment_histograms.items():
133             sawsim_histogram = sawsim_histograms[velocity]
134             title = None
135             if plot == True:
136                 title = ", ".join(["%g" % p for p in params])
137             r = experiment_histogram.residual(sawsim_histogram, type=type)
138             residual += r
139             if plot == True:
140                 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
141                 self.plot_residual_comparison(
142                     experiment_histogram, sawsim_histogram, residual=r,
143                     title=title, filename=filename)
144         print "residual", residual
145         del(sawsim_histograms)
146         return residual
147
148     def fit(self, initial_params, verbose=False):
149         p,cov,info,mesg,ier = leastsq(self.residual, initial_params,
150                                       full_output=True, maxfev=1000)
151         if verbose == True:
152             print "Fitted params:",p
153             print "Covariance mx:",cov
154             print "Info:", info
155             print "mesg:", mesg
156             if ier == 1:
157                 print "Solution converged"
158             else:
159                 print "Solution did not converge"
160         return p
161
162     def plot(self, param_ranges, logx=False, logy=False,
163              residual_type='bin-height',
164              plot_residuals=False, contour=False,
165              run_repeat_simulations=True):
166         xranges = param_ranges[0]
167         yranges = param_ranges[1]
168         if logx == False:
169             x = numpy.linspace(*xranges)
170         else:
171             m,M,n = xranges
172             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
173         if logy == False:
174             y = numpy.linspace(*yranges)
175         else:
176             m,M,n = yranges
177             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
178         X, Y = pylab.meshgrid(x,y)
179         C = numpy.zeros((len(y)-1, len(x)-1))
180         for i,xi in enumerate(x[:-1]):
181             for j,yi in enumerate(y[:-1]):
182                 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
183                 params = (xi,yi)
184                 r = self.residual(params, type=residual_type,
185                                   plot=plot_residuals,
186                                   run_repeat_simulation=run_repeat_simulations)
187                 C[j,i] = numpy.log(r) # better resolution in valleys
188                 if MEM_DEBUG == True:
189                     print >> sys.stderr, "RSS: %d KB" % rss()
190         C = numpy.nan_to_num(C) # NaN -> 0
191         fid = file("fit_force_histograms-XYC.pkl", "wb")
192         pickle.dump([X,Y,C], fid)
193         fid.close()
194         # read in with
195         # import pickle
196         # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
197         # ...
198         FIGURE.clear()
199         axes = FIGURE.add_subplot(111)
200         if logx == True:
201             axes.set_xscale('log')
202         if logy == True:
203             axes.set_yscale('log')
204         if contour == True:
205             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
206             # [:-1,:-1] to strip dummy last row & column from X&Y.
207         else: # pseudocolor plot
208             p = axes.pcolor(X, Y, C)
209             axes.autoscale_view(tight=True)
210         FIGURE.colorbar(p)
211         FIGURE.savefig("figure.png")
212
213     def plot_residual_comparison(self, expeiment_hist, theory_hist,
214                                  residual, title, filename):
215         FIGURE.clear()
216         p = pylab.plot(experiment_hist.bin_edges[:-1],
217                        experiment_hist.probabilities, 'r-',
218                        theory_hist.bin_edges[:-1],
219                        theory_hist.probabilities, 'b-')
220         pylab.title(title)
221         FIGURE.savefig(filename)
222
223
224 def parse_param_ranges_string(string):
225     """
226     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
227       ->
228     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
229     """
230     ranges = []
231     for range_string in string.split("],["):
232         range_number_strings = range_string.strip("[]").split(",")
233         ranges.append([float(x) for x in range_number_strings])
234     return ranges
235
236 def main():
237     import optparse
238     usage = "%prog [options] velocity_file"
239
240     parser = optparse.OptionParser(usage)
241     parser.add_option("-f","--param-format", dest="param_format",
242                       metavar="FORMAT",
243                       help="Convert params to sawsim options (%default).",
244                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
245     parser.add_option("-p","--initial-params", dest="initial_params",
246                       metavar="PARAMS",
247                       help="Initial params for fitting (%default).",
248                       default='3.3e-4,0.25e-9')
249     parser.add_option("-r","--param-range", dest="param_range",
250                       metavar="PARAMS",
251                       help="Param range for plotting (%default).",
252                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
253     parser.add_option("--logx", dest="logx",
254                       help="Use a log scale for the x range.",
255                       default=False, action="store_true")
256     parser.add_option("--logy", dest="logy",
257                       help="Use a log scale for the y range.",
258                       default=False, action="store_true")
259     parser.add_option("-d","--dir-prefix", dest="dir_prefix",
260                       metavar="STRING",
261                       help="Prefix to sawsim data directories (%default).",
262                       default='v_dep')
263     parser.add_option("-R","--residual", dest="residual",
264                       metavar="STRING",
265                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
266                       default='jensen-shannon')
267     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
268                       help="Generate residual difference plots for each point in the plot range.",
269                       default=False, action="store_true")
270     parser.add_option("-c","--contour-plot", dest="contour_plot",
271                       help="Select contour plot (vs. the default pseudocolor plot).",
272                       default=False, action="store_true")
273     parser.add_option("-n","--no-repeat-simulations", dest="simulations",
274                       help="Do not run simulations if they already exist (re-analyze a previous run's output).",
275                       default=True, action="store_false")
276     options,args = parser.parse_args()
277     initial_params = [float(p) for p in options.initial_params.split(",")]
278     param_ranges = parse_param_ranges_string(options.param_range)
279     velocity_file = args[0]
280
281     hm = HistogramMatcher(velocity_file, options.param_format, options.dir_prefix)
282     #hm.fit(initial_params, verbose=True)
283     hm.plot(param_ranges, logx=options.logx, logy=options.logy,
284             residual_type=options.residual,
285             plot_residuals=options.plot_residuals,
286             contour=options.contour_plot,
287             run_repeat_simulations=options.simulations)
288
289 if __name__ == "__main__":
290     main()