f877b4491ad538947000a0b5e5dd816ab96cb837
[sawsim.git] / pysawsim / velocity_dependant_scan.py
1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20 import os # HACK, for getpid()
21 import os.path
22 import pickle
23 import shutil
24 import sys
25
26 import matplotlib
27 import numpy
28 import pylab
29 from scipy.optimize import leastsq
30
31 from . import log
32 from .histogram import Histogram
33 from .sawsim_histogram import SawsimHistogram
34
35
36 matplotlib.use('Agg')  # select backend that doesn't require X Windows
37 FIGURE = pylab.figure()  # avoid memory problems.
38 """`pylab` keeps internal references to all created figures, so share
39 a single instance.
40 """
41
42 MEM_DEBUG = False
43
44 def rss():
45     """
46     For debugging memory usage.
47
48     resident set size, the non-swapped physical memory that a task has
49     used (in kilo-bytes).
50     """
51     call = "ps -o rss= -p %d" % os.getpid()
52     status,stdout,stderr = invoke(call)
53     return int(stdout)
54
55
56 class HistogramMatcher (object):
57     """Compare experimental velocity dependent histograms to simulated data.
58
59     The main entry points are `fit()` and `plot()`.
60     """
61     def __init__(self, velocity_stream, param_format_string, N=400,
62                  residual_type='jensen-shannon', plot=True, use_cache=False):
63         self.experiment_histograms = self.read_force_histograms(velocity_stream)
64         self.param_format_string = param_format_string
65         self.residual_type = residual_type
66         self.N = N
67         self.plot = plot
68         self.sawsim_histogram = SawsimHistogram(use_cache=self.use_cache)
69
70     def read_force_histograms(self, stream):
71         """
72         v_file format:
73         # comment and blank lines ignored
74         <velocity in m/s><whitespace><path to histogram file>
75         ...
76
77         e.g.
78
79         5e-7    histA
80         1e-6    histB
81         """
82         histograms = {}
83         v_file_dir = os.path.dirname(v_file)
84         for line in strem.readlines():
85             line = line.strip()
86             if len(line) == 0 or line[0] == "#":
87                 continue # ignore blank lines and comments
88             v,h_file = line.split()
89             h = Histogram()
90             h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
91             histograms[v] = h
92         return histograms
93
94     def param_string(self, params, velocity):
95         """Generate a string of options to pass to `sawsim`.
96         """
97         return '%s -v %g' % (
98             self.param_format_string % tuple(params), velocity)
99
100     def get_all_unfolding_data(self, dirname, velocity_string):
101         datafile = os.path.join(dirname, "data_" + velocity_string)
102         return numpy.fromfile(datafile, sep=" ")
103
104         sawsim_histograms = {}
105         for velocity in velocities:
106             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
107             bin_edges = histograms[velocity].bin_edges
108             h = Histogram()
109             h.from_data(unfolding_forces, bin_edges)
110             sawsim_histograms[velocity] = h
111             sawsim_histograms[velocity].normalize()
112         return sawsim_histograms
113
114     def _residual(self, params):
115         residual = 0
116         for velocity,experiment_hist in self.experiment_histograms.iteritems():
117             sawsim_hist = self.sawsim_histogram(
118                 param_string=self.param_string(params, velocity), N=self.N,
119                 bin_edges=experiment_hist.bin_edges)
120             r = experiment_histogram.residual(
121                 sawsim_hist, type=self.residual_type)
122             residual += r
123             if self.plot == True:
124                 title = ", ".join(["%g" % p for p in (params+[velocity])])
125                 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
126                 self._plot_residual_comparison(
127                     experiment_hist, sawsim_hist, residual=r,
128                     title=title, filename=filename)
129         log().debug('residual: %g' % residual)
130         return residual
131
132     def fit(self, initial_params):
133         p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
134                                       full_output=True, maxfev=1000)
135         _log = log()
136         _log.info('Fitted params: %s' % p)
137         _log.info('Covariance mx: %s' % cov)
138         _log.info('Info: %s' % info)
139         _log.info('Mesg: %s' % mesg)
140         return p
141
142     def plot(self, param_ranges, logx=False, logy=False, contour=False):
143         xranges = param_ranges[0]
144         yranges = param_ranges[1]
145         if logx == False:
146             x = numpy.linspace(*xranges)
147         else:
148             m,M,n = xranges
149             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
150         if logy == False:
151             y = numpy.linspace(*yranges)
152         else:
153             m,M,n = yranges
154             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
155         X, Y = pylab.meshgrid(x,y)
156         C = numpy.zeros((len(y)-1, len(x)-1))
157         for i,xi in enumerate(x[:-1]):
158             for j,yi in enumerate(y[:-1]):
159                 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
160                 params = (xi,yi)
161                 r = self._residual(params)
162                 C[j,i] = numpy.log(r) # better resolution in valleys
163                 if MEM_DEBUG == True:
164                     log().debug('RSS: %d KB' % rss())
165         C = numpy.nan_to_num(C) # NaN -> 0
166         fid = file("fit_force_histograms-XYC.pkl", "wb")
167         pickle.dump([X,Y,C], fid)
168         fid.close()
169         # read in with
170         # import pickle
171         # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
172         # ...
173         FIGURE.clear()
174         axes = FIGURE.add_subplot(111)
175         if logx == True:
176             axes.set_xscale('log')
177         if logy == True:
178             axes.set_yscale('log')
179         if contour == True:
180             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
181             # [:-1,:-1] to strip dummy last row & column from X&Y.
182         else: # pseudocolor plot
183             p = axes.pcolor(X, Y, C)
184             axes.autoscale_view(tight=True)
185         FIGURE.colorbar(p)
186         FIGURE.savefig("figure.png")
187
188     def _plot_residual_comparison(self, expeiment_hist, theory_hist,
189                                   residual, title, filename):
190         FIGURE.clear()
191         p = pylab.plot(experiment_hist.bin_edges[:-1],
192                        experiment_hist.probabilities, 'r-',
193                        theory_hist.bin_edges[:-1],
194                        theory_hist.probabilities, 'b-')
195         pylab.title(title)
196         FIGURE.savefig(filename)
197
198
199 def parse_param_ranges_string(string):
200     """Parse parameter range stings.
201
202     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
203       ->
204     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
205
206     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
207     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
208     """
209     ranges = []
210     for range_string in string.split("],["):
211         range_number_strings = range_string.strip("[]").split(",")
212         ranges.append([float(x) for x in range_number_strings])
213     return ranges
214
215
216 def main():
217     import optparse
218     usage = "%prog [options] velocity_file"
219
220     parser = optparse.OptionParser(usage)
221     parser.add_option("-f","--param-format", dest="param_format",
222                       metavar="FORMAT",
223                       help="Convert params to sawsim options (%default).",
224                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
225     parser.add_option("-p","--initial-params", dest="initial_params",
226                       metavar="PARAMS",
227                       help="Initial params for fitting (%default).",
228                       default='3.3e-4,0.25e-9')
229     parser.add_option("-r","--param-range", dest="param_range",
230                       metavar="PARAMS",
231                       help="Param range for plotting (%default).",
232                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
233     parser.add_option("--N", "--number-of-runs", dest="N",
234                       metavar="INT", type='int',
235                       help="Number of sawsim runs at each point in parameter space (%default).",
236                       default=400)
237     parser.add_option("--logx", dest="logx",
238                       help="Use a log scale for the x range.",
239                       default=False, action="store_true")
240     parser.add_option("--logy", dest="logy",
241                       help="Use a log scale for the y range.",
242                       default=False, action="store_true")
243     parser.add_option("-d","--cache-dir", dest="cache_dir",
244                       metavar="STRING",
245                       help="Cache directory for sawsim unfolding forces (%default).",
246                       default=sawsim_histogram.CACHE_DIR)
247     parser.add_option("-R","--residual", dest="residual",
248                       metavar="STRING",
249                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
250                       default='jensen-shannon')
251     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
252                       help="Generate residual difference plots for each point in the plot range.",
253                       default=False, action="store_true")
254     parser.add_option("-c","--contour-plot", dest="contour_plot",
255                       help="Select contour plot (vs. the default pseudocolor plot).",
256                       default=False, action="store_true")
257     parser.add_option("-C","--use_cache", dest="use_cache",
258                       help="Use cached simulations if they exist (vs. running new simulations) (%default)",
259                       default=False, action="store_true")
260     options,args = parser.parse_args()
261     initial_params = [float(p) for p in options.initial_params.split(",")]
262     param_ranges = parse_param_ranges_string(options.param_range)
263     velocity_file = args[0]
264
265     sawsim_histogram.CACHE_DIR = options.cache_dir
266
267     hm = HistogramMatcher(
268         file(velocity_file, 'r'), param_format_string=options.param_format,
269         N=options.N, residual_type=options.residual,
270         plot=options.plot_residuals, use_cache=options.use_cache)
271     #hm.fit(initial_params)
272     hm.plot(param_ranges, logx=options.logx, logy=options.logy,
273             contour=options.contour_plot)
274
275
276 if __name__ == "__main__":
277     main()