Reposition matplotlib.use() call in velocity_dependant_scan.py.
[sawsim.git] / pysawsim / velocity_dependant_scan.py
1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20 import os # HACK, for getpid()
21 import os.path
22 import pickle
23 import shutil
24 import sys
25
26 import matplotlib
27 matplotlib.use('Agg')  # select backend that doesn't require X Windows
28 import numpy
29 import pylab
30 from scipy.optimize import leastsq
31
32 from . import log
33 from .histogram import Histogram
34 from .manager import MANAGERS, get_manager
35 from . import sawsim_histogram
36
37
38 FIGURE = pylab.figure()  # avoid memory problems.
39 """`pylab` keeps internal references to all created figures, so share
40 a single instance.
41 """
42
43 MEM_DEBUG = False
44
45 def rss():
46     """
47     For debugging memory usage.
48
49     resident set size, the non-swapped physical memory that a task has
50     used (in kilo-bytes).
51     """
52     call = "ps -o rss= -p %d" % os.getpid()
53     status,stdout,stderr = invoke(call)
54     return int(stdout)
55
56
57 class HistogramMatcher (object):
58     """Compare experimental velocity dependent histograms to simulated data.
59
60     The main entry points are `fit()` and `plot()`.
61     """
62     def __init__(self, velocity_stream, param_format_string, N=400,
63                  residual_type='jensen-shannon', plot=True, use_cache=False):
64         self.experiment_histograms = self.read_force_histograms(velocity_stream)
65         self.param_format_string = param_format_string
66         self.residual_type = residual_type
67         self.N = N
68         self.plot = plot
69         self.sawsim_histogram = sawsim_histogram.SawsimHistogram(
70             use_cache=self.use_cache)
71
72     def read_force_histograms(self, stream):
73         """
74         v_file format:
75         # comment and blank lines ignored
76         <velocity in m/s><whitespace><path to histogram file>
77         ...
78
79         e.g.
80
81         5e-7    histA
82         1e-6    histB
83         """
84         histograms = {}
85         v_file_dir = os.path.dirname(v_file)
86         for line in strem.readlines():
87             line = line.strip()
88             if len(line) == 0 or line[0] == "#":
89                 continue # ignore blank lines and comments
90             v,h_file = line.split()
91             h = Histogram()
92             h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
93             histograms[v] = h
94         return histograms
95
96     def param_string(self, params, velocity):
97         """Generate a string of options to pass to `sawsim`.
98         """
99         return '%s -v %g' % (
100             self.param_format_string % tuple(params), velocity)
101
102     def get_all_unfolding_data(self, dirname, velocity_string):
103         datafile = os.path.join(dirname, "data_" + velocity_string)
104         return numpy.fromfile(datafile, sep=" ")
105
106         sawsim_histograms = {}
107         for velocity in velocities:
108             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
109             bin_edges = histograms[velocity].bin_edges
110             h = Histogram()
111             h.from_data(unfolding_forces, bin_edges)
112             sawsim_histograms[velocity] = h
113             sawsim_histograms[velocity].normalize()
114         return sawsim_histograms
115
116     def _residual(self, params):
117         residual = 0
118         for velocity,experiment_hist in self.experiment_histograms.iteritems():
119             sawsim_hist = self.sawsim_histogram(
120                 param_string=self.param_string(params, velocity), N=self.N,
121                 bin_edges=experiment_hist.bin_edges)
122             r = experiment_histogram.residual(
123                 sawsim_hist, type=self.residual_type)
124             residual += r
125             if self.plot == True:
126                 title = ", ".join(["%g" % p for p in (params+[velocity])])
127                 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
128                 self._plot_residual_comparison(
129                     experiment_hist, sawsim_hist, residual=r,
130                     title=title, filename=filename)
131         log().debug('residual: %g' % residual)
132         return residual
133
134     def fit(self, initial_params):
135         p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
136                                       full_output=True, maxfev=1000)
137         _log = log()
138         _log.info('Fitted params: %s' % p)
139         _log.info('Covariance mx: %s' % cov)
140         _log.info('Info: %s' % info)
141         _log.info('Mesg: %s' % mesg)
142         return p
143
144     def plot(self, param_ranges, logx=False, logy=False, contour=False):
145         xranges = param_ranges[0]
146         yranges = param_ranges[1]
147         if logx == False:
148             x = numpy.linspace(*xranges)
149         else:
150             m,M,n = xranges
151             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
152         if logy == False:
153             y = numpy.linspace(*yranges)
154         else:
155             m,M,n = yranges
156             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
157         X, Y = pylab.meshgrid(x,y)
158         C = numpy.zeros((len(y)-1, len(x)-1))
159         for i,xi in enumerate(x[:-1]):
160             for j,yi in enumerate(y[:-1]):
161                 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
162                 params = (xi,yi)
163                 r = self._residual(params)
164                 C[j,i] = numpy.log(r) # better resolution in valleys
165                 if MEM_DEBUG == True:
166                     log().debug('RSS: %d KB' % rss())
167         C = numpy.nan_to_num(C) # NaN -> 0
168         fid = file("fit_force_histograms-XYC.pkl", "wb")
169         pickle.dump([X,Y,C], fid)
170         fid.close()
171         # read in with
172         # import pickle
173         # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
174         # ...
175         FIGURE.clear()
176         axes = FIGURE.add_subplot(111)
177         if logx == True:
178             axes.set_xscale('log')
179         if logy == True:
180             axes.set_yscale('log')
181         if contour == True:
182             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
183             # [:-1,:-1] to strip dummy last row & column from X&Y.
184         else: # pseudocolor plot
185             p = axes.pcolor(X, Y, C)
186             axes.autoscale_view(tight=True)
187         FIGURE.colorbar(p)
188         FIGURE.savefig("figure.png")
189
190     def _plot_residual_comparison(self, expeiment_hist, theory_hist,
191                                   residual, title, filename):
192         FIGURE.clear()
193         p = pylab.plot(experiment_hist.bin_edges[:-1],
194                        experiment_hist.probabilities, 'r-',
195                        theory_hist.bin_edges[:-1],
196                        theory_hist.probabilities, 'b-')
197         pylab.title(title)
198         FIGURE.savefig(filename)
199
200
201 def parse_param_ranges_string(string):
202     """Parse parameter range stings.
203
204     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
205       ->
206     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
207
208     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
209     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
210     """
211     ranges = []
212     for range_string in string.split("],["):
213         range_number_strings = range_string.strip("[]").split(",")
214         ranges.append([float(x) for x in range_number_strings])
215     return ranges
216
217
218 def main():
219     import optparse
220     usage = "%prog [options] velocity_file"
221
222     parser = optparse.OptionParser(usage)
223     parser.add_option("-f","--param-format", dest="param_format",
224                       metavar="FORMAT",
225                       help="Convert params to sawsim options (%default).",
226                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
227     parser.add_option("-p","--initial-params", dest="initial_params",
228                       metavar="PARAMS",
229                       help="Initial params for fitting (%default).",
230                       default='3.3e-4,0.25e-9')
231     parser.add_option("-r","--param-range", dest="param_range",
232                       metavar="PARAMS",
233                       help="Param range for plotting (%default).",
234                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
235     parser.add_option("-N", "--number-of-runs", dest="N",
236                       metavar="INT", type='int',
237                       help="Number of sawsim runs at each point in parameter space (%default).",
238                       default=400)
239     parser.add_option("-m", "--manager", dest="manager",
240                       metavar="STRING",
241                       help="Job manager name (one of %s) (%%default)."
242                       % (', '.join(MANAGERS)),
243                       default=MANAGERS[0])
244     parser.add_option("--logx", dest="logx",
245                       help="Use a log scale for the x range.",
246                       default=False, action="store_true")
247     parser.add_option("--logy", dest="logy",
248                       help="Use a log scale for the y range.",
249                       default=False, action="store_true")
250     parser.add_option("-d","--cache-dir", dest="cache_dir",
251                       metavar="STRING",
252                       help="Cache directory for sawsim unfolding forces (%default).",
253                       default=sawsim_histogram.CACHE_DIR)
254     parser.add_option("-R","--residual", dest="residual",
255                       metavar="STRING",
256                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
257                       default='jensen-shannon')
258     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
259                       help="Generate residual difference plots for each point in the plot range.",
260                       default=False, action="store_true")
261     parser.add_option("-c","--contour-plot", dest="contour_plot",
262                       help="Select contour plot (vs. the default pseudocolor plot).",
263                       default=False, action="store_true")
264     parser.add_option("-C","--use_cache", dest="use_cache",
265                       help="Use cached simulations if they exist (vs. running new simulations) (%default)",
266                       default=False, action="store_true")
267     options,args = parser.parse_args()
268     initial_params = [float(p) for p in options.initial_params.split(",")]
269     param_ranges = parse_param_ranges_string(options.param_range)
270     velocity_file = args[0]
271
272     sawsim_histogram.CACHE_DIR = options.cache_dir
273
274     hm = HistogramMatcher(
275         file(velocity_file, 'r'), param_format_string=options.param_format,
276         N=options.N, residual_type=options.residual,
277         plot=options.plot_residuals, use_cache=options.use_cache)
278     #hm.fit(initial_params)
279     hm.plot(param_ranges, logx=options.logx, logy=options.logy,
280             contour=options.contour_plot)
281
282
283 if __name__ == "__main__":
284     main()