Add --manager option to velocity_dependant_scan.py.
[sawsim.git] / pysawsim / velocity_dependant_scan.py
1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20 import os # HACK, for getpid()
21 import os.path
22 import pickle
23 import shutil
24 import sys
25
26 import matplotlib
27 import numpy
28 import pylab
29 from scipy.optimize import leastsq
30
31 from . import log
32 from .histogram import Histogram
33 from .manager import MANAGERS, get_manager
34 from .sawsim_histogram import SawsimHistogram
35
36
37 matplotlib.use('Agg')  # select backend that doesn't require X Windows
38 FIGURE = pylab.figure()  # avoid memory problems.
39 """`pylab` keeps internal references to all created figures, so share
40 a single instance.
41 """
42
43 MEM_DEBUG = False
44
45 def rss():
46     """
47     For debugging memory usage.
48
49     resident set size, the non-swapped physical memory that a task has
50     used (in kilo-bytes).
51     """
52     call = "ps -o rss= -p %d" % os.getpid()
53     status,stdout,stderr = invoke(call)
54     return int(stdout)
55
56
57 class HistogramMatcher (object):
58     """Compare experimental velocity dependent histograms to simulated data.
59
60     The main entry points are `fit()` and `plot()`.
61     """
62     def __init__(self, velocity_stream, param_format_string, N=400,
63                  residual_type='jensen-shannon', plot=True, use_cache=False):
64         self.experiment_histograms = self.read_force_histograms(velocity_stream)
65         self.param_format_string = param_format_string
66         self.residual_type = residual_type
67         self.N = N
68         self.plot = plot
69         self.sawsim_histogram = SawsimHistogram(use_cache=self.use_cache)
70
71     def read_force_histograms(self, stream):
72         """
73         v_file format:
74         # comment and blank lines ignored
75         <velocity in m/s><whitespace><path to histogram file>
76         ...
77
78         e.g.
79
80         5e-7    histA
81         1e-6    histB
82         """
83         histograms = {}
84         v_file_dir = os.path.dirname(v_file)
85         for line in strem.readlines():
86             line = line.strip()
87             if len(line) == 0 or line[0] == "#":
88                 continue # ignore blank lines and comments
89             v,h_file = line.split()
90             h = Histogram()
91             h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
92             histograms[v] = h
93         return histograms
94
95     def param_string(self, params, velocity):
96         """Generate a string of options to pass to `sawsim`.
97         """
98         return '%s -v %g' % (
99             self.param_format_string % tuple(params), velocity)
100
101     def get_all_unfolding_data(self, dirname, velocity_string):
102         datafile = os.path.join(dirname, "data_" + velocity_string)
103         return numpy.fromfile(datafile, sep=" ")
104
105         sawsim_histograms = {}
106         for velocity in velocities:
107             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
108             bin_edges = histograms[velocity].bin_edges
109             h = Histogram()
110             h.from_data(unfolding_forces, bin_edges)
111             sawsim_histograms[velocity] = h
112             sawsim_histograms[velocity].normalize()
113         return sawsim_histograms
114
115     def _residual(self, params):
116         residual = 0
117         for velocity,experiment_hist in self.experiment_histograms.iteritems():
118             sawsim_hist = self.sawsim_histogram(
119                 param_string=self.param_string(params, velocity), N=self.N,
120                 bin_edges=experiment_hist.bin_edges)
121             r = experiment_histogram.residual(
122                 sawsim_hist, type=self.residual_type)
123             residual += r
124             if self.plot == True:
125                 title = ", ".join(["%g" % p for p in (params+[velocity])])
126                 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
127                 self._plot_residual_comparison(
128                     experiment_hist, sawsim_hist, residual=r,
129                     title=title, filename=filename)
130         log().debug('residual: %g' % residual)
131         return residual
132
133     def fit(self, initial_params):
134         p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
135                                       full_output=True, maxfev=1000)
136         _log = log()
137         _log.info('Fitted params: %s' % p)
138         _log.info('Covariance mx: %s' % cov)
139         _log.info('Info: %s' % info)
140         _log.info('Mesg: %s' % mesg)
141         return p
142
143     def plot(self, param_ranges, logx=False, logy=False, contour=False):
144         xranges = param_ranges[0]
145         yranges = param_ranges[1]
146         if logx == False:
147             x = numpy.linspace(*xranges)
148         else:
149             m,M,n = xranges
150             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
151         if logy == False:
152             y = numpy.linspace(*yranges)
153         else:
154             m,M,n = yranges
155             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
156         X, Y = pylab.meshgrid(x,y)
157         C = numpy.zeros((len(y)-1, len(x)-1))
158         for i,xi in enumerate(x[:-1]):
159             for j,yi in enumerate(y[:-1]):
160                 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
161                 params = (xi,yi)
162                 r = self._residual(params)
163                 C[j,i] = numpy.log(r) # better resolution in valleys
164                 if MEM_DEBUG == True:
165                     log().debug('RSS: %d KB' % rss())
166         C = numpy.nan_to_num(C) # NaN -> 0
167         fid = file("fit_force_histograms-XYC.pkl", "wb")
168         pickle.dump([X,Y,C], fid)
169         fid.close()
170         # read in with
171         # import pickle
172         # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
173         # ...
174         FIGURE.clear()
175         axes = FIGURE.add_subplot(111)
176         if logx == True:
177             axes.set_xscale('log')
178         if logy == True:
179             axes.set_yscale('log')
180         if contour == True:
181             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
182             # [:-1,:-1] to strip dummy last row & column from X&Y.
183         else: # pseudocolor plot
184             p = axes.pcolor(X, Y, C)
185             axes.autoscale_view(tight=True)
186         FIGURE.colorbar(p)
187         FIGURE.savefig("figure.png")
188
189     def _plot_residual_comparison(self, expeiment_hist, theory_hist,
190                                   residual, title, filename):
191         FIGURE.clear()
192         p = pylab.plot(experiment_hist.bin_edges[:-1],
193                        experiment_hist.probabilities, 'r-',
194                        theory_hist.bin_edges[:-1],
195                        theory_hist.probabilities, 'b-')
196         pylab.title(title)
197         FIGURE.savefig(filename)
198
199
200 def parse_param_ranges_string(string):
201     """Parse parameter range stings.
202
203     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
204       ->
205     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
206
207     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
208     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
209     """
210     ranges = []
211     for range_string in string.split("],["):
212         range_number_strings = range_string.strip("[]").split(",")
213         ranges.append([float(x) for x in range_number_strings])
214     return ranges
215
216
217 def main():
218     import optparse
219     usage = "%prog [options] velocity_file"
220
221     parser = optparse.OptionParser(usage)
222     parser.add_option("-f","--param-format", dest="param_format",
223                       metavar="FORMAT",
224                       help="Convert params to sawsim options (%default).",
225                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
226     parser.add_option("-p","--initial-params", dest="initial_params",
227                       metavar="PARAMS",
228                       help="Initial params for fitting (%default).",
229                       default='3.3e-4,0.25e-9')
230     parser.add_option("-r","--param-range", dest="param_range",
231                       metavar="PARAMS",
232                       help="Param range for plotting (%default).",
233                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
234     parser.add_option("-N", "--number-of-runs", dest="N",
235                       metavar="INT", type='int',
236                       help="Number of sawsim runs at each point in parameter space (%default).",
237                       default=400)
238     parser.add_option("-m", "--manager", dest="manager",
239                       metavar="STRING",
240                       help="Job manager name (one of %s) (%%default)."
241                       % (', '.join(MANAGERS)),
242                       default=MANAGERS[0])
243     parser.add_option("--logx", dest="logx",
244                       help="Use a log scale for the x range.",
245                       default=False, action="store_true")
246     parser.add_option("--logy", dest="logy",
247                       help="Use a log scale for the y range.",
248                       default=False, action="store_true")
249     parser.add_option("-d","--cache-dir", dest="cache_dir",
250                       metavar="STRING",
251                       help="Cache directory for sawsim unfolding forces (%default).",
252                       default=sawsim_histogram.CACHE_DIR)
253     parser.add_option("-R","--residual", dest="residual",
254                       metavar="STRING",
255                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
256                       default='jensen-shannon')
257     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
258                       help="Generate residual difference plots for each point in the plot range.",
259                       default=False, action="store_true")
260     parser.add_option("-c","--contour-plot", dest="contour_plot",
261                       help="Select contour plot (vs. the default pseudocolor plot).",
262                       default=False, action="store_true")
263     parser.add_option("-C","--use_cache", dest="use_cache",
264                       help="Use cached simulations if they exist (vs. running new simulations) (%default)",
265                       default=False, action="store_true")
266     options,args = parser.parse_args()
267     initial_params = [float(p) for p in options.initial_params.split(",")]
268     param_ranges = parse_param_ranges_string(options.param_range)
269     velocity_file = args[0]
270
271     sawsim_histogram.CACHE_DIR = options.cache_dir
272
273     hm = HistogramMatcher(
274         file(velocity_file, 'r'), param_format_string=options.param_format,
275         N=options.N, residual_type=options.residual,
276         plot=options.plot_residuals, use_cache=options.use_cache)
277     #hm.fit(initial_params)
278     hm.plot(param_ranges, logx=options.logx, logy=options.logy,
279             contour=options.contour_plot)
280
281
282 if __name__ == "__main__":
283     main()