Add link to Python wiki's parallel processing page.
[sawsim.git] / pysawsim / velocity_dependant_scan.py
1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 #
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
19
20 import os # HACK, for getpid()
21 import os.path
22 import pickle
23 import shutil
24 import sys
25
26 import matplotlib
27 matplotlib.use('Agg')  # select backend that doesn't require X Windows
28 import numpy
29 import pylab
30 from scipy.optimize import leastsq
31
32 from . import log
33 from .histogram import Histogram
34 from .manager import MANAGERS, get_manager
35 from . import sawsim_histogram
36
37
38 FIGURE = pylab.figure()  # avoid memory problems.
39 """`pylab` keeps internal references to all created figures, so share
40 a single instance.
41 """
42
43 MEM_DEBUG = False
44
45 def rss():
46     """
47     For debugging memory usage.
48
49     resident set size, the non-swapped physical memory that a task has
50     used (in kilo-bytes).
51     """
52     call = "ps -o rss= -p %d" % os.getpid()
53     status,stdout,stderr = invoke(call)
54     return int(stdout)
55
56
57 class HistogramMatcher (object):
58     """Compare experimental velocity dependent histograms to simulated data.
59
60     The main entry points are `fit()` and `plot()`.
61     """
62     def __init__(self, velocity_stream, param_format_string, N=400,
63                  manager=None, residual_type='jensen-shannon', plot=True,
64                  use_cache=False, clean_cache=False):
65         self.experiment_histograms = self.read_force_histograms(velocity_stream)
66         self.param_format_string = param_format_string
67         self.residual_type = residual_type
68         self.N = N
69         self._manager = manager
70         self.plot = plot
71         self.sawsim_histogram = sawsim_histogram.SawsimHistogram(
72             use_cache=use_cache, clean_cach=clean_cache)
73
74     def read_force_histograms(self, stream):
75         """
76         v_file format:
77         # comment and blank lines ignored
78         <velocity in m/s><whitespace><path to histogram file>
79         ...
80
81         e.g.
82
83         5e-7    histA
84         1e-6    histB
85         """
86         histograms = {}
87         v_file_dir = os.path.dirname(v_file)
88         for line in strem.readlines():
89             line = line.strip()
90             if len(line) == 0 or line[0] == "#":
91                 continue # ignore blank lines and comments
92             v,h_file = line.split()
93             h = Histogram()
94             h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
95             histograms[v] = h
96         return histograms
97
98     def param_string(self, params, velocity):
99         """Generate a string of options to pass to `sawsim`.
100         """
101         return '%s -v %g' % (
102             self.param_format_string % tuple(params), velocity)
103
104     def get_all_unfolding_data(self, dirname, velocity_string):
105         datafile = os.path.join(dirname, "data_" + velocity_string)
106         return numpy.fromfile(datafile, sep=" ")
107
108         sawsim_histograms = {}
109         for velocity in velocities:
110             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
111             bin_edges = histograms[velocity].bin_edges
112             h = Histogram()
113             h.from_data(unfolding_forces, bin_edges)
114             sawsim_histograms[velocity] = h
115             sawsim_histograms[velocity].normalize()
116         return sawsim_histograms
117
118     def _residual(self, params):
119         residual = 0
120         for velocity,experiment_hist in self.experiment_histograms.iteritems():
121             sawsim_hist = self.sawsim_histogram(
122                 param_string=self.param_string(params, velocity), N=self.N,
123                 bin_edges=experiment_hist.bin_edges, manager=self._manager)
124             r = experiment_histogram.residual(
125                 sawsim_hist, type=self.residual_type)
126             residual += r
127             if self.plot == True:
128                 title = ", ".join(["%g" % p for p in (params+[velocity])])
129                 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
130                 self._plot_residual_comparison(
131                     experiment_hist, sawsim_hist, residual=r,
132                     title=title, filename=filename)
133         log().debug('residual: %g' % residual)
134         return residual
135
136     def fit(self, initial_params):
137         p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
138                                       full_output=True, maxfev=1000)
139         _log = log()
140         _log.info('Fitted params: %s' % p)
141         _log.info('Covariance mx: %s' % cov)
142         _log.info('Info: %s' % info)
143         _log.info('Mesg: %s' % mesg)
144         return p
145
146     def plot(self, param_ranges, logx=False, logy=False, contour=False):
147         xranges = param_ranges[0]
148         yranges = param_ranges[1]
149         if logx == False:
150             x = numpy.linspace(*xranges)
151         else:
152             m,M,n = xranges
153             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
154         if logy == False:
155             y = numpy.linspace(*yranges)
156         else:
157             m,M,n = yranges
158             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
159         X, Y = pylab.meshgrid(x,y)
160         C = numpy.zeros((len(y)-1, len(x)-1))
161         for i,xi in enumerate(x[:-1]):
162             for j,yi in enumerate(y[:-1]):
163                 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
164                 params = (xi,yi)
165                 r = self._residual(params)
166                 C[j,i] = numpy.log(r) # better resolution in valleys
167                 if MEM_DEBUG == True:
168                     log().debug('RSS: %d KB' % rss())
169         C = numpy.nan_to_num(C) # NaN -> 0
170         fid = file("fit_force_histograms-XYC.pkl", "wb")
171         pickle.dump([X,Y,C], fid)
172         fid.close()
173         # read in with
174         # import pickle
175         # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
176         # ...
177         FIGURE.clear()
178         axes = FIGURE.add_subplot(111)
179         if logx == True:
180             axes.set_xscale('log')
181         if logy == True:
182             axes.set_yscale('log')
183         if contour == True:
184             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
185             # [:-1,:-1] to strip dummy last row & column from X&Y.
186         else: # pseudocolor plot
187             p = axes.pcolor(X, Y, C)
188             axes.autoscale_view(tight=True)
189         FIGURE.colorbar(p)
190         FIGURE.savefig("figure.png")
191
192     def _plot_residual_comparison(self, expeiment_hist, theory_hist,
193                                   residual, title, filename):
194         FIGURE.clear()
195         p = pylab.plot(experiment_hist.bin_edges[:-1],
196                        experiment_hist.probabilities, 'r-',
197                        theory_hist.bin_edges[:-1],
198                        theory_hist.probabilities, 'b-')
199         pylab.title(title)
200         FIGURE.savefig(filename)
201
202
203 def parse_param_ranges_string(string):
204     """Parse parameter range stings.
205
206     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
207       ->
208     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
209
210     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
211     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
212     """
213     ranges = []
214     for range_string in string.split("],["):
215         range_number_strings = range_string.strip("[]").split(",")
216         ranges.append([float(x) for x in range_number_strings])
217     return ranges
218
219
220 def main():
221     import optparse
222     usage = "%prog [options] velocity_file"
223
224     parser = optparse.OptionParser(usage)
225     parser.add_option("-s","--sawsim", dest="sawsim",
226                       metavar="PATH",
227                       help="Set sawsim binary (%default).",
228                       default=sawsim_histogram.SAWSIM)
229     parser.add_option("-f","--param-format", dest="param_format",
230                       metavar="FORMAT",
231                       help="Convert params to sawsim options (%default).",
232                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
233     parser.add_option("-p","--initial-params", dest="initial_params",
234                       metavar="PARAMS",
235                       help="Initial params for fitting (%default).",
236                       default='3.3e-4,0.25e-9')
237     parser.add_option("-r","--param-range", dest="param_range",
238                       metavar="PARAMS",
239                       help="Param range for plotting (%default).",
240                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
241     parser.add_option("-N", "--number-of-runs", dest="N",
242                       metavar="INT", type='int',
243                       help="Number of sawsim runs at each point in parameter space (%default).",
244                       default=400)
245     parser.add_option("-m", "--manager", dest="manager",
246                       metavar="STRING",
247                       help="Job manager name (one of %s) (%%default)."
248                       % (', '.join(MANAGERS)),
249                       default=MANAGERS[0])
250     parser.add_option("-C","--use-cache", dest="use_cache",
251                       help="Use cached simulations if they exist (vs. running new simulations) (%default)",
252                       default=False, action="store_true")
253     parser.add_option("--clean-cache", dest="clean_cache",
254                       help="Remove previously cached simulations if they exist (%default)",
255                       default=False, action="store_true")
256     parser.add_option("-d","--cache-dir", dest="cache_dir",
257                       metavar="STRING",
258                       help="Cache directory for sawsim unfolding forces (%default).",
259                       default=sawsim_histogram.CACHE_DIR)
260     parser.add_option("-R","--residual", dest="residual",
261                       metavar="STRING",
262                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
263                       default='jensen-shannon')
264     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
265                       help="Generate residual difference plots for each point in the plot range.",
266                       default=False, action="store_true")
267     parser.add_option("--logx", dest="logx",
268                       help="Use a log scale for the x range.",
269                       default=False, action="store_true")
270     parser.add_option("--logy", dest="logy",
271                       help="Use a log scale for the y range.",
272                       default=False, action="store_true")
273     parser.add_option("-c","--contour-plot", dest="contour_plot",
274                       help="Select contour plot (vs. the default pseudocolor plot).",
275                       default=False, action="store_true")
276     options,args = parser.parse_args()
277     initial_params = [float(p) for p in options.initial_params.split(",")]
278     param_ranges = parse_param_ranges_string(options.param_range)
279     velocity_file = args[0]
280
281     sawsim_histogram.SAWSIM = options.sawsim
282     sawsim_histogram.CACHE_DIR = options.cache_dir
283     manager = get_manager(options.manager)()
284     try:
285         hm = HistogramMatcher(
286             file(velocity_file, 'r'), param_format_string=options.param_format,
287             N=options.N, manager=manager, residual_type=options.residual,
288             plot=options.plot_residuals, use_cache=options.use_cache,
289             clean_cache=options.clean_cache)
290         #hm.fit(initial_params)
291         hm.plot(param_ranges, logx=options.logx, logy=options.logy,
292                 contour=options.contour_plot)
293     finally:
294         manager.teardown()