pysawsim/fit_force_histograms.py

   1 #!/usr/bin/python
   2 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16 #
  17 # The author may be contacted at <wking@drexel.edu> on the Internet, or
  18 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
  19 # Philadelphia PA 19104, USA.
  20
  21 import os # HACK, for getpid()
  22 import os.path
  23 import pickle
  24 import shutil
  25 import sys
  26
  27
  28 import matplotlib
  29 import numpy
  30 import pylab
  31 from scipy.optimize import leastsq
  32
  33
  34 matplotlib.use('Agg')  # select backend that doesn't require X Windows
  35 FIGURE = pylab.figure()  # avoid memory problems.
  36 """`pylab` keeps internal references to all created figures, so share
  37 a single instance.
  38 """
  39
  40 MEM_DEBUG = False
  41
  42 def rss():
  43     """
  44     For debugging memory usage.
  45
  46     resident set size, the non-swapped physical memory that a task has
  47     used (in kiloBytes).
  48     """
  49     call = "ps -o rss= -p %d" % os.getpid()
  50     status,stdout,stderr = invoke(call)
  51     return int(stdout)
  52
  53
  54 class HistogramMatcher (object):
  55     def __init__(self, velocity_file, param_format_string, dir_prefix="v_dep"):
  56         self.experiment_histograms = self.read_force_histograms(velocity_file)
  57         self.param_format_string = param_format_string
  58         self.dir_prefix = dir_prefix
  59
  60     def read_force_histograms(self, v_file):
  61         """
  62         v_file format:
  63         # comment and blank lines ignored
  64         <velocity in m/s><TAB><path to histogram file>
  65         ...
  66
  67         e.g.
  68
  69         5e-7    histA
  70         1e-6    histB
  71         """
  72         histograms = {}
  73         v_file_dir = os.path.dirname(v_file)
  74         for line in file(v_file, "r").readlines():
  75             line = line.strip()
  76             if len(line) == 0 or line[0] == "#":
  77                 continue # ignore blank lines and comments
  78             v,h_file = line.split("\t")
  79             h = Histogram()
  80             h.from_file(os.path.join(v_file_dir, h_file))
  81             histograms[v] = h
  82             histograms[v].normalize()
  83         return histograms
  84
  85     def dirname(self, params):
  86         if not hasattr(self, "_tag_i"):
  87             self._tag_i = 0
  88         self._tag_i += 1
  89         return "%s-%d" % (self.dir_prefix, self._tag_i)
  90
  91     def generate_sawsim_histograms(self, params, histograms,
  92                                    run_repeat_simulation=True):
  93         velocities = histograms.keys()
  94         dirname = self.dirname(params)
  95         cmd = "run_vel_dep %s %s %s" \
  96               % (dirname,
  97                  ",".join([str(v) for v in velocities]),
  98                  self.param_string(params))
  99         if os.path.exists(dirname) and run_repeat_simulation == True:
 100             print 'repeating simulation'
 101             shutil.rmtree(dirname)
 102         if not os.path.exists(dirname):
 103             invoke(cmd)
 104         sawsim_histograms = {}
 105         for velocity in velocities:
 106             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
 107             bin_edges = histograms[velocity].bin_edges
 108             h = Histogram()
 109             h.from_data(unfolding_forces, bin_edges)
 110             sawsim_histograms[velocity] = h
 111             sawsim_histograms[velocity].normalize()
 112         return sawsim_histograms
 113
 114     def param_string(self, params):
 115         """
 116         Generate a string of non-velocity options to pass to sawsim
 117         for a given parameter list.
 118         """
 119         return self.param_format_string % (params[0], params[1])#tuple(params)
 120
 121     def get_all_unfolding_data(self, dirname, velocity_string):
 122         datafile = os.path.join(dirname, "data_" + velocity_string)
 123         return numpy.fromfile(datafile, sep=" ")
 124
 125     def residual(self, params, type='bin-height', plot=False,
 126                  run_repeat_simulation=True):
 127         sawsim_histograms = self.generate_sawsim_histograms( \
 128             params, self.experiment_histograms,
 129             run_repeat_simulation=run_repeat_simulation)
 130         assert sorted(sawsim_histograms.keys()) == sorted(self.experiment_histograms.keys())
 131         residual = 0
 132         for velocity,experiment_histogram in self.experiment_histograms.items():
 133             sawsim_histogram = sawsim_histograms[velocity]
 134             title = None
 135             if plot == True:
 136                 title = ", ".join(["%g" % p for p in params])
 137             r = experiment_histogram.residual(sawsim_histogram, type=type)
 138             residual += r
 139             if plot == True:
 140                 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
 141                 self.plot_residual_comparison(
 142                     experiment_histogram, sawsim_histogram, residual=r,
 143                     title=title, filename=filename)
 144         print "residual", residual
 145         del(sawsim_histograms)
 146         return residual
 147
 148     def fit(self, initial_params, verbose=False):
 149         p,cov,info,mesg,ier = leastsq(self.residual, initial_params,
 150                                       full_output=True, maxfev=1000)
 151         if verbose == True:
 152             print "Fitted params:",p
 153             print "Covariance mx:",cov
 154             print "Info:", info
 155             print "mesg:", mesg
 156             if ier == 1:
 157                 print "Solution converged"
 158             else:
 159                 print "Solution did not converge"
 160         return p
 161
 162     def plot(self, param_ranges, logx=False, logy=False,
 163              residual_type='bin-height',
 164              plot_residuals=False, contour=False,
 165              run_repeat_simulations=True):
 166         xranges = param_ranges[0]
 167         yranges = param_ranges[1]
 168         if logx == False:
 169             x = numpy.linspace(*xranges)
 170         else:
 171             m,M,n = xranges
 172             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
 173         if logy == False:
 174             y = numpy.linspace(*yranges)
 175         else:
 176             m,M,n = yranges
 177             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
 178         X, Y = pylab.meshgrid(x,y)
 179         C = numpy.zeros((len(y)-1, len(x)-1))
 180         for i,xi in enumerate(x[:-1]):
 181             for j,yi in enumerate(y[:-1]):
 182                 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
 183                 params = (xi,yi)
 184                 r = self.residual(params, type=residual_type,
 185                                   plot=plot_residuals,
 186                                   run_repeat_simulation=run_repeat_simulations)
 187                 C[j,i] = numpy.log(r) # better resolution in valleys
 188                 if MEM_DEBUG == True:
 189                     print >> sys.stderr, "RSS: %d KB" % rss()
 190         C = numpy.nan_to_num(C) # NaN -> 0
 191         fid = file("fit_force_histograms-XYC.pkl", "wb")
 192         pickle.dump([X,Y,C], fid)
 193         fid.close()
 194         # read in with
 195         # import pickle
 196         # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
 197         # ...
 198         FIGURE.clear()
 199         axes = FIGURE.add_subplot(111)
 200         if logx == True:
 201             axes.set_xscale('log')
 202         if logy == True:
 203             axes.set_yscale('log')
 204         if contour == True:
 205             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
 206             # [:-1,:-1] to strip dummy last row & column from X&Y.
 207         else: # pseudocolor plot
 208             p = axes.pcolor(X, Y, C)
 209             axes.autoscale_view(tight=True)
 210         FIGURE.colorbar(p)
 211         FIGURE.savefig("figure.png")
 212
 213     def plot_residual_comparison(self, expeiment_hist, theory_hist,
 214                                  residual, title, filename):
 215         FIGURE.clear()
 216         p = pylab.plot(experiment_hist.bin_edges[:-1],
 217                        experiment_hist.probabilities, 'r-',
 218                        theory_hist.bin_edges[:-1],
 219                        theory_hist.probabilities, 'b-')
 220         pylab.title(title)
 221         FIGURE.savefig(filename)
 222
 223
 224 def parse_param_ranges_string(string):
 225     """
 226     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
 227       ->
 228     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
 229     """
 230     ranges = []
 231     for range_string in string.split("],["):
 232         range_number_strings = range_string.strip("[]").split(",")
 233         ranges.append([float(x) for x in range_number_strings])
 234     return ranges
 235
 236 def main():
 237     import optparse
 238     usage = "%prog [options] velocity_file"
 239
 240     parser = optparse.OptionParser(usage)
 241     parser.add_option("-f","--param-format", dest="param_format",
 242                       metavar="FORMAT",
 243                       help="Convert params to sawsim options (%default).",
 244                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
 245     parser.add_option("-p","--initial-params", dest="initial_params",
 246                       metavar="PARAMS",
 247                       help="Initial params for fitting (%default).",
 248                       default='3.3e-4,0.25e-9')
 249     parser.add_option("-r","--param-range", dest="param_range",
 250                       metavar="PARAMS",
 251                       help="Param range for plotting (%default).",
 252                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
 253     parser.add_option("--logx", dest="logx",
 254                       help="Use a log scale for the x range.",
 255                       default=False, action="store_true")
 256     parser.add_option("--logy", dest="logy",
 257                       help="Use a log scale for the y range.",
 258                       default=False, action="store_true")
 259     parser.add_option("-d","--dir-prefix", dest="dir_prefix",
 260                       metavar="STRING",
 261                       help="Prefix to sawsim data directories (%default).",
 262                       default='v_dep')
 263     parser.add_option("-R","--residual", dest="residual",
 264                       metavar="STRING",
 265                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
 266                       default='jensen-shannon')
 267     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
 268                       help="Generate residual difference plots for each point in the plot range.",
 269                       default=False, action="store_true")
 270     parser.add_option("-c","--contour-plot", dest="contour_plot",
 271                       help="Select contour plot (vs. the default pseudocolor plot).",
 272                       default=False, action="store_true")
 273     parser.add_option("-n","--no-repeat-simulations", dest="simulations",
 274                       help="Do not run simulations if they already exist (re-analyze a previous run's output).",
 275                       default=True, action="store_false")
 276     options,args = parser.parse_args()
 277     initial_params = [float(p) for p in options.initial_params.split(",")]
 278     param_ranges = parse_param_ranges_string(options.param_range)
 279     velocity_file = args[0]
 280
 281     hm = HistogramMatcher(velocity_file, options.param_format, options.dir_prefix)
 282     #hm.fit(initial_params, verbose=True)
 283     hm.plot(param_ranges, logx=options.logx, logy=options.logy,
 284             residual_type=options.residual,
 285             plot_residuals=options.plot_residuals,
 286             contour=options.contour_plot,
 287             run_repeat_simulations=options.simulations)
 288
 289 if __name__ == "__main__":
 290     main()