pysawsim/velocity_dependant_scan.py

   1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
   2 #
   3 # This program is free software: you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License as published by
   5 # the Free Software Foundation, either version 3 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15 #
  16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
  17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
  18 # Philadelphia PA 19104, USA.
  19
  20 import os # HACK, for getpid()
  21 import os.path
  22 import pickle
  23 import shutil
  24 import sys
  25
  26 import matplotlib
  27 import numpy
  28 import pylab
  29 from scipy.optimize import leastsq
  30
  31 from . import log
  32 from .histogram import Histogram
  33 from .sawsim_histogram import SawsimHistogram
  34
  35
  36 matplotlib.use('Agg')  # select backend that doesn't require X Windows
  37 FIGURE = pylab.figure()  # avoid memory problems.
  38 """`pylab` keeps internal references to all created figures, so share
  39 a single instance.
  40 """
  41
  42 MEM_DEBUG = False
  43
  44 def rss():
  45     """
  46     For debugging memory usage.
  47
  48     resident set size, the non-swapped physical memory that a task has
  49     used (in kilo-bytes).
  50     """
  51     call = "ps -o rss= -p %d" % os.getpid()
  52     status,stdout,stderr = invoke(call)
  53     return int(stdout)
  54
  55
  56 class HistogramMatcher (object):
  57     """Compare experimental velocity dependent histograms to simulated data.
  58
  59     The main entry points are `fit()` and `plot()`.
  60     """
  61     def __init__(self, velocity_stream, param_format_string, N=400,
  62                  residual_type='jensen-shannon', plot=True, use_cache=False):
  63         self.experiment_histograms = self.read_force_histograms(velocity_stream)
  64         self.param_format_string = param_format_string
  65         self.residual_type = residual_type
  66         self.N = N
  67         self.plot = plot
  68         self.sawsim_histogram = SawsimHistogram(use_cache=self.use_cache)
  69
  70     def read_force_histograms(self, stream):
  71         """
  72         v_file format:
  73         # comment and blank lines ignored
  74         <velocity in m/s><whitespace><path to histogram file>
  75         ...
  76
  77         e.g.
  78
  79         5e-7    histA
  80         1e-6    histB
  81         """
  82         histograms = {}
  83         v_file_dir = os.path.dirname(v_file)
  84         for line in strem.readlines():
  85             line = line.strip()
  86             if len(line) == 0 or line[0] == "#":
  87                 continue # ignore blank lines and comments
  88             v,h_file = line.split()
  89             h = Histogram()
  90             h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
  91             histograms[v] = h
  92         return histograms
  93
  94     def param_string(self, params, velocity):
  95         """Generate a string of options to pass to `sawsim`.
  96         """
  97         return '%s -v %g' % (
  98             self.param_format_string % tuple(params), velocity)
  99
 100     def get_all_unfolding_data(self, dirname, velocity_string):
 101         datafile = os.path.join(dirname, "data_" + velocity_string)
 102         return numpy.fromfile(datafile, sep=" ")
 103
 104         sawsim_histograms = {}
 105         for velocity in velocities:
 106             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
 107             bin_edges = histograms[velocity].bin_edges
 108             h = Histogram()
 109             h.from_data(unfolding_forces, bin_edges)
 110             sawsim_histograms[velocity] = h
 111             sawsim_histograms[velocity].normalize()
 112         return sawsim_histograms
 113
 114     def _residual(self, params):
 115         residual = 0
 116         for velocity,experiment_hist in self.experiment_histograms.iteritems():
 117             sawsim_hist = self.sawsim_histogram(
 118                 param_string=self.param_string(params, velocity), N=self.N,
 119                 bin_edges=experiment_hist.bin_edges)
 120             r = experiment_histogram.residual(
 121                 sawsim_hist, type=self.residual_type)
 122             residual += r
 123             if self.plot == True:
 124                 title = ", ".join(["%g" % p for p in (params+[velocity])])
 125                 filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
 126                 self._plot_residual_comparison(
 127                     experiment_hist, sawsim_hist, residual=r,
 128                     title=title, filename=filename)
 129         log().debug('residual: %g' % residual)
 130         return residual
 131
 132     def fit(self, initial_params):
 133         p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
 134                                       full_output=True, maxfev=1000)
 135         _log = log()
 136         _log.info('Fitted params: %s' % p)
 137         _log.info('Covariance mx: %s' % cov)
 138         _log.info('Info: %s' % info)
 139         _log.info('Mesg: %s' % mesg)
 140         return p
 141
 142     def plot(self, param_ranges, logx=False, logy=False, contour=False):
 143         xranges = param_ranges[0]
 144         yranges = param_ranges[1]
 145         if logx == False:
 146             x = numpy.linspace(*xranges)
 147         else:
 148             m,M,n = xranges
 149             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
 150         if logy == False:
 151             y = numpy.linspace(*yranges)
 152         else:
 153             m,M,n = yranges
 154             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
 155         X, Y = pylab.meshgrid(x,y)
 156         C = numpy.zeros((len(y)-1, len(x)-1))
 157         for i,xi in enumerate(x[:-1]):
 158             for j,yi in enumerate(y[:-1]):
 159                 print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
 160                 params = (xi,yi)
 161                 r = self._residual(params)
 162                 C[j,i] = numpy.log(r) # better resolution in valleys
 163                 if MEM_DEBUG == True:
 164                     log().debug('RSS: %d KB' % rss())
 165         C = numpy.nan_to_num(C) # NaN -> 0
 166         fid = file("fit_force_histograms-XYC.pkl", "wb")
 167         pickle.dump([X,Y,C], fid)
 168         fid.close()
 169         # read in with
 170         # import pickle
 171         # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb"))
 172         # ...
 173         FIGURE.clear()
 174         axes = FIGURE.add_subplot(111)
 175         if logx == True:
 176             axes.set_xscale('log')
 177         if logy == True:
 178             axes.set_yscale('log')
 179         if contour == True:
 180             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
 181             # [:-1,:-1] to strip dummy last row & column from X&Y.
 182         else: # pseudocolor plot
 183             p = axes.pcolor(X, Y, C)
 184             axes.autoscale_view(tight=True)
 185         FIGURE.colorbar(p)
 186         FIGURE.savefig("figure.png")
 187
 188     def _plot_residual_comparison(self, expeiment_hist, theory_hist,
 189                                   residual, title, filename):
 190         FIGURE.clear()
 191         p = pylab.plot(experiment_hist.bin_edges[:-1],
 192                        experiment_hist.probabilities, 'r-',
 193                        theory_hist.bin_edges[:-1],
 194                        theory_hist.probabilities, 'b-')
 195         pylab.title(title)
 196         FIGURE.savefig(filename)
 197
 198
 199 def parse_param_ranges_string(string):
 200     """Parse parameter range stings.
 201
 202     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
 203       ->
 204     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
 205
 206     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
 207     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
 208     """
 209     ranges = []
 210     for range_string in string.split("],["):
 211         range_number_strings = range_string.strip("[]").split(",")
 212         ranges.append([float(x) for x in range_number_strings])
 213     return ranges
 214
 215
 216 def main():
 217     import optparse
 218     usage = "%prog [options] velocity_file"
 219
 220     parser = optparse.OptionParser(usage)
 221     parser.add_option("-f","--param-format", dest="param_format",
 222                       metavar="FORMAT",
 223                       help="Convert params to sawsim options (%default).",
 224                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
 225     parser.add_option("-p","--initial-params", dest="initial_params",
 226                       metavar="PARAMS",
 227                       help="Initial params for fitting (%default).",
 228                       default='3.3e-4,0.25e-9')
 229     parser.add_option("-r","--param-range", dest="param_range",
 230                       metavar="PARAMS",
 231                       help="Param range for plotting (%default).",
 232                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
 233     parser.add_option("--N", "--number-of-runs", dest="N",
 234                       metavar="INT", type='int',
 235                       help="Number of sawsim runs at each point in parameter space (%default).",
 236                       default=400)
 237     parser.add_option("--logx", dest="logx",
 238                       help="Use a log scale for the x range.",
 239                       default=False, action="store_true")
 240     parser.add_option("--logy", dest="logy",
 241                       help="Use a log scale for the y range.",
 242                       default=False, action="store_true")
 243     parser.add_option("-d","--cache-dir", dest="cache_dir",
 244                       metavar="STRING",
 245                       help="Cache directory for sawsim unfolding forces (%default).",
 246                       default=sawsim_histogram.CACHE_DIR)
 247     parser.add_option("-R","--residual", dest="residual",
 248                       metavar="STRING",
 249                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
 250                       default='jensen-shannon')
 251     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
 252                       help="Generate residual difference plots for each point in the plot range.",
 253                       default=False, action="store_true")
 254     parser.add_option("-c","--contour-plot", dest="contour_plot",
 255                       help="Select contour plot (vs. the default pseudocolor plot).",
 256                       default=False, action="store_true")
 257     parser.add_option("-C","--use_cache", dest="use_cache",
 258                       help="Use cached simulations if they exist (vs. running new simulations) (%default)",
 259                       default=False, action="store_true")
 260     options,args = parser.parse_args()
 261     initial_params = [float(p) for p in options.initial_params.split(",")]
 262     param_ranges = parse_param_ranges_string(options.param_range)
 263     velocity_file = args[0]
 264
 265     sawsim_histogram.CACHE_DIR = options.cache_dir
 266
 267     hm = HistogramMatcher(
 268         file(velocity_file, 'r'), param_format_string=options.param_format,
 269         N=options.N, residual_type=options.residual,
 270         plot=options.plot_residuals, use_cache=options.use_cache)
 271     #hm.fit(initial_params)
 272     hm.plot(param_ranges, logx=options.logx, logy=options.logy,
 273             contour=options.contour_plot)
 274
 275
 276 if __name__ == "__main__":
 277     main()