pysawsim/parameter_scan.py

   1 # Copyright (C) 2009-2010  W. Trevor King <wking@drexel.edu>
   2 #
   3 # This program is free software: you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License as published by
   5 # the Free Software Foundation, either version 3 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15 #
  16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
  17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
  18 # Philadelphia PA 19104, USA.
  19
  20 """Experiment vs. simulation comparison and scanning.
  21 """
  22
  23 from os import getpid  # for rss()
  24 import os.path
  25 import pickle
  26 from StringIO import StringIO
  27
  28 import matplotlib
  29 matplotlib.use('Agg')  # select backend that doesn't require X Windows
  30 import numpy
  31 import pylab
  32
  33 from . import log
  34 from .histogram import Histogram
  35 from .manager import MANAGERS, get_manager
  36 from .sawsim_histogram import sawsim_histogram
  37 from .sawsim import SawsimRunner
  38
  39
  40 FIGURE = pylab.figure()  # avoid memory problems.
  41 """`pylab` keeps internal references to all created figures, so share
  42 a single instance.
  43 """
  44 EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
  45 # Other general comments...
  46
  47 #HISTOGRAM: -v 6e-7
  48 #Force (N)\tUnfolding events
  49 1.4e-10\t1
  50 1.5e-10\t0
  51 1.6e-10\t4
  52 1.7e-10\t6
  53 1.8e-10\t8
  54 1.9e-10\t20
  55 2e-10\t28
  56 2.1e-10\t38
  57 2.2e-10\t72
  58 2.3e-10\t110
  59 2.4e-10\t155
  60 2.5e-10\t247
  61 2.6e-10\t395
  62 2.7e-10\t451
  63 2.8e-10\t430
  64 2.9e-10\t300
  65 3e-10\t116
  66 3.1e-10\t18
  67 3.2e-10\t1
  68
  69 #HISTOGRAM: -v 8e-7
  70 #Force (N)\tUnfolding events
  71 8e-11\t1
  72 9e-11\t0
  73 1e-10\t0
  74 1.1e-10\t1
  75 1.2e-10\t0
  76 1.3e-10\t0
  77 1.4e-10\t0
  78 1.5e-10\t3
  79 1.6e-10\t3
  80 1.7e-10\t4
  81 1.8e-10\t4
  82 1.9e-10\t13
  83 2e-10\t29
  84 2.1e-10\t39
  85 2.2e-10\t60
  86 2.3e-10\t102
  87 2.4e-10\t154
  88 2.5e-10\t262
  89 2.6e-10\t402
  90 2.7e-10\t497
  91 2.8e-10\t541
  92 2.9e-10\t555
  93 3e-10\t325
  94 3.1e-10\t142
  95 3.2e-10\t50
  96 3.3e-10\t13
  97
  98 #HISTOGRAM: -v 1e-6
  99 #Force (N)\tUnfolding events
 100 1.5e-10\t2
 101 1.6e-10\t3
 102 1.7e-10\t7
 103 1.8e-10\t8
 104 1.9e-10\t7
 105 2e-10\t25
 106 2.1e-10\t30
 107 2.2e-10\t58
 108 2.3e-10\t76
 109 2.4e-10\t159
 110 2.5e-10\t216
 111 2.6e-10\t313
 112 2.7e-10\t451
 113 2.8e-10\t568
 114 2.9e-10\t533
 115 3e-10\t416
 116 3.1e-10\t222
 117 3.2e-10\t80
 118 3.3e-10\t24
 119 3.4e-10\t2
 120 """
 121
 122
 123 MEM_DEBUG = False
 124
 125
 126
 127 def rss():
 128     """
 129     For debugging memory usage.
 130
 131     resident set size, the non-swapped physical memory that a task has
 132     used (in kilo-bytes).
 133     """
 134     call = "ps -o rss= -p %d" % getpid()
 135     status,stdout,stderr = invoke(call)
 136     return int(stdout)
 137
 138
 139 class HistogramMatcher (object):
 140     """Compare experimental histograms to simulated data.
 141
 142     The main entry points are `fit()` and `plot()`.
 143
 144     The input `histogram_stream` should contain a series of
 145     experimental histograms with '#HISTOGRAM: <params>` lines starting
 146     each histogram.  `<params>` lists the `sawsim` parameters that are
 147     unique to that experiment.
 148
 149     >>> from .manager.thread import ThreadManager
 150     >>> velocity_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
 151     >>> param_format_string = (
 152     ...     '-s cantilever,hooke,0.05 -N1 '
 153     ...     '-s folded,null -N8 '
 154     ...     '-s "unfolded,wlc,{0.39e-9,28e-9}" '
 155     ...     '-k "folded,unfolded,bell,{%g,%g}" -q folded')
 156     >>> m = ThreadManager()
 157     >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
 158     >>> hm = HistogramMatcher(velocity_stream, param_format_string, sr, N=3)
 159     >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
 160     >>> m.teardown()
 161     """
 162     def __init__(self, histogram_stream, param_format_string,
 163                  sawsim_runner, N=400, residual_type='jensen-shannon',
 164                  plot=True):
 165         self.experiment_histograms = self._read_force_histograms(
 166             histogram_stream)
 167         self.param_format_string = param_format_string
 168         self.sawsim_runner = sawsim_runner
 169         self.N = N
 170         self.residual_type = residual_type
 171         self._plot = plot
 172
 173     def _read_force_histograms(self, stream):
 174         """
 175         File format:
 176         # comment and blank lines ignored
 177         <velocity in m/s><whitespace><path to histogram file>
 178         ...
 179
 180         >>> import sys
 181         >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
 182         >>> hm = HistogramMatcher(StringIO(), None, None, None)
 183         >>> histograms = hm._read_force_histograms(stream)
 184         >>> sorted(histograms.iterkeys())
 185         ['-v 1e-6', '-v 6e-7', '-v 8e-7']
 186         >>> histograms['-v 1e-6'].to_stream(sys.stdout)
 187         ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
 188         #Force (N)\tUnfolding events
 189         1.5e-10\t2
 190         1.6e-10\t3
 191         1.7e-10\t7
 192         1.8e-10\t8
 193         1.9e-10\t7
 194         2e-10\t25
 195         2.1e-10\t30
 196         2.2e-10\t58
 197         2.3e-10\t76
 198         2.4e-10\t159
 199         2.5e-10\t216
 200         2.6e-10\t313
 201         2.7e-10\t451
 202         2.8e-10\t568
 203         2.9e-10\t533
 204         3e-10\t416
 205         3.1e-10\t222
 206         3.2e-10\t80
 207         3.3e-10\t24
 208         3.4e-10\t2
 209         """
 210         token = '#HISTOGRAM:'
 211         hist_blocks = {None: []}
 212         params = None
 213         for line in stream.readlines():
 214             line = line.strip()
 215             if line.startswith(token):
 216                 params = line[len(token):].strip()
 217                 assert params not in hist_blocks, params
 218                 hist_blocks[params] = []
 219             else:
 220                 hist_blocks[params].append(line)
 221
 222         histograms = {}
 223         for params,block in hist_blocks.iteritems():
 224             if params == None:
 225                 continue
 226             h = Histogram()
 227             h.from_stream(StringIO('\n'.join(block)))
 228             histograms[params] = h
 229         return histograms
 230
 231     def param_string(self, params, hist_params):
 232         """Generate a string of options to pass to `sawsim`.
 233         """
 234         return '%s %s' % (
 235             self.param_format_string % tuple(params), hist_params)
 236
 237     def get_all_unfolding_data(self, dirname, velocity_string):
 238         datafile = os.path.join(dirname, "data_" + velocity_string)
 239         return numpy.fromfile(datafile, sep=" ")
 240
 241         sawsim_histograms = {}
 242         for velocity in velocities:
 243             unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
 244             bin_edges = histograms[velocity].bin_edges
 245             h = Histogram()
 246             h.from_data(unfolding_forces, bin_edges)
 247             sawsim_histograms[velocity] = h
 248             sawsim_histograms[velocity].normalize()
 249         return sawsim_histograms
 250
 251     def _residual(self, params):
 252         residual = 0
 253         for hist_params,experiment_hist in self.experiment_histograms.iteritems():
 254             sawsim_hist = sawsim_histogram(
 255                 sawsim_runner=self.sawsim_runner,
 256                 param_string=self.param_string(params, hist_params),
 257                 N=self.N, bin_edges=experiment_hist.bin_edges)
 258             r = experiment_hist.residual(sawsim_hist, type=self.residual_type)
 259             residual += r
 260             if self._plot == True:
 261                 title = ", ".join(["%g" % p for p in params]+[hist_params])
 262                 filename = "residual-%s-%g.png" % (
 263                     title.replace(', ', '_').replace(' ', '_'), r)
 264                 self._plot_residual_comparison(
 265                     experiment_hist, sawsim_hist, residual=r,
 266                     title=title, filename=filename)
 267         log().debug('residual: %g' % residual)
 268         return residual
 269
 270     def plot(self, param_ranges, logx=False, logy=False, contour=False):
 271         xranges = param_ranges[0]
 272         yranges = param_ranges[1]
 273         if logx == False:
 274             x = numpy.linspace(*xranges)
 275         else:
 276             m,M,n = xranges
 277             x = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
 278         if logy == False:
 279             y = numpy.linspace(*yranges)
 280         else:
 281             m,M,n = yranges
 282             y = numpy.exp(numpy.linspace(numpy.log(m), numpy.log(M), n))
 283         X, Y = pylab.meshgrid(x,y)
 284         C = numpy.zeros((len(y)-1, len(x)-1))
 285         for i,xi in enumerate(x[:-1]):
 286             for j,yj in enumerate(y[:-1]):
 287                 log().info('point %d %d (%d of %d)'
 288                            % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
 289                 params = (xi,yj)
 290                 r = self._residual(params)
 291                 C[j,i] = numpy.log(r) # better resolution in valleys
 292                 if MEM_DEBUG == True:
 293                     log().debug('RSS: %d KB' % rss())
 294         C = numpy.nan_to_num(C) # NaN -> 0
 295         fid = file("histogram_matcher-XYC.pkl", "wb")
 296         pickle.dump([X,Y,C], fid)
 297         fid.close()
 298         # read in with
 299         # import pickle
 300         # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
 301         # ...
 302         FIGURE.clear()
 303         axes = FIGURE.add_subplot(111)
 304         if logx == True:
 305             axes.set_xscale('log')
 306         if logy == True:
 307             axes.set_yscale('log')
 308         if contour == True:
 309             p = axes.contour(X[:-1,:-1], Y[:-1,:-1], C)
 310             # [:-1,:-1] to strip dummy last row & column from X&Y.
 311         else: # pseudocolor plot
 312             p = axes.pcolor(X, Y, C)
 313             axes.autoscale_view(tight=True)
 314         FIGURE.colorbar(p)
 315         FIGURE.savefig("figure.png")
 316
 317     def _plot_residual_comparison(self, experiment_hist, theory_hist,
 318                                   residual, title, filename):
 319         FIGURE.clear()
 320         p = pylab.plot(experiment_hist.bin_edges[:-1],
 321                        experiment_hist.probabilities, 'r-',
 322                        theory_hist.bin_edges[:-1],
 323                        theory_hist.probabilities, 'b-')
 324         pylab.title(title)
 325         FIGURE.savefig(filename)
 326
 327
 328 def parse_param_ranges_string(string):
 329     """Parse parameter range stings.
 330
 331     '[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
 332       ->
 333     [[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
 334
 335     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
 336     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
 337     """
 338     ranges = []
 339     for range_string in string.split("],["):
 340         range_number_strings = range_string.strip("[]").split(",")
 341         ranges.append([float(x) for x in range_number_strings])
 342     return ranges
 343
 344
 345 def main(argv=None):
 346     """
 347     >>> import tempfile
 348     >>> f = tempfile.NamedTemporaryFile()
 349     >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
 350     >>> f.flush()
 351     >>> main(['-s', 'bin/sawsim',
 352     ...       '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
 353     ...       '-N', '2',
 354     ...       f.name])
 355     >>> f.close()
 356     """
 357     from optparse import OptionParser
 358     import sys
 359
 360     if argv == None:
 361         argv = sys.argv[1:]
 362
 363     sr = SawsimRunner()
 364
 365     usage = '%prog [options] histogram_file'
 366     epilog = '\n'.join([
 367             'Compare simulated results against experimental values over a',
 368             'range of parameters.  Generates a plot of fit quality over',
 369             'the parameter space.  The histogram file should look something',
 370             'like:',
 371             '',
 372             EXAMPLE_HISTOGRAM_FILE_CONTENTS,
 373             ''
 374             '`#HISTOGRAM: <params>` lines start each histogram.  `params`',
 375             'lists the `sawsim` parameters that are unique to that',
 376             'experiment.',
 377             '',
 378             'Each histogram line is of the format:',
 379             '',
 380             '<bin_edge><whitespace><count>',
 381             '',
 382             '`<bin_edge>` should mark the left-hand side of the bin, and',
 383             'all bins should be of equal width (so we know where the last',
 384             'one ends).',
 385             ])
 386     parser = OptionParser(usage, epilog=epilog)
 387     parser.format_epilog = lambda formatter: epilog+'\n'
 388     for option in sr.optparse_options:
 389         if option.dest == 'param_string':
 390             continue
 391         parser.add_option(option)
 392     parser.add_option("-f","--param-format", dest="param_format",
 393                       metavar="FORMAT",
 394                       help="Convert params to sawsim options (%default).",
 395                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
 396     parser.add_option("-p","--initial-params", dest="initial_params",
 397                       metavar="PARAMS",
 398                       help="Initial params for fitting (%default).",
 399                       default='3.3e-4,0.25e-9')
 400     parser.add_option("-r","--param-range", dest="param_range",
 401                       metavar="PARAMS",
 402                       help="Param range for plotting (%default).",
 403                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
 404     parser.add_option("-R","--residual", dest="residual",
 405                       metavar="STRING",
 406                       help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
 407                       default='jensen-shannon')
 408     parser.add_option("-P","--plot-residuals", dest="plot_residuals",
 409                       help="Generate residual difference plots for each point in the plot range.",
 410                       default=False, action="store_true")
 411     parser.add_option("--logx", dest="logx",
 412                       help="Use a log scale for the x range.",
 413                       default=False, action="store_true")
 414     parser.add_option("--logy", dest="logy",
 415                       help="Use a log scale for the y range.",
 416                       default=False, action="store_true")
 417     parser.add_option("-c","--contour-plot", dest="contour_plot",
 418                       help="Select contour plot (vs. the default pseudocolor plot).",
 419                       default=False, action="store_true")
 420
 421     options,args = parser.parse_args(argv)
 422
 423     initial_params = [float(p) for p in options.initial_params.split(",")]
 424     param_ranges = parse_param_ranges_string(options.param_range)
 425     velocity_file = args[0]
 426     sr_call_params = sr.initialize_from_options(options)
 427
 428     try:
 429         hm = HistogramMatcher(
 430             file(velocity_file, 'r'), param_format_string=options.param_format,
 431             sawsim_runner=sr, residual_type=options.residual,
 432             plot=options.plot_residuals, **sr_call_params)
 433         #hm.fit(initial_params)
 434         hm.plot(param_ranges, logx=options.logx, logy=options.logy,
 435                 contour=options.contour_plot)
 436     finally:
 437         sr.teardown()