Add bin/sawsim_plot_histogram_file.py and Histogram.plot().
[sawsim.git] / pysawsim / parameter_scan.py
index 03cd99074980f94ac06b9332eff60e7eb00999ce..f986ce81aaed693989bb3aabc7deb0f7ab3fefc1 100644 (file)
@@ -25,69 +25,98 @@ import os.path
 import pickle
 from StringIO import StringIO
 
-import matplotlib
-matplotlib.use('Agg')  # select backend that doesn't require X Windows
 import numpy
-import pylab
 
 from . import log
-from .histogram import Histogram
-from .manager import MANAGERS, get_manager
+from . import PYSAWSIM_LOG_LEVEL_MSG as _PYSAWSIM_LOG_LEVEL_MSG
+from .histogram import Histogram as _Histogram
+from .histogram import FIGURE as _FIGURE
 from .sawsim_histogram import sawsim_histogram
 from .sawsim import SawsimRunner
 
+# import after .histogram, which selects an appropriate backend
+import pylab
 
-FIGURE = pylab.figure()  # avoid memory problems.
-"""`pylab` keeps internal references to all created figures, so share
-a single instance.
+_multiprocess_can_split_ = True
+"""Allow nosetests to split tests between processes.
 """
+
 EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms
 # Other general comments...
 
 #HISTOGRAM: -v 6e-7
 #Force (N)\tUnfolding events
-1.8e-10\t1
-1.9e-10\t0
-2e-10\t0
-2.1e-10\t0
-2.2e-10\t0
-2.3e-10\t0
-2.4e-10\t1
-2.5e-10\t0
-2.6e-10\t2
-2.7e-10\t0
-2.8e-10\t1
-2.9e-10\t6
-3e-10\t2
-3.1e-10\t3
+1.4e-10\t1
+1.5e-10\t0
+1.6e-10\t4
+1.7e-10\t6
+1.8e-10\t8
+1.9e-10\t20
+2e-10\t28
+2.1e-10\t38
+2.2e-10\t72
+2.3e-10\t110
+2.4e-10\t155
+2.5e-10\t247
+2.6e-10\t395
+2.7e-10\t451
+2.8e-10\t430
+2.9e-10\t300
+3e-10\t116
+3.1e-10\t18
+3.2e-10\t1
 
 #HISTOGRAM: -v 8e-7
 #Force (N)\tUnfolding events
-2.4e-10\t1
-2.5e-10\t0
-2.6e-10\t4
-2.7e-10\t2
-2.8e-10\t2
-2.9e-10\t3
-3e-10\t2
-3.1e-10\t1
-3.2e-10\t1
+8e-11\t1
+9e-11\t0
+1e-10\t0
+1.1e-10\t1
+1.2e-10\t0
+1.3e-10\t0
+1.4e-10\t0
+1.5e-10\t3
+1.6e-10\t3
+1.7e-10\t4
+1.8e-10\t4
+1.9e-10\t13
+2e-10\t29
+2.1e-10\t39
+2.2e-10\t60
+2.3e-10\t102
+2.4e-10\t154
+2.5e-10\t262
+2.6e-10\t402
+2.7e-10\t497
+2.8e-10\t541
+2.9e-10\t555
+3e-10\t325
+3.1e-10\t142
+3.2e-10\t50
+3.3e-10\t13
 
 #HISTOGRAM: -v 1e-6
 #Force (N)\tUnfolding events
-2e-10\t1
-2.1e-10\t0
-2.2e-10\t1
-2.3e-10\t1
-2.4e-10\t1
-2.5e-10\t0
-2.6e-10\t2
-2.7e-10\t1
-2.8e-10\t4
-2.9e-10\t2
-3e-10\t2
-3.1e-10\t0
-3.2e-10\t1
+1.5e-10\t2
+1.6e-10\t3
+1.7e-10\t7
+1.8e-10\t8
+1.9e-10\t7
+2e-10\t25
+2.1e-10\t30
+2.2e-10\t58
+2.3e-10\t76
+2.4e-10\t159
+2.5e-10\t216
+2.6e-10\t313
+2.7e-10\t451
+2.8e-10\t568
+2.9e-10\t533
+3e-10\t416
+3.1e-10\t222
+3.2e-10\t80
+3.3e-10\t24
+3.4e-10\t2
 """
 
 
@@ -118,21 +147,21 @@ class HistogramMatcher (object):
     unique to that experiment.
 
     >>> from .manager.thread import ThreadManager
-    >>> velocity_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
+    >>> histogram_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
     >>> param_format_string = (
     ...     '-s cantilever,hooke,0.05 -N1 '
     ...     '-s folded,null -N8 '
     ...     '-s "unfolded,wlc,{0.39e-9,28e-9}" '
     ...     '-k "folded,unfolded,bell,{%g,%g}" -q folded')
     >>> m = ThreadManager()
-    >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m)
-    >>> hm = HistogramMatcher(velocity_stream, param_format_string, sr, N=3)
+    >>> sr = SawsimRunner(manager=m)
+    >>> hm = HistogramMatcher(histogram_stream, param_format_string, sr, N=3)
     >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False)
     >>> m.teardown()
     """
     def __init__(self, histogram_stream, param_format_string,
                  sawsim_runner, N=400, residual_type='jensen-shannon',
-                 plot=True):
+                 plot=False):
         self.experiment_histograms = self._read_force_histograms(
             histogram_stream)
         self.param_format_string = param_format_string
@@ -141,12 +170,17 @@ class HistogramMatcher (object):
         self.residual_type = residual_type
         self._plot = plot
 
-    def _read_force_histograms(self, stream):
+    @staticmethod
+    def _read_force_histograms(stream):
         """
         File format:
-        # comment and blank lines ignored
-        <velocity in m/s><whitespace><path to histogram file>
-        ...
+
+          # comment and blank lines ignored
+          #HISTOGRAM: <histogram-specific params>
+          <pysawsim.histogram.Histogram-compatible histogram>
+          #HISTOGRAM: <other histogram-specific params>
+          <another pysawsim.histogram.Histogram-compatible histogram>
+          ...
 
         >>> import sys
         >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
@@ -157,19 +191,26 @@ class HistogramMatcher (object):
         >>> histograms['-v 1e-6'].to_stream(sys.stdout)
         ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF
         #Force (N)\tUnfolding events
-        2e-10\t1
-        2.1e-10\t0
-        2.2e-10\t1
-        2.3e-10\t1
-        2.4e-10\t1
-        2.5e-10\t0
-        2.6e-10\t2
-        2.7e-10\t1
-        2.8e-10\t4
-        2.9e-10\t2
-        3e-10\t2
-        3.1e-10\t0
-        3.2e-10\t1
+        1.5e-10\t2
+        1.6e-10\t3
+        1.7e-10\t7
+        1.8e-10\t8
+        1.9e-10\t7
+        2e-10\t25
+        2.1e-10\t30
+        2.2e-10\t58
+        2.3e-10\t76
+        2.4e-10\t159
+        2.5e-10\t216
+        2.6e-10\t313
+        2.7e-10\t451
+        2.8e-10\t568
+        2.9e-10\t533
+        3e-10\t416
+        3.1e-10\t222
+        3.2e-10\t80
+        3.3e-10\t24
+        3.4e-10\t2
         """
         token = '#HISTOGRAM:'
         hist_blocks = {None: []}
@@ -187,7 +228,7 @@ class HistogramMatcher (object):
         for params,block in hist_blocks.iteritems():
             if params == None:
                 continue
-            h = Histogram()
+            h = _Histogram()
             h.from_stream(StringIO('\n'.join(block)))
             histograms[params] = h
         return histograms
@@ -198,21 +239,7 @@ class HistogramMatcher (object):
         return '%s %s' % (
             self.param_format_string % tuple(params), hist_params)
 
-    def get_all_unfolding_data(self, dirname, velocity_string):
-        datafile = os.path.join(dirname, "data_" + velocity_string)
-        return numpy.fromfile(datafile, sep=" ")
-
-        sawsim_histograms = {}
-        for velocity in velocities:
-            unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
-            bin_edges = histograms[velocity].bin_edges
-            h = Histogram()
-            h.from_data(unfolding_forces, bin_edges)
-            sawsim_histograms[velocity] = h
-            sawsim_histograms[velocity].normalize()
-        return sawsim_histograms
-
-    def _residual(self, params):
+    def residual(self, params):
         residual = 0
         for hist_params,experiment_hist in self.experiment_histograms.iteritems():
             sawsim_hist = sawsim_histogram(
@@ -228,10 +255,13 @@ class HistogramMatcher (object):
                 self._plot_residual_comparison(
                     experiment_hist, sawsim_hist, residual=r,
                     title=title, filename=filename)
-        log().debug('residual: %g' % residual)
+        log().debug('residual %s: %g' % (params, residual))
         return residual
 
-    def plot(self, param_ranges, logx=False, logy=False, contour=False):
+    def plot(self, param_ranges, logx=False, logy=False, contour=False,
+             csv=None):
+        if csv:
+            csv.write(','.join(('param 1', 'param 2', 'fit quality')) + '\n')
         xranges = param_ranges[0]
         yranges = param_ranges[1]
         if logx == False:
@@ -251,7 +281,9 @@ class HistogramMatcher (object):
                 log().info('point %d %d (%d of %d)'
                            % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)))
                 params = (xi,yj)
-                r = self._residual(params)
+                r = self.residual(params)
+                if csv:
+                    csv.write(','.join([str(v) for v in (xi,yj,r)]) + '\n')
                 C[j,i] = numpy.log(r) # better resolution in valleys
                 if MEM_DEBUG == True:
                     log().debug('RSS: %d KB' % rss())
@@ -263,8 +295,8 @@ class HistogramMatcher (object):
         # import pickle
         # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb"))
         # ...
-        FIGURE.clear()
-        axes = FIGURE.add_subplot(111)
+        _FIGURE.clear()
+        axes = _FIGURE.add_subplot(1, 1, 1)
         if logx == True:
             axes.set_xscale('log')
         if logy == True:
@@ -275,18 +307,26 @@ class HistogramMatcher (object):
         else: # pseudocolor plot
             p = axes.pcolor(X, Y, C)
             axes.autoscale_view(tight=True)
-        FIGURE.colorbar(p)
-        FIGURE.savefig("figure.png")
+        _FIGURE.colorbar(p)
+        _FIGURE.savefig("figure.png")
 
     def _plot_residual_comparison(self, experiment_hist, theory_hist,
                                   residual, title, filename):
-        FIGURE.clear()
-        p = pylab.plot(experiment_hist.bin_edges[:-1],
-                       experiment_hist.probabilities, 'r-',
-                       theory_hist.bin_edges[:-1],
-                       theory_hist.probabilities, 'b-')
-        pylab.title(title)
-        FIGURE.savefig(filename)
+        _FIGURE.clear()
+        axes = _FIGURE.add_subplot(1, 1, 1)
+        axes.hold(True)
+        axes.hist(x=experiment_hist.bin_edges[:-1],  # one fake entry for each bin
+                  weights=experiment_hist.counts,    # weigh the fake entries by count
+                  bins=experiment_hist.bin_edges,
+                  align='mid', histtype='stepfilled',
+                  color='r')
+        axes.hist(x=theory_hist.bin_edges[:-1],  # one fake entry for each bin
+                  weights=theory_hist.counts,    # weigh the fake entries by count
+                  bins=theory_hist.bin_edges,
+                  align='mid', histtype='stepfilled',
+                  color='b', alpha=0.5)
+        axes.set_title(title)
+        _FIGURE.savefig(filename)
 
 
 def parse_param_ranges_string(string):
@@ -298,6 +338,8 @@ def parse_param_ranges_string(string):
 
     >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
     [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+    >>> parse_param_ranges_string('[1,2,3]')
+    [[1.0, 2.0, 3.0]]
     """
     ranges = []
     for range_string in string.split("],["):
@@ -312,8 +354,7 @@ def main(argv=None):
     >>> f = tempfile.NamedTemporaryFile()
     >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS)
     >>> f.flush()
-    >>> main(['-s', 'bin/sawsim',
-    ...       '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
+    >>> main(['-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]',
     ...       '-N', '2',
     ...       f.name])
     >>> f.close()
@@ -346,6 +387,7 @@ def main(argv=None):
             '`<bin_edge>` should mark the left-hand side of the bin, and',
             'all bins should be of equal width (so we know where the last',
             'one ends).',
+            _PYSAWSIM_LOG_LEVEL_MSG,
             ])
     parser = OptionParser(usage, epilog=epilog)
     parser.format_epilog = lambda formatter: epilog+'\n'
@@ -353,49 +395,58 @@ def main(argv=None):
         if option.dest == 'param_string':
             continue
         parser.add_option(option)
-    parser.add_option("-f","--param-format", dest="param_format",
-                      metavar="FORMAT",
-                      help="Convert params to sawsim options (%default).",
+    parser.add_option('-f','--param-format', dest='param_format',
+                      metavar='FORMAT',
+                      help='Convert params to sawsim options (%default).',
                       default=('-s cantilever,hooke,0.05 -N1 -s folded,null -N8 -s "unfolded,wlc,{0.39e-9,28e-9}" -k "folded,unfolded,bell,{%g,%g}" -q folded'))
-    parser.add_option("-p","--initial-params", dest="initial_params",
-                      metavar="PARAMS",
-                      help="Initial params for fitting (%default).",
+    parser.add_option('-p','--initial-params', dest='initial_params',
+                      metavar='PARAMS',
+                      help='Initial params for fitting (%default).',
                       default='3.3e-4,0.25e-9')
-    parser.add_option("-r","--param-range", dest="param_range",
-                      metavar="PARAMS",
-                      help="Param range for plotting (%default).",
+    parser.add_option('-r','--param-range', dest='param_range',
+                      metavar='PARAMS',
+                      help='Param range for plotting (%default).',
                       default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
-    parser.add_option("-R","--residual", dest="residual",
-                      metavar="STRING",
-                      help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
+    parser.add_option('--logx', dest='logx',
+                      help='Use a log scale for the x range.',
+                      default=False, action='store_true')
+    parser.add_option('--logy', dest='logy',
+                      help='Use a log scale for the y range.',
+                      default=False, action='store_true')
+    parser.add_option('-R','--residual', dest='residual',
+                      metavar='STRING',
+                      help='Residual type (from %s; default: %%default).'
+                      % ', '.join(_Histogram().types()),
                       default='jensen-shannon')
-    parser.add_option("-P","--plot-residuals", dest="plot_residuals",
-                      help="Generate residual difference plots for each point in the plot range.",
-                      default=False, action="store_true")
-    parser.add_option("--logx", dest="logx",
-                      help="Use a log scale for the x range.",
-                      default=False, action="store_true")
-    parser.add_option("--logy", dest="logy",
-                      help="Use a log scale for the y range.",
-                      default=False, action="store_true")
-    parser.add_option("-c","--contour-plot", dest="contour_plot",
-                      help="Select contour plot (vs. the default pseudocolor plot).",
-                      default=False, action="store_true")
+    parser.add_option('-P','--plot-residuals', dest='plot_residuals',
+                      help='Generate residual difference plots for each point in the plot range.',
+                      default=False, action='store_true')
+    parser.add_option('-c','--contour-plot', dest='contour_plot',
+                      help='Select contour plot (vs. the default pseudocolor plot).',
+                      default=False, action='store_true')
+    parser.add_option('--csv', dest='csv', metavar='FILE',
+                      help='Save fit qualities to a comma-separated value file FILE.'),
 
     options,args = parser.parse_args(argv)
 
     initial_params = [float(p) for p in options.initial_params.split(",")]
     param_ranges = parse_param_ranges_string(options.param_range)
-    velocity_file = args[0]
+    histogram_file = args[0]
+    csv = None
     sr_call_params = sr.initialize_from_options(options)
 
     try:
         hm = HistogramMatcher(
-            file(velocity_file, 'r'), param_format_string=options.param_format,
+            file(histogram_file, 'r'),
+            param_format_string=options.param_format,
             sawsim_runner=sr, residual_type=options.residual,
             plot=options.plot_residuals, **sr_call_params)
         #hm.fit(initial_params)
+        if options.csv:
+            csv = open(options.csv, 'w')
         hm.plot(param_ranges, logx=options.logx, logy=options.logy,
-                contour=options.contour_plot)
+                contour=options.contour_plot, csv=csv)
     finally:
         sr.teardown()
+        if csv:
+            csv.close()