+++ /dev/null
-#!/bin/bash
-#
-# run_vel_dep
-#
-# run BN*N simulations each at a series of pulling speeds V
-# (non-velocity parameters are copied from the run_vel_dep command
-# line) for each speed, save all unfolding data data_$V, and generate
-# a histogram of unfolding forces hist_$V also generate a file v_dep
-# with the mean unfolding force vs velocity.
-#
-# usage: run_vel_dep OUTPUT_DIR VS SAWSIM PARAMS...
-# e.g. run_vel_dep v_dep-123xs \
-# 1e-9,1e-8,1e-7,1e-6,1e-5 \
-# -s cantilever,hooke,0.05 -N1 \
-# -s folded,null -N8 \
-# -s 'unfolded,wlc,{0.39e-9,28e-9}' \
-# -k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' \
-# -q folded
-
-if [ $# -lt 3 ]; then
- STAMP=`date +"%Y.%m.%d.%H.%M.%S"`
- echo 'usage: run_vel_dep OUTPUT_DIR VS SAWSIM PARAMS...'
- echo "e.g. run_vel_dep v_dep-$STAMP \\"
- echo ' 1e-9,1e-8,1e-7,1e-6,1e-5 \'
- echo ' -s cantilever,hooke,0.05 -N1 \'
- echo ' -s folded,null -N8 \'
- echo ' -s "unfolded,wlc,{0.39e-9,28e-9}" \'
- echo ' -k "folded,unfolded,bell,{3.3e-4,0.25e-9}" \'
- echo ' -q folded'
- exit 1
-fi
-
-DIR="$1"
-shift
-Vs=`echo $1 | sed 's/,/ /g'`
-shift
-# Protect special chars from later shell expansion by quoting and
-# escaping and " characters that may have been in $arg already.
-Q='"'
-EQ='\"'
-SAWSIM_PARAMS=`for arg in $@; do echo -n "$Q${arg//$Q/$EQ}$Q "; done`
-
-
-# We can't depend on the entire array in one shot, so
-# batch executions into a single array instance
-BN=20 # sawsim executions per array job instance
-#N=8 # number of arrays
-N=20 # number of arrays
-SAFTEY_SLEEP=2 # seconds to sleep between spawning array job and depending on it
- # otherwise dependencies don't catch
-
-mkdir "$DIR" || exit 1
-pushd "$DIR" > /dev/null
-echo "Date "`date` > v_dep_notes
-echo "Run$ sawsim $SAWSIM_PARAMS -v \$V" >> v_dep_notes
-echo "for V in $Vs" >> v_dep_notes
-
-Is=`seq 1 $N` # a list of allowed 'i's, since 'for' more compact than 'while'
-
-vdone_condition="afterany"
-VJOBIDS=""
-for V in $Vs
-do
- # run N sawtooth simulations
- idone_condition="afterany"
- cmd="cd \$PBS_O_WORKDIR
- for i in \`seq 1 $BN\`; do
- ~/sawsim/sawsim $SAWSIM_PARAMS -v $V > run_$V-\$PBS_ARRAYID-\$i
- done"
- JOBID=`echo "$cmd" | qsub -h -t 1-$N -N "sawsim" -o run_$V.o -e run_$V.e` || exit 1
- # "3643.abax.physics.drexel.edu" --> "3643"
- ARRAYID=`echo "$JOBID" | sed "s/\\([0-9]*\\)\\([.].*\\)/\\1/g"`
- for i in $Is; do idone_condition="$idone_condition:$ARRAYID-$i"; done
-
- sleep $SAFTEY_SLEEP
- # once they're done, compile a list of all unfolding forces for each speed
- JOBID=`echo "cd \\\$PBS_O_WORKDIR && cat run_$V-* | grep -v '#' | cut -f1 > data_$V" |\
- qsub -h -W depend="$idone_condition" -N sawComp -o data_$V.o -e data_$V.e` \
- || exit 1
- vdone_condition="$vdone_condition:$JOBID"
- VJOBIDS="$VJOBIDS $JOBID"
- for i in $Is; do qrls "$ARRAYID-$i"; done
-done
-
-# once we have force lists for each velocity, make histograms and F(V) chart
-JOBID=`echo "cd \\\$PBS_O_WORKDIR && $HOME/script/vel_dep_graph $Vs" | \
- qsub -W depend="$vdone_condition" -N sawsimGraph` || exit 1
-# "3643.abax.physics.drexel.edu" --> "3643"
-ARRAYID=`echo "$JOBID" | sed "s/\\([0-9]*\\)\\([.].*\\)/\\1/g"`
-for job in $VJOBIDS; do qrls $job; done
-
-popd > /dev/null
-
-# wait until the final job exits
-qwait $ARRAYID
-
-exit 0
--- /dev/null
+# Copyright (C) 2009-2010 W. Trevor King <wking@drexel.edu>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# The author may be contacted at <wking@drexel.edu> on the Internet, or
+# write to Trevor King, Drudge's University, Physics Dept., 3141 Chestnut St.,
+# Philadelphia PA 19104, USA.
+
+from __future__ import with_statement
+
+import hashlib
+import os.path
+import shutil
+
+import numpy
+
+from . import __version__, log
+from .histogram import Histogram
+from .manager import InvokeJob
+from .sawsim import parse
+
+
+SAWSIM = 'sawsim' # os.path.expand(os.path.join('~', 'bin', 'sawsim'))
+CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim_histogram'))
+DEFAULT_PARAM_STRING = (
+ '-s cantilever,hooke,0.05 -N1 '
+ '-s folded,null -N8 '
+ "-s 'unfolded,wlc,{0.39e-9,28e-9}' "
+ "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' "
+ '-q folded -v 1e-6')
+
+
+class SawsimHistogram (object):
+ def __init__(self, use_cache=False, clean_cache=False):
+ self._use_cache = use_cache
+ self._clean_cache = clean_cache
+
+ def _cache_dir(self, param_string):
+ """
+ >>> s = SawsimHistogram()
+ >>> s._cache_dir(DEFAULT_PARAM_STRING) # doctest: +ELLIPSIS
+ '/.../.sawsim_histogram/...'
+ """
+ return os.path.join(
+ CACHE_DIR, hashlib.sha256(param_string).hexdigest())
+
+ def _make_cache(self, param_string):
+ cache_dir = self._cache_dir(params_string)
+ os.makedirs(cache_dir)
+ with open(os.path.join(d, 'param_string'), 'w') as f:
+ f.write('# version: %s\n%s\n' % (__version__, param_string))
+
+ def _load_cached_data(self, cache_dir, N):
+ data = {}
+ for i in range(N):
+ simulation_path = os.path.join(d, '%d.dat' % i)
+ if os.path.exists(simulation_path):
+ with open(simulation_path, 'r') as f:
+ data[i] = parse(f.read())
+ else:
+ break
+ return data
+
+ def _cache_run(self, cache_dir, i, stdout):
+ simulation_path = os.path.join(d, '%d.dat' % i)
+ with open(simulation_path, 'w') as f:
+ f.write(stdout)
+
+ def __call__(self, param_string=None, N=400, bin_edges=None, manager=None):
+ """Run `N` simulations and return a histogram with `bin_edges`.
+
+ If `bin_edges == None`, return a numpy array of all unfolding
+ forces.
+
+ Use the `JobManager` instance `manager` for asynchronous job
+ execution.
+
+ If `_use_cache` is `True`, store an array of unfolding forces
+ in `CACHE_DIR` for each simulated pull. If the cached forces
+ are already present for `param_string`, do not redo the
+ simulation unless `_clean_cache` is `True`.
+ """
+ data = {}
+ if self._use_cache == True:
+ d = self._cache_dir(param_string)
+ if os.path.exists(d):
+ if self._clean_cache == True:
+ shutil.rmtree(d)
+ self._make_cache(param_string)
+ else:
+ data = self._load_cached_data(d, N)
+ log().debug('using %d cached runs for %s'
+ % (len(data), param_string))
+ else:
+ self._make_cache(param_string)
+
+ jobs = {}
+ for i in range(N):
+ if i in data:
+ continue
+ jobs[i] = manager.async_invoke(InvokeJob(
+ '%s %s' % (SAWSIM, param_string)))
+ complete_jobs = manager.wait([job.id for job in jobs])
+ for i,job in jobs.iteritems:
+ j = complete_jobs[job.id]
+ assert j.status == 0, j.data
+ if self._use_cache == True:
+ self._cache_run(d, i, j.data['stdout'])
+ data[i] = parse(j.data['stdout'])
+ del(jobs)
+ del(complete_jobs)
+
+ # generate histogram
+ events = []
+ for d_i in data.values():
+ events.extend([e.force for e in d_i
+ if e.initial_state == 'folded'])
+ events = numpy.array(d_i)
+ if bin_edges == None:
+ return events
+ h = Histogram()
+ h.from_data(events, bin_edges)
+ return h
-#!/usr/bin/python
# Copyright (C) 2009-2010 W. Trevor King <wking@drexel.edu>
#
# This program is free software: you can redistribute it and/or modify
import shutil
import sys
-
import matplotlib
import numpy
import pylab
from scipy.optimize import leastsq
+from . import log
+from .histogram import Histogram
+from .sawsim_histogram import SawsimHistogram
+
matplotlib.use('Agg') # select backend that doesn't require X Windows
FIGURE = pylab.figure() # avoid memory problems.
For debugging memory usage.
resident set size, the non-swapped physical memory that a task has
- used (in kiloBytes).
+ used (in kilo-bytes).
"""
call = "ps -o rss= -p %d" % os.getpid()
status,stdout,stderr = invoke(call)
class HistogramMatcher (object):
- def __init__(self, velocity_file, param_format_string, dir_prefix="v_dep"):
- self.experiment_histograms = self.read_force_histograms(velocity_file)
+ """Compare experimental velocity dependent histograms to simulated data.
+
+ The main entry points are `fit()` and `plot()`.
+ """
+ def __init__(self, velocity_stream, param_format_string, N=400,
+ residual_type='jensen-shannon', plot=True, use_cache=False):
+ self.experiment_histograms = self.read_force_histograms(velocity_stream)
self.param_format_string = param_format_string
- self.dir_prefix = dir_prefix
+ self.residual_type = residual_type
+ self.N = N
+ self.plot = plot
+ self.sawsim_histogram = SawsimHistogram(use_cache=self.use_cache)
- def read_force_histograms(self, v_file):
+ def read_force_histograms(self, stream):
"""
v_file format:
# comment and blank lines ignored
- <velocity in m/s><TAB><path to histogram file>
+ <velocity in m/s><whitespace><path to histogram file>
...
e.g.
"""
histograms = {}
v_file_dir = os.path.dirname(v_file)
- for line in file(v_file, "r").readlines():
+ for line in strem.readlines():
line = line.strip()
if len(line) == 0 or line[0] == "#":
continue # ignore blank lines and comments
- v,h_file = line.split("\t")
+ v,h_file = line.split()
h = Histogram()
- h.from_file(os.path.join(v_file_dir, h_file))
+ h.from_stream(file(os.path.join(v_file_dir, h_file), 'r'))
histograms[v] = h
- histograms[v].normalize()
return histograms
- def dirname(self, params):
- if not hasattr(self, "_tag_i"):
- self._tag_i = 0
- self._tag_i += 1
- return "%s-%d" % (self.dir_prefix, self._tag_i)
-
- def generate_sawsim_histograms(self, params, histograms,
- run_repeat_simulation=True):
- velocities = histograms.keys()
- dirname = self.dirname(params)
- cmd = "run_vel_dep %s %s %s" \
- % (dirname,
- ",".join([str(v) for v in velocities]),
- self.param_string(params))
- if os.path.exists(dirname) and run_repeat_simulation == True:
- print 'repeating simulation'
- shutil.rmtree(dirname)
- if not os.path.exists(dirname):
- invoke(cmd)
+ def param_string(self, params, velocity):
+ """Generate a string of options to pass to `sawsim`.
+ """
+ return '%s -v %g' % (
+ self.param_format_string % tuple(params), velocity)
+
+ def get_all_unfolding_data(self, dirname, velocity_string):
+ datafile = os.path.join(dirname, "data_" + velocity_string)
+ return numpy.fromfile(datafile, sep=" ")
+
sawsim_histograms = {}
for velocity in velocities:
unfolding_forces = self.get_all_unfolding_data(dirname, str(velocity))
sawsim_histograms[velocity].normalize()
return sawsim_histograms
- def param_string(self, params):
- """
- Generate a string of non-velocity options to pass to sawsim
- for a given parameter list.
- """
- return self.param_format_string % (params[0], params[1])#tuple(params)
-
- def get_all_unfolding_data(self, dirname, velocity_string):
- datafile = os.path.join(dirname, "data_" + velocity_string)
- return numpy.fromfile(datafile, sep=" ")
-
- def residual(self, params, type='bin-height', plot=False,
- run_repeat_simulation=True):
- sawsim_histograms = self.generate_sawsim_histograms( \
- params, self.experiment_histograms,
- run_repeat_simulation=run_repeat_simulation)
- assert sorted(sawsim_histograms.keys()) == sorted(self.experiment_histograms.keys())
+ def _residual(self, params):
residual = 0
- for velocity,experiment_histogram in self.experiment_histograms.items():
- sawsim_histogram = sawsim_histograms[velocity]
- title = None
- if plot == True:
- title = ", ".join(["%g" % p for p in params])
- r = experiment_histogram.residual(sawsim_histogram, type=type)
+ for velocity,experiment_hist in self.experiment_histograms.iteritems():
+ sawsim_hist = self.sawsim_histogram(
+ param_string=self.param_string(params, velocity), N=self.N,
+ bin_edges=experiment_hist.bin_edges)
+ r = experiment_histogram.residual(
+ sawsim_hist, type=self.residual_type)
residual += r
- if plot == True:
+ if self.plot == True:
+ title = ", ".join(["%g" % p for p in (params+[velocity])])
filename = "residual-%s-%g.png" % (title.replace(', ','-'), r)
- self.plot_residual_comparison(
- experiment_histogram, sawsim_histogram, residual=r,
+ self._plot_residual_comparison(
+ experiment_hist, sawsim_hist, residual=r,
title=title, filename=filename)
- print "residual", residual
- del(sawsim_histograms)
+ log().debug('residual: %g' % residual)
return residual
- def fit(self, initial_params, verbose=False):
- p,cov,info,mesg,ier = leastsq(self.residual, initial_params,
+ def fit(self, initial_params):
+ p,cov,info,mesg,ier = leastsq(self._residual, initial_params,
full_output=True, maxfev=1000)
- if verbose == True:
- print "Fitted params:",p
- print "Covariance mx:",cov
- print "Info:", info
- print "mesg:", mesg
- if ier == 1:
- print "Solution converged"
- else:
- print "Solution did not converge"
+ _log = log()
+ _log.info('Fitted params: %s' % p)
+ _log.info('Covariance mx: %s' % cov)
+ _log.info('Info: %s' % info)
+ _log.info('Mesg: %s' % mesg)
return p
- def plot(self, param_ranges, logx=False, logy=False,
- residual_type='bin-height',
- plot_residuals=False, contour=False,
- run_repeat_simulations=True):
+ def plot(self, param_ranges, logx=False, logy=False, contour=False):
xranges = param_ranges[0]
yranges = param_ranges[1]
if logx == False:
for j,yi in enumerate(y[:-1]):
print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1)
params = (xi,yi)
- r = self.residual(params, type=residual_type,
- plot=plot_residuals,
- run_repeat_simulation=run_repeat_simulations)
+ r = self._residual(params)
C[j,i] = numpy.log(r) # better resolution in valleys
if MEM_DEBUG == True:
- print >> sys.stderr, "RSS: %d KB" % rss()
+ log().debug('RSS: %d KB' % rss())
C = numpy.nan_to_num(C) # NaN -> 0
fid = file("fit_force_histograms-XYC.pkl", "wb")
pickle.dump([X,Y,C], fid)
FIGURE.colorbar(p)
FIGURE.savefig("figure.png")
- def plot_residual_comparison(self, expeiment_hist, theory_hist,
- residual, title, filename):
+ def _plot_residual_comparison(self, expeiment_hist, theory_hist,
+ residual, title, filename):
FIGURE.clear()
p = pylab.plot(experiment_hist.bin_edges[:-1],
experiment_hist.probabilities, 'r-',
def parse_param_ranges_string(string):
- """
+ """Parse parameter range stings.
+
'[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...'
->
[[Amin,Amax,Asteps],[Bmin,Bmax,Bsteps],...]
+
+ >>> parse_param_ranges_string('[1,2,3],[4,5,6]')
+ [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
"""
ranges = []
for range_string in string.split("],["):
ranges.append([float(x) for x in range_number_strings])
return ranges
+
def main():
import optparse
usage = "%prog [options] velocity_file"
metavar="PARAMS",
help="Param range for plotting (%default).",
default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]')
+ parser.add_option("--N", "--number-of-runs", dest="N",
+ metavar="INT", type='int',
+ help="Number of sawsim runs at each point in parameter space (%default).",
+ default=400)
parser.add_option("--logx", dest="logx",
help="Use a log scale for the x range.",
default=False, action="store_true")
parser.add_option("--logy", dest="logy",
help="Use a log scale for the y range.",
default=False, action="store_true")
- parser.add_option("-d","--dir-prefix", dest="dir_prefix",
+ parser.add_option("-d","--cache-dir", dest="cache_dir",
metavar="STRING",
- help="Prefix to sawsim data directories (%default).",
- default='v_dep')
+ help="Cache directory for sawsim unfolding forces (%default).",
+ default=sawsim_histogram.CACHE_DIR)
parser.add_option("-R","--residual", dest="residual",
metavar="STRING",
help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).",
parser.add_option("-c","--contour-plot", dest="contour_plot",
help="Select contour plot (vs. the default pseudocolor plot).",
default=False, action="store_true")
- parser.add_option("-n","--no-repeat-simulations", dest="simulations",
- help="Do not run simulations if they already exist (re-analyze a previous run's output).",
- default=True, action="store_false")
+ parser.add_option("-C","--use_cache", dest="use_cache",
+ help="Use cached simulations if they exist (vs. running new simulations) (%default)",
+ default=False, action="store_true")
options,args = parser.parse_args()
initial_params = [float(p) for p in options.initial_params.split(",")]
param_ranges = parse_param_ranges_string(options.param_range)
velocity_file = args[0]
- hm = HistogramMatcher(velocity_file, options.param_format, options.dir_prefix)
- #hm.fit(initial_params, verbose=True)
+ sawsim_histogram.CACHE_DIR = options.cache_dir
+
+ hm = HistogramMatcher(
+ file(velocity_file, 'r'), param_format_string=options.param_format,
+ N=options.N, residual_type=options.residual,
+ plot=options.plot_residuals, use_cache=options.use_cache)
+ #hm.fit(initial_params)
hm.plot(param_ranges, logx=options.logx, logy=options.logy,
- residual_type=options.residual,
- plot_residuals=options.plot_residuals,
- contour=options.contour_plot,
- run_repeat_simulations=options.simulations)
+ contour=options.contour_plot)
+
if __name__ == "__main__":
main()