From 837c425eaeccae280cc7f7784d03dfcfcb03678c Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 5 Nov 2010 16:58:45 -0400 Subject: [PATCH] Move Bell model unfolding rate test into pysawsim/test/bell_rate.py. Cleaned up model and parameter estimation. See W. Trevor King, Meihong Su, and Guoliang Yang. Monte Carlo simulation of mechanical unfolding of proteins based on a simple two-state model. International Journal of Biological Macromolecules 46 (2010) 159-166. doi: 10.1016/j.ijbiomac.2009.12.001 (king10 in src/sawsim.bib) for details. --- pysawsim/constants.py | 6 +- pysawsim/test/bell_rate.py | 184 +++++++++++++++++++++++++++++++++ pysawsim/test/constant_rate.py | 4 +- testing/bell_rate/Makefile | 10 -- testing/bell_rate/NOTES | 75 -------------- testing/bell_rate/bell_rate.sh | 136 ------------------------ testing/bell_rate/fit_bell | 67 ------------ testing/bell_rate/params | 11 -- testing/bell_rate/run_test.sh | 13 --- 9 files changed, 190 insertions(+), 316 deletions(-) create mode 100644 pysawsim/test/bell_rate.py delete mode 100644 testing/bell_rate/Makefile delete mode 100644 testing/bell_rate/NOTES delete mode 100755 testing/bell_rate/bell_rate.sh delete mode 100755 testing/bell_rate/fit_bell delete mode 100644 testing/bell_rate/params delete mode 100755 testing/bell_rate/run_test.sh diff --git a/pysawsim/constants.py b/pysawsim/constants.py index fc00ce3..507d78e 100644 --- a/pysawsim/constants.py +++ b/pysawsim/constants.py @@ -23,6 +23,8 @@ TODO: bundle physcon? """ +gamma_e = 0.5772156649015328606065 +"Euler-Mascheroni constant" -kB = 1.3806503e-23 # m²·kg/s²·K - +kB = 1.3806503e-23 +"Boltzmann constant in m²·kg/s²·K" diff --git a/pysawsim/test/bell_rate.py b/pysawsim/test/bell_rate.py new file mode 100644 index 0000000..b7eb3f6 --- /dev/null +++ b/pysawsim/test/bell_rate.py @@ -0,0 +1,184 @@ +# Copyright (C) 2008-2010 W. Trevor King +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# The author may be contacted at on the Internet, or +# write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St., +# Philadelphia PA 19104, USA. + +"""Test a Hookian chain with Bell model unfolding rate. + +With the constant velocity experiment and a Hookian domain, the +unfolding force is proportional to time, so we expect a peaked +histogram. + +Analytically, with a spring constant + +.. math:: k = df/dx + +and a pulling velocity + +.. math:: v = dx/dt + +we have a loading rate + +.. math:: df/dt = df/dx dx/dt = kv \\;, + +so + +.. math:: f = kvt + f_0 \\;. + +Assuming :math:`f_0 = 0` and an unfolding rate constant :math:`K`, the +population follows + +.. math:: + dp/dt = -K_0 p = -p K_0 exp(f \\Delta x/k_B T) + dp/df = dp/dt dt/df = -p K_0/kv exp(f \\Delta x/k_B T) + = -p/\\rho exp(-\\alpha/\\rho) exp(f/\\rho) + = -p/\\rho exp((f-\\alpha)/\\rho) + +Where :math:`\\rho \\equiv k_B T/\\Delta x` and +:math:`\\alpha \\equiv \\rho log(kv/K_0\\rho)`. + +Events with such an exponentially increasing "hazard function" follow +the Gumbel (minimum) probability distribution + + P(f) = K_0 p(f) = 1/\\rho exp((f-\\alpha)/\\rho - exp((f-\\alpha)/\\rho)) + +which has a mean :math;`\\langle f \\rangle = \\alpha - \\gamma_e \\rho` +and a variance :math:`\\sigma^2 = \pi^2 \\rho^2 / 6`, where +:math:`\\gamma_e = 0.577\\ldots` is the Euler-Mascheroni constant. + +>>> test() +>>> test(num_domains=5) +>>> test(unfolding_rate=5) +>>> test(unfolding_distance=5) +>>> test(spring_constant=5) +>>> test(velocity=5) + +Now use reasonable protein parameters. + +>>> test(num_domains=1, unfolding_rate=1e-3, unfolding_distance=1e-9, +... temperature=300, spring_constant=0.05, velocity=1e-6) +>>> test(num_domains=50, unfolding_rate=1e-3, unfolding_distance=1e-9, +... temperature=300, spring_constant=0.05, velocity=1e-6) + +Problems with 50_1e-6_0.05_1e-3_1e-9_300's + z = K0/kv: 17106.1 (expected 20000.0) +Strange banded structure too. Banding most pronounced for smaller +forces. The banding is due to the double-unfolding problem. Reducing +P from 1e-3 to 1e-5 (which takes a lot longer to run), gave +50_1e-6_0.05_1e-3_1e-9_299, which looks much nicer. +""" + +from numpy import arange, exp, log, pi, sqrt + +from ..constants import gamma_e, kB +from ..histogram import Histogram +from ..fit import HistogramModelFitter +from ..manager import get_manager +from ..sawsim import SawsimRunner +from ..sawsim_histogram import sawsim_histogram + + +def probability_distribution(x, params): + """Gumbel (minimum) probability distribution. + + .. math:: 1/\\rho exp((x-\\alpha)/\\rho - exp((x-\\alpha)/\\rho)) + """ + p = params # convenient alias + p[1] = abs(p[1]) # cannot normalize negative rho. + xs = (x - p[0]) / p[1] + return (1.0/p[1]) * exp(xs - exp(xs)) + + +class GumbelModelFitter (HistogramModelFitter): + """Gumbel (minimum) model fitter. + """ + def model(self, params): + """A Gumbel (minimum) decay model. + + Notes + ----- + .. math:: y \\propto exp((x-\\alpha)/\\rho - exp((x-\\alpha)/\\rho)) + """ + self._model_data.counts = ( + self.info['binwidth']*self.info['N']*probability_distribution( + self._model_data.bin_centers, params)) + return self._model_data + + def guess_initial_params(self): + rho = sqrt(6) * self._data.std_dev / pi + alpha = self._data.mean + gamma_e * rho + return (alpha, rho) + + def guess_scale(self, params): + return params + + def model_string(self): + return 'p(x) ~ exp((x-alpha)/rho - exp((x-alpha)/rho))' + + def param_string(self, params): + pstrings = [] + for name,param in zip(['alpha', 'rho'], params): + pstrings.append('%s=%g' % (name, param)) + return ', '.join(pstrings) + + + +def bell_rate(sawsim_runner, num_domains=1, unfolding_rate=1, + unfolding_distance=1, temperature=1/kB, spring_constant=1, + velocity=1, N=100): + loading_rate = float(spring_constant * velocity) + rho = kB * temperature / unfolding_distance + alpha = rho * log(loading_rate / (unfolding_rate * rho)) + w = 0.1 * rho # calculate bin width (in force) + force_mean = alpha - gamma_e * rho + theory = Histogram() + theory.bin_edges = arange(start=0, stop=3*force_mean, step=w) + theory.bin_centers = theory.bin_edges[:-1] + w/2 + theory.counts = w*num_domains*N*probability_distribution( + theory.bin_centers, [alpha, rho]) + theory.analyze() + + max_force_step = w/10.0 + max_time_step = max_force_step / loading_rate + param_string = ( + '-d %(max_time_step)g -F %(max_force_step)g -v %(velocity)g ' + '-s cantilever,hooke,%(spring_constant)g -N1 ' + '-s folded,null -N%(num_domains)d -s unfolded,null ' + '-k "folded,unfolded,bell,{%(unfolding_rate)g,%(unfolding_distance)g}" ' + '-T %(temperature)g -q folded ' + ) % locals() + + sim = sawsim_histogram(sawsim_runner, param_string, N=N, + bin_edges=theory.bin_edges) + + e = GumbelModelFitter(sim) + params = e.fit() + sim_alpha = params[0] + sim_rho = abs(params[1]) + for s,t,n in [(sim_alpha, alpha, 'alpha'), (sim_rho, rho, 'rho')]: + assert (s - t)/t < 0.1, 'simulation %s = %g != %g = %s' % (n,s,t,n) + return sim.residual(theory) + + +def test(threshold=0.2, **kwargs): + m = get_manager()() + sr = SawsimRunner(manager=m) + try: + residual = bell_rate(sawsim_runner=sr, **kwargs) + assert residual < threshold, residual + finally: + m.teardown() diff --git a/pysawsim/test/constant_rate.py b/pysawsim/test/constant_rate.py index 7435b96..63fd4f7 100644 --- a/pysawsim/test/constant_rate.py +++ b/pysawsim/test/constant_rate.py @@ -138,8 +138,8 @@ def constant_rate(sawsim_runner, num_domains=1, unfolding_rate=1, e = ExponentialModelFitter(sim) params = e.fit() sim_tau = abs(params[0]) - assert (sim_tau - tau)/tau, 'simulation tau = %g != %g = tau' % ( - sim_tau, tau) + for s,t,n in [(sim_tau, tau, 'tau')]: + assert (s - t)/t < 0.1, 'simulation %s = %g != %g = %s' % (n,s,t,n) return sim.residual(theory) diff --git a/testing/bell_rate/Makefile b/testing/bell_rate/Makefile deleted file mode 100644 index 557ae35..0000000 --- a/testing/bell_rate/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -all : bell_rate.png - -view : all - qiv bell_rate.png - -clean : - rm -f bell_rate_*.gp bell_rate_*.hist bell_rate_*.d bell_rate_*.png bell_rate_*.compare STDIN* - -bell_rate.png : run_test.sh - ./$< diff --git a/testing/bell_rate/NOTES b/testing/bell_rate/NOTES deleted file mode 100644 index 87375aa..0000000 --- a/testing/bell_rate/NOTES +++ /dev/null @@ -1,75 +0,0 @@ -* Version 0.8 MOSTLY PASSED - -Problems with 50_1e-6_0.05_1e-3_1e-9_300's - z = K0/kv: 17106.1 (expected 20000.0) -Strange banded structure too. Banding most pronounced for smaller -forces. The banding is due to the double-unfolding problem. Reducing -P from 1e-3 to 1e-5 (which takes a lot longer to run), gave -50_1e-6_0.05_1e-3_1e-9_299, which looks much nicer. - -Otherwise things look ok. - -==> bell_rate_1_1_1_1_1_7.24296e22.compare <== -z = K0/kv: 1.00068 (expected 1.0) -b = dx/kBT: 0.999718 (expected 1.00000051031) - -==> bell_rate_1_1_1_1_5_7.24296e22.compare <== -z = K0/kv: 0.985894 (expected 1.0) -b = dx/kBT: 4.99089 (expected 5.00000255156) - -==> bell_rate_1_1_1_5_1_7.24296e22.compare <== -z = K0/kv: 4.98609 (expected 5.0) -b = dx/kBT: 0.981462 (expected 1.00000051031) - -==> bell_rate_1_1_5_1_1_7.24296e22.compare <== -z = K0/kv: 0.197084 (expected 0.2) -b = dx/kBT: 0.999987 (expected 1.00000051031) - -==> bell_rate_1_1e-6_0.05_1e-3_1e-9_300.compare <== -z = K0/kv: 20512.9 (expected 20000.0) -b = dx/kBT: 2.41336e+11 (expected 241432123206.0) - -==> bell_rate_1_5_1_1_1_7.24296e22.compare <== -z = K0/kv: 0.204091 (expected 0.2) -b = dx/kBT: 0.982143 (expected 1.00000051031) - -==> bell_rate_50_1e-6_0.05_1e-3_1e-9_300.compare <== -z = K0/kv: 17106.1 (expected 20000.0) -b = dx/kBT: 2.4366e+11 (expected 241432123206.0) - -==> bell_rate_5_1_1_1_1_7.24296e22.compare <== -z = K0/kv: 1.00114 (expected 1.0) -b = dx/kBT: 1.00475 (expected 1.00000051031) - -* Version 0.9 PASSED -==> bell_rate_1_1_1_1_1_7.24296e22.compare <== -z = K0/kv: 1.01076 (expected 1.0) -b = dx/kBT: 0.984221 (expected 1.00000051031) - -==> bell_rate_1_1_1_1_5_7.24296e22.compare <== -z = K0/kv: 1.0312 (expected 1.0) -b = dx/kBT: 4.93899 (expected 5.00000255156) - -==> bell_rate_1_1_1_5_1_7.24296e22.compare <== -z = K0/kv: 5.18165 (expected 5.0) -b = dx/kBT: 0.776549 (expected 1.00000051031) - -==> bell_rate_1_1_5_1_1_7.24296e22.compare <== -z = K0/kv: 0.198328 (expected 0.2) -b = dx/kBT: 1.00328 (expected 1.00000051031) - -==> bell_rate_1_1e-6_0.05_1e-3_1e-9_300.compare <== -z = K0/kv: 17096.4 (expected 20000.0) -b = dx/kBT: 2.4364e+11 (expected 241432123206.0) - -==> bell_rate_1_5_1_1_1_7.24296e22.compare <== -z = K0/kv: 0.201196 (expected 0.2) -b = dx/kBT: 0.984595 (expected 1.00000051031) - -==> bell_rate_50_1e-6_0.05_1e-3_1e-9_300.compare <== -z = K0/kv: 18893 (expected 20000.0) -b = dx/kBT: 2.42285e+11 (expected 241432123206.0) - -==> bell_rate_5_1_1_1_1_7.24296e22.compare <== -z = K0/kv: 1.01665 (expected 1.0) -b = dx/kBT: 0.979307 (expected 1.00000051031) diff --git a/testing/bell_rate/bell_rate.sh b/testing/bell_rate/bell_rate.sh deleted file mode 100755 index 6aee491..0000000 --- a/testing/bell_rate/bell_rate.sh +++ /dev/null @@ -1,136 +0,0 @@ -#!/bin/bash -# -# Use a single hooke domain with a constant unfolding rate, so -# unfolding force is proportional to time, and connect multiple -# identical Bell unfolding domains. We expect an peaked death -# histogram. -# -# e.g. -# spring constant k=df/dx \__ -# velocity v=dx/dt / `--> df/dt = kv --> f = kvt (+ f(t=0)) -# unfolding rate K= a e^(bf) (frac/s) -# let f(t=0) = 0 -# let P(f) be the population existing at force f (normed so P(t=f=0)=1). -# the probability of dying (normed # deaths) between f and f+df is given by -# dD = K P(f) * dt = K P(f) df/kv since dt = df/kv -# this is what we plot in the normalized histogram with dD in a bin of width -# df. The continuous analog is the function dD/df = K/kv P(f). -# -# To find P(f), we need to solve -# dP/dt = -KP -# dP/P = -K dt = -a e^{bf} df/kv = -z e^{bf} df where z ≡ a/kv -# ln(P) = -z/b e^{bf) + c -# P = C e^{-z/b e^{bf}} -# Applying our f(t=0)=0 boundary condition we see that C = P(t=f=0) e^{z/b}. -# Plugging P(f) into our formula for dD/df yields -# dD/df = K/kv P(f) = z e^{bf} P(t=f=0) e^{z/b} e^{-z/b e^{bf}} -# dD/df = P(t=f=0) z e^{bf + z/b(1-e^{bf})} -# -# The histogram scaling isn't dD/df, though, it's dD/d(bin) -# dD/d(bin) = dD/df df/d(bin) -# = binwidth P(t=f=0) z e^{bf + z/b(1-e^{bf})} -# -# Reminders: -# spring constant k=df/dx (N/m) -# velocity v=dx/dt (m/s) -# unfolding rate K= a e^(bf) = K0 e^(fdx/kBT) (frac/s) -# usage: bell_rate.sh - -if [ $# -ne 6 ] -then - echo "usage: $0 " - echo "" - echo "where" - echo "spring constant k=df/dx (N/m)" - echo "velocity v=dx/dt (m/s)" - echo "unfolding rate K= K0 e^(fdx/kBT) (frac/s)" - exit 1 -fi - -NDOM=$1 -V=$2 -SPRING=$3 -K0=$4 -DX=$5 -T=$6 -DEBUG=0 - -# since we're on the cluster, we'll go crazy and get really good statistics ;) -N=10000 - -SAWSIM=../../bin/sawsim -DATA=bell_rate_${NDOM}_${V}_${SPRING}_${K0}_${DX}_${T}.d -HIST=bell_rate_${NDOM}_${V}_${SPRING}_${K0}_${DX}_${T}.hist -COMPFILE=bell_rate_${NDOM}_${V}_${SPRING}_${K0}_${DX}_${T}.compare -GPFILE=bell_rate_${NDOM}_${V}_${SPRING}_${K0}_${DX}_${T}.gp -PNGFILE=bell_rate_${NDOM}_${V}_${SPRING}_${K0}_${DX}_${T}.png - -# set up the theory -# our predicted histogram (see notes above) is -# dD/d(bin) = binwidth P0 z e^{bf + z/b(1-e^{bf})} -# where z ≡ a/kv, and b = Δx/kBT -KB=1.3806503e-23 # J/K -B=`python -c "print ($DX/($KB*$T))"` -Z=`python -c "print (float($K0)/($SPRING*$V))"` -# find a good df (histogram bin width). -# currently, scale based only on e^(bf) term in exponent. -df=`python -c "print (0.01/$B)"` - -THEORY_FN="$df * $N * $NDOM * $Z * exp($B*x + $Z/$B *(1 - exp($B*x)))" -if [ $DEBUG -eq 1 ]; then - echo "b = dx/kBT = $B" - echo "z = K0/kv = $Z" - echo "Theory --> P(x=f) = $THEORY_FN" -fi - -# run the experiments -> $DATA -if [ "$ISACLUSTER" -eq 1 ] -then - # Sawsim >= 0.6 - qcmd -w14:00:00 xtimes $N $SAWSIM -T$T -v$V -s cantilever,hooke,$SPRING -N1 \ - -s folded,null -N$NDOM -s unfolded,null \ - -k "'folded,unfolded,bell,{$K0,$DX}'" -q folded \ - | grep -v '^#' | cut -f1 >> $DATA -else - # Sawsim >= 0.6 - xtimes $N $SAWSIM -T$T -v$V -s cantilever,hooke,$SPRING -N1 \ - -s folded,null -N$NDOM -s unfolded,null \ - -k "folded,unfolded,bell,{$K0,$DX}" -q folded \ - | grep -v '^#' | cut -f1 >> $DATA -fi - -# histogram the data -if [ $DEBUG -eq 1 ]; then - echo "cat $DATA | stem_leaf -v-1 -b$df -c | sed 's/://' > $HIST" -fi -cat $DATA | stem_leaf -v-1 -b$df -c | sed 's/://' > $HIST -wc -l "$HIST" -# fit the data to an exponential decay -if [ $DEBUG -eq 1 ]; then - echo "cat $HIST | ./fit_bell" -fi -FIT=`cat $HIST | ./fit_bell` || exit 1 -FITZ=`echo "$FIT" | sed -n 's/a:[[:space:]]*//p'` -FITB=`echo "$FIT" | sed -n 's/b:[[:space:]]*//p'` -echo -e "z = K0/kv:\t$FITZ\t(expected $Z)" > $COMPFILE -echo -e "b = dx/kBT:\t$FITB\t(expected $B)" >> $COMPFILE -cat $COMPFILE - -# generate gnuplot script -if [ $DEBUG -eq 1 ]; then - echo "generate Gnuplot script" -fi -echo "set terminal png" > $GPFILE -echo "set output '$PNGFILE'" >> $GPFILE -echo "set logscale y" >> $GPFILE -echo "set xlabel 'Force'" >> $GPFILE -echo "set ylabel 'Deaths'" >> $GPFILE -echo "theory(x) = $THEORY_FN" >> $GPFILE -echo "fit(x) = $df * $N * $NDOM * $FITZ * exp($FITB*x + $FITZ/$FITB*(1-exp($FITB*x)))" >> $GPFILE -echo "plot theory(x), '$HIST' with points, fit(x)" >> $GPFILE - -if [ $DEBUG -eq 1 ]; then - echo "Generate PNG" -fi -gnuplot $GPFILE diff --git a/testing/bell_rate/fit_bell b/testing/bell_rate/fit_bell deleted file mode 100755 index c6433c3..0000000 --- a/testing/bell_rate/fit_bell +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/python -# -# fit data coming in on stdin to bell-theory histogram -# output: -# -# a:\t -# b:\t -# N:\t -# bin:\t -# -# usage: fit_bell -# for example: -# $ cat datafile | fit_bell - -import sys -sys.path.append('../common/') -import fit -from scipy import exp - -class Model (fit.Model) : - def printModel(self) : - print "bin*N*a*exp(b*x + a/b(1-exp(b*x)))" - def printParams(self, xopt) : - print "a:\t%g" % xopt[0] - print "b:\t%g" % xopt[1] - def model(self, x, params) : - a = params[0] - b = params[1] - return self.binwidth*self.N*a*exp(b*x + a/b*(1.-exp(b*x))) - def guessInitialParams(self) : - print "guessing initial params" - init = self.data[0] - # find the coords (Xmax,Ymax) of the peak of the histogram - Yarr = list(zip(*self.data)[1]) - Xarr = list(zip(*self.data)[0]) - Ymax = max(Yarr) - Xmax = Xarr[Yarr.index(Ymax)] - # two guessing methods for B: - if Ymax < 1.3*init[1] : - # in the first, we have a high unforced rate, and - # the histogram is more-or-less exponential decay. - print "guessing with method 1" - Aguess = init[1]/self.N/self.binwidth - i = 0 - s = 0 - while s < self.N/2.0 : - s += Yarr[i] - i += 1 - print "half dead by", self.data[i] - Aguess = init[1]/self.N/self.binwidth - Bguess = 1.0/self.data[i][0] - else : - # in the second, we have a low unforced rate, and the histogram has - # a well defined 'bump' at some likely unfolding force - # h(f) = binwidth N a e^{b x + a/b[1-e^(b f)]} - # dh/df = h [b - a e^(b f)] - # so dh/df = 0 at f=fmax when - # b = a e^(b fmax) - # hmm, trancendental equation for b - print "Xmax = ", Xmax - Bguess = 1.0/Xmax - Aguess = init[1]/self.N/self.binwidth - print "guessing ", (Aguess, Bguess) - return (Aguess, Bguess) - -if __name__ == "__main__" : - fit.run(2, Model) diff --git a/testing/bell_rate/params b/testing/bell_rate/params deleted file mode 100644 index 702a353..0000000 --- a/testing/bell_rate/params +++ /dev/null @@ -1,11 +0,0 @@ -# num domains velocity spring constant unfolding rate unfolding distance temperature -# 7.24296e22 = 1/kB (in SI units) -1 1 1 1 1 7.24296e22 -5 1 1 1 1 7.24296e22 -1 5 1 1 1 7.24296e22 -1 1 5 1 1 7.24296e22 -1 1 1 5 1 7.24296e22 -1 1 1 1 5 7.24296e22 -# Now use reasonable protein parameters -1 1e-6 0.05 1e-3 1e-9 300 -50 1e-6 0.05 1e-3 1e-9 300 diff --git a/testing/bell_rate/run_test.sh b/testing/bell_rate/run_test.sh deleted file mode 100755 index f77a230..0000000 --- a/testing/bell_rate/run_test.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -# -# run const_rate.sh with a variety of parameters -# -# usage: run_test.sh - -while read LINE -do - echo ./bell_rate.sh $LINE - ./bell_rate.sh $LINE -done < <(cat params | grep -v '^#') - -exit 0 -- 2.26.2