From 41f927e1e6728f539409d9e2d9c7dd7c1ec6432b Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 6 Aug 2010 07:47:50 -0400 Subject: [PATCH] Moved hooke.plugin.fit -> polymer_fit & updated to Plugin/Command architecture. --- conf/fit.ini | 38 -- hooke/plugin/__init__.py | 2 +- hooke/plugin/fit.py | 780 ------------------------- hooke/plugin/polymer_fit.py | 1066 +++++++++++++++++++++++++++++++++++ hooke/ui/gui/__init__.py | 4 + hooke/util/fit.py | 56 ++ 6 files changed, 1127 insertions(+), 819 deletions(-) delete mode 100644 conf/fit.ini delete mode 100644 hooke/plugin/fit.py create mode 100644 hooke/plugin/polymer_fit.py diff --git a/conf/fit.ini b/conf/fit.ini deleted file mode 100644 index ecc0b99..0000000 --- a/conf/fit.ini +++ /dev/null @@ -1,38 +0,0 @@ -[fjc] - [[temperature]] - default = 293 - minimum = 0 - type = float - value = 293 - -[fjcPEG] - [[delta_G]] - default = 3 - minimum = 0 - type = float - value = 3 - - [[L_helical]] - default = 0.28 - minimum = 0 - type = float - value = 0.28 - - [[L_planar]] - default = 0.358 - minimum = 0 - type = float - value = 0.358 - - [[temperature]] - default = 293 - minimum = 0 - type = float - value = 293 - -[wlc] - [[temperature]] - default = 293 - minimum = 0 - type = float - value = 293 diff --git a/hooke/plugin/__init__.py b/hooke/plugin/__init__.py index adaf4c1..ee9cc8e 100644 --- a/hooke/plugin/__init__.py +++ b/hooke/plugin/__init__.py @@ -34,7 +34,6 @@ PLUGIN_MODULES = [ ('convfilt', True), ('cut', True), # ('fclamp', True), -# ('fit', True), # ('flatfilts-rolf', True), ('flatfilt', True), # ('jumpstat', True), @@ -43,6 +42,7 @@ PLUGIN_MODULES = [ # ('multidistance', True), # ('multifit', True), # ('pcluster', True), + ('polymer_fit', True), # ('procplots', True), # ('review', True), # ('showconvoluted', True), diff --git a/hooke/plugin/fit.py b/hooke/plugin/fit.py deleted file mode 100644 index c566a8b..0000000 --- a/hooke/plugin/fit.py +++ /dev/null @@ -1,780 +0,0 @@ -# Copyright (C) 2008-2010 Alberto Gomez-Casado -# Fabrizio Benedetti -# Massimo Sandal -# W. Trevor King -# -# This file is part of Hooke. -# -# Hooke is free software: you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# Hooke is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General -# Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with Hooke. If not, see -# . - -"""Force spectroscopy curves basic fitting plugin. - -Non-standard Dependencies: -procplots.py (plot processing plugin) -""" - -from ..libhooke import WX_GOOD, ClickedPoint - -import wxversion -wxversion.select(WX_GOOD) -#from wx import PostEvent -#from wx.lib.newevent import NewEvent -import scipy -import scipy.odr -import scipy.stats -import numpy as np -import copy -import Queue - -global measure_wlc -global EVT_MEASURE_WLC - -#measure_wlc, EVT_MEASURE_WLC = NewEvent() - -global events_from_fit -events_from_fit=Queue.Queue() #GUI ---> CLI COMMUNICATION - - -class fitCommands(object): - - def _plug_init(self): - self.wlccurrent=None - self.wlccontact_point=None - self.wlccontact_index=None - - def dist2fit(self): - '''Calculates the average distance from data to fit, scaled by the standard deviation - of the free cantilever area (thermal noise) - ''' - - - - def wlc_fit(self,clicked_points,xvector,yvector, pl_value, T=293, return_errors=False): - ''' - Worm-like chain model fitting. - The function is the simple polynomial worm-like chain as proposed by C.Bustamante, J.F.Marko, E.D.Siggia - and S.Smith (Science. 1994 Sep 9;265(5178):1599-600.) - ''' - - ''' - clicked_points[0] = contact point (calculated or hand-clicked) - clicked_points[1] and [2] are edges of chunk - ''' - - #STEP 1: Prepare the vectors to apply the fit. - if pl_value is not None: - pl_value=pl_value/(10**9) - - #indexes of the selected chunk - first_index=min(clicked_points[1].index, clicked_points[2].index) - last_index=max(clicked_points[1].index, clicked_points[2].index) - - #getting the chunk and reverting it - xchunk,ychunk=xvector[first_index:last_index],yvector[first_index:last_index] - xchunk.reverse() - ychunk.reverse() - #put contact point at zero and flip around the contact point (the fit wants a positive growth for extension and force) - xchunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xchunk] - ychunk_corr_up=[-(y-clicked_points[0].graph_coords[1]) for y in ychunk] - - #make them arrays - xchunk_corr_up=scipy.array(xchunk_corr_up) - ychunk_corr_up=scipy.array(ychunk_corr_up) - - - #STEP 2: actually do the fit - - #Find furthest point of chunk and add it a bit; the fit must converge - #from an excess! - xchunk_high=max(xchunk_corr_up) - xchunk_high+=(xchunk_high/10) - - #Here are the linearized start parameters for the WLC. - #[lambd=1/Lo , pii=1/P] - - p0=[(1/xchunk_high),(1/(3.5e-10))] - p0_plfix=[(1/xchunk_high)] - ''' - ODR STUFF - fixme: remove these comments after testing - ''' - def dist(px,py,linex,liney): - distancesx=scipy.array([(px-x)**2 for x in linex]) - minindex=np.argmin(distancesx) - print px, linex[0], linex[-1] - return (py-liney[minindex])**2 - - def f_wlc(params,x,T=T): - ''' - wlc function for ODR fitting - ''' - lambd,pii=params - Kb=(1.38065e-23) - therm=Kb*T - y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd)) - return y - - def f_wlc_plfix(params,x,pl_value=pl_value,T=T): - ''' - wlc function for ODR fitting - ''' - lambd=params - pii=1/pl_value - Kb=(1.38065e-23) - therm=Kb*T - y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd)) - return y - - #make the ODR fit - realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up) - if pl_value: - model=scipy.odr.Model(f_wlc_plfix) - o = scipy.odr.ODR(realdata, model, p0_plfix) - else: - model=scipy.odr.Model(f_wlc) - o = scipy.odr.ODR(realdata, model, p0) - - o.set_job(fit_type=2) - out=o.run() - fit_out=[(1/i) for i in out.beta] - - #Calculate fit errors from output standard deviations. - #We must propagate the error because we fit the *inverse* parameters! - #The error = (error of the inverse)*(value**2) - fit_errors=[] - for sd,value in zip(out.sd_beta, fit_out): - err_real=sd*(value**2) - fit_errors.append(err_real) - - def wlc_eval(x,params,pl_value,T): - ''' - Evaluates the WLC function - ''' - if not pl_value: - lambd, pii = params - else: - lambd = params - - if pl_value: - pii=1/pl_value - - Kb=(1.38065e-23) #boltzmann constant - therm=Kb*T #so we have thermal energy - - return ( (therm*pii/4.0) * (((1-(x*lambd))**-2.0) - 1 + (4.0*x*lambd)) ) - - - #STEP 3: plotting the fit - - #obtain domain to plot the fit - from contact point to last_index plus 20 points - thule_index=last_index+10 - if thule_index > len(xvector): #for rare cases in which we fit something at the END of whole curve. - thule_index = len(xvector) - #reverse etc. the domain - xfit_chunk=xvector[clicked_points[0].index:thule_index] - xfit_chunk.reverse() - xfit_chunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xfit_chunk] - xfit_chunk_corr_up=scipy.array(xfit_chunk_corr_up) - - #the fitted curve: reflip, re-uncorrect - yfit=wlc_eval(xfit_chunk_corr_up, out.beta, pl_value,T) - yfit_down=[-y for y in yfit] - yfit_corr_down=[y+clicked_points[0].graph_coords[1] for y in yfit_down] - - - #calculate fit quality - qsum=0 - yqeval=wlc_eval(xchunk_corr_up,out.beta,pl_value,T) - #we need to cut the extra from thuleindex - for qindex in np.arange(0,len(yqeval)): - qsum+=(yqeval[qindex]-ychunk_corr_up[qindex])**2 - qstd=np.sqrt(qsum/len(ychunk_corr_up)) - - - if return_errors: - return fit_out, yfit_corr_down, xfit_chunk, fit_errors, qstd - else: - return fit_out, yfit_corr_down, xfit_chunk, None, qstd - - def fjc_fit(self,clicked_points,xvector,yvector, pl_value, T=293, return_errors=False): - ''' - Freely-jointed chain function - ref: C.Ray and B.B. Akhremitchev; http://www.chem.duke.edu/~boris/research/force_spectroscopy/fit_efjc.pdf - ''' - ''' - clicked_points[0] is the contact point (calculated or hand-clicked) - clicked_points[1] and [2] are edges of chunk - ''' - - #STEP 1: Prepare the vectors to apply the fit. - if pl_value is not None: - pl_value=pl_value/(10**9) - - #indexes of the selected chunk - first_index=min(clicked_points[1].index, clicked_points[2].index) - last_index=max(clicked_points[1].index, clicked_points[2].index) - - #getting the chunk and reverting it - xchunk,ychunk=xvector[first_index:last_index],yvector[first_index:last_index] - xchunk.reverse() - ychunk.reverse() - #put contact point at zero and flip around the contact point (the fit wants a positive growth for extension and force) - xchunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xchunk] - ychunk_corr_up=[-(y-clicked_points[0].graph_coords[1]) for y in ychunk] - - - #make them arrays - xchunk_corr_up=scipy.array(xchunk_corr_up) - ychunk_corr_up=scipy.array(ychunk_corr_up) - - - #STEP 2: actually do the fit - - #Find furthest point of chunk and add it a bit; the fit must converge - #from an excess! - xchunk_high=max(xchunk_corr_up) - xchunk_high+=(xchunk_high/10) - - #Here are the linearized start parameters for the WLC. - #[lambd=1/Lo , pii=1/P] - - p0=[(1/xchunk_high),(1/(3.5e-10))] - p0_plfix=[(1/xchunk_high)] - ''' - ODR STUFF - fixme: remove these comments after testing - ''' - def dist(px,py,linex,liney): - distancesx=scipy.array([(px-x)**2 for x in linex]) - minindex=np.argmin(distancesx) - print minindex, px, linex[0], linex[-1] - return (py-liney[minindex])**2 - - def coth(z): - ''' - hyperbolic cotangent - ''' - return (np.exp(2*z)+1)/(np.exp(2*z)-1) - - def x_fjc(params,f,T=T): - ''' - fjc function for ODR fitting - ''' - lambd,pii=params - Kb=(1.38065e-23) - therm=Kb*T - - #x=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd)) - x=(1/lambd)*(coth(f*(1/pii)/therm) - (therm*pii)/f) - return x - - def x_fjc_plfix(params,f,pl_value=pl_value,T=T): - ''' - fjc function for ODR fitting - ''' - lambd=params - pii=1/pl_value - Kb=(1.38065e-23) - therm=Kb*T - #y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd)) - x=(1/lambd)*(coth(f*(1/pii)/therm) - (therm*pii)/f) - return x - - #make the ODR fit - realdata=scipy.odr.RealData(ychunk_corr_up,xchunk_corr_up) - if pl_value: - model=scipy.odr.Model(x_fjc_plfix) - o = scipy.odr.ODR(realdata, model, p0_plfix) - else: - model=scipy.odr.Model(x_fjc) - o = scipy.odr.ODR(realdata, model, p0) - - o.set_job(fit_type=2) - out=o.run() - fit_out=[(1/i) for i in out.beta] - - #Calculate fit errors from output standard deviations. - #We must propagate the error because we fit the *inverse* parameters! - #The error = (error of the inverse)*(value**2) - fit_errors=[] - for sd,value in zip(out.sd_beta, fit_out): - err_real=sd*(value**2) - fit_errors.append(err_real) - - def fjc_eval(y,params,pl_value,T): - ''' - Evaluates the WLC function - ''' - if not pl_value: - lambd, pii = params - else: - lambd = params - - if pl_value: - pii=1/pl_value - - Kb=(1.38065e-23) #boltzmann constant - therm=Kb*T #so we have thermal energy - #return ( (therm*pii/4.0) * (((1-(x*lambd))**-2.0) - 1 + (4.0*x*lambd)) ) - return (1/lambd)*(coth(y*(1/pii)/therm) - (therm*pii)/y) - - - - #STEP 3: plotting the fit - #obtain domain to plot the fit - from contact point to last_index plus 20 points - thule_index=last_index+10 - if thule_index > len(xvector): #for rare cases in which we fit something at the END of whole curve. - thule_index = len(xvector) - #reverse etc. the domain - ychunk=yvector[clicked_points[0].index:thule_index] - - if len(ychunk)>0: - y_evalchunk=np.linspace(min(ychunk),max(ychunk),100) - else: - #Empty y-chunk. It happens whenever we set the contact point after a recognized peak, - #or other buggy situations. Kludge to live with it now... - ychunk=yvector[:thule_index] - y_evalchunk=np.linspace(min(ychunk),max(ychunk),100) - - yfit_down=[-y for y in y_evalchunk] - yfit_corr_down=[y+clicked_points[0].graph_coords[1] for y in yfit_down] - yfit_corr_down=scipy.array(yfit_corr_down) - - #the fitted curve: reflip, re-uncorrect - xfit=fjc_eval(yfit_corr_down, out.beta, pl_value,T) - xfit=list(xfit) - xfit.reverse() - xfit_chunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xfit] - - #xfit_chunk_corr_up=scipy.array(xfit_chunk_corr_up) - #deltay=yfit_down[0]-yvector[clicked_points[0].index] - - #This is a terrible, terrible kludge to find the point where it should normalize (and from where it should plot) - xxxdists=[] - for index in scipy.arange(1,len(xfit_chunk_corr_up),1): - xxxdists.append((clicked_points[0].graph_coords[0]-xfit_chunk_corr_up[index])**2) - normalize_index=xxxdists.index(min(xxxdists)) - #End of kludge - - deltay=yfit_down[normalize_index]-clicked_points[0].graph_coords[1] - yfit_corr_down=[y-deltay for y in yfit_down] - - - #calculate fit quality - #creates dense y vector - yqeval=np.linspace(np.min(ychunk_corr_up)/2,np.max(ychunk_corr_up)*2,10*len(ychunk_corr_up)) - #corresponding fitted x - xqeval=fjc_eval(yqeval,out.beta,pl_value,T) - - qsum=0 - for qindex in np.arange(0,len(ychunk_corr_up)): - qsum+=dist(xchunk_corr_up[qindex],ychunk_corr_up[qindex],xqeval,yqeval) - qstd=np.sqrt(qsum/len(ychunk_corr_up)) - - - if return_errors: - return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], fit_errors, qstd - else: - return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], None, qstd - - def efjc_fit(self,clicked_points,xvector,yvector, pl_value, T=293.0, return_errors=False): - ''' - Extended Freely-jointed chain function - ref: F Oesterhelt, M Rief and H E Gaub, New Journal of Physics 1 (1999) 6.1–6.11 - Please note that 2-parameter fitting (length and kl) usually does not converge, use fixed kl - ''' - ''' - clicked_points[0] is the contact point (calculated or hand-clicked) - clicked_points[1] and [2] are edges of chunk - - ''' - #Fixed parameters from reference - Kb=(1.38065e-2) #in pN.nm - Lp=0.358 #planar, nm - Lh=0.280 #helical, nm - Ks=150e3 #pN/nm - - - #STEP 1: Prepare the vectors to apply the fit. - - #indexes of the selected chunk - first_index=min(clicked_points[1].index, clicked_points[2].index) - last_index=max(clicked_points[1].index, clicked_points[2].index) - - #getting the chunk and reverting it - xchunk,ychunk=xvector[first_index:last_index],yvector[first_index:last_index] - xchunk.reverse() - ychunk.reverse() - #put contact point at zero and flip around the contact point (the fit wants a positive growth for extension and force) - xchunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xchunk] - ychunk_corr_up=[-(y-clicked_points[0].graph_coords[1]) for y in ychunk] - - - #make them arrays - xchunk_corr_up=scipy.array(xchunk_corr_up) - ychunk_corr_up=scipy.array(ychunk_corr_up) - - xchunk_corr_up_nm=xchunk_corr_up*1e9 - ychunk_corr_up_pn=ychunk_corr_up*1e12 - - - #STEP 2: actually do the fit - - #Find furthest point of chunk and add it a bit; the fit must converge - #from an excess! - xchunk_high=max(xchunk_corr_up_nm) - xchunk_high+=(xchunk_high/10.0) - - #Here are the linearized start parameters for the WLC. - #[Ns , pii=1/P] - #max number of monomers (all helical)for a contour length xchunk_high - excessNs=xchunk_high/(Lp) - p0=[excessNs,(1.0/(0.7))] - p0_plfix=[(excessNs)] - - def dist(px,py,linex,liney): - distancesx=scipy.array([(px-x)**2 for x in linex]) - minindex=np.argmin(distancesx) - return (py-liney[minindex])**2 - - def deltaG(f): - dG0=12.4242 #3kt in pN.nm - dL=0.078 #planar-helical - return dG0-f*dL - - def Lfactor(f,T=T): - Lp=0.358 #planar, nm - Lh=0.280 #helical, nm - Kb=(1.38065e-2) - therm=Kb*T - dG=deltaG(f) - - return Lp/(np.exp(dG/therm)+1)+Lh/(np.exp(-dG/therm)+1) - - def coth(z): - ''' - hyperbolic cotangent - ''' - return 1.0/np.tanh(z) - - def x_efjc(params,f,T=T,Ks=Ks): - ''' - efjc function for ODR fitting - ''' - - Ns=params[0] - invkl=params[1] - Kb=(1.38065e-2) - therm=Kb*T - beta=(f/therm)/invkl - - x=Ns*Lfactor(f)*(coth(beta)-1.0/beta)+Ns*f/Ks - return x - - def x_efjc_plfix(params,f,kl_value=pl_value,T=T,Ks=Ks): - ''' - efjc function for ODR fitting - ''' - - Ns=params - invkl=1.0/kl_value - Kb=(1.38065e-2) - therm=Kb*T - beta=(f/therm)/invkl - - x=Ns*Lfactor(f)*(coth(beta)-1.0/beta)+Ns*f/Ks - return x - - #make the ODR fit - realdata=scipy.odr.RealData(ychunk_corr_up_pn,xchunk_corr_up_nm) - if pl_value: - model=scipy.odr.Model(x_efjc_plfix) - o = scipy.odr.ODR(realdata, model, p0_plfix) - else: - print 'WARNING eFJC fit with free pl sometimes does not converge' - model=scipy.odr.Model(x_efjc) - o = scipy.odr.ODR(realdata, model, p0) - - o.set_job(fit_type=2) - out=o.run() - - - Ns=out.beta[0] - Lc=Ns*Lp*1e-9 - if len(out.beta)>1: - kfit=1e-9/out.beta[1] - kfitnm=1/out.beta[1] - else: - kfit=1e-9*pl_value - kfitnm=pl_value - - fit_out=[Lc, kfit] - - #Calculate fit errors from output standard deviations. - fit_errors=[] - fit_errors.append(out.sd_beta[0]*Lp*1e-9) - if len(out.beta)>1: - fit_errors.append(1e9*out.sd_beta[1]*kfit**2) - - - - def efjc_eval(y,params,pl_value,T=T,Lfactor=Lfactor,Ks=Ks): - ''' - Evaluates the eFJC function - ''' - if not pl_value: - Ns, invkl = params - else: - Ns = params - - if pl_value: - invkl=1.0/pl_value - - Kb=(1.38065e-2) #boltzmann constant - therm=Kb*T #so we have thermal energy - beta=(y/therm)/invkl - - x= Ns*Lfactor(y)*(coth(beta)-1.0/beta)+Ns*y/Ks - - return x - - #STEP 3: plotting the fit - #obtain domain to plot the fit - from contact point to last_index plus 20 points - thule_index=last_index+10 - if thule_index > len(xvector): #for rare cases in which we fit something at the END of whole curve. - thule_index = len(xvector) - #reverse etc. the domain - ychunk=yvector[clicked_points[0].index:thule_index] - - if len(ychunk)>0: - y_evalchunk=np.linspace(min(ychunk),max(ychunk),100) - else: - #Empty y-chunk. It happens whenever we set the contact point after a recognized peak, - #or other buggy situations. Kludge to live with it now... - ychunk=yvector[:thule_index] - y_evalchunk=np.linspace(min(ychunk),max(ychunk),100) - - yfit_down=[-y for y in y_evalchunk] - yfit_corr_down=[y+clicked_points[0].graph_coords[1] for y in yfit_down] - yfit_corr_down=scipy.array(yfit_corr_down) - - #the fitted curve: reflip, re-uncorrect - xfit=efjc_eval(1e12*yfit_corr_down, out.beta, pl_value,T)*1e-9 - xfit=list(xfit) - xfit.reverse() - xfit_chunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xfit] - - #xfit_chunk_corr_up=scipy.array(xfit_chunk_corr_up) - #deltay=yfit_down[0]-yvector[clicked_points[0].index] - - #This is a terrible, terrible kludge to find the point where it should normalize (and from where it should plot) - xxxdists=[] - for index in scipy.arange(1,len(xfit_chunk_corr_up),1): - xxxdists.append((clicked_points[0].graph_coords[0]-xfit_chunk_corr_up[index])**2) - normalize_index=xxxdists.index(min(xxxdists)) - #End of kludge - - deltay=yfit_down[normalize_index]-clicked_points[0].graph_coords[1] - yfit_corr_down=[y-deltay for y in yfit_down] - - #calculate fit quality - #creates dense y vector - yqeval=np.linspace(np.min(ychunk_corr_up_pn)/2,np.max(ychunk_corr_up_pn)*2,10*len(ychunk_corr_up_pn)) - #corresponding fitted x - xqeval=efjc_eval(yqeval,out.beta,pl_value) - - qsum=0 - for qindex in np.arange(0,len(ychunk_corr_up_pn)): - qsum+=dist(xchunk_corr_up_nm[qindex],ychunk_corr_up_pn[qindex],xqeval,yqeval) - qstd=1e-12*np.sqrt(qsum/len(ychunk_corr_up_pn)) - - if return_errors: - return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], fit_errors, qstd - else: - return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], None, qstd - - - - - def do_wlc(self,args): - ''' - WLC - (fit.py plugin) - - See the fit command - ''' - self.do_fit(args) - - def do_fjc(self,args): - ''' - FJC - (fit.py plugin) - - See the fit command - ''' - self.do_fit(args) - - def do_fit(self,args): - ''' - FIT - (fit.py plugin) - Fits an entropic elasticity function to a given chunk of the curve. - - First you have to click a contact point. - Then you have to click the two edges of the data you want to fit. - - Fit quality compares the distance to the fit with the thermal noise (a good fit should be close to 1) - - The fit function depends on the fit_function variable. You can set it with the command - "set fit_function wlc" or "set fit_function fjc" depending on the function you prefer. - - For WLC, the function is the simple polynomial worm-like chain as proposed by - C.Bustamante, J.F.Marko, E.D.Siggia and S.Smith (Science. 1994 - Sep 9;265(5178):1599-600.) - - For FJC, ref: - C.Ray and B.B. Akhremitchev; http://www.chem.duke.edu/~boris/research/force_spectroscopy/fit_efjc.pdf - - For eFJC, ref: - F Oesterhelt, M Rief and H E Gaub, New Journal of Physics 1 (1999) 6.1–6.11 (section 4.2) - NOTE: use fixed pl for better results. - - Arguments: - pl=[value] : Use a fixed persistent length (WLC) or Kuhn length (FJC) for the fit. If pl is not given, - the fit will be a 2-variable - fit. DO NOT put spaces between 'pl', '=' and the value. - The value must be in nanometers. - - t=[value] : Use a user-defined temperature. The value must be in - kelvins; by default it is 293 K. - DO NOT put spaces between 't', '=' and the value. - - noauto : allows for clicking the contact point by - hand (otherwise it is automatically estimated) the first time. - If subsequent measurements are made, the same contact point - clicked is used - - reclick : redefines by hand the contact point, if noauto has been used before - but the user is unsatisfied of the previously choosen contact point. - --------- - Syntax: fit [pl=(value)] [t=value] [noauto] - ''' - pl_value=None - T=self.config['temperature'] - for arg in args.split(): - #look for a persistent length argument. - if 'pl=' in arg: - pl_expression=arg.split('=') - pl_value=float(pl_expression[1]) #actual value - #look for a T argument. FIXME: spaces are not allowed between 'pl' and value - if ('t=' in arg[0:2]) or ('T=' in arg[0:2]): - t_expression=arg.split('=') - T=float(t_expression[1]) - - #use the currently displayed plot for the fit - displayed_plot=self._get_displayed_plot() - - #handle contact point arguments correctly - if 'reclick' in args.split(): - print 'Click contact point' - contact_point=self._measure_N_points(N=1, whatset=1)[0] - contact_point_index=contact_point.index - self.wlccontact_point=contact_point - self.wlccontact_index=contact_point.index - self.wlccurrent=self.current.path - elif 'noauto' in args.split(): - if self.wlccontact_index==None or self.wlccurrent != self.current.path: - print 'Click contact point' - contact_point=self._measure_N_points(N=1, whatset=1)[0] - contact_point_index=contact_point.index - self.wlccontact_point=contact_point - self.wlccontact_index=contact_point.index - self.wlccurrent=self.current.path - else: - contact_point=self.wlccontact_point - contact_point_index=self.wlccontact_index - else: - cindex=self.find_contact_point() - contact_point=ClickedPoint() - contact_point.absolute_coords=displayed_plot.vectors[1][0][cindex], displayed_plot.vectors[1][1][cindex] - contact_point.find_graph_coords(displayed_plot.vectors[1][0], displayed_plot.vectors[1][1]) - contact_point.is_marker=True - - print 'Click edges of chunk' - points=self._measure_N_points(N=2, whatset=1) - points=[contact_point]+points - - try: - if self.config['fit_function']=='wlc': - params, yfit, xfit, fit_errors,qstd = self.wlc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True ) - name_of_charlength='Persistent length' - elif self.config['fit_function']=='fjc': - params, yfit, xfit, fit_errors,qstd = self.fjc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True ) - name_of_charlength='Kuhn length' - elif self.config['fit_function']=='efjc': - params, yfit, xfit, fit_errors,qstd = self.efjc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True ) - name_of_charlength='Kuhn length (e)' - else: - print 'No recognized fit function defined!' - print 'Set your fit function to wlc, fjc or efjc.' - return - - except: - print 'Fit not possible. Probably wrong interval -did you click two *different* points?' - return - - #FIXME: print "Kuhn length" for FJC - print 'Fit function:',self.config['fit_function'] - print 'Contour length: ',params[0]*(1.0e+9),' nm' - to_dump='contour '+self.current.path+' '+str(params[0]*(1.0e+9))+' nm' - self.outlet.push(to_dump) - if len(params)==2: #if we did choose 2-value fit - print name_of_charlength+': ',params[1]*(1.0e+9),' nm' - to_dump='persistent '+self.current.path+' '+str(params[1]*(1.0e+9))+' nm' - self.outlet.push(to_dump) - - if fit_errors: - fit_nm=[i*(10**9) for i in fit_errors] - print 'Standard deviation (contour length)', fit_nm[0] - if len(fit_nm)>1: - print 'Standard deviation ('+name_of_charlength+')', fit_nm[1] - - print 'Fit quality: '+str(qstd/np.std(displayed_plot.vectors[1][1][-20:-1])) - - - #add the clicked points in the final PlotObject - clickvector_x, clickvector_y=[], [] - for item in points: - clickvector_x.append(item.graph_coords[0]) - clickvector_y.append(item.graph_coords[1]) - - #create a custom PlotObject to gracefully plot the fit along the curves - - fitplot=copy.deepcopy(displayed_plot) - fitplot.add_set(xfit,yfit) - fitplot.add_set(clickvector_x,clickvector_y) - - #FIXME: this colour/styles stuff must be solved at the root! - if fitplot.styles==[]: - fitplot.styles=[None,None,None,'scatter'] - else: - fitplot.styles+=[None,'scatter'] - - if fitplot.colors==[]: - fitplot.colors=[None,None,None,None] - else: - fitplot.colors+=[None,None] - - self._send_plot([fitplot]) diff --git a/hooke/plugin/polymer_fit.py b/hooke/plugin/polymer_fit.py new file mode 100644 index 0000000..d5b43db --- /dev/null +++ b/hooke/plugin/polymer_fit.py @@ -0,0 +1,1066 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2008-2010 Alberto Gomez-Casado +# Fabrizio Benedetti +# Massimo Sandal +# W. Trevor King +# +# This file is part of Hooke. +# +# Hooke is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# Hooke is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General +# Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with Hooke. If not, see +# . + +"""The ``polymer_fit`` module proviews :class:`PolymerFitPlugin` and +serveral associated :class:`hooke.command.Command`\s for Fitting +velocity-clamp data to various polymer models (WLC, FJC, etc.). +""" + +import copy +import Queue +import StringIO +import sys + +import numpy +from scipy.optimize import newton, bisect + +from ..command import Command, Argument, Failure +from ..config import Setting +from ..curve import Data +from ..plugin import Plugin +from ..util.fit import PoorFit, ModelFitter +from ..util.callback import is_iterable +from .curve import CurveArgument +from .vclamp import scale + + +kB = 1.3806503e-23 # Joules/Kelvin + + +def coth(z): + """Hyperbolic cotangent. + + Examples + -------- + >>> coth(1.19967874) # doctest: +ELLIPSIS + 1.199678... + + Notes + ----- + From `MathWorld`_ + + .. math:: + \coth(z) \equiv \frac{\exp(z) + \exp(-z)}{\exp(z) - \exp(-z)} + = \frac{1}{\tanh(z)} + + .. _MathWorld: + http://mathworld.wolfram.com/HyperbolicCotangent.html + """ + return 1.0/numpy.tanh(z) + +def arccoth(z): + """Inverse hyperbolic cotangent. + + Examples + -------- + >>> arccoth(1.19967874) # doctest: +ELLIPSIS + 1.199678... + >>> arccoth(coth(numpy.pi)) # doctest: +ELLIPSIS + 3.1415... + + Notes + ----- + Inverting from the :func:`definition `. + + .. math:: + z \equiv \atanh\left( \frac{1}{\coth(z)} \right) + """ + return numpy.arctanh(1.0/z) + +def Langevin(z): + """Langevin function. + + Examples + -------- + >>> Langevin(numpy.pi) # doctest: +ELLIPSIS + 0.685... + + Notes + ----- + From `Wikipedia`_ or Hatfield [#]_ + + .. math:: + L(z) \equiv \coth(z) - \frac{1}{z} + + .. _Wikipedia: + http://en.wikipedia.org/wiki/Brillouin_and_Langevin_functions#Langevin_Function + + .. [#]: J.W. Hatfield and S.R. Quake + "Dynamic Properties of an Extended Polymer in Solution." + Phys. Rev. Lett. 1999. + doi: `10.1103/PhysRevLett.82.3548 ` + """ + return coth(z) - 1.0/z + +def inverse_Langevin(z, extreme=1.0 - 1e-8): + """Inverse Langevin function. + + Parameters + ---------- + z : float or array_like + object whose inverse Langevin will be returned + extreme : float + value (close to one) for which we assume the inverse is +/- + infinity. This avoids problems with Newton-Raphson + convergence in regions with low slope. + + Examples + -------- + >>> inverse_Langevin(Langevin(numpy.pi)) # doctest: +ELLIPSIS + 3.14159... + >>> inverse_Langevin(Langevin(numpy.array([1,2,3]))) # doctest: +ELLIPSIS + array([ 1., 2., 3.]) + + Notes + ----- + We approximate the inverse Langevin via Newton's method + (:func:`scipy.optimize.newton`). For the starting point, we take + the first three terms from the Taylor series (from `Wikipedia`_). + + .. math:: + L^{-1}(z) = 3z + \frac{9}{5}z^3 + \frac{297}{175}z^5 + \dots + + .. _Wikipedia: + http://en.wikipedia.org/wiki/Brillouin_and_Langevin_functions#Langevin_Function + """ + if is_iterable(z): + ret = numpy.ndarray(shape=z.shape, dtype=z.dtype) + for i,z_i in enumerate(z): + ret[i] = inverse_Langevin(z_i) + return ret + if z > extreme: + return numpy.inf + elif z < -extreme: + return -numpy.inf + # Catch stdout since sometimes the newton function prints exit + # messages to stdout, e.g. "Tolerance of %s reached". + stdout = sys.stdout + tmp_stdout = StringIO.StringIO() + sys.stdout = tmp_stdout + ret = newton(func=lambda x: Langevin(x)-z, + x0=3*z + 9./5.*z**3 + 297./175.*z**5,) + sys.stdout = stdout + output = tmp_stdout.getvalue() + return ret + +def FJC_fn(x_data, T, L, a): + """The freely jointed chain model. + + Parameters + ---------- + x_data : array_like + x values for which the WLC tension should be calculated. + T : float + temperature in Kelvin. + L : float + contour length in meters. + a : float + Kuhn length in meters. + + Returns + ------- + f_data : array + `F(x_data)`. + + Examples + -------- + >>> FJC_fn(numpy.array([1e-9, 5e-9, 10e-9], dtype=numpy.float), + ... T=300, L=15e-9, a=2.5e-10) # doctest: +ELLIPSIS + array([ 3.322...-12, 1.78...e-11, 4.889...e-11]) + + Notes + ----- + The freely jointed chain model is + + .. math:: + F(x) = \frac{k_B T}{a} L^{-1}\left( \frac{x}{L} \right) + + where :math:`L^{-1}` is the :func:`inverse_Langevin`, :math:`a` is + the Kuhn length, and :math:`L` is the contour length [#]_. For + the inverse formulation (:math:`x(F)`), see Ray and Akhremitchev [#]_. + + + .. [#]: J.W. Hatfield and S.R. Quake + "Dynamic Properties of an Extended Polymer in Solution." + Phys. Rev. Lett. 1999. + doi: `10.1103/PhysRevLett.82.3548 ` + + .. [#] C. Ray and B.B. Akhremitchev. + `"Fitting Force Curves by the Extended Freely Jointed Chain Model" ` + """ + return kB*T / a * inverse_Langevin(x_data/L) + +class FJC (ModelFitter): + """Fit the data to a freely jointed chain. + + Examples + -------- + Generate some example data + + >>> T = 300 # Kelvin + >>> L = 35e-9 # meters + >>> a = 2.5e-10 # meters + >>> x_data = numpy.linspace(10e-9, 30e-9, num=20) + >>> d_data = FJC_fn(x_data, T=T, L=L, a=a) + + Fit the example data with a two-parameter fit (`L` and `a`). + + >>> info = { + ... 'temperature (K)': T, + ... 'x data (m)': x_data, + ... } + >>> model = FJC(d_data, info=info, rescale=True) + >>> outqueue = Queue.Queue() + >>> Lp,a = model.fit(outqueue=outqueue) + >>> fit_info = outqueue.get(block=False) + >>> model.L(Lp) # doctest: +ELLIPSIS + 3.500...e-08 + >>> a # doctest: +ELLIPSIS + 2.499...e-10 + + Fit the example data with a one-parameter fit (`L`). We introduce + some error in our fixed Kuhn length for fun. + + >>> info['Kuhn length (m)'] = 2*a + >>> model = FJC(d_data, info=info, rescale=True) + >>> Lp = model.fit(outqueue=outqueue) + >>> fit_info = outqueue.get(block=False) + >>> model.L(Lp) # doctest: +ELLIPSIS + 3.199...e-08 + """ + def Lp(self, L): + """To avoid invalid values of `L`, we reparametrize with `Lp`. + + Parameters + ---------- + L : float + contour length in meters + + Returns + ------- + Lp : float + rescaled version of `L`. + + Examples + -------- + >>> x_data = numpy.linspace(1e-9, 20e-9, num=10) + >>> model = FJC(data=x_data, info={'x data (m)':x_data}) + >>> model.Lp(20e-9) # doctest: +ELLIPSIS + -inf + >>> model.Lp(19e-9) # doctest: +ELLIPSIS + nan + >>> model.Lp(21e-9) # doctest: +ELLIPSIS + -2.99... + >>> model.Lp(100e-9) # doctest: +ELLIPSIS + 1.386... + + Notes + ----- + The rescaling is designed to limit `L` to values strictly + greater than the maximum `x` data value, so we use + + .. math:: + L = [\exp(L_p))+1] * x_\text{max} + + which has the inverse + + .. math:: + L_p = \ln(L/x_\text{max}-1) + + This will obviously effect the interpretation of the fit's covariance matrix. + """ + x_max = self.info['x data (m)'].max() + return numpy.log(L/x_max - 1) + + def L(self, Lp): + """Inverse of :meth:`Lp`. + + Parameters + ---------- + Lp : float + rescaled version of `L` + + Returns + ------- + L : float + contour length in meters + + Examples + -------- + >>> x_data = numpy.linspace(1e-9, 20e-9, num=10) + >>> model = FJC(data=x_data, info={'x data (m)':x_data}) + >>> model.L(model.Lp(21e-9)) # doctest: +ELLIPSIS + 2.100...e-08 + >>> model.L(model.Lp(100e-9)) # doctest: +ELLIPSIS + 9.999...e-08 + """ + x_max = self.info['x data (m)'].max() + return (numpy.exp(Lp)+1)*x_max + + def model(self, params): + """Extract the relavant arguments and use :func:`FJC_fn`. + + Parameters + ---------- + params : list of floats + `[Lp, a]`, where the presence of `a` is optional. + """ + # setup convenient aliases + T = self.info['temperature (K)'] + x_data = self.info['x data (m)'] + L = self.L(params[0]) + if len(params) > 1: + a = abs(params[1]) + else: + a = self.info['Kuhn length (m)'] + # compute model data + self._model_data[:] = FJC_fn(x_data, T, L, a) + return self._model_data + + def guess_initial_params(self, outqueue=None): + """Guess initial fitting parameters. + + Returns + ------- + Lp : float + A guess at the reparameterized contour length in meters. + a : float (optional) + A guess at the Kuhn length in meters. If `.info` has a + setting for `'Kuhn length (m)'`, `a` is not returned. + """ + x_max = self.info['x data (m)'].max() + a_given = 'Kuhn length (m)' in self.info + if a_given: + a = self.info['Kuhn length (m)'] + else: + a = x_max / 10.0 + L = 1.5 * x_max + Lp = self.Lp(L) + if a_given: + return [Lp] + return [Lp, a] + + def guess_scale(self, params, outqueue=None): + """Guess parameter scales. + + Returns + ------- + Lp_scale : float + A guess at the reparameterized contour length scale in meters. + a_scale : float (optional) + A guess at the Kuhn length in meters. If the length of + `params` is less than 2, `a_scale` is not returned. + """ + Lp_scale = 1.0 + if len(params) == 1: + return [Lp_scale] + return [Lp_scale] + [x/10.0 for x in params[1:]] + + +def inverse_FJC_PEG_fn(F_data, T=300, N=1, k=150., Lp=3.58e-10, Lh=2.8e-10, dG=3., a=7e-10): + """Inverse poly(ethylene-glycol) adjusted extended FJC model. + + Examples + -------- + >>> kwargs = {'T':300.0, 'N':1, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10, + ... 'dG':3.0, 'a':7e-10} + >>> inverse_FJC_PEG_fn(F_data=200e-12, **kwargs) # doctest: +ELLIPSIS + 3.487...e-10 + + Notes + ----- + The function is that proposed by F. Oesterhelt, et al. [#]_. + + .. math:: + x(F) = N_\text{S} \cdot \left[ + \left( + \frac{L_\text{planar}}{ + \exp\left(-\Delta G/k_B T\right) + 1} + + \frac{L_\text{helical}}{ + \exp\left(+\Delta G/k_B T\right) + 1} + \right) \cdot \left( + \coth\left(\frac{Fa}{k_B T}\right) + - \frac{k_B T}{Fa} + \right) + + \frac{F}{K_\text{S}} + + where :math:`N_\text{S}` is the number of segments, + :math:`K_\text{S}` is the segment elasticicty, + :math:`L_\text{planar}` is the ttt contour length, + :math:`L_\text{helical}` is the ttg contour length, + :math:`\Delta G` is the Gibbs free energy difference + :math:`G_\text{planar}-G_\text{helical}`, + :math:`a` is the Kuhn length, + and :math:`F` is the chain tension. + + .. [#]: F. Oesterhelt, M. Rief, and H.E. Gaub. + "Single molecule force spectroscopy by AFM indicates helical + structure of poly(ethylene-glycol) in water." + New Journal of Physics. 1999. + doi: `10.1088/1367-2630/1/1/006 ` + (section 4.2) + """ + kBT = kB * T + g = (dG - F_data*(Lp-Lh)) / kBT + z = F_data*a/kBT + return N * ((Lp/(numpy.exp(-g)+1) + Lh/(numpy.exp(+g)+1)) * (coth(z)-1./z) + + F_data/k) + +def FJC_PEG_fn(x_data, T, N, k, Lp, Lh, dG, a): + """Poly(ethylene-glycol) adjusted extended FJC model. + + Parameters + ---------- + z : float or array_like + object whose inverse Langevin will be returned + extreme : float + value (close to one) for which we assume the inverse is +/- + infinity. This avoids problems with Newton-Raphson + convergence in regions with low slope. + + Examples + -------- + >>> kwargs = {'T':300.0, 'N':1, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10, + ... 'dG':3.0, 'a':7e-10} + >>> FJC_PEG_fn(inverse_FJC_PEG_fn(1e-11, **kwargs), **kwargs) # doctest: +ELLIPSIS + 9.999...e-12 + >>> FJC_PEG_fn(inverse_FJC_PEG_fn(3.4e-10, **kwargs), **kwargs) # doctest: +ELLIPSIS + 3.400...e-10 + >>> FJC_PEG_fn(numpy.array([1e-10,2e-10,3e-10]), **kwargs) # doctest: +ELLIPSIS + array([ 5.207...e-12, 1.257...e-11, 3.636...e-11]) + >>> kwargs['N'] = 123 + >>> FJC_PEG_fn(numpy.array([1e-8,2e-8,3e-8]), **kwargs) # doctest: +ELLIPSIS + array([ 4.160...e-12, 9.302...e-12, 1.830...e-11]) + + Notes + ----- + We approximate the PEG adjusted eJPG via bisection + (:func:`scipy.optimize.bisect`) Because it is more stable than + Newton's algorithm. For the starting point, we use the standard + JPG function with an averaged contour length. + """ + kwargs = {'T':T, 'N':N, 'k':k, 'Lp':Lp, 'Lh':Lh, 'dG':dG, 'a':a} + if is_iterable(x_data): + ret = numpy.ndarray(shape=x_data.shape, dtype=x_data.dtype) + for i,x in enumerate(x_data): + ret[i] = FJC_PEG_fn(x, **kwargs) + return ret + if x_data == 0: + return 0 + # Approximate with the simple FJC_fn() + guess = numpy.inf + L = N*max(Lp, Lh) + while guess == numpy.inf: + guess = FJC_fn(x_data, T=T, L=L, a=a) + L *= 2.0 + + if False: # Bisection + while inverse_FJC_PEG_fn(guess, **kwargs) < x_data: + guess *= 2.0 + lower = guess + while inverse_FJC_PEG_fn(lower, **kwargs) > x_data: + lower /= 2.0 + + fn = lambda f: inverse_FJC_PEG_fn(guess*abs(f), **kwargs) - x_data + return guess*abs(bisect(f=fn, a=lower/guess, b=1.0)) + else: # Newton + fn = lambda f: inverse_FJC_PEG_fn(guess*abs(f), **kwargs) - x_data + ret = guess*abs(newton(func=fn, x0=1.0)) + return ret + +class FJC_PEG (ModelFitter): + """Fit the data to an poly(ethylene-glycol) adjusted extended FJC. + + Examples + -------- + Generate some example data + + >>> kwargs = {'T':300.0, 'N':123, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10, + ... 'dG':3.0, 'a':7e-10} + >>> x_data = numpy.linspace(10e-9, 30e-9, num=20) + >>> d_data = FJC_PEG_fn(x_data, **kwargs) + + Fit the example data with a two-parameter fit (`N` and `a`). + + >>> info = { + ... 'temperature (K)': kwargs['T'], + ... 'x data (m)': x_data, + ... 'section elasticity (N/m)': kwargs['k'], + ... 'planar section length (m)': kwargs['Lp'], + ... 'helical section length (m)': kwargs['Lh'], + ... 'Gibbs free energy difference (Gp - Gh) (kBT)': kwargs['dG'], + ... } + >>> model = FJC_PEG(d_data, info=info, rescale=True) + >>> outqueue = Queue.Queue() + >>> Nr,a = model.fit(outqueue=outqueue) + >>> fit_info = outqueue.get(block=False) + >>> model.L(Nr) # doctest: +ELLIPSIS + 122.999... + >>> a # doctest: +ELLIPSIS + 6.999...e-10 + + Fit the example data with a one-parameter fit (`N`). We introduce + some error in our fixed Kuhn length for fun. + + >>> info['Kuhn length (m)'] = 2*kwargs['a'] + >>> model = FJC_PEG(d_data, info=info, rescale=True) + >>> Nr = model.fit(outqueue=outqueue) + >>> fit_info = outqueue.get(block=False) + >>> model.L(Nr) # doctest: +ELLIPSIS + 96.931... + """ + def Lr(self, L): + """To avoid invalid values of `L`, we reparametrize with `Lr`. + + Parameters + ---------- + L : float + contour length in meters + + Returns + ------- + Lr : float + rescaled version of `L`. + + Examples + -------- + >>> x_data = numpy.linspace(1e-9, 20e-9, num=10) + >>> model = FJC_PEG(data=x_data, info={'x data (m)':x_data}) + >>> model.Lr(20e-9) # doctest: +ELLIPSIS + 0.0 + >>> model.Lr(19e-9) # doctest: +ELLIPSIS + -0.0512... + >>> model.Lr(21e-9) # doctest: +ELLIPSIS + 0.0487... + >>> model.Lr(100e-9) # doctest: +ELLIPSIS + 1.609... + + Notes + ----- + The rescaling is designed to limit `L` to values strictly + greater than zero, so we use + + .. math:: + L = \exp(L_p) * x_\text{max} + + which has the inverse + + .. math:: + L_p = \ln(L/x_\text{max}) + + This will obviously effect the interpretation of the fit's covariance matrix. + """ + x_max = self.info['x data (m)'].max() + return numpy.log(L/x_max) + + def L(self, Lr): + """Inverse of :meth:`Lr`. + + Parameters + ---------- + Lr : float + rescaled version of `L` + + Returns + ------- + L : float + contour length in meters + + Examples + -------- + >>> x_data = numpy.linspace(1e-9, 20e-9, num=10) + >>> model = FJC_PEG(data=x_data, info={'x data (m)':x_data}) + >>> model.L(model.Lr(21e-9)) # doctest: +ELLIPSIS + 2.100...e-08 + >>> model.L(model.Lr(100e-9)) # doctest: +ELLIPSIS + 9.999...e-08 + """ + x_max = self.info['x data (m)'].max() + return numpy.exp(Lr)*x_max + + def _kwargs(self): + return { + 'T': self.info['temperature (K)'], + 'k': self.info['section elasticity (N/m)'], + 'Lp': self.info['planar section length (m)'], + 'Lh': self.info['helical section length (m)'], + 'dG': self.info['Gibbs free energy difference (Gp - Gh) (kBT)'], + } + + def model(self, params): + """Extract the relavant arguments and use :func:`FJC_PEG_fn`. + + Parameters + ---------- + params : list of floats + `[N, a]`, where the presence of `a` is optional. + """ + # setup convenient aliases + T = self.info['temperature (K)'] + x_data = self.info['x data (m)'] + N = self.L(params[0]) + if len(params) > 1: + a = abs(params[1]) + else: + a = self.info['Kuhn length (m)'] + kwargs = self._kwargs() + # compute model data + self._model_data[:] = FJC_PEG_fn(x_data, N=N, a=a, **kwargs) + return self._model_data + + def guess_initial_params(self, outqueue=None): + """Guess initial fitting parameters. + + Returns + ------- + N : float + A guess at the section count. + a : float (optional) + A guess at the Kuhn length in meters. If `.info` has a + setting for `'Kuhn length (m)'`, `a` is not returned. + """ + x_max = self.info['x data (m)'].max() + a_given = 'Kuhn length (m)' in self.info + if a_given: + a = self.info['Kuhn length (m)'] + else: + a = x_max / 10.0 + f_max = self._data.max() + kwargs = self._kwargs() + kwargs['a'] = a + x_section = inverse_FJC_PEG_fn(F_data=f_max, **kwargs) + N = x_max / x_section; + Nr = self.Lr(N) + if a_given: + return [Nr] + return [Nr, a] + + def guess_scale(self, params, outqueue=None): + """Guess parameter scales. + + Returns + ------- + N_scale : float + A guess at the section count scale in meters. + a_scale : float (optional) + A guess at the Kuhn length in meters. If the length of + `params` is less than 2, `a_scale` is not returned. + """ + return [x/10.0 for x in params] + + +def WLC_fn(x_data, T, L, p): + """The worm like chain interpolation model. + + Parameters + ---------- + x_data : array_like + x values for which the WLC tension should be calculated. + T : float + temperature in Kelvin. + L : float + contour length in meters. + p : float + persistence length in meters. + + Returns + ------- + f_data : array + `F(x_data)`. + + Examples + -------- + >>> WLC_fn(numpy.array([1e-9, 5e-9, 10e-9], dtype=numpy.float), + ... T=300, L=15e-9, p=2.5e-10) # doctest: +ELLIPSIS + array([ 1.717...e-12, 1.070...e-11, 4.418...e-11]) + + Notes + ----- + The function is the simple polynomial worm like chain + interpolation forumula proposed by C. Bustamante, et al. [#]_. + + .. math:: + F(x) = \frac{k_B T}{p} \left[ + \frac{1}{4}\left( \frac{1}{(1-x/L)^2} - 1 \right) + + \frac{x}{L} + \right] + + .. [#] C. Bustamante, J.F. Marko, E.D. Siggia, and S.Smith. + "Entropic elasticity of lambda-phage DNA." + Science. 1994. + doi: `10.1126/science.8079175 ` + """ + a = kB * T / p + scaled_data = x_data / L + return a * (0.25*((1-scaled_data)**-2 - 1) + scaled_data) + +class WLC (ModelFitter): + """Fit the data to a worm like chain. + + Examples + -------- + Generate some example data + + >>> T = 300 # Kelvin + >>> L = 35e-9 # meters + >>> p = 2.5e-10 # meters + >>> x_data = numpy.linspace(10e-9, 30e-9, num=20) + >>> d_data = WLC_fn(x_data, T=T, L=L, p=p) + + Fit the example data with a two-parameter fit (`L` and `p`). + + >>> info = { + ... 'temperature (K)': T, + ... 'x data (m)': x_data, + ... } + >>> model = WLC(d_data, info=info, rescale=True) + >>> outqueue = Queue.Queue() + >>> Lp,p = model.fit(outqueue=outqueue) + >>> fit_info = outqueue.get(block=False) + >>> model.L(Lp) # doctest: +ELLIPSIS + 3.500...e-08 + >>> p # doctest: +ELLIPSIS + 2.500...e-10 + + Fit the example data with a one-parameter fit (`L`). We introduce + some error in our fixed persistence length for fun. + + >>> info['persistence length (m)'] = 2*p + >>> model = WLC(d_data, info=info, rescale=True) + >>> Lp = model.fit(outqueue=outqueue) + >>> fit_info = outqueue.get(block=False) + >>> model.L(Lp) # doctest: +ELLIPSIS + 3.318...e-08 + """ + def Lp(self, L): + """To avoid invalid values of `L`, we reparametrize with `Lp`. + + Parameters + ---------- + L : float + contour length in meters + + Returns + ------- + Lp : float + rescaled version of `L`. + + Examples + -------- + >>> x_data = numpy.linspace(1e-9, 20e-9, num=10) + >>> model = WLC(data=x_data, info={'x data (m)':x_data}) + >>> model.Lp(20e-9) # doctest: +ELLIPSIS + -inf + >>> model.Lp(19e-9) # doctest: +ELLIPSIS + nan + >>> model.Lp(21e-9) # doctest: +ELLIPSIS + -2.99... + >>> model.Lp(100e-9) # doctest: +ELLIPSIS + 1.386... + + Notes + ----- + The rescaling is designed to limit `L` to values strictly + greater than the maximum `x` data value, so we use + + .. math:: + L = [\exp(L_p))+1] * x_\text{max} + + which has the inverse + + .. math:: + L_p = \ln(L/x_\text{max}-1) + + This will obviously effect the interpretation of the fit's covariance matrix. + """ + x_max = self.info['x data (m)'].max() + return numpy.log(L/x_max - 1) + + def L(self, Lp): + """Inverse of :meth:`Lp`. + + Parameters + ---------- + Lp : float + rescaled version of `L` + + Returns + ------- + L : float + contour length in meters + + Examples + -------- + >>> x_data = numpy.linspace(1e-9, 20e-9, num=10) + >>> model = WLC(data=x_data, info={'x data (m)':x_data}) + >>> model.L(model.Lp(21e-9)) # doctest: +ELLIPSIS + 2.100...e-08 + >>> model.L(model.Lp(100e-9)) # doctest: +ELLIPSIS + 9.999...e-08 + """ + x_max = self.info['x data (m)'].max() + return (numpy.exp(Lp)+1)*x_max + + def model(self, params): + """Extract the relavant arguments and use :func:`WLC_fn`. + + Parameters + ---------- + params : list of floats + `[Lp, p]`, where the presence of `p` is optional. + """ + # setup convenient aliases + T = self.info['temperature (K)'] + x_data = self.info['x data (m)'] + L = self.L(params[0]) + if len(params) > 1: + p = abs(params[1]) + else: + p = self.info['persistence length (m)'] + # compute model data + self._model_data[:] = WLC_fn(x_data, T, L, p) + return self._model_data + + def guess_initial_params(self, outqueue=None): + """Guess initial fitting parameters. + + Returns + ------- + Lp : float + A guess at the reparameterized contour length in meters. + p : float (optional) + A guess at the persistence length in meters. If `.info` + has a setting for `'persistence length (m)'`, `p` is not + returned. + """ + x_max = self.info['x data (m)'].max() + p_given = 'persistence length (m)' in self.info + if p_given: + p = self.info['persistence length (m)'] + else: + p = x_max / 10.0 + L = 1.5 * x_max + Lp = self.Lp(L) + if p_given: + return [Lp] + return [Lp, p] + + def guess_scale(self, params, outqueue=None): + """Guess parameter scales. + + Returns + ------- + Lp_scale : float + A guess at the reparameterized contour length scale in meters. + p_scale : float (optional) + A guess at the persistence length in meters. If the + length of `params` is less than 2, `p_scale` is not + returned. + """ + Lp_scale = 1.0 + if len(params) == 1: + return [Lp_scale] + return [Lp_scale] + [x/10.0 for x in params[1:]] + + +class PolymerFitPlugin (Plugin): + """Polymer model (WLC, FJC, etc.) fitting. + """ + def __init__(self): + super(PolymerFitPlugin, self).__init__(name='polymer_fit') + self._commands = [PolymerFitCommand(self),] + + def dependencies(self): + return ['vclamp'] + + def default_settings(self): + return [ + Setting(section=self.setting_section, help=self.__doc__), + Setting(section=self.setting_section, + option='polymer model', + value='WLC', + help="Select the default polymer model for 'polymer fit'. See the documentation for descriptions of available algorithms."), + Setting(section=self.setting_section, + option='FJC Kuhn length', + value=4e-10, type='float', + help='Kuhn length in meters'), + Setting(section=self.setting_section, + option='FJC-PEG Kuhn length', + value=4e-10, type='float', + help='Kuhn length in meters'), + Setting(section=self.setting_section, + option='FJC-PEG elasticity', + value=150.0, type='float', + help='Elasticity of a PEG segment in Newtons per meter.'), + Setting(section=self.setting_section, + option='FJC-PEG delta G', + value=3.0, type='float', + help='Gibbs free energy difference between trans-trans-trans (ttt) and trans-trans-gauche (ttg) PEG states in units of kBT.'), + Setting(section=self.setting_section, + option='FJC-PEG L_helical', + value=2.8e-10, type='float', + help='Contour length of PEG in the ttg state.'), + Setting(section=self.setting_section, + option='FJC-PEG L_planar', + value=3.58e-10, type='float', + help='Contour length of PEG in the ttt state.'), + Setting(section=self.setting_section, + option='WLC persistence length', + value=4e-10, type='float', + help='Persistence length in meters'), + ] + + +class PolymerFitCommand (Command): + """Polymer model (WLC, FJC, etc.) fitting. + + Fits an entropic elasticity function to a given chunk of the + curve. Fit quality compares the residual with the thermal noise + (a good fit should be 1 or less). + + Because the models are complicated and varied, you should + configure the command by setting configuration options instead of + using command arguments. TODO: argument_to_setting() + """ + def __init__(self, plugin): + super(PolymerFitCommand, self).__init__( + name='polymer fit', + arguments=[ + CurveArgument, + Argument(name='block', aliases=['set'], type='int', default=0, + help=""" +Data block for which the fit should be calculated. For an +approach/retract force curve, `0` selects the approaching curve and +`1` selects the retracting curve. +""".strip()), + Argument(name='bounds', type='point', optional=False, count=2, + help=""" +Indicies of points bounding the selected data. +""".strip()), + ], + help=self.__doc__, plugin=plugin) + + def _run(self, hooke, inqueue, outqueue, params): + scale(hooke, params['curve'], params['block']) # TODO: is autoscaling a good idea? (explicit is better than implicit) + data = params['curve'].data[params['block']] + # HACK? rely on params['curve'] being bound to the local hooke + # playlist (i.e. not a copy, as you would get by passing a + # curve through the queue). Ugh. Stupid queues. As an + # alternative, we could pass lookup information through the + # queue... + model = self.plugin.config['polymer model'] + new = Data((data.shape[0], data.shape[1]+1), dtype=data.dtype) + new.info = copy.deepcopy(data.info) + new[:,:-1] = data + new.info['columns'].append('%s tension (N)' % model) # TODO: WLC fit for each peak, etc. + z_data = data[:,data.info['columns'].index('surface distance (m)')] + d_data = data[:,data.info['columns'].index('deflection (N)')] + start,stop = params['bounds'] + tension_data,ps = self.fit_polymer_model( + params['curve'], z_data, d_data, start, stop, outqueue) + new.info['%s polymer fit parameters' % model] = ps + new[:,-1] = tension_data + params['curve'].data[params['block']] = new + + def fit_polymer_model(self, curve, z_data, d_data, start, stop, + outqueue=None): + """Railyard for the `fit_*_model` family. + + Uses the `polymer model` configuration setting to call the + appropriate backend algorithm. + """ + fn = getattr(self, 'fit_%s_model' + % self.plugin.config['polymer model'].replace('-','_')) + return fn(curve, z_data, d_data, start, stop, outqueue) + + def fit_FJC_model(self, curve, z_data, d_data, start, stop, + outqueue=None): + """Fit the data with :class:`FJC`. + """ + """Fit the data with :class:`WLC`. + """ + info = { + 'temperature (K)': self.plugin.config['temperature'], + 'x data (m)': z_data, + } + if True: # TODO: optionally free persistence length + info['Kuhn length (m)'] = ( + self.plugin.config['FJC Kuhn length']) + model = FJC(d_data, info=info) + queue = Queue.Queue() + params = model.fit(outqueue=queue) + if True: # TODO: if Kuhn length fixed + params = [params] + a = info['Kuhn length (m)'] + else: + a = params[1] + Lp = params[0] + L = model.L(Lp) + T = info['temperature (K)'] + fit_info = queue.get(block=False) + mask = numpy.zeros(z_data.shape, dtype=numpy.bool) + mask[start:stop] = True + return [FJC_fn(z_data, T=T, L=L, a=a) * mask, + fit_info] + + def fit_FJC_PEG_model(self, curve, z_data, d_data, start, stop, + outqueue=None): + """Fit the data with :class:`FJC_PEG`. + """ + pass + + def fit_WLC_model(self, curve, z_data, d_data, start, stop, + outqueue=None): + """Fit the data with :class:`WLC`. + """ + info = { + 'temperature (K)': self.plugin.config['temperature'], + 'x data (m)': z_data, + } + if True: # TODO: optionally free persistence length + info['persistence length (m)'] = ( + self.plugin.config['WLC persistence length']) + model = WLC(d_data, info=info) + queue = Queue.Queue() + params = model.fit(outqueue=queue) + if True: # TODO: if persistence length fixed + params = [params] + p = info['persistence length (m)'] + else: + p = params[1] + Lp = params[0] + L = model.L(Lp) + T = info['temperature (K)'] + fit_info = queue.get(block=False) + mask = numpy.zeros(z_data.shape, dtype=numpy.bool) + mask[start:stop] = True + return [WLC_fn(z_data, T=T, L=L, p=p) * mask, + fit_info] + + +# TODO: +# def dist2fit(self): +# '''Calculates the average distance from data to fit, scaled by +# the standard deviation of the free cantilever area (thermal +# noise). +# ''' diff --git a/hooke/ui/gui/__init__.py b/hooke/ui/gui/__init__.py index ed03c2f..3bc29d5 100644 --- a/hooke/ui/gui/__init__.py +++ b/hooke/ui/gui/__init__.py @@ -123,6 +123,10 @@ class HookeFrame (wx.Frame): command=self._command_by_name('load playlist'), args={'input':'test/data/vclamp_picoforce/playlist'}, ) + self.execute_command( + command=self._command_by_name('polymer fit'), + args={'block':1, 'bounds':[400, 1000]}, + ) return # TODO: cleanup self.playlists = self._c['playlist'].Playlists self._displayed_plot = None diff --git a/hooke/util/fit.py b/hooke/util/fit.py index 4ae0e18..d5e4d03 100644 --- a/hooke/util/fit.py +++ b/hooke/util/fit.py @@ -21,6 +21,7 @@ from numpy import arange, ndarray from scipy.optimize import leastsq +import scipy.optimize class PoorFit (ValueError): @@ -29,6 +30,10 @@ class PoorFit (ValueError): class ModelFitter (object): """A convenient wrapper around :func:`scipy.optimize.leastsq`. + TODO: look into :mod:`scipy.odr` as an alternative fitting + algorithm (minimizing the sum of squares of orthogonal distances, + vs. minimizing y distances). + Parameters ---------- d_data : array_like @@ -217,3 +222,54 @@ class ModelFitter (object): 'convergence flag': ier, }) return params + +# Example ORD code from the old fit.py +# def dist(px,py,linex,liney): +# distancesx=scipy.array([(px-x)**2 for x in linex]) +# minindex=numpy.argmin(distancesx) +# print px, linex[0], linex[-1] +# return (py-liney[minindex])**2 +# +# +# def f_wlc(params,x,T=T): +# ''' +# wlc function for ODR fitting +# ''' +# lambd,pii=params +# Kb=(1.38065e-23) +# therm=Kb*T +# y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd)) +# return y +# +# def f_wlc_plfix(params,x,pl_value=pl_value,T=T): +# ''' +# wlc function for ODR fitting +# ''' +# lambd=params +# pii=1/pl_value +# Kb=(1.38065e-23) +# therm=Kb*T +# y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd)) +# return y +# +# #make the ODR fit +# realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up) +# if pl_value: +# model=scipy.odr.Model(f_wlc_plfix) +# o = scipy.odr.ODR(realdata, model, p0_plfix) +# else: +# model=scipy.odr.Model(f_wlc) +# o = scipy.odr.ODR(realdata, model, p0) +# +# o.set_job(fit_type=2) +# out=o.run() +# fit_out=[(1/i) for i in out.beta] +# +# #Calculate fit errors from output standard deviations. +# #We must propagate the error because we fit the *inverse* parameters! +# #The error = (error of the inverse)*(value**2) +# fit_errors=[] +# for sd,value in zip(out.sd_beta, fit_out): +# err_real=sd*(value**2) +# fit_errors.append(err_real) + -- 2.26.2