WLC and FJC output are now with 2 decimal precision. Added a comment on autopeak...
[hooke.git] / fit.py
diff --git a/fit.py b/fit.py
index e1e78d78fd2ad0219d5ba23ee8ad390315cbf659..12f04e9d2c9f0a65fe7154882206bad441570272 100755 (executable)
--- a/fit.py
+++ b/fit.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 '''
 FIT
@@ -35,6 +36,13 @@ class fitCommands:
         self.wlccurrent=None
         self.wlccontact_point=None
         self.wlccontact_index=None
+        
+    def dist2fit(self):
+        '''Calculates the average distance from data to fit, scaled by the standard deviation
+        of the free cantilever area (thermal noise)
+        '''
+                
+        
     
     def wlc_fit(self,clicked_points,xvector,yvector, pl_value, T=293, return_errors=False):
         '''
@@ -43,11 +51,12 @@ class fitCommands:
         and S.Smith (Science. 1994 Sep 9;265(5178):1599-600.)
         '''
     
-        '''clicked_points[0] = contact point (calculated or hand-clicked)
-        clicked_points[1] and [2] are edges of chunk'''
+        '''
+        clicked_points[0] = contact point (calculated or hand-clicked)
+        clicked_points[1] and [2] are edges of chunk
+        '''
     
         #STEP 1: Prepare the vectors to apply the fit.
-        
         if pl_value is not None:
             pl_value=pl_value/(10**9)
         
@@ -84,7 +93,11 @@ class fitCommands:
         ODR STUFF
         fixme: remove these comments after testing
         '''
-        
+        def dist(px,py,linex,liney):
+            distancesx=scipy.array([(px-x)**2 for x in linex])
+            minindex=np.argmin(distancesx)
+            print px, linex[0], linex[-1]
+            return (py-liney[minindex])**2
         
         def f_wlc(params,x,T=T):
             '''
@@ -145,6 +158,7 @@ class fitCommands:
         
             return ( (therm*pii/4.0) * (((1-(x*lambd))**-2.0) - 1 + (4.0*x*lambd)) )
         
+        
         #STEP 3: plotting the fit
         
         #obtain domain to plot the fit - from contact point to last_index plus 20 points
@@ -161,21 +175,32 @@ class fitCommands:
         yfit=wlc_eval(xfit_chunk_corr_up, out.beta, pl_value,T)
         yfit_down=[-y for y in yfit]
         yfit_corr_down=[y+clicked_points[0].graph_coords[1] for y in yfit_down]
+        
+        
+        #calculate fit quality 
+        qsum=0
+        yqeval=wlc_eval(xchunk_corr_up,out.beta,pl_value,T)
+        #we need to cut the extra from thuleindex
+        for qindex in np.arange(0,len(yqeval)):
+            qsum+=(yqeval[qindex]-ychunk_corr_up[qindex])**2        
+        qstd=np.sqrt(qsum/len(ychunk_corr_up))        
+        
     
         if return_errors:
-            return fit_out, yfit_corr_down, xfit_chunk, fit_errors
+            return fit_out, yfit_corr_down, xfit_chunk, fit_errors, qstd
         else:
-            return fit_out, yfit_corr_down, xfit_chunk, None
+            return fit_out, yfit_corr_down, xfit_chunk, None, qstd
     
     def fjc_fit(self,clicked_points,xvector,yvector, pl_value, T=293, return_errors=False):
         '''
         Freely-jointed chain function
         ref: C.Ray and B.B. Akhremitchev; http://www.chem.duke.edu/~boris/research/force_spectroscopy/fit_efjc.pdf
         '''
-    
-        '''clicked_points[0] = contact point (calculated or hand-clicked)
-        clicked_points[1] and [2] are edges of chunk'''
-    
+        '''
+        clicked_points[0] is the contact point (calculated or hand-clicked)
+        clicked_points[1] and [2] are edges of chunk
+        '''
+        
         #STEP 1: Prepare the vectors to apply the fit.
         if pl_value is not None:
             pl_value=pl_value/(10**9)
@@ -183,7 +208,7 @@ class fitCommands:
         #indexes of the selected chunk
         first_index=min(clicked_points[1].index, clicked_points[2].index)
         last_index=max(clicked_points[1].index, clicked_points[2].index)
-               
+        
         #getting the chunk and reverting it
         xchunk,ychunk=xvector[first_index:last_index],yvector[first_index:last_index]
         xchunk.reverse()
@@ -192,6 +217,7 @@ class fitCommands:
         xchunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xchunk]
         ychunk_corr_up=[-(y-clicked_points[0].graph_coords[1]) for y in ychunk]
         
+        
         #make them arrays
         xchunk_corr_up=scipy.array(xchunk_corr_up)
         ychunk_corr_up=scipy.array(ychunk_corr_up)
@@ -213,6 +239,12 @@ class fitCommands:
         ODR STUFF
         fixme: remove these comments after testing
         '''
+        def dist(px,py,linex,liney):
+            distancesx=scipy.array([(px-x)**2 for x in linex])
+            minindex=np.argmin(distancesx)
+            print minindex, px, linex[0], linex[-1]
+            return (py-liney[minindex])**2
+        
         def coth(z):
             '''
             hyperbolic cotangent
@@ -280,32 +312,24 @@ class fitCommands:
             therm=Kb*T #so we have thermal energy
             #return ( (therm*pii/4.0) * (((1-(x*lambd))**-2.0) - 1 + (4.0*x*lambd)) )
             return (1/lambd)*(coth(y*(1/pii)/therm) - (therm*pii)/y)
-            
+           
         
         #STEP 3: plotting the fit
-        
         #obtain domain to plot the fit - from contact point to last_index plus 20 points
         thule_index=last_index+10
         if thule_index > len(xvector): #for rare cases in which we fit something at the END of whole curve.
             thule_index = len(xvector)
         #reverse etc. the domain
-        '''
-        xfit_chunk=xvector[clicked_points[0].index:thule_index]
-        xfit_chunk.reverse()
-        xfit_chunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xfit_chunk]
-        xfit_chunk_corr_up=scipy.array(xfit_chunk_corr_up)
-    
-        #the fitted curve: reflip, re-uncorrect
-        yfit=wlc_eval(xfit_chunk_corr_up, out.beta, pl_value,T)
-        yfit_down=[-y for y in yfit]
-        yfit_corr_down=[y+clicked_points[0].graph_coords[1] for y in yfit_down]
-        '''
-        #xfit_chunk=xvector[clicked_points[0].index:thule_index]
-        
-        #yfit=wlc_eval(xfit_chunk_corr_up, out.beta, pl_value,T)
         ychunk=yvector[clicked_points[0].index:thule_index]
-        #print clicked_points[0].graph_coords[1]
-        y_evalchunk=np.linspace(min(ychunk),max(ychunk),100)
+
+        if len(ychunk)>0:
+            y_evalchunk=np.linspace(min(ychunk),max(ychunk),100)
+        else:
+            #Empty y-chunk. It happens whenever we set the contact point after a recognized peak,
+            #or other buggy situations. Kludge to live with it now...
+            ychunk=yvector[:thule_index]
+            y_evalchunk=np.linspace(min(ychunk),max(ychunk),100)
+            
         yfit_down=[-y for y in y_evalchunk]
         yfit_corr_down=[y+clicked_points[0].graph_coords[1] for y in yfit_down]
         yfit_corr_down=scipy.array(yfit_corr_down)
@@ -317,32 +341,300 @@ class fitCommands:
         xfit_chunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xfit]
         
         #xfit_chunk_corr_up=scipy.array(xfit_chunk_corr_up)
-               
+        #deltay=yfit_down[0]-yvector[clicked_points[0].index]
         
+        #This is a terrible, terrible kludge to find the point where it should normalize (and from where it should plot)
+        xxxdists=[]
+        for index in scipy.arange(1,len(xfit_chunk_corr_up),1):
+            xxxdists.append((clicked_points[0].graph_coords[0]-xfit_chunk_corr_up[index])**2)           
+        normalize_index=xxxdists.index(min(xxxdists))
+        #End of kludge
+        
+        deltay=yfit_down[normalize_index]-clicked_points[0].graph_coords[1]
+        yfit_corr_down=[y-deltay for y in yfit_down]
+        
+        
+        #calculate fit quality
+        #creates dense y vector
+        yqeval=np.linspace(np.min(ychunk_corr_up)/2,np.max(ychunk_corr_up)*2,10*len(ychunk_corr_up))
+        #corresponding fitted x
+        xqeval=fjc_eval(yqeval,out.beta,pl_value,T)
+        
+        qsum=0
+        for qindex in np.arange(0,len(ychunk_corr_up)):
+            qsum+=dist(xchunk_corr_up[qindex],ychunk_corr_up[qindex],xqeval,yqeval)        
+        qstd=np.sqrt(qsum/len(ychunk_corr_up))        
         
-        #print yfit_chunk_corr_up
-        #print xfit_corr_down
             
         if return_errors:
-            return fit_out, yfit_corr_down, xfit_chunk_corr_up, fit_errors
+            return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], fit_errors, qstd
         else:
-            return fit_out, yfit_corr_down, xfit_chunk_corr_up, None
+            return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], None, qstd
+    
+    def efjc_fit(self,clicked_points,xvector,yvector, pl_value, T=293.0, return_errors=False):
+        '''
+        Extended Freely-jointed chain function
+        ref: F Oesterhelt, M Rief and H E Gaub, New Journal of Physics 1 (1999) 6.1–6.11 
+        Please note that 2-parameter fitting (length and kl) usually does not converge, use fixed kl
+        '''
+        '''
+        clicked_points[0] is the contact point (calculated or hand-clicked)
+        clicked_points[1] and [2] are edges of chunk
+        
+        '''
+        #Fixed parameters from reference
+        Kb=(1.38065e-2) #in pN.nm
+        Lp=0.358 #planar, nm
+        Lh=0.280 #helical, nm
+        Ks=150e3  #pN/nm
+        
+       
+        #STEP 1: Prepare the vectors to apply the fit.
+        
+        #indexes of the selected chunk
+        first_index=min(clicked_points[1].index, clicked_points[2].index)
+        last_index=max(clicked_points[1].index, clicked_points[2].index)
+        
+        #getting the chunk and reverting it
+        xchunk,ychunk=xvector[first_index:last_index],yvector[first_index:last_index]
+        xchunk.reverse()
+        ychunk.reverse()    
+        #put contact point at zero and flip around the contact point (the fit wants a positive growth for extension and force)
+        xchunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xchunk]
+        ychunk_corr_up=[-(y-clicked_points[0].graph_coords[1]) for y in ychunk]
+        
+        
+        #make them arrays
+        xchunk_corr_up=scipy.array(xchunk_corr_up)
+        ychunk_corr_up=scipy.array(ychunk_corr_up)
+        
+        xchunk_corr_up_nm=xchunk_corr_up*1e9
+        ychunk_corr_up_pn=ychunk_corr_up*1e12
     
+        
+        #STEP 2: actually do the fit
+    
+        #Find furthest point of chunk and add it a bit; the fit must converge
+        #from an excess!
+        xchunk_high=max(xchunk_corr_up_nm)
+        xchunk_high+=(xchunk_high/10.0)
+    
+        #Here are the linearized start parameters for the WLC.
+        #[Ns , pii=1/P]
+        #max number of monomers (all helical)for a contour length xchunk_high
+        excessNs=xchunk_high/(Lp) 
+        p0=[excessNs,(1.0/(0.7))]
+        p0_plfix=[(excessNs)]
+    
+        def dist(px,py,linex,liney):
+            distancesx=scipy.array([(px-x)**2 for x in linex])
+            minindex=np.argmin(distancesx)
+            return (py-liney[minindex])**2
+    
+        def deltaG(f):
+            dG0=12.4242 #3kt in pN.nm
+            dL=0.078 #planar-helical
+            return dG0-f*dL
+        
+        def Lfactor(f,T=T):
+            Lp=0.358 #planar, nm
+            Lh=0.280 #helical, nm
+            Kb=(1.38065e-2)
+            therm=Kb*T
+            dG=deltaG(f)
+            
+            return Lp/(np.exp(dG/therm)+1)+Lh/(np.exp(-dG/therm)+1)
+        
+        def coth(z):
+            '''
+            hyperbolic cotangent
+            '''
+            return 1.0/np.tanh(z)
+        
+        def x_efjc(params,f,T=T,Ks=Ks):
+            '''
+            efjc function for ODR fitting
+            '''
+            
+            Ns=params[0]
+            invkl=params[1]
+            Kb=(1.38065e-2)
+            therm=Kb*T            
+            beta=(f/therm)/invkl
+                        
+            x=Ns*Lfactor(f)*(coth(beta)-1.0/beta)+Ns*f/Ks
+            return x
+        
+        def x_efjc_plfix(params,f,kl_value=pl_value,T=T,Ks=Ks):
+            '''
+            efjc function for ODR fitting
+            '''
+            
+            Ns=params
+            invkl=1.0/kl_value
+            Kb=(1.38065e-2)
+            therm=Kb*T
+            beta=(f/therm)/invkl
+            
+            x=Ns*Lfactor(f)*(coth(beta)-1.0/beta)+Ns*f/Ks
+            return x
+        
+        #make the ODR fit
+        realdata=scipy.odr.RealData(ychunk_corr_up_pn,xchunk_corr_up_nm)
+        if pl_value:
+            model=scipy.odr.Model(x_efjc_plfix)
+            o = scipy.odr.ODR(realdata, model, p0_plfix)
+        else:
+            print 'WARNING eFJC fit with free pl sometimes does not converge'
+            model=scipy.odr.Model(x_efjc)
+            o = scipy.odr.ODR(realdata, model, p0)
+        
+        o.set_job(fit_type=2)
+        out=o.run()
+    
+        
+        Ns=out.beta[0]
+        Lc=Ns*Lp*1e-9 
+        if len(out.beta)>1:
+            kfit=1e-9/out.beta[1]
+            kfitnm=1/out.beta[1]
+        else:
+            kfit=1e-9*pl_value
+            kfitnm=pl_value
+        
+        fit_out=[Lc, kfit]
+        
+        #Calculate fit errors from output standard deviations.
+        fit_errors=[]
+        fit_errors.append(out.sd_beta[0]*Lp*1e-9)
+        if len(out.beta)>1:
+            fit_errors.append(1e9*out.sd_beta[1]*kfit**2)
+            
+  
+        
+        def efjc_eval(y,params,pl_value,T=T,Lfactor=Lfactor,Ks=Ks):    
+            '''
+            Evaluates the eFJC function
+            '''
+            if not pl_value:
+                Ns, invkl = params
+            else:
+                Ns = params
+        
+            if pl_value:
+                invkl=1.0/pl_value
+        
+            Kb=(1.38065e-2) #boltzmann constant
+            therm=Kb*T #so we have thermal energy
+            beta=(y/therm)/invkl
+
+            x= Ns*Lfactor(y)*(coth(beta)-1.0/beta)+Ns*y/Ks
+            
+            return x
+            
+        #STEP 3: plotting the fit
+        #obtain domain to plot the fit - from contact point to last_index plus 20 points
+        thule_index=last_index+10
+        if thule_index > len(xvector): #for rare cases in which we fit something at the END of whole curve.
+            thule_index = len(xvector)
+        #reverse etc. the domain
+        ychunk=yvector[clicked_points[0].index:thule_index]
+
+        if len(ychunk)>0:
+            y_evalchunk=np.linspace(min(ychunk),max(ychunk),100)
+        else:
+            #Empty y-chunk. It happens whenever we set the contact point after a recognized peak,
+            #or other buggy situations. Kludge to live with it now...
+            ychunk=yvector[:thule_index]
+            y_evalchunk=np.linspace(min(ychunk),max(ychunk),100)
+            
+        yfit_down=[-y for y in y_evalchunk]
+        yfit_corr_down=[y+clicked_points[0].graph_coords[1] for y in yfit_down]
+        yfit_corr_down=scipy.array(yfit_corr_down)
+        
+        #the fitted curve: reflip, re-uncorrect
+        xfit=efjc_eval(1e12*yfit_corr_down, out.beta, pl_value,T)*1e-9
+        xfit=list(xfit)
+        xfit.reverse()
+        xfit_chunk_corr_up=[-(x-clicked_points[0].graph_coords[0]) for x in xfit]
+        
+        #xfit_chunk_corr_up=scipy.array(xfit_chunk_corr_up)
+        #deltay=yfit_down[0]-yvector[clicked_points[0].index]
+        
+        #This is a terrible, terrible kludge to find the point where it should normalize (and from where it should plot)
+        xxxdists=[]
+        for index in scipy.arange(1,len(xfit_chunk_corr_up),1):
+            xxxdists.append((clicked_points[0].graph_coords[0]-xfit_chunk_corr_up[index])**2)           
+        normalize_index=xxxdists.index(min(xxxdists))
+        #End of kludge
+        
+        deltay=yfit_down[normalize_index]-clicked_points[0].graph_coords[1]
+        yfit_corr_down=[y-deltay for y in yfit_down]
+        
+        #calculate fit quality
+        #creates dense y vector
+        yqeval=np.linspace(np.min(ychunk_corr_up_pn)/2,np.max(ychunk_corr_up_pn)*2,10*len(ychunk_corr_up_pn))
+        #corresponding fitted x
+        xqeval=efjc_eval(yqeval,out.beta,pl_value)
+        
+        qsum=0
+        for qindex in np.arange(0,len(ychunk_corr_up_pn)):
+            qsum+=dist(xchunk_corr_up_nm[qindex],ychunk_corr_up_pn[qindex],xqeval,yqeval)        
+        qstd=1e-12*np.sqrt(qsum/len(ychunk_corr_up_pn))
+            
+        if return_errors:
+            return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], fit_errors, qstd
+        else:
+            return fit_out, yfit_corr_down[normalize_index+1:], xfit_chunk_corr_up[normalize_index+1:], None, qstd
+            
+    
+   
     
     def do_wlc(self,args):
         '''
         WLC
-        (fit plugin)
-        Fits a worm-like chain entropic rise to a given chunk of the curve.
+        (fit.py plugin)
+        
+        See the fit command
+        '''
+        self.do_fit(args)
+    
+    def do_fjc(self,args):
+        '''
+        FJC
+        (fit.py plugin)
+        
+        See the fit command
+        '''
+        self.do_fit(args)
+    
+    def do_fit(self,args):
+        '''
+        FIT
+        (fit.py plugin)
+        Fits an entropic elasticity function to a given chunk of the curve.
 
         First you have to click a contact point.
         Then you have to click the two edges of the data you want to fit.
-        The function is the simple polynomial worm-like chain as proposed by 
+        
+        Fit quality compares the distance to the fit with the thermal noise (a good fit should be close to 1)
+        
+        The fit function depends on the fit_function variable. You can set it with the command
+        "set fit_function wlc" or  "set fit_function fjc" depending on the function you prefer.
+        
+        For WLC, the function is the simple polynomial worm-like chain as proposed by 
         C.Bustamante, J.F.Marko, E.D.Siggia and S.Smith (Science. 1994 
         Sep 9;265(5178):1599-600.)
+        
+        For FJC, ref: 
+        C.Ray and B.B. Akhremitchev; http://www.chem.duke.edu/~boris/research/force_spectroscopy/fit_efjc.pdf
+        
+        For eFJC, ref:
+        F Oesterhelt, M Rief and H E Gaub, New Journal of Physics 1 (1999) 6.1–6.11 (section 4.2)
+        NOTE: use fixed pl for better results.
 
         Arguments:
-        pl=[value] : Use a fixed persistent length for the fit. If pl is not given, 
+        pl=[value] : Use a fixed persistent length (WLC) or Kuhn length (FJC) for the fit. If pl is not given, 
                      the fit will be a 2-variable  
                      fit. DO NOT put spaces between 'pl', '=' and the value.
                      The value must be in nanometers. 
@@ -359,7 +651,7 @@ class fitCommands:
         reclick : redefines by hand the contact point, if noauto has been used before
                   but the user is unsatisfied of the previously choosen contact point.
         ---------
-        Syntax: wlc [pl=(value)] [t=value] [noauto]
+        Syntax: fit [pl=(value)] [t=value] [noauto]
         '''
         pl_value=None
         T=self.config['temperature']
@@ -405,14 +697,20 @@ class fitCommands:
         print 'Click edges of chunk'
         points=self._measure_N_points(N=2, whatset=1)
         points=[contact_point]+points
+      
         try:
             if self.config['fit_function']=='wlc':
-                params, yfit, xfit, fit_errors = self.wlc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True )
+                params, yfit, xfit, fit_errors,qstd = self.wlc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True )
+                name_of_charlength='Persistent length'
             elif self.config['fit_function']=='fjc':
-                params, yfit, xfit, fit_errors = self.fjc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True )
+                params, yfit, xfit, fit_errors,qstd = self.fjc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True )
+                name_of_charlength='Kuhn length'
+            elif self.config['fit_function']=='efjc':
+                params, yfit, xfit, fit_errors,qstd = self.efjc_fit(points, displayed_plot.vectors[1][0], displayed_plot.vectors[1][1],pl_value,T, return_errors=True )
+                name_of_charlength='Kuhn length (e)'                    
             else:
                 print 'No recognized fit function defined!'
-                print 'Set your fit function to wlc or fjc.'
+                print 'Set your fit function to wlc, fjc or efjc.'
                 return
             
         except:
@@ -421,19 +719,21 @@ class fitCommands:
         
         #FIXME: print "Kuhn length" for FJC
         print 'Fit function:',self.config['fit_function']
-        print 'Contour length: ',params[0]*(1.0e+9),' nm'
-        to_dump='contour '+self.current.path+' '+str(params[0]*(1.0e+9))+' nm'
+        print 'Contour length: %.2f nm' %(params[0]*(1.0e+9))
+        to_dump='contour '+self.current.path+' %.2f nm'%(params[0]*(1.0e+9))
         self.outlet.push(to_dump)
         if len(params)==2: #if we did choose 2-value fit
-            print 'Persistent length: ',params[1]*(1.0e+9),' nm'
-            to_dump='persistent '+self.current.path+' '+str(params[1]*(1.0e+9))+' nm'
+            print name_of_charlength+': %.2f nm' %(params[1]*(1.0e+9))
+            to_dump='persistent '+self.current.path+' %.2f nm' %(params[1]*(1.0e+9))
             self.outlet.push(to_dump)
         
         if fit_errors:
             fit_nm=[i*(10**9) for i in fit_errors]
-            print 'Standard deviation (contour length)', fit_nm[0]
+            print 'Standard deviation (contour length) %.2f' %fit_nm[0]
             if len(fit_nm)>1:
-                print 'Standard deviation (persistent length)', fit_nm[1]
+                print 'Standard deviation ('+name_of_charlength+') %.2f' %fit_nm[1]
+        
+        print 'Fit quality: %.3f ' %(qstd/np.std(displayed_plot.vectors[1][1][-20:-1]))
             
             
         #add the clicked points in the final PlotObject
@@ -462,10 +762,7 @@ class fitCommands:
         self._send_plot([fitplot])
     
   
-    
-    
-    
-    
+
     #----------