Run update-copyright.py.
[hooke.git] / hooke / plugin / pcluster.py
index 5084b83ed31adeee853cf813f1cee12866cba794..45bb9c7e5b8b3db63207b426266fd43b57989075 100644 (file)
@@ -1,7 +1,25 @@
-#!/usr/bin/env python
+# Copyright (C) 2009-2012 Massimo Sandal <devicerandom@gmail.com>
+#                         Pancaldi Paolo <pancaldi.paolo@gmail.com>
+#                         W. Trevor King <wking@drexel.edu>
+#
+# This file is part of Hooke.
+#
+# Hooke is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# Hooke is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with Hooke.  If not, see <http://www.gnu.org/licenses/>.
+
+from ..libhooke import WX_GOOD, ClickedPoint, config_file_path
 
 from mdp import pca
-from libhooke import WX_GOOD, ClickedPoint
 import wxversion
 wxversion.select(WX_GOOD)
 from wx import PostEvent
@@ -10,15 +28,17 @@ import scipy as sp
 import copy
 import os.path
 import time
-import libhookecurve as lhc
 import pylab as pyl
 
+from .. import curve as lhc
+
+
 import warnings
 warnings.simplefilter('ignore',np.RankWarning)
 
 
-class pclusterCommands:
-    
+class pclusterCommands(object):
+
     def _plug_init(self):
         self.clustplot1=None
         self.clustplot2=None
@@ -30,16 +50,12 @@ class pclusterCommands:
         Automatically measures peaks and extracts informations for further clustering
         (c)Paolo Pancaldi, Massimo Sandal 2009
         '''
-        if self.config['hookedir'][0]=='/':
-            slash='/' #a Unix or Unix-like system
-        else:
-            slash='\\'
         blindw = str(self.convfilt_config['blindwindow'])
         pclus_dir = "pCluster_blind"+blindw+"_"+time.strftime("%Y%m%d_%H%M")
-        self.my_work_dir = os.getcwd()+slash+pclus_dir+slash
+        self.my_work_dir = os.path.join(os.getcwd(), pclus_dir)
         self.my_curr_dir = os.path.basename(os.getcwd())
         os.mkdir(self.my_work_dir)
-        
+
         #--Custom persistent length
         pl_value=None
         for arg in args.split():
@@ -49,57 +65,33 @@ class pclusterCommands:
                 pl_value=float(pl_expression[1]) #actual value
             else:
                 pl_value=None
-                
+
         #configuration variables
         min_npks = self.convfilt_config['minpeaks']
         min_deviation = self.convfilt_config['mindeviation']
-        
+
         pclust_filename = "automeasure_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Automeasure filename? ')
         realclust_filename = "coordinate_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Coordinates filename? ')
         peackforce_filename = "peakforce_"+self.my_curr_dir+"_blind"+blindw+".txt"  #raw_input('Peacks and Forces filename? ')
-        
+
         f=open(self.my_work_dir+pclust_filename,'w+')
         f.write('Analysis started '+time.asctime()+'\n')
         f.write('----------------------------------------\n')
         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
         f.close()
-        
+
         f=open(self.my_work_dir+realclust_filename,'w+')
         f.write('Analysis started '+time.asctime()+'\n')
         f.write('----------------------------------------\n')
         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm) ; Peaks Diff\n')
         f.close()
-        
+
         f=open(self.my_work_dir+peackforce_filename,'w+')
         f.write('Analysis started '+time.asctime()+'\n')
         f.write('----------------------------------------\n')
         f.write('; Peak number  ;  1 peak Length (nm) ; 1 peak Force (pN) ;  2 peak Length (nm) ; 2 peak Force (pN) ;  3 peak Length (nm) ; 3 peak Force (pN) ;  4 peak Length (nm) ; 4 peak Force (pN) ;  5 peak Length (nm) ; 5 peak Force (pN) ;  6 peak Length (nm) ; 6 peak Force (pN) ;  7 peak Length (nm) ; 7 peak Force (pN) ;  8 peak Length (nm) ; 8 peak Force (pN)\n')
         f.close()
         
-        # ------ FUNCTION ------
-        def fit_interval_nm(start_index,plot,nm,backwards):
-            '''
-            Calculates the number of points to fit, given a fit interval in nm
-            start_index: index of point
-            plot: plot to use
-            backwards: if true, finds a point backwards.
-            '''
-            whatset=1 #FIXME: should be decidable
-            x_vect=plot.vectors[1][0]
-            
-            c=0
-            i=start_index
-            start=x_vect[start_index]
-            maxlen=len(x_vect)
-            while abs(x_vect[i]-x_vect[start_index])*(10**9) < nm:
-                if i==0 or i==maxlen-1: #we reached boundaries of vector!
-                    return c
-                if backwards:
-                    i-=1
-                else:
-                    i+=1
-                c+=1
-            return c
 
         def plot_informations(itplot,pl_value):
             '''
@@ -113,14 +105,14 @@ class pclusterCommands:
             fit_errors                                                                                 [6.5817195369767644e-010, 2.4415923138871498e-011]
             '''
             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
-            
+
             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
             self.basepoints=[]
-            base_index_0=peak_location[-1]+fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
+            base_index_0=peak_location[-1]+self.fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
-            base_index_1=self.basepoints[0].index+fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
+            base_index_1=self.basepoints[0].index+self.fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
             self.basecurrent=self.current.path
             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
@@ -131,7 +123,7 @@ class pclusterCommands:
 
         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
             '''
-            calculate informations for each peak and add they in 
+            calculate informations for each peak and add they in
             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
             '''
             c_leng=None
@@ -140,17 +132,17 @@ class pclusterCommands:
             sigma_p_leng=None
             force=None
             slope=None
-            
+
             delta_force=10
             slope_span=int(self.config['auto_slope_span'])
-            
+
             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
-            
+
             points=[contact_point, peak_point, other_fit_point]
-            
+
             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
-            
+
             #Measure forces
             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
             y=min(delta_to_measure)
@@ -168,12 +160,12 @@ class pclusterCommands:
                 #check if persistent length makes sense. otherwise, discard peak.
                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
                     '''
-                    p_lengths.append(p_leng)       
+                    p_lengths.append(p_leng)
                     c_lengths.append(params[0]*(1.0e+9))
                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
                     forces.append(abs(y-avg)*(1.0e+12))
-                    slopes.append(slope)     
+                    slopes.append(slope)
                     '''
                     c_leng=params[0]*(1.0e+9)
                     sigma_c_leng=fit_errors[0]*(1.0e+9)
@@ -196,18 +188,18 @@ class pclusterCommands:
             itplot[0]=flatten(itplot[0], item, customvalue=1)
             try:
                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
-            except: 
+            except:
                 #We have troubles with exec_has_peaks (bad curve, whatever).
                 #Print info and go to next cycle.
                 print 'Cannot process ',item.path
-                continue 
+                continue
 
             if len(peak_location)==0:
                 print 'No peaks!'
                 continue
 
             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
-            
+
             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 
             #initialize output data vectors
@@ -231,7 +223,7 @@ class pclusterCommands:
                 if len(vect)==0:
                     for i in range(len(c_lengths)):
                         vect.append(0)
-            
+
             print 'Measurements for all peaks detected:'
             print 'contour (nm)', c_lengths
             print 'sigma contour (nm)',sigma_c_lengths
@@ -239,7 +231,7 @@ class pclusterCommands:
             print 'sigma p (nm)',sigma_p_lengths
             print 'forces (pN)',forces
             print 'slopes (N/m)',slopes
-            
+
             '''
             write automeasure text file
             '''
@@ -249,9 +241,9 @@ class pclusterCommands:
             for i in range(len(c_lengths)):
                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
             f.close()
-            
+
             peak_number=len(c_lengths)
-            
+
             '''
             write peackforce text file
             '''
@@ -263,7 +255,7 @@ class pclusterCommands:
                 peackforce_info = peackforce_info + ' ; ' + str(c_lengths[i]) + ' ; ' + str(forces[i])
             f.write(' ; '+str(peak_number)+peackforce_info+'\n')
             f.close()
-            
+
             '''
             calculate clustering coordinates
             '''
@@ -271,27 +263,27 @@ class pclusterCommands:
                 deltas=[]
                 for i in range(len(c_lengths)-1):
                     deltas.append(c_lengths[i+1]-c_lengths[i])
-                
+
                 delta_mean=np.mean(deltas)
                 delta_median=np.median(deltas)
-                
+
                 force_mean=np.mean(forces)
                 force_median=np.median(forces)
-                
+
                 first_peak_cl=c_lengths[0]
                 last_peak_cl=c_lengths[-1]
-                
+
                 max_force=max(forces[:-1])
                 min_force=min(forces)
-                
+
                 max_delta=max(deltas)
                 min_delta=min(deltas)
-                
+
                 delta_stdev=np.std(deltas)
                 forces_stdev=np.std(forces[:-1])
-                    
+
                 peaks_diff=(last_peak_cl-first_peak_cl)/peak_number
-                
+
                 print 'Coordinates'
                 print 'Peaks',peak_number
                 print 'Mean delta',delta_mean
@@ -307,7 +299,7 @@ class pclusterCommands:
                 print 'Delta stdev',delta_stdev
                 print 'Forces stdev',forces_stdev
                 print 'Peaks difference',peaks_diff
-                
+
                 '''
                 write clustering coordinates
                 '''
@@ -329,33 +321,26 @@ class pclusterCommands:
                         ' ; '+str(peaks_diff)+      # 12
                         '\n')
                 f.close()
-                
+
         # start PCA
         self.do_pca(pclus_dir+"/"+realclust_filename)
-        
-                
+
+
     def do_pca(self,args):
         '''
         PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
         Automatically measures pca from coordinates filename and shows two interactives plots
-        With the second argument (arbitrary) you can select the columns and the multiplier factor 
+        With the second argument (arbitrary) you can select the columns and the multiplier factor
         to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
         Without second argument it reads pca_config.txt file
         (c)Paolo Pancaldi, Massimo Sandal 2009
         '''
-        
+
         # reads the columns of pca
-        if self.config['hookedir'][0]=='/':
-            slash='/' #a Unix or Unix-like system
-        else:
-            slash='\\'
-        self.my_hooke_dir = self.config['hookedir']+slash
-        #self.my_work_dir = os.getcwd()+slash+"pCluster_"+time.strftime("%Y%m%d_%H%M")+slash
-        #self.my_curr_dir = os.path.basename(os.getcwd())
-        conf=open(self.my_hooke_dir+"pca_config.txt")
+        conf=open(config_file_path("pca_config.txt"), 'r')
         config = conf.readlines()
         conf.close()
-        
+
         self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
         self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
         self.plot_pcaCoord = []         # tiene le due colonne della pca
@@ -366,8 +351,8 @@ class pclusterCommands:
         self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
         self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
         plot_path_temp = ""
-        
-        # prende in inpunt un arg (nome del file) 
+
+        # prende in inpunt un arg (nome del file)
         # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
         arg = args.split(" ")
         if arg[0]==args:
@@ -375,7 +360,7 @@ class pclusterCommands:
         else:
             file_name=arg[0]
             config[0] = arg[1]
-        
+
         # creo l'array "plot_myCoord" con tutte le coordinate dei plots
         # e l'array plot_paths con tutti i percorsi dei plots
         nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
@@ -388,7 +373,7 @@ class pclusterCommands:
             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
                 row = row[row.index(";",2)+2:].split(" ; ")    # non considero la prima colonna col #picchi
                 row = [float(i) for i in row]
-                
+
                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
@@ -397,8 +382,8 @@ class pclusterCommands:
                         self.plot_myCoord.append(row)
                         self.plot_paths.append(plot_path_temp)
         f.close()
-        
-        # creo l'array con alcune colonne e pure moltiplicate 
+
+        # creo l'array con alcune colonne e pure moltiplicate
         for row in self.plot_myCoord:
             res=[]
             for cols in config[0].split(","):
@@ -413,7 +398,7 @@ class pclusterCommands:
                     molt = 1
                 res.append(row[col]*molt)
             self.plot_origCoord.append(res)
-        
+
         # array convert, calculate PCA, transpose
         self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
         #print self.plot_origCoord.shape
@@ -421,11 +406,11 @@ class pclusterCommands:
         self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
         pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
         pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
-        
+
         '''
         # Start section of testing with good plots                                  # 4 TESTING!
         Xsyn_1=[]
-        Ysyn_1=[]        
+        Ysyn_1=[]
         Xgb1_1=[]
         Ygb1_1=[]
         Xbad_1=[]
@@ -434,7 +419,7 @@ class pclusterCommands:
         goodnames=goodnamefile.readlines()
         nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
         goodnames=[i.split()[0] for i in goodnames[1:]]
-        
+
         for index in range(len(self.plot_paths)):
             if self.plot_paths[index][:-1] in goodnames:
                 Xsyn_1.append(pca_X[index])
@@ -444,7 +429,7 @@ class pclusterCommands:
                 Ybad_1.append(pca_Y[index])
         # Stop section of testing with good plots                                   # 4 TESTING!
         '''
-        
+
         # print first plot
         clustplot1=lhc.PlotObject()
         clustplot1.add_set(pca_X,pca_Y)
@@ -456,7 +441,7 @@ class pclusterCommands:
         clustplot1.destination=0
         self._send_plot([clustplot1])
         self.clustplot1=clustplot1
-        
+
         # density and filer estimation
         kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
         tallest = 0
@@ -479,7 +464,7 @@ class pclusterCommands:
         axis_X = np.linspace(xmin,xmax,num=100)
         axis_Y = np.linspace(ymin,ymax,num=100)
         '''
-        
+
         # density filtering:
         # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
         filtered_pca_X = []
@@ -494,16 +479,16 @@ class pclusterCommands:
         filtered_PcaCoordTr.append(filtered_pca_X)
         filtered_PcaCoordTr.append(filtered_pca_Y)
         filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
-        
+
         # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
         for index in range(len(self.plot_pcaCoord)):
             if self.plot_pcaCoord[index] in filtered_PcaCoord:
                 self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
                 self.plot_FiltPaths.append(self.plot_paths[index])
-        
+
         '''
         # START PCA#2: USELESS!!!
-        
+
         # creo l array con alcune colonne e pure moltiplicate
         temp_coord = []
         for row in self.plot_FiltOrigCoord:
@@ -521,7 +506,7 @@ class pclusterCommands:
                 res.append(row[col]*molt)
             temp_coord.append(res)
         self.plot_FiltOrigCoord = temp_coord
-                
+
         # ricalcolo la PCA: array convert, calculate PCA, transpose
         self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
         #print self.plot_FiltOrigCoord.shape
@@ -529,7 +514,7 @@ class pclusterCommands:
         self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
         pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
         pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
-        
+
         # Start section of testing with good plots                              # 4 TESTING!
         Xsyn_2=[]
         Ysyn_2=[]
@@ -542,7 +527,7 @@ class pclusterCommands:
             else:
                 Xbad_2.append(pca_X2[index])
                 Ybad_2.append(pca_Y2[index])
-        
+
         # print second plot
         clustplot2=lhc.PlotObject()
         #clustplot2.add_set(pca_X2,pca_Y2)
@@ -555,7 +540,7 @@ class pclusterCommands:
         self._send_plot([clustplot2])
         self.clustplot2=clustplot2
         '''
-        
+
         # PRINT density plot
         clustplot2=lhc.PlotObject()
         clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
@@ -565,7 +550,7 @@ class pclusterCommands:
         clustplot2.destination=1
         self._send_plot([clustplot2])
         self.clustplot2=clustplot2
-        
+
         # printing results
         config_pca1 = config[0].replace("*", "x").rstrip("\n")
         config_pca2 = config[2].replace("*", "x").rstrip("\n")
@@ -582,7 +567,7 @@ class pclusterCommands:
         print "Piu alta: ", tallest
         #print "- FILTRO 3: 2'PCA:"+config_pca2
         print ""
-        
+
         # -- exporting coordinates and plot of PCA in debug mode! --
         if config[3].find("true")!=-1:
             #1' PCA: save plot and build coordinate s file
@@ -610,7 +595,7 @@ class pclusterCommands:
                     f.write (" ; " + str(cel))
                 f.write ("\n")
             f.close()
-        
+
         # pCLUSTER SAVING!!!
         import shutil
         pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
@@ -622,12 +607,12 @@ class pclusterCommands:
             f.write (myfile+"\n")
             shutil.copy2(myfile, pcl_name)
         f.close()
-        
-        
+
+
     def do_multipca(self,args):
         '''
         MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
-        Automatically multiply the column suggest in second argument for value between 1-100 (step of 2), 
+        Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
         measures pca from coordinates filename and save the png plots.
         (c)Paolo Pancaldi, Massimo Sandal 2009
         '''
@@ -655,7 +640,7 @@ class pclusterCommands:
             for j in range(1, 13):
                 if i!=j:
                     self.do_pca(file_name + " " + str(i) + "," + str(j))
-                    
+
     def do_triplepca(self,args):
         '''
         TRIPLEPCA -> "triplepca gaeta_coor_blind50.txt"
@@ -675,7 +660,7 @@ class pclusterCommands:
         '''
         It returns id, coordinates and file name of a clicked dot on a PCA graphic
         '''
-        
+
         self._send_plot([self.clustplot1]) #quick workaround for BAD problems in the GUI
         print 'Click point'
         point = self._measure_N_points(N=1, whatset=0)
@@ -687,7 +672,7 @@ class pclusterCommands:
         print "coord: " + str(dot_coord)
         self.do_genlist(str(plot_file))
         #self.do_jump(str(plot_file))
-        
+
     # indea iniziata e messa da parte...
     def do_peakforce(self, args):
         '''
@@ -695,14 +680,14 @@ class pclusterCommands:
         Automatically measures peack and force plots
         (c)Paolo Pancaldi, Massimo Sandal 2009
         '''
-        
+
         # prende in inpunt un arg (nome del file)
         file_name=args
         f=open(file_name)
-        
+
         # scrivo un file temp
         g = open('_prove.txt','w')
-        
+
         plot_file = '';
         rows = f.readlines()
         for row in rows:
@@ -731,5 +716,3 @@ class pclusterCommands:
                         g.write(writeme)
         g.close()
         f.close()
-        
-        
\ No newline at end of file