Added a small feature in the 'autopeak' command. If the variable 'baseline_clicks...
[hooke.git] / pcluster.py
index 583edad306525aaa02b611270c97d17a2f8a17a6..392d4f09d36ced339a87b011f4e575cac5c92fd4 100644 (file)
@@ -97,7 +97,7 @@ class pclusterCommands:
             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
             
             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
-            cindex=self.find_contact_point() #Automatically find contact point <158, libhooke.ClickedPoint>
+            cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
             self.basepoints=[]
             base_index_0=peak_location[-1]+fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
@@ -142,8 +142,8 @@ class pclusterCommands:
             if len(params)==1: #if we did choose 1-value fit
                 p_leng=pl_value
                 c_leng=params[0]*(1.0e+9)
-                sigma_p_lengths=0
-                sigma_c_lengths=fit_errors[0]*(1.0e+9)
+                sigma_p_leng=0
+                sigma_c_leng=fit_errors[0]*(1.0e+9)
                 force = abs(y-avg)*(1.0e+12)
             else: #2-value fit
                 p_leng=params[1]*(1.0e+9)
@@ -174,6 +174,8 @@ class pclusterCommands:
             c+=1
             item.identify(self.drivers)
             itplot=item.curve.default_plots()
+            flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
+            itplot[0]=flatten(itplot[0], item, customvalue=1)
             try:
                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
             except: 
@@ -183,9 +185,11 @@ class pclusterCommands:
                 continue 
 
             if len(peak_location)==0:
+                print 'No peaks!'
                 continue
 
             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
+            
             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 
             #initialize output data vectors
@@ -275,40 +279,54 @@ class pclusterCommands:
                 '''
                 f=open(realclust_filename,'a+')
                 f.write(item.path+'\n')
-                f.write(' ; '+str(peak_number)+' ; '+str(delta_mean)+' ; '+str(delta_median)+' ; '+str(force_mean)+' ; '+str(force_median)+' ; '+str(first_peak_cl)+' ; '+str(last_peak_cl)+ ' ; '+str(max_force)+' ; '
-                +str(min_force)+' ; '+str(max_delta)+' ; '+str(min_delta)+ ' ; '+str(delta_stdev)+ ' ; '+str(forces_stdev)+'\n')
+                f.write(' ; '+str(peak_number)+     # non considerato
+                        ' ; '+str(delta_mean)+      # 0
+                        ' ; '+str(delta_median)+    # 1 -
+                        ' ; '+str(force_mean)+      # 2
+                        ' ; '+str(force_median)+    # 3 -
+                        ' ; '+str(first_peak_cl)+   # 4 -
+                        ' ; '+str(last_peak_cl)+    # 5 -
+                        ' ; '+str(max_force)+       # 6
+                        ' ; '+str(min_force)+       # 7
+                        ' ; '+str(max_delta)+       # 8
+                        ' ; '+str(min_delta)+       # 9
+                        ' ; '+str(delta_stdev)+     # 10
+                        ' ; '+str(forces_stdev)+    # 11
+                        '\n')
                 f.close()
             else:
                 pass
                 
     def do_pca(self,args):
         '''
-        PCA
+        PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
         Automatically measures pca from coordinates filename and shows two interactives plots
+        With the second argument (arbitrary) you can select the columns and the multiplier factor 
+        to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
+        Without second argument it reads pca_config.txt file
         (c)Paolo Pancaldi, Massimo Sandal 2009
         '''
         
+        # reads the columns of pca
+        conf=open("pca_config.txt")
+        config = conf.readlines()
+        conf.close()
+        
         self.pca_myArray = []
         self.pca_paths = {}
         plot_path_temp = ""
         nPlotTot = 0
         nPlotGood = 0
         
-        file_name=args
+        # prende in inpunt un arg (nome del file) 
+        # e il secondo e' arbitrario riceve x es "row[1],row2,row[3]"
+        arg = args.split(" ")
+        if arg[0]==args:
+            file_name=args
+        else:
+            file_name=arg[0]
+            config[0] = arg[1]
         
-        for arg in args.split():
-            #look for a file argument.
-            if 'f=' in arg:
-                file_temp=arg.split('=')[1] #actual coordinates filename
-                try:
-                    f=open(file_temp)
-                    f.close()
-                    file_name = file_temp
-                    print "Coordinates filename used: " + file_name
-                except:
-                    print "Impossibile to find " + file_temp + " in current directory"
-                    print "Coordinates filename used: " + file_name
-            
         f=open(file_name)
         rows = f.readlines()
         for row in rows:
@@ -316,16 +334,29 @@ class pclusterCommands:
                 nPlotTot = nPlotTot+1
                 #plot_path_temp = row.split("/")[6][:-1]
                 plot_path_temp = row
-            if row[0]==" " and row.find('nan')==-1:
+            if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
                 row = row[row.index(";",2)+2:].split(" ; ")    # non considero la prima colonna col #picchi
                 row = [float(i) for i in row]
                         
                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
-                        #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
-                if (row[0]<9000 and row[1]<9000 and row[2]<9000 and row[3]<9000 and row[4]<9000 and row[5]<9000):
-                    if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0):
+                #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
+                if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
+                    if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
                         self.pca_paths[nPlotGood] = plot_path_temp
-                        self.pca_myArray.append(row)
+                        #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
+                        res=[]
+                        for cols in config[0].split(","):
+                            if cols.find("*")!=-1:
+                                col = int(cols.split("*")[0])
+                                molt = int(cols.split("*")[1])
+                            elif cols.find("x")!=-1:
+                                col = int(cols.split("x")[0])
+                                molt = int(cols.split("x")[1])
+                            else:
+                                col = int(cols)
+                                molt = 1
+                            res.append(row[col]*molt)
+                        self.pca_myArray.append(res)
                         nPlotGood = nPlotGood+1
                         
         f.close()
@@ -334,29 +365,130 @@ class pclusterCommands:
         # array convert, calculate PCA, transpose
         self.pca_myArray = np.array(self.pca_myArray,dtype='float')
         print self.pca_myArray.shape
-        '''for i in range(len(self.pca_myArray)):
-            print i, self.pca_paths[i]
-            print i, self.pca_myArray[i]'''
         self.pca_myArray = pca(self.pca_myArray, output_dim=2) #other way -> y = mdp.nodes.PCANode(output_dim=2)(gigi)
         myArrayTr = np.transpose(self.pca_myArray)
         
-        '''for i in range(len(self.pca_myArray)):
-            print i, self.pca_paths[i]
-            print i, self.pca_myArray[i]'''
-        
         # plotting
         X=myArrayTr[0]
         Y=myArrayTr[1]
+        
+        X=list(X)
+        Y=list(Y)
+        
         clustplot=lhc.PlotObject()
-        clustplot.add_set(X,Y)
-        #clustplot.add_set(X[:14],Y[:14])
+        
+        #FIXME
+        #our dataset-specific stuff
+        #This will go away after testing :)
+        Xsyn=[]
+        Ysyn=[]
+        
+        Xgb1=[]
+        Ygb1=[]
+        
+        Xbad=[]
+        Ybad=[]
+        
+        goodnamefile=open('dataset_s3sT45base_good_blind50.log','r')
+        #goodnamefile=open('/home/massimo/python/hooke/dataset_clust/roslin_blind50.log','r')
+        goodnames=goodnamefile.readlines()
+        goodnames=[i.split()[0] for i in goodnames[1:]]
+        
+        
+        for index in range(len(self.pca_paths)):
+            '''
+            if '3s3' in self.pca_paths[index] and not 'bad' in self.pca_paths[index]:
+                Xsyn.append(X[index])
+                Ysyn.append(Y[index])
+            elif 'bad' in self.pca_paths[index]:
+                Xbad.append(X[index])
+                Ybad.append(Y[index])
+            else:
+                Xgb1.append(X[index])
+                Ygb1.append(Y[index])
+            '''
+            #print self.pca_paths
+            if self.pca_paths[index][:-1] in goodnames:
+                Xsyn.append(X[index])
+                Ysyn.append(Y[index])
+            else:
+                Xbad.append(X[index])
+                Ybad.append(Y[index])
+            
+        print 'blath',len(Xsyn),len(Ysyn)
+        
+        #clustplot.add_set(Xgb1,Ygb1)
+        clustplot.add_set(Xbad,Ybad)
+        clustplot.add_set(Xsyn,Ysyn)
         clustplot.normalize_vectors()
-        clustplot.styles=['scatter']
+        clustplot.styles=['scatter', 'scatter','scatter']
+        clustplot.colors=[None,'red','green']
         #clustplot.styles=['scatter',None]
         clustplot.destination=1
         self._send_plot([clustplot])
         self.clustplot=clustplot
         
+        # -- exporting coordinates and plot! --
+        
+        #builds coordinate s file
+        
+        f = open('coordinate_punti.txt','w')
+        for i in range(len(X)):
+            f.write (str(i) + "\t" + str(X[i]) + "\t" + str(Y[i]) + "\n")
+        f.close()
+        
+        #save plot
+        config = config[0].replace("*", "x")
+        self.do_export("png/" + config + " 1")
+            
+    def do_multipca(self,args):
+        '''
+        MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
+        Automatically multiply the column suggest in second argument for value between 1-100 (step of 2), 
+        measures pca from coordinates filename and save the png plots.
+        (c)Paolo Pancaldi, Massimo Sandal 2009
+        '''
+        # reads the columns of pca
+        conf=open("pca_config.txt")
+        config = conf.readlines() # config[0] = "1,2,3"
+        conf.close()
+        # cycling pca
+        arg = args.split(" ")
+        file_name=arg[0]
+        column=str(arg[1])
+        for i in range(1, 51, 1):
+            self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
+
+    def do_doublepca(self,args):
+        '''
+        DOUBLEPCA -> "double gaeta_coor_blind50.txt"
+        Automatically multiply the column suggest in second argument for value between 1-100 (step of 2), 
+        measures pca from coordinates filename and save the png plots.
+        (c)Paolo Pancaldi, Massimo Sandal 2009
+        '''
+        # cycling pca
+        arg = args.split(" ")
+        file_name=arg[0]
+        for i in range(1, 12):
+            for j in range(1, 12):
+                if i!=j:
+                    self.do_pca(file_name + " " + str(i) + "," + str(j))
+                    
+    def do_triplepca(self,args):
+        '''
+        DOUBLEPCA -> "double gaeta_coor_blind50.txt"
+        Automatically multiply the column suggest in second argument for value between 1-100 (step of 2), 
+        measures pca from coordinates filename and save the png plots.
+        (c)Paolo Pancaldi, Massimo Sandal 2009
+        '''
+        # cycling pca
+        arg = args.split(" ")
+        file_name=arg[0]
+        for i in range(1, 12):
+            for j in range(1, 12):
+                for k in range(1, 12):
+                    if i!=j and i!=k and j!=k:
+                        self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
         
     def do_pclick(self,args):