(hooke.py) scatter_red patch to have red scatterplot
[hooke.git] / pcluster.py
index b3de929bfbb4230b2141dabb4884163212927b4a..e38e4aedd9f3c5397cf76a9b764a70aff7417078 100644 (file)
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+from mdp import pca
 from libhooke import WX_GOOD, ClickedPoint
 import wxversion
 wxversion.select(WX_GOOD)
@@ -9,12 +10,18 @@ import scipy as sp
 import copy
 import os.path
 import time
+import libhookecurve as lhc
 
 import warnings
 warnings.simplefilter('ignore',np.RankWarning)
 
 
 class pclusterCommands:
+    
+    def _plug_init(self):
+        self.pca_paths=[]
+        self.pca_myArray=None
+        self.clustplot=None
 
     def do_pcluster(self,args):
         '''
@@ -32,7 +39,7 @@ class pclusterCommands:
                 pl_value=float(pl_expression[1]) #actual value
             else:
                 pl_value=None
-                           
+                
         #configuration variables
         min_npks = self.convfilt_config['minpeaks']
         min_deviation = self.convfilt_config['mindeviation']
@@ -245,6 +252,9 @@ class pclusterCommands:
                 max_delta=max(deltas)
                 min_delta=min(deltas)
                 
+                delta_stdev=np.std(deltas)
+                forces_stdev=np.std(forces[:-1])
+                
                 print 'Coordinates'
                 print 'Peaks',peak_number
                 print 'Mean delta',delta_mean
@@ -257,6 +267,8 @@ class pclusterCommands:
                 print 'Min force',min_force
                 print 'Max delta',max_delta
                 print 'Min delta',min_delta
+                print 'Delta stdev',delta_stdev
+                print 'Forces stdev',forces_stdev
                 
                 '''
                 write clustering coordinates
@@ -264,7 +276,99 @@ class pclusterCommands:
                 f=open(realclust_filename,'a+')
                 f.write(item.path+'\n')
                 f.write(' ; '+str(peak_number)+' ; '+str(delta_mean)+' ; '+str(delta_median)+' ; '+str(force_mean)+' ; '+str(force_median)+' ; '+str(first_peak_cl)+' ; '+str(last_peak_cl)+ ' ; '+str(max_force)+' ; '
-                +str(min_force)+' ; '+str(max_delta)+' ; '+str(min_delta)+ '\n')
+                +str(min_force)+' ; '+str(max_delta)+' ; '+str(min_delta)+ ' ; '+str(delta_stdev)+ ' ; '+str(forces_stdev)+'\n')
                 f.close()
             else:
                 pass
+                
+    def do_pca(self,args):
+        '''
+        PCA
+        Automatically measures pca from coordinates filename and shows two interactives plots
+        (c)Paolo Pancaldi, Massimo Sandal 2009
+        '''
+        
+        self.pca_myArray = []
+        self.pca_paths = {}
+        plot_path_temp = ""
+        nPlotTot = 0
+        nPlotGood = 0
+        
+        file_name=args
+        
+        for arg in args.split():
+            #look for a file argument.
+            if 'f=' in arg:
+                file_temp=arg.split('=')[1] #actual coordinates filename
+                try:
+                    f=open(file_temp)
+                    f.close()
+                    file_name = file_temp
+                    print "Coordinates filename used: " + file_name
+                except:
+                    print "Impossibile to find " + file_temp + " in current directory"
+                    print "Coordinates filename used: " + file_name
+            
+        f=open(file_name)
+        rows = f.readlines()
+        for row in rows:
+            if row[0]=="/":
+                nPlotTot = nPlotTot+1
+                #plot_path_temp = row.split("/")[6][:-1]
+                plot_path_temp = row
+            if row[0]==" " and row.find('nan')==-1:
+                row = row[row.index(";",2)+2:].split(" ; ")    # non considero la prima colonna col #picchi
+                row = [float(i) for i in row]
+                        
+                #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
+                        #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
+                if (row[0]<9000 and row[1]<9000 and row[2]<9000 and row[3]<9000 and row[4]<9000 and row[5]<9000):
+                    if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0):
+                        self.pca_paths[nPlotGood] = plot_path_temp
+                        row=[row[1],row[2]]
+                        self.pca_myArray.append(row)
+                        nPlotGood = nPlotGood+1
+                        
+        f.close()
+        print nPlotGood, "of", nPlotTot
+        
+        # array convert, calculate PCA, transpose
+        self.pca_myArray = np.array(self.pca_myArray,dtype='float')
+        print self.pca_myArray.shape
+        '''for i in range(len(self.pca_myArray)):
+            print i, self.pca_paths[i]
+            print i, self.pca_myArray[i]'''
+        self.pca_myArray = pca(self.pca_myArray, output_dim=2) #other way -> y = mdp.nodes.PCANode(output_dim=2)(gigi)
+        myArrayTr = np.transpose(self.pca_myArray)
+        
+        '''for i in range(len(self.pca_myArray)):
+            print i, self.pca_paths[i]
+            print i, self.pca_myArray[i]'''
+        
+        # plotting
+        X=myArrayTr[0]
+        Y=myArrayTr[1]
+        clustplot=lhc.PlotObject()
+        clustplot.add_set(X,Y)
+        clustplot.add_set(X[:15],Y[:15])
+        clustplot.normalize_vectors()
+        clustplot.styles=['scatter', 'scatter_red']
+        #clustplot.styles=['scatter',None]
+        clustplot.destination=1
+        self._send_plot([clustplot])
+        self.clustplot=clustplot
+        
+        
+    def do_pclick(self,args):        
+        
+        self._send_plot([self.clustplot]) #quick workaround for BAD problems in the GUI
+        print 'Click point'
+        point = self._measure_N_points(N=1, whatset=0)
+        indice = point[0].index
+        plot_file = self.pca_paths[indice]
+        dot_coord = self.pca_myArray[indice]
+        print "file: " + str(plot_file).rstrip()
+        print "id: " + str(indice)
+        print "coord: " + str(dot_coord)
+        self.do_genlist(str(plot_file))
+        #self.do_jump(str(plot_file))
\ No newline at end of file