pcluster.py

   1 #!/usr/bin/env python
   2
   3 from mdp import pca
   4 from libhooke import WX_GOOD, ClickedPoint
   5 import wxversion
   6 wxversion.select(WX_GOOD)
   7 from wx import PostEvent
   8 import numpy as np
   9 import scipy as sp
  10 import copy
  11 import os.path
  12 import time
  13 import libhookecurve as lhc
  14 import pylab as pyl
  15
  16 import warnings
  17 warnings.simplefilter('ignore',np.RankWarning)
  18
  19
  20 class pclusterCommands:
  21
  22     def _plug_init(self):
  23         self.clustplot1=None
  24         self.clustplot2=None
  25
  26     def do_pcluster(self,args):
  27         '''
  28         pCLUSTER
  29         (pcluster.py)
  30         Automatically measures peaks and extracts informations for further clustering
  31         (c)Paolo Pancaldi, Massimo Sandal 2009
  32         '''
  33         if self.config['hookedir'][0]=='/':
  34             slash='/' #a Unix or Unix-like system
  35         else:
  36             slash='\\'
  37         blindw = str(self.convfilt_config['blindwindow'])
  38         pclus_dir = "pCluster_blind"+blindw+"_"+time.strftime("%Y%m%d_%H%M")
  39         self.my_work_dir = os.getcwd()+slash+pclus_dir+slash
  40         self.my_curr_dir = os.path.basename(os.getcwd())
  41         os.mkdir(self.my_work_dir)
  42
  43         #--Custom persistent length
  44         pl_value=None
  45         for arg in args.split():
  46             #look for a persistent length argument.
  47             if 'pl=' in arg:
  48                 pl_expression=arg.split('=')
  49                 pl_value=float(pl_expression[1]) #actual value
  50             else:
  51                 pl_value=None
  52
  53         #configuration variables
  54         min_npks = self.convfilt_config['minpeaks']
  55         min_deviation = self.convfilt_config['mindeviation']
  56
  57         pclust_filename = "automeasure_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Automeasure filename? ')
  58         realclust_filename = "coordinate_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Coordinates filename? ')
  59         peackforce_filename = "peakforce_"+self.my_curr_dir+"_blind"+blindw+".txt"  #raw_input('Peacks and Forces filename? ')
  60
  61         f=open(self.my_work_dir+pclust_filename,'w+')
  62         f.write('Analysis started '+time.asctime()+'\n')
  63         f.write('----------------------------------------\n')
  64         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
  65         f.close()
  66
  67         f=open(self.my_work_dir+realclust_filename,'w+')
  68         f.write('Analysis started '+time.asctime()+'\n')
  69         f.write('----------------------------------------\n')
  70         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm) ; Peaks Diff\n')
  71         f.close()
  72
  73         f=open(self.my_work_dir+peackforce_filename,'w+')
  74         f.write('Analysis started '+time.asctime()+'\n')
  75         f.write('----------------------------------------\n')
  76         f.write('; Peak number  ;  1 peak Length (nm) ; 1 peak Force (pN) ;  2 peak Length (nm) ; 2 peak Force (pN) ;  3 peak Length (nm) ; 3 peak Force (pN) ;  4 peak Length (nm) ; 4 peak Force (pN) ;  5 peak Length (nm) ; 5 peak Force (pN) ;  6 peak Length (nm) ; 6 peak Force (pN) ;  7 peak Length (nm) ; 7 peak Force (pN) ;  8 peak Length (nm) ; 8 peak Force (pN)\n')
  77         f.close()
  78
  79         # ------ FUNCTION ------
  80         def fit_interval_nm(start_index,plot,nm,backwards):
  81             '''
  82             Calculates the number of points to fit, given a fit interval in nm
  83             start_index: index of point
  84             plot: plot to use
  85             backwards: if true, finds a point backwards.
  86             '''
  87             whatset=1 #FIXME: should be decidable
  88             x_vect=plot.vectors[1][0]
  89
  90             c=0
  91             i=start_index
  92             start=x_vect[start_index]
  93             maxlen=len(x_vect)
  94             while abs(x_vect[i]-x_vect[start_index])*(10**9) < nm:
  95                 if i==0 or i==maxlen-1: #we reached boundaries of vector!
  96                     return c
  97                 if backwards:
  98                     i-=1
  99                 else:
 100                     i+=1
 101                 c+=1
 102             return c
 103
 104         def plot_informations(itplot,pl_value):
 105             '''
 106             OUR VARIABLES
 107             contact_point.absolute_coords               (2.4584142802103689e-007, -6.9647135616234017e-009)
 108             peak_point.absolute_coords                  (3.6047748250571423e-008, -7.7142802788854212e-009)
 109             other_fit_point.absolute_coords     (4.1666139243838867e-008, -7.3759393477579707e-009)
 110             peak_location                                                                               [510, 610, 703, 810, 915, 1103]
 111             peak_size                                                                                           [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
 112             params                                                                                                      [2.2433999931959462e-007, 3.3230248825175678e-010]
 113             fit_errors                                                                                  [6.5817195369767644e-010, 2.4415923138871498e-011]
 114             '''
 115             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
 116
 117             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
 118             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
 119             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
 120             self.basepoints=[]
 121             base_index_0=peak_location[-1]+fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
 122             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
 123             base_index_1=self.basepoints[0].index+fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
 124             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
 125             self.basecurrent=self.current.path
 126             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
 127             boundaries.sort()
 128             to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
 129             avg=np.mean(to_average)
 130             return fit_points, contact_point, pl_value, T, cindex, avg
 131
 132         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
 133             '''
 134             calculate informations for each peak and add they in
 135             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 136             '''
 137             c_leng=None
 138             p_leng=None
 139             sigma_c_leng=None
 140             sigma_p_leng=None
 141             force=None
 142             slope=None
 143
 144             delta_force=10
 145             slope_span=int(self.config['auto_slope_span'])
 146
 147             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
 148             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
 149
 150             points=[contact_point, peak_point, other_fit_point]
 151
 152             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
 153
 154             #Measure forces
 155             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
 156             y=min(delta_to_measure)
 157             #Measure slopes
 158             slope=self.linefit_between(peak-slope_span,peak)[0]
 159             #check fitted data and, if right, add peak to the measurement
 160             if len(params)==1: #if we did choose 1-value fit
 161                 p_leng=pl_value
 162                 c_leng=params[0]*(1.0e+9)
 163                 sigma_p_leng=0
 164                 sigma_c_leng=fit_errors[0]*(1.0e+9)
 165                 force = abs(y-avg)*(1.0e+12)
 166             else: #2-value fit
 167                 p_leng=params[1]*(1.0e+9)
 168                 #check if persistent length makes sense. otherwise, discard peak.
 169                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
 170                     '''
 171                     p_lengths.append(p_leng)
 172                     c_lengths.append(params[0]*(1.0e+9))
 173                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
 174                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
 175                     forces.append(abs(y-avg)*(1.0e+12))
 176                     slopes.append(slope)
 177                     '''
 178                     c_leng=params[0]*(1.0e+9)
 179                     sigma_c_leng=fit_errors[0]*(1.0e+9)
 180                     sigma_p_leng=fit_errors[1]*(1.0e+9)
 181                     force=abs(y-avg)*(1.0e+12)
 182                 else:
 183                     p_leng=None
 184                     slope=None
 185             #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 186             return  c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
 187
 188
 189         # ------ PROGRAM -------
 190         c=0
 191         for item in self.current_list:
 192             c+=1
 193             item.identify(self.drivers)
 194             itplot=item.curve.default_plots()
 195             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
 196             itplot[0]=flatten(itplot[0], item, customvalue=1)
 197             try:
 198                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
 199             except:
 200                 #We have troubles with exec_has_peaks (bad curve, whatever).
 201                 #Print info and go to next cycle.
 202                 print 'Cannot process ',item.path
 203                 continue
 204
 205             if len(peak_location)==0:
 206                 print 'No peaks!'
 207                 continue
 208
 209             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
 210
 211             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 212
 213             #initialize output data vectors
 214             c_lengths=[]
 215             p_lengths=[]
 216             sigma_c_lengths=[]
 217             sigma_p_lengths=[]
 218             forces=[]
 219             slopes=[]
 220
 221             #loop each peak of my curve
 222             for peak in peak_location:
 223                 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
 224                 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
 225                     if var is not None:
 226                         vector.append(var)
 227
 228             #FIXME: We need a dictionary here...
 229             allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
 230             for vect in allvects:
 231                 if len(vect)==0:
 232                     for i in range(len(c_lengths)):
 233                         vect.append(0)
 234
 235             print 'Measurements for all peaks detected:'
 236             print 'contour (nm)', c_lengths
 237             print 'sigma contour (nm)',sigma_c_lengths
 238             print 'p (nm)',p_lengths
 239             print 'sigma p (nm)',sigma_p_lengths
 240             print 'forces (pN)',forces
 241             print 'slopes (N/m)',slopes
 242
 243             '''
 244             write automeasure text file
 245             '''
 246             print 'Saving automatic measurement...'
 247             f=open(self.my_work_dir+pclust_filename,'a+')
 248             f.write(item.path+'\n')
 249             for i in range(len(c_lengths)):
 250                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
 251             f.close()
 252
 253             peak_number=len(c_lengths)
 254
 255             '''
 256             write peackforce text file
 257             '''
 258             print 'Saving automatic measurement...'
 259             f=open(self.my_work_dir+peackforce_filename,'a+')
 260             f.write(item.path+'\n')
 261             peackforce_info = ''
 262             for i in range(len(c_lengths)):
 263                 peackforce_info = peackforce_info + ' ; ' + str(c_lengths[i]) + ' ; ' + str(forces[i])
 264             f.write(' ; '+str(peak_number)+peackforce_info+'\n')
 265             f.close()
 266
 267             '''
 268             calculate clustering coordinates
 269             '''
 270             if peak_number > 1:
 271                 deltas=[]
 272                 for i in range(len(c_lengths)-1):
 273                     deltas.append(c_lengths[i+1]-c_lengths[i])
 274
 275                 delta_mean=np.mean(deltas)
 276                 delta_median=np.median(deltas)
 277
 278                 force_mean=np.mean(forces)
 279                 force_median=np.median(forces)
 280
 281                 first_peak_cl=c_lengths[0]
 282                 last_peak_cl=c_lengths[-1]
 283
 284                 max_force=max(forces[:-1])
 285                 min_force=min(forces)
 286
 287                 max_delta=max(deltas)
 288                 min_delta=min(deltas)
 289
 290                 delta_stdev=np.std(deltas)
 291                 forces_stdev=np.std(forces[:-1])
 292
 293                 peaks_diff=(last_peak_cl-first_peak_cl)/peak_number
 294
 295                 print 'Coordinates'
 296                 print 'Peaks',peak_number
 297                 print 'Mean delta',delta_mean
 298                 print 'Median delta',delta_median
 299                 print 'Mean force',force_mean
 300                 print 'Median force',force_median
 301                 print 'First peak',first_peak_cl
 302                 print 'Last peak',last_peak_cl
 303                 print 'Max force',max_force
 304                 print 'Min force',min_force
 305                 print 'Max delta',max_delta
 306                 print 'Min delta',min_delta
 307                 print 'Delta stdev',delta_stdev
 308                 print 'Forces stdev',forces_stdev
 309                 print 'Peaks difference',peaks_diff
 310
 311                 '''
 312                 write clustering coordinates
 313                 '''
 314                 f=open(self.my_work_dir+realclust_filename,'a+')
 315                 f.write(item.path+'\n')
 316                 f.write(' ; '+str(peak_number)+     # non considerato
 317                         ' ; '+str(delta_mean)+      # 0
 318                         ' ; '+str(delta_median)+    # 1 -
 319                         ' ; '+str(force_mean)+      # 2
 320                         ' ; '+str(force_median)+    # 3 -
 321                         ' ; '+str(first_peak_cl)+   # 4 -
 322                         ' ; '+str(last_peak_cl)+    # 5 -
 323                         ' ; '+str(max_force)+       # 6
 324                         ' ; '+str(min_force)+       # 7
 325                         ' ; '+str(max_delta)+       # 8
 326                         ' ; '+str(min_delta)+       # 9
 327                         ' ; '+str(delta_stdev)+     # 10
 328                         ' ; '+str(forces_stdev)+    # 11
 329                         ' ; '+str(peaks_diff)+      # 12
 330                         '\n')
 331                 f.close()
 332
 333         # start PCA
 334         self.do_pca(pclus_dir+"/"+realclust_filename)
 335
 336
 337     def do_pca(self,args):
 338         '''
 339         PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
 340         Automatically measures pca from coordinates filename and shows two interactives plots
 341         With the second argument (arbitrary) you can select the columns and the multiplier factor
 342         to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
 343         Without second argument it reads pca_config.txt file
 344         (c)Paolo Pancaldi, Massimo Sandal 2009
 345         '''
 346
 347         # reads the columns of pca
 348         if self.config['hookedir'][0]=='/':
 349             slash='/' #a Unix or Unix-like system
 350         else:
 351             slash='\\'
 352         self.my_hooke_dir = self.config['hookedir']+slash
 353         #self.my_work_dir = os.getcwd()+slash+"pCluster_"+time.strftime("%Y%m%d_%H%M")+slash
 354         #self.my_curr_dir = os.path.basename(os.getcwd())
 355         conf=open(self.my_hooke_dir+"pca_config.txt")
 356         config = conf.readlines()
 357         conf.close()
 358
 359         self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
 360         self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
 361         self.plot_pcaCoord = []         # tiene le due colonne della pca
 362         self.plot_pcaCoordTr = []       # tiene le due colonne della pca trasposta
 363         self.plot_FiltOrigCoord = []    # tiene le coordinate solo dei punti filtrati per densita
 364         self.plot_FiltPaths = []        # tiene i paths dei plot solo dei punti filtrati per densita
 365         self.plot_paths = []            # tiene i paths di tutti i plots
 366         self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
 367         self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
 368         plot_path_temp = ""
 369
 370         # prende in inpunt un arg (nome del file)
 371         # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
 372         arg = args.split(" ")
 373         if arg[0]==args:
 374             file_name=args
 375         else:
 376             file_name=arg[0]
 377             config[0] = arg[1]
 378
 379         # creo l'array "plot_myCoord" con tutte le coordinate dei plots
 380         # e l'array plot_paths con tutti i percorsi dei plots
 381         nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
 382         f=open(file_name)
 383         rows = f.readlines()
 384         for row in rows:
 385             if row[0]!=" " and row[0]!="":
 386                 nPlotTot = nPlotTot+1
 387                 plot_path_temp = row
 388             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 389                 row = row[row.index(";",2)+2:].split(" ; ")     # non considero la prima colonna col #picchi
 390                 row = [float(i) for i in row]
 391
 392                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
 393                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
 394                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
 395                     if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
 396                         #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
 397                         self.plot_myCoord.append(row)
 398                         self.plot_paths.append(plot_path_temp)
 399         f.close()
 400
 401         # creo l'array con alcune colonne e pure moltiplicate
 402         for row in self.plot_myCoord:
 403             res=[]
 404             for cols in config[0].split(","):
 405                 if cols.find("*")!=-1:
 406                     col = int(cols.split("*")[0])
 407                     molt = int(cols.split("*")[1])
 408                 elif cols.find("x")!=-1:
 409                     col = int(cols.split("x")[0])
 410                     molt = int(cols.split("x")[1])
 411                 else:
 412                     col = int(cols)
 413                     molt = 1
 414                 res.append(row[col]*molt)
 415             self.plot_origCoord.append(res)
 416
 417         # array convert, calculate PCA, transpose
 418         self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
 419         #print self.plot_origCoord.shape
 420         self.plot_pcaCoord = pca(self.plot_origCoord, output_dim=2)     #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 421         self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
 422         pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
 423         pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
 424
 425         '''
 426         # Start section of testing with good plots                                  # 4 TESTING!
 427         Xsyn_1=[]
 428         Ysyn_1=[]
 429         Xgb1_1=[]
 430         Ygb1_1=[]
 431         Xbad_1=[]
 432         Ybad_1=[]
 433         goodnamefile=open(file_name.replace("coordinate", "good"),'r')
 434         goodnames=goodnamefile.readlines()
 435         nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
 436         goodnames=[i.split()[0] for i in goodnames[1:]]
 437
 438         for index in range(len(self.plot_paths)):
 439             if self.plot_paths[index][:-1] in goodnames:
 440                 Xsyn_1.append(pca_X[index])
 441                 Ysyn_1.append(pca_Y[index])
 442             else:
 443                 Xbad_1.append(pca_X[index])
 444                 Ybad_1.append(pca_Y[index])
 445         # Stop section of testing with good plots                                   # 4 TESTING!
 446         '''
 447
 448         # print first plot
 449         clustplot1=lhc.PlotObject()
 450         clustplot1.add_set(pca_X,pca_Y)
 451         #clustplot1.add_set(Xbad_1,Ybad_1) # 4 TESTING!
 452         #clustplot1.add_set(Xsyn_1,Ysyn_1) # 4 TESTING!
 453         clustplot1.normalize_vectors()
 454         clustplot1.styles=['scatter', 'scatter','scatter']
 455         clustplot1.colors=[None,'red','green']
 456         clustplot1.destination=0
 457         self._send_plot([clustplot1])
 458         self.clustplot1=clustplot1
 459
 460         # density and filer estimation
 461         kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
 462         tallest = 0
 463         for i in range(len(pca_X)):
 464             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 465             if tallest < kern_value:
 466                     tallest = float(kern_value)
 467         if float(config[1]) == 0:
 468             my_filter = float(tallest / 3.242311147)
 469         else:
 470             my_filter = float(config[1])
 471         '''
 472         # section useful only for graphic printing
 473         xmin = pca_X.min()
 474         xmax = pca_X.max()
 475         ymin = pca_Y.min()
 476         ymax = pca_Y.max()
 477         mX, mY = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
 478         Z = sp.rot90(sp.fliplr(sp.reshape(kernel(sp.c_[mX.ravel(), mY.ravel()].T).T, mX.T.shape)))
 479         axis_X = np.linspace(xmin,xmax,num=100)
 480         axis_Y = np.linspace(ymin,ymax,num=100)
 481         '''
 482
 483         # density filtering:
 484         # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
 485         filtered_pca_X = []
 486         filtered_pca_Y = []
 487         filtered_PcaCoordTr = []
 488         filtered_PcaCoord = []
 489         for i in range(len(pca_X)):
 490             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 491             if kern_value > my_filter:
 492                 filtered_pca_X.append(pca_X[i])
 493                 filtered_pca_Y.append(pca_Y[i])
 494         filtered_PcaCoordTr.append(filtered_pca_X)
 495         filtered_PcaCoordTr.append(filtered_pca_Y)
 496         filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
 497
 498         # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
 499         for index in range(len(self.plot_pcaCoord)):
 500             if self.plot_pcaCoord[index] in filtered_PcaCoord:
 501                 self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
 502                 self.plot_FiltPaths.append(self.plot_paths[index])
 503
 504         '''
 505         # START PCA#2: USELESS!!!
 506
 507         # creo l array con alcune colonne e pure moltiplicate
 508         temp_coord = []
 509         for row in self.plot_FiltOrigCoord:
 510             res=[]
 511             for cols in config[2].split(","):
 512                 if cols.find("*")!=-1:
 513                     col = int(cols.split("*")[0])
 514                     molt = int(cols.split("*")[1])
 515                 elif cols.find("x")!=-1:
 516                     col = int(cols.split("x")[0])
 517                     molt = int(cols.split("x")[1])
 518                 else:
 519                     col = int(cols)
 520                     molt = 1
 521                 res.append(row[col]*molt)
 522             temp_coord.append(res)
 523         self.plot_FiltOrigCoord = temp_coord
 524
 525         # ricalcolo la PCA: array convert, calculate PCA, transpose
 526         self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
 527         #print self.plot_FiltOrigCoord.shape
 528         self.plot_NewPcaCoord = pca(self.plot_FiltOrigCoord, output_dim=2)      #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 529         self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
 530         pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
 531         pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
 532
 533         # Start section of testing with good plots                              # 4 TESTING!
 534         Xsyn_2=[]
 535         Ysyn_2=[]
 536         Xbad_2=[]
 537         Ybad_2=[]
 538         for index in range(len(self.plot_FiltPaths)):
 539             if self.plot_FiltPaths[index][:-1] in goodnames:
 540                 Xsyn_2.append(pca_X2[index])
 541                 Ysyn_2.append(pca_Y2[index])
 542             else:
 543                 Xbad_2.append(pca_X2[index])
 544                 Ybad_2.append(pca_Y2[index])
 545
 546         # print second plot
 547         clustplot2=lhc.PlotObject()
 548         #clustplot2.add_set(pca_X2,pca_Y2)
 549         clustplot2.add_set(Xbad_2,Ybad_2)                                       # 4 TESTING!
 550         clustplot2.add_set(Xsyn_2,Ysyn_2)                                       # 4 TESTING!
 551         clustplot2.normalize_vectors()
 552         clustplot2.styles=['scatter', 'scatter','scatter']
 553         clustplot2.colors=[None,'red','green']
 554         clustplot2.destination=1
 555         self._send_plot([clustplot2])
 556         self.clustplot2=clustplot2
 557         '''
 558
 559         # PRINT density plot
 560         clustplot2=lhc.PlotObject()
 561         clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
 562         clustplot2.normalize_vectors()
 563         clustplot2.styles=['scatter', 'scatter','scatter']
 564         clustplot2.colors=[None,'red','green']
 565         clustplot2.destination=1
 566         self._send_plot([clustplot2])
 567         self.clustplot2=clustplot2
 568
 569         # printing results
 570         config_pca1 = config[0].replace("*", "x").rstrip("\n")
 571         config_pca2 = config[2].replace("*", "x").rstrip("\n")
 572         print ""
 573         print "- START: "+file_name
 574         print "Curve totali: ", nPlotTot
 575         #print "Curve totali good: ", nPlotGood                                  # 4 TESTING!
 576         print "- FILTRO 1: 0-500 e NaN"
 577         print "Curve totali rimaste: ", len(self.plot_origCoord)
 578         #print 'Curve good rimaste: ', len(Xsyn_1)                               # 4 TESTING!
 579         print "- FILTRO 2: PCA:"+config_pca1+" e DENSITA:"+str(my_filter)
 580         print "Curve totali rimaste: ", len(self.plot_FiltOrigCoord)
 581         #print 'Curve good rimaste: ', len(Xsyn_2)                               # 4 TESTING!
 582         print "Piu alta: ", tallest
 583         #print "- FILTRO 3: 2'PCA:"+config_pca2
 584         print ""
 585
 586         # -- exporting coordinates and plot of PCA in debug mode! --
 587         if config[3].find("true")!=-1:
 588             #1' PCA: save plot and build coordinate s file
 589             self.do_export(file_name.replace("coordinate_", "debug_pca1graph_").replace('.txt','_'+config_pca1) + " 0")
 590             f = open(file_name.replace("coordinate_", "debug_pca1coor_").replace('.txt','_'+config_pca1+'.txt'),'w')
 591             for i in range(len(pca_X)):
 592                 f.write (str(i) + "\t" + str(pca_X[i]) + "\t" + str(pca_Y[i]) + "\n")
 593             f.close()
 594             #2' PCA: save plot and build coordinate s file
 595             #self.do_export(file_name.replace("coordinate_", "debug_pca2graph_").replace('.txt','_'+config_pca2) + " 1")
 596             #f = open(file_name.replace("coordinate_", "debug_pca2coor_").replace('.txt','_'+config_pca2+'.txt'),'w')
 597             #for i in range(len(pca_X2)):
 598             #    f.write (str(i) + "\t" + str(pca_X2[i]) + "\t" + str(pca_Y2[i]) + "\n")
 599             #f.close()
 600             #DENSITY: save plot
 601             self.do_export(file_name.replace("coordinate_", "debug_densitygraph_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")) + " 1")
 602             f = open(file_name.replace("coordinate_", "debug_densitycoor_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")+'.txt'),'w')
 603             for i in range(len(filtered_pca_X)):
 604                 f.write (str(i) + "\t" + str(filtered_pca_X[i]) + "\t" + str(filtered_pca_Y[i]) + "\n")
 605             f.close()
 606             #ALL GOOD COORDINATES (without NaN and 0<x<500)
 607             f = open(file_name.replace("coordinate_", "debug_allgoodcoor_"),'w')
 608             for i in range(len(self.plot_myCoord)):
 609                 for cel in self.plot_myCoord[i]:
 610                     f.write (" ; " + str(cel))
 611                 f.write ("\n")
 612             f.close()
 613
 614         # pCLUSTER SAVING!!!
 615         import shutil
 616         pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
 617         if os.path.exists(pcl_name+slash): shutil.rmtree(pcl_name)
 618         os.mkdir(pcl_name+slash)
 619         f = open(pcl_name+'.txt','w')
 620         for i in range(len(self.plot_FiltPaths)):
 621             myfile = str(self.plot_FiltPaths[i]).rstrip("\n")
 622             f.write (myfile+"\n")
 623             shutil.copy2(myfile, pcl_name)
 624         f.close()
 625
 626
 627     def do_multipca(self,args):
 628         '''
 629         MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
 630         Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
 631         measures pca from coordinates filename and save the png plots.
 632         (c)Paolo Pancaldi, Massimo Sandal 2009
 633         '''
 634         # reads the columns of pca
 635         conf=open("pca_config.txt")
 636         config = conf.readlines() # config[0] = "1,2,3"
 637         conf.close()
 638         # cycling pca
 639         arg = args.split(" ")
 640         file_name=arg[0]
 641         column=str(arg[1])
 642         for i in range(1, 51, 1):
 643             self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
 644
 645     def do_doublepca(self,args):
 646         '''
 647         DOUBLEPCA -> "doublepca gaeta_coor_blind50.txt"
 648         Automatically it launches the pca command for all combinations with two column
 649         (c)Paolo Pancaldi, Massimo Sandal 2009
 650         '''
 651         # cycling pca
 652         arg = args.split(" ")
 653         file_name=arg[0]
 654         for i in range(1, 13):
 655             for j in range(1, 13):
 656                 if i!=j:
 657                     self.do_pca(file_name + " " + str(i) + "," + str(j))
 658
 659     def do_triplepca(self,args):
 660         '''
 661         TRIPLEPCA -> "triplepca gaeta_coor_blind50.txt"
 662         Automatically it launches the pca command for all combinations with three column
 663         (c)Paolo Pancaldi, Massimo Sandal 2009
 664         '''
 665         # cycling pca
 666         arg = args.split(" ")
 667         file_name=arg[0]
 668         for i in range(1, 13):
 669             for j in range(1, 13):
 670                 for k in range(1, 13):
 671                     if i!=j and i!=k and j!=k:
 672                         self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
 673
 674     def do_pclick(self,args):
 675         '''
 676         It returns id, coordinates and file name of a clicked dot on a PCA graphic
 677         '''
 678
 679         self._send_plot([self.clustplot1]) #quick workaround for BAD problems in the GUI
 680         print 'Click point'
 681         point = self._measure_N_points(N=1, whatset=0)
 682         indice = point[0].index
 683         plot_file = self.plot_paths[indice]
 684         dot_coord = self.plot_pcaCoord[indice]
 685         print "file: " + str(plot_file).rstrip()
 686         print "id: " + str(indice)
 687         print "coord: " + str(dot_coord)
 688         self.do_genlist(str(plot_file))
 689         #self.do_jump(str(plot_file))
 690
 691     # indea iniziata e messa da parte...
 692     def do_peakforce(self, args):
 693         '''
 694         peackforce -> "peackforce peackforce_file.txt"
 695         Automatically measures peack and force plots
 696         (c)Paolo Pancaldi, Massimo Sandal 2009
 697         '''
 698
 699         # prende in inpunt un arg (nome del file)
 700         file_name=args
 701         f=open(file_name)
 702
 703         # scrivo un file temp
 704         g = open('_prove.txt','w')
 705
 706         plot_file = '';
 707         rows = f.readlines()
 708         for row in rows:
 709             if row[0]=="/":
 710                 plot_file = row
 711             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 712                 # FILTRO SUI 7 PICCHI
 713                 num_pic = int(row.split(" ; ")[1])
 714                 if num_pic==7:
 715                     width_force = row.split(" ; ")
 716                     w1 = float(width_force[2]); f1 = float(width_force[3]);
 717                     w2 = float(width_force[4]); f2 = float(width_force[5]);
 718                     w3 = float(width_force[6]); f3 = float(width_force[7]);
 719                     w4 = float(width_force[8]); f4 = float(width_force[9]);
 720                     w5 = float(width_force[10]); f5 = float(width_force[11]);
 721                     w6 = float(width_force[12]); f6 = float(width_force[13]);
 722                     w7 = float(width_force[14]); f7 = float(width_force[15]);
 723                     if w1>0 and w1<1000 and w2>0 and w2<1000 and w3>0 and w3<1000 and w4>0 and w4<1000 and w5>0 and w5<1000 and w6>0 and w6<1000 and w7>0 and w7<1000:
 724                         score_76 = abs(32 - (w7 - w6))
 725                         score_65 = abs(32 - (w6 - w5))
 726                         score_54 = abs(32 - (w5 - w4))
 727                         score_43 = abs(32 - (w4 - w3))
 728                         score_32 = abs(32 - (w3 - w2))
 729                         score_21 = abs(32 - (w2 - w1))
 730                         writeme = str(score_76) + " --- " + str(row)
 731                         g.write(writeme)
 732         g.close()
 733         f.close()
 734
 735