pcluster.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from mdp import pca
   5 from libhooke import WX_GOOD, ClickedPoint
   6 import wxversion
   7 wxversion.select(WX_GOOD)
   8 from wx import PostEvent
   9 import numpy as np
  10 import scipy as sp
  11 import copy
  12 import os.path
  13 import time
  14 import libhookecurve as lhc
  15 import pylab as pyl
  16
  17 import warnings
  18 warnings.simplefilter('ignore',np.RankWarning)
  19
  20
  21 class pclusterCommands:
  22
  23     def _plug_init(self):
  24         self.clustplot1=None
  25         self.clustplot2=None
  26
  27     def do_pcluster(self,args):
  28         '''
  29         pCLUSTER
  30         (pcluster.py)
  31         Automatically measures peaks and extracts informations for further clustering
  32         (c)Paolo Pancaldi, Massimo Sandal 2009
  33         '''
  34         if self.config['hookedir'][0]=='/':
  35             slash='/' #a Unix or Unix-like system
  36         else:
  37             slash='\\'
  38         blindw = str(self.convfilt_config['blindwindow'])
  39         pclus_dir = "pCluster_blind"+blindw+"_"+time.strftime("%Y%m%d_%H%M")
  40         self.my_work_dir = os.getcwd()+slash+pclus_dir+slash
  41         self.my_curr_dir = os.path.basename(os.getcwd())
  42         os.mkdir(self.my_work_dir)
  43
  44         #--Custom persistent length
  45         pl_value=None
  46         for arg in args.split():
  47             #look for a persistent length argument.
  48             if 'pl=' in arg:
  49                 pl_expression=arg.split('=')
  50                 pl_value=float(pl_expression[1]) #actual value
  51             else:
  52                 pl_value=None
  53
  54         #configuration variables
  55         min_npks = self.convfilt_config['minpeaks']
  56         min_deviation = self.convfilt_config['mindeviation']
  57
  58         pclust_filename = "automeasure_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Automeasure filename? ')
  59         realclust_filename = "coordinate_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Coordinates filename? ')
  60         peackforce_filename = "peakforce_"+self.my_curr_dir+"_blind"+blindw+".txt"  #raw_input('Peacks and Forces filename? ')
  61
  62         f=open(self.my_work_dir+pclust_filename,'w+')
  63         f.write('Analysis started '+time.asctime()+'\n')
  64         f.write('----------------------------------------\n')
  65         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
  66         f.close()
  67
  68         f=open(self.my_work_dir+realclust_filename,'w+')
  69         f.write('Analysis started '+time.asctime()+'\n')
  70         f.write('----------------------------------------\n')
  71         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm) ; Peaks Diff\n')
  72         f.close()
  73
  74         f=open(self.my_work_dir+peackforce_filename,'w+')
  75         f.write('Analysis started '+time.asctime()+'\n')
  76         f.write('----------------------------------------\n')
  77         f.write('; Peak number  ;  1 peak Length (nm) ; 1 peak Force (pN) ;  2 peak Length (nm) ; 2 peak Force (pN) ;  3 peak Length (nm) ; 3 peak Force (pN) ;  4 peak Length (nm) ; 4 peak Force (pN) ;  5 peak Length (nm) ; 5 peak Force (pN) ;  6 peak Length (nm) ; 6 peak Force (pN) ;  7 peak Length (nm) ; 7 peak Force (pN) ;  8 peak Length (nm) ; 8 peak Force (pN)\n')
  78         f.close()
  79
  80
  81         def plot_informations(itplot,pl_value):
  82             '''
  83             OUR VARIABLES
  84             contact_point.absolute_coords               (2.4584142802103689e-007, -6.9647135616234017e-009)
  85             peak_point.absolute_coords                  (3.6047748250571423e-008, -7.7142802788854212e-009)
  86             other_fit_point.absolute_coords     (4.1666139243838867e-008, -7.3759393477579707e-009)
  87             peak_location                                                                               [510, 610, 703, 810, 915, 1103]
  88             peak_size                                                                                           [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
  89             params                                                                                                      [2.2433999931959462e-007, 3.3230248825175678e-010]
  90             fit_errors                                                                                  [6.5817195369767644e-010, 2.4415923138871498e-011]
  91             '''
  92             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
  93
  94             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
  95             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
  96             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
  97             self.basepoints=[]
  98             base_index_0=peak_location[-1]+self.fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
  99             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
 100             base_index_1=self.basepoints[0].index+self.fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
 101             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
 102             self.basecurrent=self.current.path
 103             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
 104             boundaries.sort()
 105             to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
 106             avg=np.mean(to_average)
 107             return fit_points, contact_point, pl_value, T, cindex, avg
 108
 109         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
 110             '''
 111             calculate informations for each peak and add they in
 112             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 113             '''
 114             c_leng=None
 115             p_leng=None
 116             sigma_c_leng=None
 117             sigma_p_leng=None
 118             force=None
 119             slope=None
 120
 121             delta_force=10
 122             slope_span=int(self.config['auto_slope_span'])
 123
 124             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
 125             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
 126
 127             points=[contact_point, peak_point, other_fit_point]
 128
 129             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
 130
 131             #Measure forces
 132             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
 133             y=min(delta_to_measure)
 134             #Measure slopes
 135             slope=self.linefit_between(peak-slope_span,peak)[0]
 136             #check fitted data and, if right, add peak to the measurement
 137             if len(params)==1: #if we did choose 1-value fit
 138                 p_leng=pl_value
 139                 c_leng=params[0]*(1.0e+9)
 140                 sigma_p_leng=0
 141                 sigma_c_leng=fit_errors[0]*(1.0e+9)
 142                 force = abs(y-avg)*(1.0e+12)
 143             else: #2-value fit
 144                 p_leng=params[1]*(1.0e+9)
 145                 #check if persistent length makes sense. otherwise, discard peak.
 146                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
 147                     '''
 148                     p_lengths.append(p_leng)
 149                     c_lengths.append(params[0]*(1.0e+9))
 150                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
 151                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
 152                     forces.append(abs(y-avg)*(1.0e+12))
 153                     slopes.append(slope)
 154                     '''
 155                     c_leng=params[0]*(1.0e+9)
 156                     sigma_c_leng=fit_errors[0]*(1.0e+9)
 157                     sigma_p_leng=fit_errors[1]*(1.0e+9)
 158                     force=abs(y-avg)*(1.0e+12)
 159                 else:
 160                     p_leng=None
 161                     slope=None
 162             #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 163             return  c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
 164
 165
 166         # ------ PROGRAM -------
 167         c=0
 168         for item in self.current_list:
 169             c+=1
 170             item.identify(self.drivers)
 171             itplot=item.curve.default_plots()
 172             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
 173             itplot[0]=flatten(itplot[0], item, customvalue=1)
 174             try:
 175                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
 176             except:
 177                 #We have troubles with exec_has_peaks (bad curve, whatever).
 178                 #Print info and go to next cycle.
 179                 print 'Cannot process ',item.path
 180                 continue
 181
 182             if len(peak_location)==0:
 183                 print 'No peaks!'
 184                 continue
 185
 186             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
 187
 188             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 189
 190             #initialize output data vectors
 191             c_lengths=[]
 192             p_lengths=[]
 193             sigma_c_lengths=[]
 194             sigma_p_lengths=[]
 195             forces=[]
 196             slopes=[]
 197
 198             #loop each peak of my curve
 199             for peak in peak_location:
 200                 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
 201                 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
 202                     if var is not None:
 203                         vector.append(var)
 204
 205             #FIXME: We need a dictionary here...
 206             allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
 207             for vect in allvects:
 208                 if len(vect)==0:
 209                     for i in range(len(c_lengths)):
 210                         vect.append(0)
 211
 212             print 'Measurements for all peaks detected:'
 213             print 'contour (nm)', c_lengths
 214             print 'sigma contour (nm)',sigma_c_lengths
 215             print 'p (nm)',p_lengths
 216             print 'sigma p (nm)',sigma_p_lengths
 217             print 'forces (pN)',forces
 218             print 'slopes (N/m)',slopes
 219
 220             '''
 221             write automeasure text file
 222             '''
 223             print 'Saving automatic measurement...'
 224             f=open(self.my_work_dir+pclust_filename,'a+')
 225             f.write(item.path+'\n')
 226             for i in range(len(c_lengths)):
 227                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
 228             f.close()
 229
 230             peak_number=len(c_lengths)
 231
 232             '''
 233             write peackforce text file
 234             '''
 235             print 'Saving automatic measurement...'
 236             f=open(self.my_work_dir+peackforce_filename,'a+')
 237             f.write(item.path+'\n')
 238             peackforce_info = ''
 239             for i in range(len(c_lengths)):
 240                 peackforce_info = peackforce_info + ' ; ' + str(c_lengths[i]) + ' ; ' + str(forces[i])
 241             f.write(' ; '+str(peak_number)+peackforce_info+'\n')
 242             f.close()
 243
 244             '''
 245             calculate clustering coordinates
 246             '''
 247             if peak_number > 1:
 248                 deltas=[]
 249                 for i in range(len(c_lengths)-1):
 250                     deltas.append(c_lengths[i+1]-c_lengths[i])
 251
 252                 delta_mean=np.mean(deltas)
 253                 delta_median=np.median(deltas)
 254
 255                 force_mean=np.mean(forces)
 256                 force_median=np.median(forces)
 257
 258                 first_peak_cl=c_lengths[0]
 259                 last_peak_cl=c_lengths[-1]
 260
 261                 max_force=max(forces[:-1])
 262                 min_force=min(forces)
 263
 264                 max_delta=max(deltas)
 265                 min_delta=min(deltas)
 266
 267                 delta_stdev=np.std(deltas)
 268                 forces_stdev=np.std(forces[:-1])
 269
 270                 peaks_diff=(last_peak_cl-first_peak_cl)/peak_number
 271
 272                 print 'Coordinates'
 273                 print 'Peaks',peak_number
 274                 print 'Mean delta',delta_mean
 275                 print 'Median delta',delta_median
 276                 print 'Mean force',force_mean
 277                 print 'Median force',force_median
 278                 print 'First peak',first_peak_cl
 279                 print 'Last peak',last_peak_cl
 280                 print 'Max force',max_force
 281                 print 'Min force',min_force
 282                 print 'Max delta',max_delta
 283                 print 'Min delta',min_delta
 284                 print 'Delta stdev',delta_stdev
 285                 print 'Forces stdev',forces_stdev
 286                 print 'Peaks difference',peaks_diff
 287
 288                 '''
 289                 write clustering coordinates
 290                 '''
 291                 f=open(self.my_work_dir+realclust_filename,'a+')
 292                 f.write(item.path+'\n')
 293                 f.write(' ; '+str(peak_number)+     # non considerato
 294                         ' ; '+str(delta_mean)+      # 0
 295                         ' ; '+str(delta_median)+    # 1 -
 296                         ' ; '+str(force_mean)+      # 2
 297                         ' ; '+str(force_median)+    # 3 -
 298                         ' ; '+str(first_peak_cl)+   # 4 -
 299                         ' ; '+str(last_peak_cl)+    # 5 -
 300                         ' ; '+str(max_force)+       # 6
 301                         ' ; '+str(min_force)+       # 7
 302                         ' ; '+str(max_delta)+       # 8
 303                         ' ; '+str(min_delta)+       # 9
 304                         ' ; '+str(delta_stdev)+     # 10
 305                         ' ; '+str(forces_stdev)+    # 11
 306                         ' ; '+str(peaks_diff)+      # 12
 307                         '\n')
 308                 f.close()
 309
 310         # start PCA
 311         self.do_pca(pclus_dir+"/"+realclust_filename)
 312
 313
 314     def do_pca(self,args):
 315         '''
 316         PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
 317         Automatically measures pca from coordinates filename and shows two interactives plots
 318         With the second argument (arbitrary) you can select the columns and the multiplier factor
 319         to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
 320         Without second argument it reads pca_config.txt file
 321         (c)Paolo Pancaldi, Massimo Sandal 2009
 322         '''
 323
 324         # reads the columns of pca
 325         if self.config['hookedir'][0]=='/':
 326             slash='/' #a Unix or Unix-like system
 327         else:
 328             slash='\\'
 329         self.my_hooke_dir = self.config['hookedir']+slash
 330         #self.my_work_dir = os.getcwd()+slash+"pCluster_"+time.strftime("%Y%m%d_%H%M")+slash
 331         #self.my_curr_dir = os.path.basename(os.getcwd())
 332         conf=open(self.my_hooke_dir+"pca_config.txt")
 333         config = conf.readlines()
 334         conf.close()
 335
 336         self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
 337         self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
 338         self.plot_pcaCoord = []         # tiene le due colonne della pca
 339         self.plot_pcaCoordTr = []       # tiene le due colonne della pca trasposta
 340         self.plot_FiltOrigCoord = []    # tiene le coordinate solo dei punti filtrati per densita
 341         self.plot_FiltPaths = []        # tiene i paths dei plot solo dei punti filtrati per densita
 342         self.plot_paths = []            # tiene i paths di tutti i plots
 343         self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
 344         self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
 345         plot_path_temp = ""
 346
 347         # prende in inpunt un arg (nome del file)
 348         # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
 349         arg = args.split(" ")
 350         if arg[0]==args:
 351             file_name=args
 352         else:
 353             file_name=arg[0]
 354             config[0] = arg[1]
 355
 356         # creo l'array "plot_myCoord" con tutte le coordinate dei plots
 357         # e l'array plot_paths con tutti i percorsi dei plots
 358         nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
 359         f=open(file_name)
 360         rows = f.readlines()
 361         for row in rows:
 362             if row[0]!=" " and row[0]!="":
 363                 nPlotTot = nPlotTot+1
 364                 plot_path_temp = row
 365             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 366                 row = row[row.index(";",2)+2:].split(" ; ")     # non considero la prima colonna col #picchi
 367                 row = [float(i) for i in row]
 368
 369                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
 370                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
 371                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
 372                     if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
 373                         #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
 374                         self.plot_myCoord.append(row)
 375                         self.plot_paths.append(plot_path_temp)
 376         f.close()
 377
 378         # creo l'array con alcune colonne e pure moltiplicate
 379         for row in self.plot_myCoord:
 380             res=[]
 381             for cols in config[0].split(","):
 382                 if cols.find("*")!=-1:
 383                     col = int(cols.split("*")[0])
 384                     molt = int(cols.split("*")[1])
 385                 elif cols.find("x")!=-1:
 386                     col = int(cols.split("x")[0])
 387                     molt = int(cols.split("x")[1])
 388                 else:
 389                     col = int(cols)
 390                     molt = 1
 391                 res.append(row[col]*molt)
 392             self.plot_origCoord.append(res)
 393
 394         # array convert, calculate PCA, transpose
 395         self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
 396         #print self.plot_origCoord.shape
 397         self.plot_pcaCoord = pca(self.plot_origCoord, output_dim=2)     #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 398         self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
 399         pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
 400         pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
 401
 402         '''
 403         # Start section of testing with good plots                                  # 4 TESTING!
 404         Xsyn_1=[]
 405         Ysyn_1=[]
 406         Xgb1_1=[]
 407         Ygb1_1=[]
 408         Xbad_1=[]
 409         Ybad_1=[]
 410         goodnamefile=open(file_name.replace("coordinate", "good"),'r')
 411         goodnames=goodnamefile.readlines()
 412         nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
 413         goodnames=[i.split()[0] for i in goodnames[1:]]
 414
 415         for index in range(len(self.plot_paths)):
 416             if self.plot_paths[index][:-1] in goodnames:
 417                 Xsyn_1.append(pca_X[index])
 418                 Ysyn_1.append(pca_Y[index])
 419             else:
 420                 Xbad_1.append(pca_X[index])
 421                 Ybad_1.append(pca_Y[index])
 422         # Stop section of testing with good plots                                   # 4 TESTING!
 423         '''
 424
 425         # print first plot
 426         clustplot1=lhc.PlotObject()
 427         clustplot1.add_set(pca_X,pca_Y)
 428         #clustplot1.add_set(Xbad_1,Ybad_1) # 4 TESTING!
 429         #clustplot1.add_set(Xsyn_1,Ysyn_1) # 4 TESTING!
 430         clustplot1.normalize_vectors()
 431         clustplot1.styles=['scatter', 'scatter','scatter']
 432         clustplot1.colors=[None,'red','green']
 433         clustplot1.destination=0
 434         self._send_plot([clustplot1])
 435         self.clustplot1=clustplot1
 436
 437         # density and filer estimation
 438         kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
 439         tallest = 0
 440         for i in range(len(pca_X)):
 441             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 442             if tallest < kern_value:
 443                     tallest = float(kern_value)
 444         if float(config[1]) == 0:
 445             my_filter = float(tallest / 3.242311147)
 446         else:
 447             my_filter = float(config[1])
 448         '''
 449         # section useful only for graphic printing
 450         xmin = pca_X.min()
 451         xmax = pca_X.max()
 452         ymin = pca_Y.min()
 453         ymax = pca_Y.max()
 454         mX, mY = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
 455         Z = sp.rot90(sp.fliplr(sp.reshape(kernel(sp.c_[mX.ravel(), mY.ravel()].T).T, mX.T.shape)))
 456         axis_X = np.linspace(xmin,xmax,num=100)
 457         axis_Y = np.linspace(ymin,ymax,num=100)
 458         '''
 459
 460         # density filtering:
 461         # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
 462         filtered_pca_X = []
 463         filtered_pca_Y = []
 464         filtered_PcaCoordTr = []
 465         filtered_PcaCoord = []
 466         for i in range(len(pca_X)):
 467             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 468             if kern_value > my_filter:
 469                 filtered_pca_X.append(pca_X[i])
 470                 filtered_pca_Y.append(pca_Y[i])
 471         filtered_PcaCoordTr.append(filtered_pca_X)
 472         filtered_PcaCoordTr.append(filtered_pca_Y)
 473         filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
 474
 475         # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
 476         for index in range(len(self.plot_pcaCoord)):
 477             if self.plot_pcaCoord[index] in filtered_PcaCoord:
 478                 self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
 479                 self.plot_FiltPaths.append(self.plot_paths[index])
 480
 481         '''
 482         # START PCA#2: USELESS!!!
 483
 484         # creo l array con alcune colonne e pure moltiplicate
 485         temp_coord = []
 486         for row in self.plot_FiltOrigCoord:
 487             res=[]
 488             for cols in config[2].split(","):
 489                 if cols.find("*")!=-1:
 490                     col = int(cols.split("*")[0])
 491                     molt = int(cols.split("*")[1])
 492                 elif cols.find("x")!=-1:
 493                     col = int(cols.split("x")[0])
 494                     molt = int(cols.split("x")[1])
 495                 else:
 496                     col = int(cols)
 497                     molt = 1
 498                 res.append(row[col]*molt)
 499             temp_coord.append(res)
 500         self.plot_FiltOrigCoord = temp_coord
 501
 502         # ricalcolo la PCA: array convert, calculate PCA, transpose
 503         self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
 504         #print self.plot_FiltOrigCoord.shape
 505         self.plot_NewPcaCoord = pca(self.plot_FiltOrigCoord, output_dim=2)      #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 506         self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
 507         pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
 508         pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
 509
 510         # Start section of testing with good plots                              # 4 TESTING!
 511         Xsyn_2=[]
 512         Ysyn_2=[]
 513         Xbad_2=[]
 514         Ybad_2=[]
 515         for index in range(len(self.plot_FiltPaths)):
 516             if self.plot_FiltPaths[index][:-1] in goodnames:
 517                 Xsyn_2.append(pca_X2[index])
 518                 Ysyn_2.append(pca_Y2[index])
 519             else:
 520                 Xbad_2.append(pca_X2[index])
 521                 Ybad_2.append(pca_Y2[index])
 522
 523         # print second plot
 524         clustplot2=lhc.PlotObject()
 525         #clustplot2.add_set(pca_X2,pca_Y2)
 526         clustplot2.add_set(Xbad_2,Ybad_2)                                       # 4 TESTING!
 527         clustplot2.add_set(Xsyn_2,Ysyn_2)                                       # 4 TESTING!
 528         clustplot2.normalize_vectors()
 529         clustplot2.styles=['scatter', 'scatter','scatter']
 530         clustplot2.colors=[None,'red','green']
 531         clustplot2.destination=1
 532         self._send_plot([clustplot2])
 533         self.clustplot2=clustplot2
 534         '''
 535
 536         # PRINT density plot
 537         clustplot2=lhc.PlotObject()
 538         clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
 539         clustplot2.normalize_vectors()
 540         clustplot2.styles=['scatter', 'scatter','scatter']
 541         clustplot2.colors=[None,'red','green']
 542         clustplot2.destination=1
 543         self._send_plot([clustplot2])
 544         self.clustplot2=clustplot2
 545
 546         # printing results
 547         config_pca1 = config[0].replace("*", "x").rstrip("\n")
 548         config_pca2 = config[2].replace("*", "x").rstrip("\n")
 549         print ""
 550         print "- START: "+file_name
 551         print "Curve totali: ", nPlotTot
 552         #print "Curve totali good: ", nPlotGood                                  # 4 TESTING!
 553         print "- FILTRO 1: 0-500 e NaN"
 554         print "Curve totali rimaste: ", len(self.plot_origCoord)
 555         #print 'Curve good rimaste: ', len(Xsyn_1)                               # 4 TESTING!
 556         print "- FILTRO 2: PCA:"+config_pca1+" e DENSITA:"+str(my_filter)
 557         print "Curve totali rimaste: ", len(self.plot_FiltOrigCoord)
 558         #print 'Curve good rimaste: ', len(Xsyn_2)                               # 4 TESTING!
 559         print "Piu alta: ", tallest
 560         #print "- FILTRO 3: 2'PCA:"+config_pca2
 561         print ""
 562
 563         # -- exporting coordinates and plot of PCA in debug mode! --
 564         if config[3].find("true")!=-1:
 565             #1' PCA: save plot and build coordinate s file
 566             self.do_export(file_name.replace("coordinate_", "debug_pca1graph_").replace('.txt','_'+config_pca1) + " 0")
 567             f = open(file_name.replace("coordinate_", "debug_pca1coor_").replace('.txt','_'+config_pca1+'.txt'),'w')
 568             for i in range(len(pca_X)):
 569                 f.write (str(i) + "\t" + str(pca_X[i]) + "\t" + str(pca_Y[i]) + "\n")
 570             f.close()
 571             #2' PCA: save plot and build coordinate s file
 572             #self.do_export(file_name.replace("coordinate_", "debug_pca2graph_").replace('.txt','_'+config_pca2) + " 1")
 573             #f = open(file_name.replace("coordinate_", "debug_pca2coor_").replace('.txt','_'+config_pca2+'.txt'),'w')
 574             #for i in range(len(pca_X2)):
 575             #    f.write (str(i) + "\t" + str(pca_X2[i]) + "\t" + str(pca_Y2[i]) + "\n")
 576             #f.close()
 577             #DENSITY: save plot
 578             self.do_export(file_name.replace("coordinate_", "debug_densitygraph_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")) + " 1")
 579             f = open(file_name.replace("coordinate_", "debug_densitycoor_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")+'.txt'),'w')
 580             for i in range(len(filtered_pca_X)):
 581                 f.write (str(i) + "\t" + str(filtered_pca_X[i]) + "\t" + str(filtered_pca_Y[i]) + "\n")
 582             f.close()
 583             #ALL GOOD COORDINATES (without NaN and 0<x<500)
 584             f = open(file_name.replace("coordinate_", "debug_allgoodcoor_"),'w')
 585             for i in range(len(self.plot_myCoord)):
 586                 for cel in self.plot_myCoord[i]:
 587                     f.write (" ; " + str(cel))
 588                 f.write ("\n")
 589             f.close()
 590
 591         # pCLUSTER SAVING!!!
 592         import shutil
 593         pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
 594         if os.path.exists(pcl_name+slash): shutil.rmtree(pcl_name)
 595         os.mkdir(pcl_name+slash)
 596         f = open(pcl_name+'.txt','w')
 597         for i in range(len(self.plot_FiltPaths)):
 598             myfile = str(self.plot_FiltPaths[i]).rstrip("\n")
 599             f.write (myfile+"\n")
 600             shutil.copy2(myfile, pcl_name)
 601         f.close()
 602
 603
 604     def do_multipca(self,args):
 605         '''
 606         MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
 607         Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
 608         measures pca from coordinates filename and save the png plots.
 609         (c)Paolo Pancaldi, Massimo Sandal 2009
 610         '''
 611         # reads the columns of pca
 612         conf=open("pca_config.txt")
 613         config = conf.readlines() # config[0] = "1,2,3"
 614         conf.close()
 615         # cycling pca
 616         arg = args.split(" ")
 617         file_name=arg[0]
 618         column=str(arg[1])
 619         for i in range(1, 51, 1):
 620             self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
 621
 622     def do_doublepca(self,args):
 623         '''
 624         DOUBLEPCA -> "doublepca gaeta_coor_blind50.txt"
 625         Automatically it launches the pca command for all combinations with two column
 626         (c)Paolo Pancaldi, Massimo Sandal 2009
 627         '''
 628         # cycling pca
 629         arg = args.split(" ")
 630         file_name=arg[0]
 631         for i in range(1, 13):
 632             for j in range(1, 13):
 633                 if i!=j:
 634                     self.do_pca(file_name + " " + str(i) + "," + str(j))
 635
 636     def do_triplepca(self,args):
 637         '''
 638         TRIPLEPCA -> "triplepca gaeta_coor_blind50.txt"
 639         Automatically it launches the pca command for all combinations with three column
 640         (c)Paolo Pancaldi, Massimo Sandal 2009
 641         '''
 642         # cycling pca
 643         arg = args.split(" ")
 644         file_name=arg[0]
 645         for i in range(1, 13):
 646             for j in range(1, 13):
 647                 for k in range(1, 13):
 648                     if i!=j and i!=k and j!=k:
 649                         self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
 650
 651     def do_pclick(self,args):
 652         '''
 653         It returns id, coordinates and file name of a clicked dot on a PCA graphic
 654         '''
 655
 656         self._send_plot([self.clustplot1]) #quick workaround for BAD problems in the GUI
 657         print 'Click point'
 658         point = self._measure_N_points(N=1, whatset=0)
 659         indice = point[0].index
 660         plot_file = self.plot_paths[indice]
 661         dot_coord = self.plot_pcaCoord[indice]
 662         print "file: " + str(plot_file).rstrip()
 663         print "id: " + str(indice)
 664         print "coord: " + str(dot_coord)
 665         self.do_genlist(str(plot_file))
 666         #self.do_jump(str(plot_file))
 667
 668     # indea iniziata e messa da parte...
 669     def do_peakforce(self, args):
 670         '''
 671         peackforce -> "peackforce peackforce_file.txt"
 672         Automatically measures peack and force plots
 673         (c)Paolo Pancaldi, Massimo Sandal 2009
 674         '''
 675
 676         # prende in inpunt un arg (nome del file)
 677         file_name=args
 678         f=open(file_name)
 679
 680         # scrivo un file temp
 681         g = open('_prove.txt','w')
 682
 683         plot_file = '';
 684         rows = f.readlines()
 685         for row in rows:
 686             if row[0]=="/":
 687                 plot_file = row
 688             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 689                 # FILTRO SUI 7 PICCHI
 690                 num_pic = int(row.split(" ; ")[1])
 691                 if num_pic==7:
 692                     width_force = row.split(" ; ")
 693                     w1 = float(width_force[2]); f1 = float(width_force[3]);
 694                     w2 = float(width_force[4]); f2 = float(width_force[5]);
 695                     w3 = float(width_force[6]); f3 = float(width_force[7]);
 696                     w4 = float(width_force[8]); f4 = float(width_force[9]);
 697                     w5 = float(width_force[10]); f5 = float(width_force[11]);
 698                     w6 = float(width_force[12]); f6 = float(width_force[13]);
 699                     w7 = float(width_force[14]); f7 = float(width_force[15]);
 700                     if w1>0 and w1<1000 and w2>0 and w2<1000 and w3>0 and w3<1000 and w4>0 and w4<1000 and w5>0 and w5<1000 and w6>0 and w6<1000 and w7>0 and w7<1000:
 701                         score_76 = abs(32 - (w7 - w6))
 702                         score_65 = abs(32 - (w6 - w5))
 703                         score_54 = abs(32 - (w5 - w4))
 704                         score_43 = abs(32 - (w4 - w3))
 705                         score_32 = abs(32 - (w3 - w2))
 706                         score_21 = abs(32 - (w2 - w1))
 707                         writeme = str(score_76) + " --- " + str(row)
 708                         g.write(writeme)
 709         g.close()
 710         f.close()
 711
 712