hooke/plugin/pcluster.py

   1 from mdp import pca
   2 from libhooke import WX_GOOD, ClickedPoint
   3 import wxversion
   4 wxversion.select(WX_GOOD)
   5 from wx import PostEvent
   6 import numpy as np
   7 import scipy as sp
   8 import copy
   9 import os.path
  10 import time
  11 import libhookecurve as lhc
  12 import pylab as pyl
  13
  14 import warnings
  15 warnings.simplefilter('ignore',np.RankWarning)
  16
  17
  18 class pclusterCommands:
  19
  20     def _plug_init(self):
  21         self.clustplot1=None
  22         self.clustplot2=None
  23
  24     def do_pcluster(self,args):
  25         '''
  26         pCLUSTER
  27         (pcluster.py)
  28         Automatically measures peaks and extracts informations for further clustering
  29         (c)Paolo Pancaldi, Massimo Sandal 2009
  30         '''
  31         if self.config['hookedir'][0]=='/':
  32             slash='/' #a Unix or Unix-like system
  33         else:
  34             slash='\\'
  35         blindw = str(self.convfilt_config['blindwindow'])
  36         pclus_dir = "pCluster_blind"+blindw+"_"+time.strftime("%Y%m%d_%H%M")
  37         self.my_work_dir = os.getcwd()+slash+pclus_dir+slash
  38         self.my_curr_dir = os.path.basename(os.getcwd())
  39         os.mkdir(self.my_work_dir)
  40
  41         #--Custom persistent length
  42         pl_value=None
  43         for arg in args.split():
  44             #look for a persistent length argument.
  45             if 'pl=' in arg:
  46                 pl_expression=arg.split('=')
  47                 pl_value=float(pl_expression[1]) #actual value
  48             else:
  49                 pl_value=None
  50
  51         #configuration variables
  52         min_npks = self.convfilt_config['minpeaks']
  53         min_deviation = self.convfilt_config['mindeviation']
  54
  55         pclust_filename = "automeasure_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Automeasure filename? ')
  56         realclust_filename = "coordinate_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Coordinates filename? ')
  57         peackforce_filename = "peakforce_"+self.my_curr_dir+"_blind"+blindw+".txt"  #raw_input('Peacks and Forces filename? ')
  58
  59         f=open(self.my_work_dir+pclust_filename,'w+')
  60         f.write('Analysis started '+time.asctime()+'\n')
  61         f.write('----------------------------------------\n')
  62         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
  63         f.close()
  64
  65         f=open(self.my_work_dir+realclust_filename,'w+')
  66         f.write('Analysis started '+time.asctime()+'\n')
  67         f.write('----------------------------------------\n')
  68         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm) ; Peaks Diff\n')
  69         f.close()
  70
  71         f=open(self.my_work_dir+peackforce_filename,'w+')
  72         f.write('Analysis started '+time.asctime()+'\n')
  73         f.write('----------------------------------------\n')
  74         f.write('; Peak number  ;  1 peak Length (nm) ; 1 peak Force (pN) ;  2 peak Length (nm) ; 2 peak Force (pN) ;  3 peak Length (nm) ; 3 peak Force (pN) ;  4 peak Length (nm) ; 4 peak Force (pN) ;  5 peak Length (nm) ; 5 peak Force (pN) ;  6 peak Length (nm) ; 6 peak Force (pN) ;  7 peak Length (nm) ; 7 peak Force (pN) ;  8 peak Length (nm) ; 8 peak Force (pN)\n')
  75         f.close()
  76
  77         # ------ FUNCTION ------
  78         def fit_interval_nm(start_index,plot,nm,backwards):
  79             '''
  80             Calculates the number of points to fit, given a fit interval in nm
  81             start_index: index of point
  82             plot: plot to use
  83             backwards: if true, finds a point backwards.
  84             '''
  85             whatset=1 #FIXME: should be decidable
  86             x_vect=plot.vectors[1][0]
  87
  88             c=0
  89             i=start_index
  90             start=x_vect[start_index]
  91             maxlen=len(x_vect)
  92             while abs(x_vect[i]-x_vect[start_index])*(10**9) < nm:
  93                 if i==0 or i==maxlen-1: #we reached boundaries of vector!
  94                     return c
  95                 if backwards:
  96                     i-=1
  97                 else:
  98                     i+=1
  99                 c+=1
 100             return c
 101
 102         def plot_informations(itplot,pl_value):
 103             '''
 104             OUR VARIABLES
 105             contact_point.absolute_coords               (2.4584142802103689e-007, -6.9647135616234017e-009)
 106             peak_point.absolute_coords                  (3.6047748250571423e-008, -7.7142802788854212e-009)
 107             other_fit_point.absolute_coords     (4.1666139243838867e-008, -7.3759393477579707e-009)
 108             peak_location                                                                               [510, 610, 703, 810, 915, 1103]
 109             peak_size                                                                                           [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
 110             params                                                                                                      [2.2433999931959462e-007, 3.3230248825175678e-010]
 111             fit_errors                                                                                  [6.5817195369767644e-010, 2.4415923138871498e-011]
 112             '''
 113             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
 114
 115             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
 116             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
 117             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
 118             self.basepoints=[]
 119             base_index_0=peak_location[-1]+fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
 120             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
 121             base_index_1=self.basepoints[0].index+fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
 122             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
 123             self.basecurrent=self.current.path
 124             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
 125             boundaries.sort()
 126             to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
 127             avg=np.mean(to_average)
 128             return fit_points, contact_point, pl_value, T, cindex, avg
 129
 130         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
 131             '''
 132             calculate informations for each peak and add they in
 133             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 134             '''
 135             c_leng=None
 136             p_leng=None
 137             sigma_c_leng=None
 138             sigma_p_leng=None
 139             force=None
 140             slope=None
 141
 142             delta_force=10
 143             slope_span=int(self.config['auto_slope_span'])
 144
 145             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
 146             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
 147
 148             points=[contact_point, peak_point, other_fit_point]
 149
 150             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
 151
 152             #Measure forces
 153             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
 154             y=min(delta_to_measure)
 155             #Measure slopes
 156             slope=self.linefit_between(peak-slope_span,peak)[0]
 157             #check fitted data and, if right, add peak to the measurement
 158             if len(params)==1: #if we did choose 1-value fit
 159                 p_leng=pl_value
 160                 c_leng=params[0]*(1.0e+9)
 161                 sigma_p_leng=0
 162                 sigma_c_leng=fit_errors[0]*(1.0e+9)
 163                 force = abs(y-avg)*(1.0e+12)
 164             else: #2-value fit
 165                 p_leng=params[1]*(1.0e+9)
 166                 #check if persistent length makes sense. otherwise, discard peak.
 167                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
 168                     '''
 169                     p_lengths.append(p_leng)
 170                     c_lengths.append(params[0]*(1.0e+9))
 171                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
 172                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
 173                     forces.append(abs(y-avg)*(1.0e+12))
 174                     slopes.append(slope)
 175                     '''
 176                     c_leng=params[0]*(1.0e+9)
 177                     sigma_c_leng=fit_errors[0]*(1.0e+9)
 178                     sigma_p_leng=fit_errors[1]*(1.0e+9)
 179                     force=abs(y-avg)*(1.0e+12)
 180                 else:
 181                     p_leng=None
 182                     slope=None
 183             #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 184             return  c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
 185
 186
 187         # ------ PROGRAM -------
 188         c=0
 189         for item in self.current_list:
 190             c+=1
 191             item.identify(self.drivers)
 192             itplot=item.curve.default_plots()
 193             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
 194             itplot[0]=flatten(itplot[0], item, customvalue=1)
 195             try:
 196                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
 197             except:
 198                 #We have troubles with exec_has_peaks (bad curve, whatever).
 199                 #Print info and go to next cycle.
 200                 print 'Cannot process ',item.path
 201                 continue
 202
 203             if len(peak_location)==0:
 204                 print 'No peaks!'
 205                 continue
 206
 207             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
 208
 209             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 210
 211             #initialize output data vectors
 212             c_lengths=[]
 213             p_lengths=[]
 214             sigma_c_lengths=[]
 215             sigma_p_lengths=[]
 216             forces=[]
 217             slopes=[]
 218
 219             #loop each peak of my curve
 220             for peak in peak_location:
 221                 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
 222                 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
 223                     if var is not None:
 224                         vector.append(var)
 225
 226             #FIXME: We need a dictionary here...
 227             allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
 228             for vect in allvects:
 229                 if len(vect)==0:
 230                     for i in range(len(c_lengths)):
 231                         vect.append(0)
 232
 233             print 'Measurements for all peaks detected:'
 234             print 'contour (nm)', c_lengths
 235             print 'sigma contour (nm)',sigma_c_lengths
 236             print 'p (nm)',p_lengths
 237             print 'sigma p (nm)',sigma_p_lengths
 238             print 'forces (pN)',forces
 239             print 'slopes (N/m)',slopes
 240
 241             '''
 242             write automeasure text file
 243             '''
 244             print 'Saving automatic measurement...'
 245             f=open(self.my_work_dir+pclust_filename,'a+')
 246             f.write(item.path+'\n')
 247             for i in range(len(c_lengths)):
 248                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
 249             f.close()
 250
 251             peak_number=len(c_lengths)
 252
 253             '''
 254             write peackforce text file
 255             '''
 256             print 'Saving automatic measurement...'
 257             f=open(self.my_work_dir+peackforce_filename,'a+')
 258             f.write(item.path+'\n')
 259             peackforce_info = ''
 260             for i in range(len(c_lengths)):
 261                 peackforce_info = peackforce_info + ' ; ' + str(c_lengths[i]) + ' ; ' + str(forces[i])
 262             f.write(' ; '+str(peak_number)+peackforce_info+'\n')
 263             f.close()
 264
 265             '''
 266             calculate clustering coordinates
 267             '''
 268             if peak_number > 1:
 269                 deltas=[]
 270                 for i in range(len(c_lengths)-1):
 271                     deltas.append(c_lengths[i+1]-c_lengths[i])
 272
 273                 delta_mean=np.mean(deltas)
 274                 delta_median=np.median(deltas)
 275
 276                 force_mean=np.mean(forces)
 277                 force_median=np.median(forces)
 278
 279                 first_peak_cl=c_lengths[0]
 280                 last_peak_cl=c_lengths[-1]
 281
 282                 max_force=max(forces[:-1])
 283                 min_force=min(forces)
 284
 285                 max_delta=max(deltas)
 286                 min_delta=min(deltas)
 287
 288                 delta_stdev=np.std(deltas)
 289                 forces_stdev=np.std(forces[:-1])
 290
 291                 peaks_diff=(last_peak_cl-first_peak_cl)/peak_number
 292
 293                 print 'Coordinates'
 294                 print 'Peaks',peak_number
 295                 print 'Mean delta',delta_mean
 296                 print 'Median delta',delta_median
 297                 print 'Mean force',force_mean
 298                 print 'Median force',force_median
 299                 print 'First peak',first_peak_cl
 300                 print 'Last peak',last_peak_cl
 301                 print 'Max force',max_force
 302                 print 'Min force',min_force
 303                 print 'Max delta',max_delta
 304                 print 'Min delta',min_delta
 305                 print 'Delta stdev',delta_stdev
 306                 print 'Forces stdev',forces_stdev
 307                 print 'Peaks difference',peaks_diff
 308
 309                 '''
 310                 write clustering coordinates
 311                 '''
 312                 f=open(self.my_work_dir+realclust_filename,'a+')
 313                 f.write(item.path+'\n')
 314                 f.write(' ; '+str(peak_number)+     # non considerato
 315                         ' ; '+str(delta_mean)+      # 0
 316                         ' ; '+str(delta_median)+    # 1 -
 317                         ' ; '+str(force_mean)+      # 2
 318                         ' ; '+str(force_median)+    # 3 -
 319                         ' ; '+str(first_peak_cl)+   # 4 -
 320                         ' ; '+str(last_peak_cl)+    # 5 -
 321                         ' ; '+str(max_force)+       # 6
 322                         ' ; '+str(min_force)+       # 7
 323                         ' ; '+str(max_delta)+       # 8
 324                         ' ; '+str(min_delta)+       # 9
 325                         ' ; '+str(delta_stdev)+     # 10
 326                         ' ; '+str(forces_stdev)+    # 11
 327                         ' ; '+str(peaks_diff)+      # 12
 328                         '\n')
 329                 f.close()
 330
 331         # start PCA
 332         self.do_pca(pclus_dir+"/"+realclust_filename)
 333
 334
 335     def do_pca(self,args):
 336         '''
 337         PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
 338         Automatically measures pca from coordinates filename and shows two interactives plots
 339         With the second argument (arbitrary) you can select the columns and the multiplier factor
 340         to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
 341         Without second argument it reads pca_config.txt file
 342         (c)Paolo Pancaldi, Massimo Sandal 2009
 343         '''
 344
 345         # reads the columns of pca
 346         if self.config['hookedir'][0]=='/':
 347             slash='/' #a Unix or Unix-like system
 348         else:
 349             slash='\\'
 350         self.my_hooke_dir = self.config['hookedir']+slash
 351         #self.my_work_dir = os.getcwd()+slash+"pCluster_"+time.strftime("%Y%m%d_%H%M")+slash
 352         #self.my_curr_dir = os.path.basename(os.getcwd())
 353         conf=open(self.my_hooke_dir+"pca_config.txt")
 354         config = conf.readlines()
 355         conf.close()
 356
 357         self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
 358         self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
 359         self.plot_pcaCoord = []         # tiene le due colonne della pca
 360         self.plot_pcaCoordTr = []       # tiene le due colonne della pca trasposta
 361         self.plot_FiltOrigCoord = []    # tiene le coordinate solo dei punti filtrati per densita
 362         self.plot_FiltPaths = []        # tiene i paths dei plot solo dei punti filtrati per densita
 363         self.plot_paths = []            # tiene i paths di tutti i plots
 364         self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
 365         self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
 366         plot_path_temp = ""
 367
 368         # prende in inpunt un arg (nome del file)
 369         # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
 370         arg = args.split(" ")
 371         if arg[0]==args:
 372             file_name=args
 373         else:
 374             file_name=arg[0]
 375             config[0] = arg[1]
 376
 377         # creo l'array "plot_myCoord" con tutte le coordinate dei plots
 378         # e l'array plot_paths con tutti i percorsi dei plots
 379         nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
 380         f=open(file_name)
 381         rows = f.readlines()
 382         for row in rows:
 383             if row[0]!=" " and row[0]!="":
 384                 nPlotTot = nPlotTot+1
 385                 plot_path_temp = row
 386             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 387                 row = row[row.index(";",2)+2:].split(" ; ")     # non considero la prima colonna col #picchi
 388                 row = [float(i) for i in row]
 389
 390                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
 391                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
 392                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
 393                     if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
 394                         #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
 395                         self.plot_myCoord.append(row)
 396                         self.plot_paths.append(plot_path_temp)
 397         f.close()
 398
 399         # creo l'array con alcune colonne e pure moltiplicate
 400         for row in self.plot_myCoord:
 401             res=[]
 402             for cols in config[0].split(","):
 403                 if cols.find("*")!=-1:
 404                     col = int(cols.split("*")[0])
 405                     molt = int(cols.split("*")[1])
 406                 elif cols.find("x")!=-1:
 407                     col = int(cols.split("x")[0])
 408                     molt = int(cols.split("x")[1])
 409                 else:
 410                     col = int(cols)
 411                     molt = 1
 412                 res.append(row[col]*molt)
 413             self.plot_origCoord.append(res)
 414
 415         # array convert, calculate PCA, transpose
 416         self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
 417         #print self.plot_origCoord.shape
 418         self.plot_pcaCoord = pca(self.plot_origCoord, output_dim=2)     #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 419         self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
 420         pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
 421         pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
 422
 423         '''
 424         # Start section of testing with good plots                                  # 4 TESTING!
 425         Xsyn_1=[]
 426         Ysyn_1=[]
 427         Xgb1_1=[]
 428         Ygb1_1=[]
 429         Xbad_1=[]
 430         Ybad_1=[]
 431         goodnamefile=open(file_name.replace("coordinate", "good"),'r')
 432         goodnames=goodnamefile.readlines()
 433         nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
 434         goodnames=[i.split()[0] for i in goodnames[1:]]
 435
 436         for index in range(len(self.plot_paths)):
 437             if self.plot_paths[index][:-1] in goodnames:
 438                 Xsyn_1.append(pca_X[index])
 439                 Ysyn_1.append(pca_Y[index])
 440             else:
 441                 Xbad_1.append(pca_X[index])
 442                 Ybad_1.append(pca_Y[index])
 443         # Stop section of testing with good plots                                   # 4 TESTING!
 444         '''
 445
 446         # print first plot
 447         clustplot1=lhc.PlotObject()
 448         clustplot1.add_set(pca_X,pca_Y)
 449         #clustplot1.add_set(Xbad_1,Ybad_1) # 4 TESTING!
 450         #clustplot1.add_set(Xsyn_1,Ysyn_1) # 4 TESTING!
 451         clustplot1.normalize_vectors()
 452         clustplot1.styles=['scatter', 'scatter','scatter']
 453         clustplot1.colors=[None,'red','green']
 454         clustplot1.destination=0
 455         self._send_plot([clustplot1])
 456         self.clustplot1=clustplot1
 457
 458         # density and filer estimation
 459         kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
 460         tallest = 0
 461         for i in range(len(pca_X)):
 462             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 463             if tallest < kern_value:
 464                     tallest = float(kern_value)
 465         if float(config[1]) == 0:
 466             my_filter = float(tallest / 3.242311147)
 467         else:
 468             my_filter = float(config[1])
 469         '''
 470         # section useful only for graphic printing
 471         xmin = pca_X.min()
 472         xmax = pca_X.max()
 473         ymin = pca_Y.min()
 474         ymax = pca_Y.max()
 475         mX, mY = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
 476         Z = sp.rot90(sp.fliplr(sp.reshape(kernel(sp.c_[mX.ravel(), mY.ravel()].T).T, mX.T.shape)))
 477         axis_X = np.linspace(xmin,xmax,num=100)
 478         axis_Y = np.linspace(ymin,ymax,num=100)
 479         '''
 480
 481         # density filtering:
 482         # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
 483         filtered_pca_X = []
 484         filtered_pca_Y = []
 485         filtered_PcaCoordTr = []
 486         filtered_PcaCoord = []
 487         for i in range(len(pca_X)):
 488             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 489             if kern_value > my_filter:
 490                 filtered_pca_X.append(pca_X[i])
 491                 filtered_pca_Y.append(pca_Y[i])
 492         filtered_PcaCoordTr.append(filtered_pca_X)
 493         filtered_PcaCoordTr.append(filtered_pca_Y)
 494         filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
 495
 496         # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
 497         for index in range(len(self.plot_pcaCoord)):
 498             if self.plot_pcaCoord[index] in filtered_PcaCoord:
 499                 self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
 500                 self.plot_FiltPaths.append(self.plot_paths[index])
 501
 502         '''
 503         # START PCA#2: USELESS!!!
 504
 505         # creo l array con alcune colonne e pure moltiplicate
 506         temp_coord = []
 507         for row in self.plot_FiltOrigCoord:
 508             res=[]
 509             for cols in config[2].split(","):
 510                 if cols.find("*")!=-1:
 511                     col = int(cols.split("*")[0])
 512                     molt = int(cols.split("*")[1])
 513                 elif cols.find("x")!=-1:
 514                     col = int(cols.split("x")[0])
 515                     molt = int(cols.split("x")[1])
 516                 else:
 517                     col = int(cols)
 518                     molt = 1
 519                 res.append(row[col]*molt)
 520             temp_coord.append(res)
 521         self.plot_FiltOrigCoord = temp_coord
 522
 523         # ricalcolo la PCA: array convert, calculate PCA, transpose
 524         self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
 525         #print self.plot_FiltOrigCoord.shape
 526         self.plot_NewPcaCoord = pca(self.plot_FiltOrigCoord, output_dim=2)      #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 527         self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
 528         pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
 529         pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
 530
 531         # Start section of testing with good plots                              # 4 TESTING!
 532         Xsyn_2=[]
 533         Ysyn_2=[]
 534         Xbad_2=[]
 535         Ybad_2=[]
 536         for index in range(len(self.plot_FiltPaths)):
 537             if self.plot_FiltPaths[index][:-1] in goodnames:
 538                 Xsyn_2.append(pca_X2[index])
 539                 Ysyn_2.append(pca_Y2[index])
 540             else:
 541                 Xbad_2.append(pca_X2[index])
 542                 Ybad_2.append(pca_Y2[index])
 543
 544         # print second plot
 545         clustplot2=lhc.PlotObject()
 546         #clustplot2.add_set(pca_X2,pca_Y2)
 547         clustplot2.add_set(Xbad_2,Ybad_2)                                       # 4 TESTING!
 548         clustplot2.add_set(Xsyn_2,Ysyn_2)                                       # 4 TESTING!
 549         clustplot2.normalize_vectors()
 550         clustplot2.styles=['scatter', 'scatter','scatter']
 551         clustplot2.colors=[None,'red','green']
 552         clustplot2.destination=1
 553         self._send_plot([clustplot2])
 554         self.clustplot2=clustplot2
 555         '''
 556
 557         # PRINT density plot
 558         clustplot2=lhc.PlotObject()
 559         clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
 560         clustplot2.normalize_vectors()
 561         clustplot2.styles=['scatter', 'scatter','scatter']
 562         clustplot2.colors=[None,'red','green']
 563         clustplot2.destination=1
 564         self._send_plot([clustplot2])
 565         self.clustplot2=clustplot2
 566
 567         # printing results
 568         config_pca1 = config[0].replace("*", "x").rstrip("\n")
 569         config_pca2 = config[2].replace("*", "x").rstrip("\n")
 570         print ""
 571         print "- START: "+file_name
 572         print "Curve totali: ", nPlotTot
 573         #print "Curve totali good: ", nPlotGood                                  # 4 TESTING!
 574         print "- FILTRO 1: 0-500 e NaN"
 575         print "Curve totali rimaste: ", len(self.plot_origCoord)
 576         #print 'Curve good rimaste: ', len(Xsyn_1)                               # 4 TESTING!
 577         print "- FILTRO 2: PCA:"+config_pca1+" e DENSITA:"+str(my_filter)
 578         print "Curve totali rimaste: ", len(self.plot_FiltOrigCoord)
 579         #print 'Curve good rimaste: ', len(Xsyn_2)                               # 4 TESTING!
 580         print "Piu alta: ", tallest
 581         #print "- FILTRO 3: 2'PCA:"+config_pca2
 582         print ""
 583
 584         # -- exporting coordinates and plot of PCA in debug mode! --
 585         if config[3].find("true")!=-1:
 586             #1' PCA: save plot and build coordinate s file
 587             self.do_export(file_name.replace("coordinate_", "debug_pca1graph_").replace('.txt','_'+config_pca1) + " 0")
 588             f = open(file_name.replace("coordinate_", "debug_pca1coor_").replace('.txt','_'+config_pca1+'.txt'),'w')
 589             for i in range(len(pca_X)):
 590                 f.write (str(i) + "\t" + str(pca_X[i]) + "\t" + str(pca_Y[i]) + "\n")
 591             f.close()
 592             #2' PCA: save plot and build coordinate s file
 593             #self.do_export(file_name.replace("coordinate_", "debug_pca2graph_").replace('.txt','_'+config_pca2) + " 1")
 594             #f = open(file_name.replace("coordinate_", "debug_pca2coor_").replace('.txt','_'+config_pca2+'.txt'),'w')
 595             #for i in range(len(pca_X2)):
 596             #    f.write (str(i) + "\t" + str(pca_X2[i]) + "\t" + str(pca_Y2[i]) + "\n")
 597             #f.close()
 598             #DENSITY: save plot
 599             self.do_export(file_name.replace("coordinate_", "debug_densitygraph_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")) + " 1")
 600             f = open(file_name.replace("coordinate_", "debug_densitycoor_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")+'.txt'),'w')
 601             for i in range(len(filtered_pca_X)):
 602                 f.write (str(i) + "\t" + str(filtered_pca_X[i]) + "\t" + str(filtered_pca_Y[i]) + "\n")
 603             f.close()
 604             #ALL GOOD COORDINATES (without NaN and 0<x<500)
 605             f = open(file_name.replace("coordinate_", "debug_allgoodcoor_"),'w')
 606             for i in range(len(self.plot_myCoord)):
 607                 for cel in self.plot_myCoord[i]:
 608                     f.write (" ; " + str(cel))
 609                 f.write ("\n")
 610             f.close()
 611
 612         # pCLUSTER SAVING!!!
 613         import shutil
 614         pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
 615         if os.path.exists(pcl_name+slash): shutil.rmtree(pcl_name)
 616         os.mkdir(pcl_name+slash)
 617         f = open(pcl_name+'.txt','w')
 618         for i in range(len(self.plot_FiltPaths)):
 619             myfile = str(self.plot_FiltPaths[i]).rstrip("\n")
 620             f.write (myfile+"\n")
 621             shutil.copy2(myfile, pcl_name)
 622         f.close()
 623
 624
 625     def do_multipca(self,args):
 626         '''
 627         MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
 628         Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
 629         measures pca from coordinates filename and save the png plots.
 630         (c)Paolo Pancaldi, Massimo Sandal 2009
 631         '''
 632         # reads the columns of pca
 633         conf=open("pca_config.txt")
 634         config = conf.readlines() # config[0] = "1,2,3"
 635         conf.close()
 636         # cycling pca
 637         arg = args.split(" ")
 638         file_name=arg[0]
 639         column=str(arg[1])
 640         for i in range(1, 51, 1):
 641             self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
 642
 643     def do_doublepca(self,args):
 644         '''
 645         DOUBLEPCA -> "doublepca gaeta_coor_blind50.txt"
 646         Automatically it launches the pca command for all combinations with two column
 647         (c)Paolo Pancaldi, Massimo Sandal 2009
 648         '''
 649         # cycling pca
 650         arg = args.split(" ")
 651         file_name=arg[0]
 652         for i in range(1, 13):
 653             for j in range(1, 13):
 654                 if i!=j:
 655                     self.do_pca(file_name + " " + str(i) + "," + str(j))
 656
 657     def do_triplepca(self,args):
 658         '''
 659         TRIPLEPCA -> "triplepca gaeta_coor_blind50.txt"
 660         Automatically it launches the pca command for all combinations with three column
 661         (c)Paolo Pancaldi, Massimo Sandal 2009
 662         '''
 663         # cycling pca
 664         arg = args.split(" ")
 665         file_name=arg[0]
 666         for i in range(1, 13):
 667             for j in range(1, 13):
 668                 for k in range(1, 13):
 669                     if i!=j and i!=k and j!=k:
 670                         self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
 671
 672     def do_pclick(self,args):
 673         '''
 674         It returns id, coordinates and file name of a clicked dot on a PCA graphic
 675         '''
 676
 677         self._send_plot([self.clustplot1]) #quick workaround for BAD problems in the GUI
 678         print 'Click point'
 679         point = self._measure_N_points(N=1, whatset=0)
 680         indice = point[0].index
 681         plot_file = self.plot_paths[indice]
 682         dot_coord = self.plot_pcaCoord[indice]
 683         print "file: " + str(plot_file).rstrip()
 684         print "id: " + str(indice)
 685         print "coord: " + str(dot_coord)
 686         self.do_genlist(str(plot_file))
 687         #self.do_jump(str(plot_file))
 688
 689     # indea iniziata e messa da parte...
 690     def do_peakforce(self, args):
 691         '''
 692         peackforce -> "peackforce peackforce_file.txt"
 693         Automatically measures peack and force plots
 694         (c)Paolo Pancaldi, Massimo Sandal 2009
 695         '''
 696
 697         # prende in inpunt un arg (nome del file)
 698         file_name=args
 699         f=open(file_name)
 700
 701         # scrivo un file temp
 702         g = open('_prove.txt','w')
 703
 704         plot_file = '';
 705         rows = f.readlines()
 706         for row in rows:
 707             if row[0]=="/":
 708                 plot_file = row
 709             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 710                 # FILTRO SUI 7 PICCHI
 711                 num_pic = int(row.split(" ; ")[1])
 712                 if num_pic==7:
 713                     width_force = row.split(" ; ")
 714                     w1 = float(width_force[2]); f1 = float(width_force[3]);
 715                     w2 = float(width_force[4]); f2 = float(width_force[5]);
 716                     w3 = float(width_force[6]); f3 = float(width_force[7]);
 717                     w4 = float(width_force[8]); f4 = float(width_force[9]);
 718                     w5 = float(width_force[10]); f5 = float(width_force[11]);
 719                     w6 = float(width_force[12]); f6 = float(width_force[13]);
 720                     w7 = float(width_force[14]); f7 = float(width_force[15]);
 721                     if w1>0 and w1<1000 and w2>0 and w2<1000 and w3>0 and w3<1000 and w4>0 and w4<1000 and w5>0 and w5<1000 and w6>0 and w6<1000 and w7>0 and w7<1000:
 722                         score_76 = abs(32 - (w7 - w6))
 723                         score_65 = abs(32 - (w6 - w5))
 724                         score_54 = abs(32 - (w5 - w4))
 725                         score_43 = abs(32 - (w4 - w3))
 726                         score_32 = abs(32 - (w3 - w2))
 727                         score_21 = abs(32 - (w2 - w1))
 728                         writeme = str(score_76) + " --- " + str(row)
 729                         g.write(writeme)
 730         g.close()
 731         f.close()