hooke/plugin/pcluster.py

   1 # Copyright (C) 2009-2011 Fabrizio Benedetti
   2 #                         Massimo Sandal <devicerandom@gmail.com>
   3 #                         Pancaldi Paolo
   4 #                         W. Trevor King <wking@drexel.edu>
   5 #
   6 # This file is part of Hooke.
   7 #
   8 # Hooke is free software: you can redistribute it and/or modify it
   9 # under the terms of the GNU Lesser General Public License as
  10 # published by the Free Software Foundation, either version 3 of the
  11 # License, or (at your option) any later version.
  12 #
  13 # Hooke is distributed in the hope that it will be useful, but WITHOUT
  14 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
  16 # Public License for more details.
  17 #
  18 # You should have received a copy of the GNU Lesser General Public
  19 # License along with Hooke.  If not, see
  20 # <http://www.gnu.org/licenses/>.
  21
  22 from ..libhooke import WX_GOOD, ClickedPoint, config_file_path
  23
  24 from mdp import pca
  25 import wxversion
  26 wxversion.select(WX_GOOD)
  27 from wx import PostEvent
  28 import numpy as np
  29 import scipy as sp
  30 import copy
  31 import os.path
  32 import time
  33 import pylab as pyl
  34
  35 from .. import curve as lhc
  36
  37
  38 import warnings
  39 warnings.simplefilter('ignore',np.RankWarning)
  40
  41
  42 class pclusterCommands(object):
  43
  44     def _plug_init(self):
  45         self.clustplot1=None
  46         self.clustplot2=None
  47
  48     def do_pcluster(self,args):
  49         '''
  50         pCLUSTER
  51         (pcluster.py)
  52         Automatically measures peaks and extracts informations for further clustering
  53         (c)Paolo Pancaldi, Massimo Sandal 2009
  54         '''
  55         blindw = str(self.convfilt_config['blindwindow'])
  56         pclus_dir = "pCluster_blind"+blindw+"_"+time.strftime("%Y%m%d_%H%M")
  57         self.my_work_dir = os.path.join(os.getcwd(), pclus_dir)
  58         self.my_curr_dir = os.path.basename(os.getcwd())
  59         os.mkdir(self.my_work_dir)
  60
  61         #--Custom persistent length
  62         pl_value=None
  63         for arg in args.split():
  64             #look for a persistent length argument.
  65             if 'pl=' in arg:
  66                 pl_expression=arg.split('=')
  67                 pl_value=float(pl_expression[1]) #actual value
  68             else:
  69                 pl_value=None
  70
  71         #configuration variables
  72         min_npks = self.convfilt_config['minpeaks']
  73         min_deviation = self.convfilt_config['mindeviation']
  74
  75         pclust_filename = "automeasure_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Automeasure filename? ')
  76         realclust_filename = "coordinate_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Coordinates filename? ')
  77         peackforce_filename = "peakforce_"+self.my_curr_dir+"_blind"+blindw+".txt"  #raw_input('Peacks and Forces filename? ')
  78
  79         f=open(self.my_work_dir+pclust_filename,'w+')
  80         f.write('Analysis started '+time.asctime()+'\n')
  81         f.write('----------------------------------------\n')
  82         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
  83         f.close()
  84
  85         f=open(self.my_work_dir+realclust_filename,'w+')
  86         f.write('Analysis started '+time.asctime()+'\n')
  87         f.write('----------------------------------------\n')
  88         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm) ; Peaks Diff\n')
  89         f.close()
  90
  91         f=open(self.my_work_dir+peackforce_filename,'w+')
  92         f.write('Analysis started '+time.asctime()+'\n')
  93         f.write('----------------------------------------\n')
  94         f.write('; Peak number  ;  1 peak Length (nm) ; 1 peak Force (pN) ;  2 peak Length (nm) ; 2 peak Force (pN) ;  3 peak Length (nm) ; 3 peak Force (pN) ;  4 peak Length (nm) ; 4 peak Force (pN) ;  5 peak Length (nm) ; 5 peak Force (pN) ;  6 peak Length (nm) ; 6 peak Force (pN) ;  7 peak Length (nm) ; 7 peak Force (pN) ;  8 peak Length (nm) ; 8 peak Force (pN)\n')
  95         f.close()
  96
  97
  98         def plot_informations(itplot,pl_value):
  99             '''
 100             OUR VARIABLES
 101             contact_point.absolute_coords               (2.4584142802103689e-007, -6.9647135616234017e-009)
 102             peak_point.absolute_coords                  (3.6047748250571423e-008, -7.7142802788854212e-009)
 103             other_fit_point.absolute_coords     (4.1666139243838867e-008, -7.3759393477579707e-009)
 104             peak_location                                                                               [510, 610, 703, 810, 915, 1103]
 105             peak_size                                                                                           [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
 106             params                                                                                                      [2.2433999931959462e-007, 3.3230248825175678e-010]
 107             fit_errors                                                                                  [6.5817195369767644e-010, 2.4415923138871498e-011]
 108             '''
 109             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
 110
 111             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
 112             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
 113             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
 114             self.basepoints=[]
 115             base_index_0=peak_location[-1]+self.fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
 116             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
 117             base_index_1=self.basepoints[0].index+self.fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
 118             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
 119             self.basecurrent=self.current.path
 120             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
 121             boundaries.sort()
 122             to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
 123             avg=np.mean(to_average)
 124             return fit_points, contact_point, pl_value, T, cindex, avg
 125
 126         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
 127             '''
 128             calculate informations for each peak and add they in
 129             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 130             '''
 131             c_leng=None
 132             p_leng=None
 133             sigma_c_leng=None
 134             sigma_p_leng=None
 135             force=None
 136             slope=None
 137
 138             delta_force=10
 139             slope_span=int(self.config['auto_slope_span'])
 140
 141             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
 142             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
 143
 144             points=[contact_point, peak_point, other_fit_point]
 145
 146             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
 147
 148             #Measure forces
 149             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
 150             y=min(delta_to_measure)
 151             #Measure slopes
 152             slope=self.linefit_between(peak-slope_span,peak)[0]
 153             #check fitted data and, if right, add peak to the measurement
 154             if len(params)==1: #if we did choose 1-value fit
 155                 p_leng=pl_value
 156                 c_leng=params[0]*(1.0e+9)
 157                 sigma_p_leng=0
 158                 sigma_c_leng=fit_errors[0]*(1.0e+9)
 159                 force = abs(y-avg)*(1.0e+12)
 160             else: #2-value fit
 161                 p_leng=params[1]*(1.0e+9)
 162                 #check if persistent length makes sense. otherwise, discard peak.
 163                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
 164                     '''
 165                     p_lengths.append(p_leng)
 166                     c_lengths.append(params[0]*(1.0e+9))
 167                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
 168                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
 169                     forces.append(abs(y-avg)*(1.0e+12))
 170                     slopes.append(slope)
 171                     '''
 172                     c_leng=params[0]*(1.0e+9)
 173                     sigma_c_leng=fit_errors[0]*(1.0e+9)
 174                     sigma_p_leng=fit_errors[1]*(1.0e+9)
 175                     force=abs(y-avg)*(1.0e+12)
 176                 else:
 177                     p_leng=None
 178                     slope=None
 179             #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 180             return  c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
 181
 182
 183         # ------ PROGRAM -------
 184         c=0
 185         for item in self.current_list:
 186             c+=1
 187             item.identify(self.drivers)
 188             itplot=item.curve.default_plots()
 189             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
 190             itplot[0]=flatten(itplot[0], item, customvalue=1)
 191             try:
 192                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
 193             except:
 194                 #We have troubles with exec_has_peaks (bad curve, whatever).
 195                 #Print info and go to next cycle.
 196                 print 'Cannot process ',item.path
 197                 continue
 198
 199             if len(peak_location)==0:
 200                 print 'No peaks!'
 201                 continue
 202
 203             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
 204
 205             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 206
 207             #initialize output data vectors
 208             c_lengths=[]
 209             p_lengths=[]
 210             sigma_c_lengths=[]
 211             sigma_p_lengths=[]
 212             forces=[]
 213             slopes=[]
 214
 215             #loop each peak of my curve
 216             for peak in peak_location:
 217                 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
 218                 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
 219                     if var is not None:
 220                         vector.append(var)
 221
 222             #FIXME: We need a dictionary here...
 223             allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
 224             for vect in allvects:
 225                 if len(vect)==0:
 226                     for i in range(len(c_lengths)):
 227                         vect.append(0)
 228
 229             print 'Measurements for all peaks detected:'
 230             print 'contour (nm)', c_lengths
 231             print 'sigma contour (nm)',sigma_c_lengths
 232             print 'p (nm)',p_lengths
 233             print 'sigma p (nm)',sigma_p_lengths
 234             print 'forces (pN)',forces
 235             print 'slopes (N/m)',slopes
 236
 237             '''
 238             write automeasure text file
 239             '''
 240             print 'Saving automatic measurement...'
 241             f=open(self.my_work_dir+pclust_filename,'a+')
 242             f.write(item.path+'\n')
 243             for i in range(len(c_lengths)):
 244                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
 245             f.close()
 246
 247             peak_number=len(c_lengths)
 248
 249             '''
 250             write peackforce text file
 251             '''
 252             print 'Saving automatic measurement...'
 253             f=open(self.my_work_dir+peackforce_filename,'a+')
 254             f.write(item.path+'\n')
 255             peackforce_info = ''
 256             for i in range(len(c_lengths)):
 257                 peackforce_info = peackforce_info + ' ; ' + str(c_lengths[i]) + ' ; ' + str(forces[i])
 258             f.write(' ; '+str(peak_number)+peackforce_info+'\n')
 259             f.close()
 260
 261             '''
 262             calculate clustering coordinates
 263             '''
 264             if peak_number > 1:
 265                 deltas=[]
 266                 for i in range(len(c_lengths)-1):
 267                     deltas.append(c_lengths[i+1]-c_lengths[i])
 268
 269                 delta_mean=np.mean(deltas)
 270                 delta_median=np.median(deltas)
 271
 272                 force_mean=np.mean(forces)
 273                 force_median=np.median(forces)
 274
 275                 first_peak_cl=c_lengths[0]
 276                 last_peak_cl=c_lengths[-1]
 277
 278                 max_force=max(forces[:-1])
 279                 min_force=min(forces)
 280
 281                 max_delta=max(deltas)
 282                 min_delta=min(deltas)
 283
 284                 delta_stdev=np.std(deltas)
 285                 forces_stdev=np.std(forces[:-1])
 286
 287                 peaks_diff=(last_peak_cl-first_peak_cl)/peak_number
 288
 289                 print 'Coordinates'
 290                 print 'Peaks',peak_number
 291                 print 'Mean delta',delta_mean
 292                 print 'Median delta',delta_median
 293                 print 'Mean force',force_mean
 294                 print 'Median force',force_median
 295                 print 'First peak',first_peak_cl
 296                 print 'Last peak',last_peak_cl
 297                 print 'Max force',max_force
 298                 print 'Min force',min_force
 299                 print 'Max delta',max_delta
 300                 print 'Min delta',min_delta
 301                 print 'Delta stdev',delta_stdev
 302                 print 'Forces stdev',forces_stdev
 303                 print 'Peaks difference',peaks_diff
 304
 305                 '''
 306                 write clustering coordinates
 307                 '''
 308                 f=open(self.my_work_dir+realclust_filename,'a+')
 309                 f.write(item.path+'\n')
 310                 f.write(' ; '+str(peak_number)+     # non considerato
 311                         ' ; '+str(delta_mean)+      # 0
 312                         ' ; '+str(delta_median)+    # 1 -
 313                         ' ; '+str(force_mean)+      # 2
 314                         ' ; '+str(force_median)+    # 3 -
 315                         ' ; '+str(first_peak_cl)+   # 4 -
 316                         ' ; '+str(last_peak_cl)+    # 5 -
 317                         ' ; '+str(max_force)+       # 6
 318                         ' ; '+str(min_force)+       # 7
 319                         ' ; '+str(max_delta)+       # 8
 320                         ' ; '+str(min_delta)+       # 9
 321                         ' ; '+str(delta_stdev)+     # 10
 322                         ' ; '+str(forces_stdev)+    # 11
 323                         ' ; '+str(peaks_diff)+      # 12
 324                         '\n')
 325                 f.close()
 326
 327         # start PCA
 328         self.do_pca(pclus_dir+"/"+realclust_filename)
 329
 330
 331     def do_pca(self,args):
 332         '''
 333         PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
 334         Automatically measures pca from coordinates filename and shows two interactives plots
 335         With the second argument (arbitrary) you can select the columns and the multiplier factor
 336         to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
 337         Without second argument it reads pca_config.txt file
 338         (c)Paolo Pancaldi, Massimo Sandal 2009
 339         '''
 340
 341         # reads the columns of pca
 342         conf=open(config_file_path("pca_config.txt"), 'r')
 343         config = conf.readlines()
 344         conf.close()
 345
 346         self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
 347         self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
 348         self.plot_pcaCoord = []         # tiene le due colonne della pca
 349         self.plot_pcaCoordTr = []       # tiene le due colonne della pca trasposta
 350         self.plot_FiltOrigCoord = []    # tiene le coordinate solo dei punti filtrati per densita
 351         self.plot_FiltPaths = []        # tiene i paths dei plot solo dei punti filtrati per densita
 352         self.plot_paths = []            # tiene i paths di tutti i plots
 353         self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
 354         self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
 355         plot_path_temp = ""
 356
 357         # prende in inpunt un arg (nome del file)
 358         # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
 359         arg = args.split(" ")
 360         if arg[0]==args:
 361             file_name=args
 362         else:
 363             file_name=arg[0]
 364             config[0] = arg[1]
 365
 366         # creo l'array "plot_myCoord" con tutte le coordinate dei plots
 367         # e l'array plot_paths con tutti i percorsi dei plots
 368         nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
 369         f=open(file_name)
 370         rows = f.readlines()
 371         for row in rows:
 372             if row[0]!=" " and row[0]!="":
 373                 nPlotTot = nPlotTot+1
 374                 plot_path_temp = row
 375             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 376                 row = row[row.index(";",2)+2:].split(" ; ")     # non considero la prima colonna col #picchi
 377                 row = [float(i) for i in row]
 378
 379                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
 380                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
 381                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
 382                     if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
 383                         #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
 384                         self.plot_myCoord.append(row)
 385                         self.plot_paths.append(plot_path_temp)
 386         f.close()
 387
 388         # creo l'array con alcune colonne e pure moltiplicate
 389         for row in self.plot_myCoord:
 390             res=[]
 391             for cols in config[0].split(","):
 392                 if cols.find("*")!=-1:
 393                     col = int(cols.split("*")[0])
 394                     molt = int(cols.split("*")[1])
 395                 elif cols.find("x")!=-1:
 396                     col = int(cols.split("x")[0])
 397                     molt = int(cols.split("x")[1])
 398                 else:
 399                     col = int(cols)
 400                     molt = 1
 401                 res.append(row[col]*molt)
 402             self.plot_origCoord.append(res)
 403
 404         # array convert, calculate PCA, transpose
 405         self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
 406         #print self.plot_origCoord.shape
 407         self.plot_pcaCoord = pca(self.plot_origCoord, output_dim=2)     #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 408         self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
 409         pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
 410         pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
 411
 412         '''
 413         # Start section of testing with good plots                                  # 4 TESTING!
 414         Xsyn_1=[]
 415         Ysyn_1=[]
 416         Xgb1_1=[]
 417         Ygb1_1=[]
 418         Xbad_1=[]
 419         Ybad_1=[]
 420         goodnamefile=open(file_name.replace("coordinate", "good"),'r')
 421         goodnames=goodnamefile.readlines()
 422         nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
 423         goodnames=[i.split()[0] for i in goodnames[1:]]
 424
 425         for index in range(len(self.plot_paths)):
 426             if self.plot_paths[index][:-1] in goodnames:
 427                 Xsyn_1.append(pca_X[index])
 428                 Ysyn_1.append(pca_Y[index])
 429             else:
 430                 Xbad_1.append(pca_X[index])
 431                 Ybad_1.append(pca_Y[index])
 432         # Stop section of testing with good plots                                   # 4 TESTING!
 433         '''
 434
 435         # print first plot
 436         clustplot1=lhc.PlotObject()
 437         clustplot1.add_set(pca_X,pca_Y)
 438         #clustplot1.add_set(Xbad_1,Ybad_1) # 4 TESTING!
 439         #clustplot1.add_set(Xsyn_1,Ysyn_1) # 4 TESTING!
 440         clustplot1.normalize_vectors()
 441         clustplot1.styles=['scatter', 'scatter','scatter']
 442         clustplot1.colors=[None,'red','green']
 443         clustplot1.destination=0
 444         self._send_plot([clustplot1])
 445         self.clustplot1=clustplot1
 446
 447         # density and filer estimation
 448         kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
 449         tallest = 0
 450         for i in range(len(pca_X)):
 451             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 452             if tallest < kern_value:
 453                     tallest = float(kern_value)
 454         if float(config[1]) == 0:
 455             my_filter = float(tallest / 3.242311147)
 456         else:
 457             my_filter = float(config[1])
 458         '''
 459         # section useful only for graphic printing
 460         xmin = pca_X.min()
 461         xmax = pca_X.max()
 462         ymin = pca_Y.min()
 463         ymax = pca_Y.max()
 464         mX, mY = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
 465         Z = sp.rot90(sp.fliplr(sp.reshape(kernel(sp.c_[mX.ravel(), mY.ravel()].T).T, mX.T.shape)))
 466         axis_X = np.linspace(xmin,xmax,num=100)
 467         axis_Y = np.linspace(ymin,ymax,num=100)
 468         '''
 469
 470         # density filtering:
 471         # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
 472         filtered_pca_X = []
 473         filtered_pca_Y = []
 474         filtered_PcaCoordTr = []
 475         filtered_PcaCoord = []
 476         for i in range(len(pca_X)):
 477             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 478             if kern_value > my_filter:
 479                 filtered_pca_X.append(pca_X[i])
 480                 filtered_pca_Y.append(pca_Y[i])
 481         filtered_PcaCoordTr.append(filtered_pca_X)
 482         filtered_PcaCoordTr.append(filtered_pca_Y)
 483         filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
 484
 485         # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
 486         for index in range(len(self.plot_pcaCoord)):
 487             if self.plot_pcaCoord[index] in filtered_PcaCoord:
 488                 self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
 489                 self.plot_FiltPaths.append(self.plot_paths[index])
 490
 491         '''
 492         # START PCA#2: USELESS!!!
 493
 494         # creo l array con alcune colonne e pure moltiplicate
 495         temp_coord = []
 496         for row in self.plot_FiltOrigCoord:
 497             res=[]
 498             for cols in config[2].split(","):
 499                 if cols.find("*")!=-1:
 500                     col = int(cols.split("*")[0])
 501                     molt = int(cols.split("*")[1])
 502                 elif cols.find("x")!=-1:
 503                     col = int(cols.split("x")[0])
 504                     molt = int(cols.split("x")[1])
 505                 else:
 506                     col = int(cols)
 507                     molt = 1
 508                 res.append(row[col]*molt)
 509             temp_coord.append(res)
 510         self.plot_FiltOrigCoord = temp_coord
 511
 512         # ricalcolo la PCA: array convert, calculate PCA, transpose
 513         self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
 514         #print self.plot_FiltOrigCoord.shape
 515         self.plot_NewPcaCoord = pca(self.plot_FiltOrigCoord, output_dim=2)      #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 516         self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
 517         pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
 518         pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
 519
 520         # Start section of testing with good plots                              # 4 TESTING!
 521         Xsyn_2=[]
 522         Ysyn_2=[]
 523         Xbad_2=[]
 524         Ybad_2=[]
 525         for index in range(len(self.plot_FiltPaths)):
 526             if self.plot_FiltPaths[index][:-1] in goodnames:
 527                 Xsyn_2.append(pca_X2[index])
 528                 Ysyn_2.append(pca_Y2[index])
 529             else:
 530                 Xbad_2.append(pca_X2[index])
 531                 Ybad_2.append(pca_Y2[index])
 532
 533         # print second plot
 534         clustplot2=lhc.PlotObject()
 535         #clustplot2.add_set(pca_X2,pca_Y2)
 536         clustplot2.add_set(Xbad_2,Ybad_2)                                       # 4 TESTING!
 537         clustplot2.add_set(Xsyn_2,Ysyn_2)                                       # 4 TESTING!
 538         clustplot2.normalize_vectors()
 539         clustplot2.styles=['scatter', 'scatter','scatter']
 540         clustplot2.colors=[None,'red','green']
 541         clustplot2.destination=1
 542         self._send_plot([clustplot2])
 543         self.clustplot2=clustplot2
 544         '''
 545
 546         # PRINT density plot
 547         clustplot2=lhc.PlotObject()
 548         clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
 549         clustplot2.normalize_vectors()
 550         clustplot2.styles=['scatter', 'scatter','scatter']
 551         clustplot2.colors=[None,'red','green']
 552         clustplot2.destination=1
 553         self._send_plot([clustplot2])
 554         self.clustplot2=clustplot2
 555
 556         # printing results
 557         config_pca1 = config[0].replace("*", "x").rstrip("\n")
 558         config_pca2 = config[2].replace("*", "x").rstrip("\n")
 559         print ""
 560         print "- START: "+file_name
 561         print "Curve totali: ", nPlotTot
 562         #print "Curve totali good: ", nPlotGood                                  # 4 TESTING!
 563         print "- FILTRO 1: 0-500 e NaN"
 564         print "Curve totali rimaste: ", len(self.plot_origCoord)
 565         #print 'Curve good rimaste: ', len(Xsyn_1)                               # 4 TESTING!
 566         print "- FILTRO 2: PCA:"+config_pca1+" e DENSITA:"+str(my_filter)
 567         print "Curve totali rimaste: ", len(self.plot_FiltOrigCoord)
 568         #print 'Curve good rimaste: ', len(Xsyn_2)                               # 4 TESTING!
 569         print "Piu alta: ", tallest
 570         #print "- FILTRO 3: 2'PCA:"+config_pca2
 571         print ""
 572
 573         # -- exporting coordinates and plot of PCA in debug mode! --
 574         if config[3].find("true")!=-1:
 575             #1' PCA: save plot and build coordinate s file
 576             self.do_export(file_name.replace("coordinate_", "debug_pca1graph_").replace('.txt','_'+config_pca1) + " 0")
 577             f = open(file_name.replace("coordinate_", "debug_pca1coor_").replace('.txt','_'+config_pca1+'.txt'),'w')
 578             for i in range(len(pca_X)):
 579                 f.write (str(i) + "\t" + str(pca_X[i]) + "\t" + str(pca_Y[i]) + "\n")
 580             f.close()
 581             #2' PCA: save plot and build coordinate s file
 582             #self.do_export(file_name.replace("coordinate_", "debug_pca2graph_").replace('.txt','_'+config_pca2) + " 1")
 583             #f = open(file_name.replace("coordinate_", "debug_pca2coor_").replace('.txt','_'+config_pca2+'.txt'),'w')
 584             #for i in range(len(pca_X2)):
 585             #    f.write (str(i) + "\t" + str(pca_X2[i]) + "\t" + str(pca_Y2[i]) + "\n")
 586             #f.close()
 587             #DENSITY: save plot
 588             self.do_export(file_name.replace("coordinate_", "debug_densitygraph_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")) + " 1")
 589             f = open(file_name.replace("coordinate_", "debug_densitycoor_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")+'.txt'),'w')
 590             for i in range(len(filtered_pca_X)):
 591                 f.write (str(i) + "\t" + str(filtered_pca_X[i]) + "\t" + str(filtered_pca_Y[i]) + "\n")
 592             f.close()
 593             #ALL GOOD COORDINATES (without NaN and 0<x<500)
 594             f = open(file_name.replace("coordinate_", "debug_allgoodcoor_"),'w')
 595             for i in range(len(self.plot_myCoord)):
 596                 for cel in self.plot_myCoord[i]:
 597                     f.write (" ; " + str(cel))
 598                 f.write ("\n")
 599             f.close()
 600
 601         # pCLUSTER SAVING!!!
 602         import shutil
 603         pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
 604         if os.path.exists(pcl_name+slash): shutil.rmtree(pcl_name)
 605         os.mkdir(pcl_name+slash)
 606         f = open(pcl_name+'.txt','w')
 607         for i in range(len(self.plot_FiltPaths)):
 608             myfile = str(self.plot_FiltPaths[i]).rstrip("\n")
 609             f.write (myfile+"\n")
 610             shutil.copy2(myfile, pcl_name)
 611         f.close()
 612
 613
 614     def do_multipca(self,args):
 615         '''
 616         MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
 617         Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
 618         measures pca from coordinates filename and save the png plots.
 619         (c)Paolo Pancaldi, Massimo Sandal 2009
 620         '''
 621         # reads the columns of pca
 622         conf=open("pca_config.txt")
 623         config = conf.readlines() # config[0] = "1,2,3"
 624         conf.close()
 625         # cycling pca
 626         arg = args.split(" ")
 627         file_name=arg[0]
 628         column=str(arg[1])
 629         for i in range(1, 51, 1):
 630             self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
 631
 632     def do_doublepca(self,args):
 633         '''
 634         DOUBLEPCA -> "doublepca gaeta_coor_blind50.txt"
 635         Automatically it launches the pca command for all combinations with two column
 636         (c)Paolo Pancaldi, Massimo Sandal 2009
 637         '''
 638         # cycling pca
 639         arg = args.split(" ")
 640         file_name=arg[0]
 641         for i in range(1, 13):
 642             for j in range(1, 13):
 643                 if i!=j:
 644                     self.do_pca(file_name + " " + str(i) + "," + str(j))
 645
 646     def do_triplepca(self,args):
 647         '''
 648         TRIPLEPCA -> "triplepca gaeta_coor_blind50.txt"
 649         Automatically it launches the pca command for all combinations with three column
 650         (c)Paolo Pancaldi, Massimo Sandal 2009
 651         '''
 652         # cycling pca
 653         arg = args.split(" ")
 654         file_name=arg[0]
 655         for i in range(1, 13):
 656             for j in range(1, 13):
 657                 for k in range(1, 13):
 658                     if i!=j and i!=k and j!=k:
 659                         self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
 660
 661     def do_pclick(self,args):
 662         '''
 663         It returns id, coordinates and file name of a clicked dot on a PCA graphic
 664         '''
 665
 666         self._send_plot([self.clustplot1]) #quick workaround for BAD problems in the GUI
 667         print 'Click point'
 668         point = self._measure_N_points(N=1, whatset=0)
 669         indice = point[0].index
 670         plot_file = self.plot_paths[indice]
 671         dot_coord = self.plot_pcaCoord[indice]
 672         print "file: " + str(plot_file).rstrip()
 673         print "id: " + str(indice)
 674         print "coord: " + str(dot_coord)
 675         self.do_genlist(str(plot_file))
 676         #self.do_jump(str(plot_file))
 677
 678     # indea iniziata e messa da parte...
 679     def do_peakforce(self, args):
 680         '''
 681         peackforce -> "peackforce peackforce_file.txt"
 682         Automatically measures peack and force plots
 683         (c)Paolo Pancaldi, Massimo Sandal 2009
 684         '''
 685
 686         # prende in inpunt un arg (nome del file)
 687         file_name=args
 688         f=open(file_name)
 689
 690         # scrivo un file temp
 691         g = open('_prove.txt','w')
 692
 693         plot_file = '';
 694         rows = f.readlines()
 695         for row in rows:
 696             if row[0]=="/":
 697                 plot_file = row
 698             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 699                 # FILTRO SUI 7 PICCHI
 700                 num_pic = int(row.split(" ; ")[1])
 701                 if num_pic==7:
 702                     width_force = row.split(" ; ")
 703                     w1 = float(width_force[2]); f1 = float(width_force[3]);
 704                     w2 = float(width_force[4]); f2 = float(width_force[5]);
 705                     w3 = float(width_force[6]); f3 = float(width_force[7]);
 706                     w4 = float(width_force[8]); f4 = float(width_force[9]);
 707                     w5 = float(width_force[10]); f5 = float(width_force[11]);
 708                     w6 = float(width_force[12]); f6 = float(width_force[13]);
 709                     w7 = float(width_force[14]); f7 = float(width_force[15]);
 710                     if w1>0 and w1<1000 and w2>0 and w2<1000 and w3>0 and w3<1000 and w4>0 and w4<1000 and w5>0 and w5<1000 and w6>0 and w6<1000 and w7>0 and w7<1000:
 711                         score_76 = abs(32 - (w7 - w6))
 712                         score_65 = abs(32 - (w6 - w5))
 713                         score_54 = abs(32 - (w5 - w4))
 714                         score_43 = abs(32 - (w4 - w3))
 715                         score_32 = abs(32 - (w3 - w2))
 716                         score_21 = abs(32 - (w2 - w1))
 717                         writeme = str(score_76) + " --- " + str(row)
 718                         g.write(writeme)
 719         g.close()
 720         f.close()