hooke/plugin/pcluster.py

   1 # Copyright (C) 2009-2012 Massimo Sandal <devicerandom@gmail.com>
   2 #                         Pancaldi Paolo <pancaldi.paolo@gmail.com>
   3 #                         W. Trevor King <wking@tremily.us>
   4 #
   5 # This file is part of Hooke.
   6 #
   7 # Hooke is free software: you can redistribute it and/or modify it under the
   8 # terms of the GNU Lesser General Public License as published by the Free
   9 # Software Foundation, either version 3 of the License, or (at your option) any
  10 # later version.
  11 #
  12 # Hooke is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  14 # A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
  15 # details.
  16 #
  17 # You should have received a copy of the GNU Lesser General Public License
  18 # along with Hooke.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 from ..libhooke import WX_GOOD, ClickedPoint, config_file_path
  21
  22 from mdp import pca
  23 import wxversion
  24 wxversion.select(WX_GOOD)
  25 from wx import PostEvent
  26 import numpy as np
  27 import scipy as sp
  28 import copy
  29 import os.path
  30 import time
  31 import pylab as pyl
  32
  33 from .. import curve as lhc
  34
  35
  36 import warnings
  37 warnings.simplefilter('ignore',np.RankWarning)
  38
  39
  40 class pclusterCommands(object):
  41
  42     def _plug_init(self):
  43         self.clustplot1=None
  44         self.clustplot2=None
  45
  46     def do_pcluster(self,args):
  47         '''
  48         pCLUSTER
  49         (pcluster.py)
  50         Automatically measures peaks and extracts informations for further clustering
  51         (c)Paolo Pancaldi, Massimo Sandal 2009
  52         '''
  53         blindw = str(self.convfilt_config['blindwindow'])
  54         pclus_dir = "pCluster_blind"+blindw+"_"+time.strftime("%Y%m%d_%H%M")
  55         self.my_work_dir = os.path.join(os.getcwd(), pclus_dir)
  56         self.my_curr_dir = os.path.basename(os.getcwd())
  57         os.mkdir(self.my_work_dir)
  58
  59         #--Custom persistent length
  60         pl_value=None
  61         for arg in args.split():
  62             #look for a persistent length argument.
  63             if 'pl=' in arg:
  64                 pl_expression=arg.split('=')
  65                 pl_value=float(pl_expression[1]) #actual value
  66             else:
  67                 pl_value=None
  68
  69         #configuration variables
  70         min_npks = self.convfilt_config['minpeaks']
  71         min_deviation = self.convfilt_config['mindeviation']
  72
  73         pclust_filename = "automeasure_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Automeasure filename? ')
  74         realclust_filename = "coordinate_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Coordinates filename? ')
  75         peackforce_filename = "peakforce_"+self.my_curr_dir+"_blind"+blindw+".txt"  #raw_input('Peacks and Forces filename? ')
  76
  77         f=open(self.my_work_dir+pclust_filename,'w+')
  78         f.write('Analysis started '+time.asctime()+'\n')
  79         f.write('----------------------------------------\n')
  80         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
  81         f.close()
  82
  83         f=open(self.my_work_dir+realclust_filename,'w+')
  84         f.write('Analysis started '+time.asctime()+'\n')
  85         f.write('----------------------------------------\n')
  86         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm) ; Peaks Diff\n')
  87         f.close()
  88
  89         f=open(self.my_work_dir+peackforce_filename,'w+')
  90         f.write('Analysis started '+time.asctime()+'\n')
  91         f.write('----------------------------------------\n')
  92         f.write('; Peak number  ;  1 peak Length (nm) ; 1 peak Force (pN) ;  2 peak Length (nm) ; 2 peak Force (pN) ;  3 peak Length (nm) ; 3 peak Force (pN) ;  4 peak Length (nm) ; 4 peak Force (pN) ;  5 peak Length (nm) ; 5 peak Force (pN) ;  6 peak Length (nm) ; 6 peak Force (pN) ;  7 peak Length (nm) ; 7 peak Force (pN) ;  8 peak Length (nm) ; 8 peak Force (pN)\n')
  93         f.close()
  94
  95
  96         def plot_informations(itplot,pl_value):
  97             '''
  98             OUR VARIABLES
  99             contact_point.absolute_coords               (2.4584142802103689e-007, -6.9647135616234017e-009)
 100             peak_point.absolute_coords                  (3.6047748250571423e-008, -7.7142802788854212e-009)
 101             other_fit_point.absolute_coords     (4.1666139243838867e-008, -7.3759393477579707e-009)
 102             peak_location                                                                               [510, 610, 703, 810, 915, 1103]
 103             peak_size                                                                                           [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
 104             params                                                                                                      [2.2433999931959462e-007, 3.3230248825175678e-010]
 105             fit_errors                                                                                  [6.5817195369767644e-010, 2.4415923138871498e-011]
 106             '''
 107             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
 108
 109             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
 110             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
 111             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
 112             self.basepoints=[]
 113             base_index_0=peak_location[-1]+self.fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
 114             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
 115             base_index_1=self.basepoints[0].index+self.fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
 116             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
 117             self.basecurrent=self.current.path
 118             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
 119             boundaries.sort()
 120             to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
 121             avg=np.mean(to_average)
 122             return fit_points, contact_point, pl_value, T, cindex, avg
 123
 124         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
 125             '''
 126             calculate informations for each peak and add they in
 127             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 128             '''
 129             c_leng=None
 130             p_leng=None
 131             sigma_c_leng=None
 132             sigma_p_leng=None
 133             force=None
 134             slope=None
 135
 136             delta_force=10
 137             slope_span=int(self.config['auto_slope_span'])
 138
 139             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
 140             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
 141
 142             points=[contact_point, peak_point, other_fit_point]
 143
 144             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
 145
 146             #Measure forces
 147             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
 148             y=min(delta_to_measure)
 149             #Measure slopes
 150             slope=self.linefit_between(peak-slope_span,peak)[0]
 151             #check fitted data and, if right, add peak to the measurement
 152             if len(params)==1: #if we did choose 1-value fit
 153                 p_leng=pl_value
 154                 c_leng=params[0]*(1.0e+9)
 155                 sigma_p_leng=0
 156                 sigma_c_leng=fit_errors[0]*(1.0e+9)
 157                 force = abs(y-avg)*(1.0e+12)
 158             else: #2-value fit
 159                 p_leng=params[1]*(1.0e+9)
 160                 #check if persistent length makes sense. otherwise, discard peak.
 161                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
 162                     '''
 163                     p_lengths.append(p_leng)
 164                     c_lengths.append(params[0]*(1.0e+9))
 165                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
 166                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
 167                     forces.append(abs(y-avg)*(1.0e+12))
 168                     slopes.append(slope)
 169                     '''
 170                     c_leng=params[0]*(1.0e+9)
 171                     sigma_c_leng=fit_errors[0]*(1.0e+9)
 172                     sigma_p_leng=fit_errors[1]*(1.0e+9)
 173                     force=abs(y-avg)*(1.0e+12)
 174                 else:
 175                     p_leng=None
 176                     slope=None
 177             #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 178             return  c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
 179
 180
 181         # ------ PROGRAM -------
 182         c=0
 183         for item in self.current_list:
 184             c+=1
 185             item.identify(self.drivers)
 186             itplot=item.curve.default_plots()
 187             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
 188             itplot[0]=flatten(itplot[0], item, customvalue=1)
 189             try:
 190                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
 191             except:
 192                 #We have troubles with exec_has_peaks (bad curve, whatever).
 193                 #Print info and go to next cycle.
 194                 print 'Cannot process ',item.path
 195                 continue
 196
 197             if len(peak_location)==0:
 198                 print 'No peaks!'
 199                 continue
 200
 201             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
 202
 203             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 204
 205             #initialize output data vectors
 206             c_lengths=[]
 207             p_lengths=[]
 208             sigma_c_lengths=[]
 209             sigma_p_lengths=[]
 210             forces=[]
 211             slopes=[]
 212
 213             #loop each peak of my curve
 214             for peak in peak_location:
 215                 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
 216                 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
 217                     if var is not None:
 218                         vector.append(var)
 219
 220             #FIXME: We need a dictionary here...
 221             allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
 222             for vect in allvects:
 223                 if len(vect)==0:
 224                     for i in range(len(c_lengths)):
 225                         vect.append(0)
 226
 227             print 'Measurements for all peaks detected:'
 228             print 'contour (nm)', c_lengths
 229             print 'sigma contour (nm)',sigma_c_lengths
 230             print 'p (nm)',p_lengths
 231             print 'sigma p (nm)',sigma_p_lengths
 232             print 'forces (pN)',forces
 233             print 'slopes (N/m)',slopes
 234
 235             '''
 236             write automeasure text file
 237             '''
 238             print 'Saving automatic measurement...'
 239             f=open(self.my_work_dir+pclust_filename,'a+')
 240             f.write(item.path+'\n')
 241             for i in range(len(c_lengths)):
 242                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
 243             f.close()
 244
 245             peak_number=len(c_lengths)
 246
 247             '''
 248             write peackforce text file
 249             '''
 250             print 'Saving automatic measurement...'
 251             f=open(self.my_work_dir+peackforce_filename,'a+')
 252             f.write(item.path+'\n')
 253             peackforce_info = ''
 254             for i in range(len(c_lengths)):
 255                 peackforce_info = peackforce_info + ' ; ' + str(c_lengths[i]) + ' ; ' + str(forces[i])
 256             f.write(' ; '+str(peak_number)+peackforce_info+'\n')
 257             f.close()
 258
 259             '''
 260             calculate clustering coordinates
 261             '''
 262             if peak_number > 1:
 263                 deltas=[]
 264                 for i in range(len(c_lengths)-1):
 265                     deltas.append(c_lengths[i+1]-c_lengths[i])
 266
 267                 delta_mean=np.mean(deltas)
 268                 delta_median=np.median(deltas)
 269
 270                 force_mean=np.mean(forces)
 271                 force_median=np.median(forces)
 272
 273                 first_peak_cl=c_lengths[0]
 274                 last_peak_cl=c_lengths[-1]
 275
 276                 max_force=max(forces[:-1])
 277                 min_force=min(forces)
 278
 279                 max_delta=max(deltas)
 280                 min_delta=min(deltas)
 281
 282                 delta_stdev=np.std(deltas)
 283                 forces_stdev=np.std(forces[:-1])
 284
 285                 peaks_diff=(last_peak_cl-first_peak_cl)/peak_number
 286
 287                 print 'Coordinates'
 288                 print 'Peaks',peak_number
 289                 print 'Mean delta',delta_mean
 290                 print 'Median delta',delta_median
 291                 print 'Mean force',force_mean
 292                 print 'Median force',force_median
 293                 print 'First peak',first_peak_cl
 294                 print 'Last peak',last_peak_cl
 295                 print 'Max force',max_force
 296                 print 'Min force',min_force
 297                 print 'Max delta',max_delta
 298                 print 'Min delta',min_delta
 299                 print 'Delta stdev',delta_stdev
 300                 print 'Forces stdev',forces_stdev
 301                 print 'Peaks difference',peaks_diff
 302
 303                 '''
 304                 write clustering coordinates
 305                 '''
 306                 f=open(self.my_work_dir+realclust_filename,'a+')
 307                 f.write(item.path+'\n')
 308                 f.write(' ; '+str(peak_number)+     # non considerato
 309                         ' ; '+str(delta_mean)+      # 0
 310                         ' ; '+str(delta_median)+    # 1 -
 311                         ' ; '+str(force_mean)+      # 2
 312                         ' ; '+str(force_median)+    # 3 -
 313                         ' ; '+str(first_peak_cl)+   # 4 -
 314                         ' ; '+str(last_peak_cl)+    # 5 -
 315                         ' ; '+str(max_force)+       # 6
 316                         ' ; '+str(min_force)+       # 7
 317                         ' ; '+str(max_delta)+       # 8
 318                         ' ; '+str(min_delta)+       # 9
 319                         ' ; '+str(delta_stdev)+     # 10
 320                         ' ; '+str(forces_stdev)+    # 11
 321                         ' ; '+str(peaks_diff)+      # 12
 322                         '\n')
 323                 f.close()
 324
 325         # start PCA
 326         self.do_pca(pclus_dir+"/"+realclust_filename)
 327
 328
 329     def do_pca(self,args):
 330         '''
 331         PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
 332         Automatically measures pca from coordinates filename and shows two interactives plots
 333         With the second argument (arbitrary) you can select the columns and the multiplier factor
 334         to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
 335         Without second argument it reads pca_config.txt file
 336         (c)Paolo Pancaldi, Massimo Sandal 2009
 337         '''
 338
 339         # reads the columns of pca
 340         conf=open(config_file_path("pca_config.txt"), 'r')
 341         config = conf.readlines()
 342         conf.close()
 343
 344         self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
 345         self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
 346         self.plot_pcaCoord = []         # tiene le due colonne della pca
 347         self.plot_pcaCoordTr = []       # tiene le due colonne della pca trasposta
 348         self.plot_FiltOrigCoord = []    # tiene le coordinate solo dei punti filtrati per densita
 349         self.plot_FiltPaths = []        # tiene i paths dei plot solo dei punti filtrati per densita
 350         self.plot_paths = []            # tiene i paths di tutti i plots
 351         self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
 352         self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
 353         plot_path_temp = ""
 354
 355         # prende in inpunt un arg (nome del file)
 356         # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
 357         arg = args.split(" ")
 358         if arg[0]==args:
 359             file_name=args
 360         else:
 361             file_name=arg[0]
 362             config[0] = arg[1]
 363
 364         # creo l'array "plot_myCoord" con tutte le coordinate dei plots
 365         # e l'array plot_paths con tutti i percorsi dei plots
 366         nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
 367         f=open(file_name)
 368         rows = f.readlines()
 369         for row in rows:
 370             if row[0]!=" " and row[0]!="":
 371                 nPlotTot = nPlotTot+1
 372                 plot_path_temp = row
 373             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 374                 row = row[row.index(";",2)+2:].split(" ; ")     # non considero la prima colonna col #picchi
 375                 row = [float(i) for i in row]
 376
 377                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
 378                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
 379                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
 380                     if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
 381                         #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
 382                         self.plot_myCoord.append(row)
 383                         self.plot_paths.append(plot_path_temp)
 384         f.close()
 385
 386         # creo l'array con alcune colonne e pure moltiplicate
 387         for row in self.plot_myCoord:
 388             res=[]
 389             for cols in config[0].split(","):
 390                 if cols.find("*")!=-1:
 391                     col = int(cols.split("*")[0])
 392                     molt = int(cols.split("*")[1])
 393                 elif cols.find("x")!=-1:
 394                     col = int(cols.split("x")[0])
 395                     molt = int(cols.split("x")[1])
 396                 else:
 397                     col = int(cols)
 398                     molt = 1
 399                 res.append(row[col]*molt)
 400             self.plot_origCoord.append(res)
 401
 402         # array convert, calculate PCA, transpose
 403         self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
 404         #print self.plot_origCoord.shape
 405         self.plot_pcaCoord = pca(self.plot_origCoord, output_dim=2)     #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 406         self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
 407         pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
 408         pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
 409
 410         '''
 411         # Start section of testing with good plots                                  # 4 TESTING!
 412         Xsyn_1=[]
 413         Ysyn_1=[]
 414         Xgb1_1=[]
 415         Ygb1_1=[]
 416         Xbad_1=[]
 417         Ybad_1=[]
 418         goodnamefile=open(file_name.replace("coordinate", "good"),'r')
 419         goodnames=goodnamefile.readlines()
 420         nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
 421         goodnames=[i.split()[0] for i in goodnames[1:]]
 422
 423         for index in range(len(self.plot_paths)):
 424             if self.plot_paths[index][:-1] in goodnames:
 425                 Xsyn_1.append(pca_X[index])
 426                 Ysyn_1.append(pca_Y[index])
 427             else:
 428                 Xbad_1.append(pca_X[index])
 429                 Ybad_1.append(pca_Y[index])
 430         # Stop section of testing with good plots                                   # 4 TESTING!
 431         '''
 432
 433         # print first plot
 434         clustplot1=lhc.PlotObject()
 435         clustplot1.add_set(pca_X,pca_Y)
 436         #clustplot1.add_set(Xbad_1,Ybad_1) # 4 TESTING!
 437         #clustplot1.add_set(Xsyn_1,Ysyn_1) # 4 TESTING!
 438         clustplot1.normalize_vectors()
 439         clustplot1.styles=['scatter', 'scatter','scatter']
 440         clustplot1.colors=[None,'red','green']
 441         clustplot1.destination=0
 442         self._send_plot([clustplot1])
 443         self.clustplot1=clustplot1
 444
 445         # density and filer estimation
 446         kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
 447         tallest = 0
 448         for i in range(len(pca_X)):
 449             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 450             if tallest < kern_value:
 451                     tallest = float(kern_value)
 452         if float(config[1]) == 0:
 453             my_filter = float(tallest / 3.242311147)
 454         else:
 455             my_filter = float(config[1])
 456         '''
 457         # section useful only for graphic printing
 458         xmin = pca_X.min()
 459         xmax = pca_X.max()
 460         ymin = pca_Y.min()
 461         ymax = pca_Y.max()
 462         mX, mY = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
 463         Z = sp.rot90(sp.fliplr(sp.reshape(kernel(sp.c_[mX.ravel(), mY.ravel()].T).T, mX.T.shape)))
 464         axis_X = np.linspace(xmin,xmax,num=100)
 465         axis_Y = np.linspace(ymin,ymax,num=100)
 466         '''
 467
 468         # density filtering:
 469         # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
 470         filtered_pca_X = []
 471         filtered_pca_Y = []
 472         filtered_PcaCoordTr = []
 473         filtered_PcaCoord = []
 474         for i in range(len(pca_X)):
 475             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 476             if kern_value > my_filter:
 477                 filtered_pca_X.append(pca_X[i])
 478                 filtered_pca_Y.append(pca_Y[i])
 479         filtered_PcaCoordTr.append(filtered_pca_X)
 480         filtered_PcaCoordTr.append(filtered_pca_Y)
 481         filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
 482
 483         # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
 484         for index in range(len(self.plot_pcaCoord)):
 485             if self.plot_pcaCoord[index] in filtered_PcaCoord:
 486                 self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
 487                 self.plot_FiltPaths.append(self.plot_paths[index])
 488
 489         '''
 490         # START PCA#2: USELESS!!!
 491
 492         # creo l array con alcune colonne e pure moltiplicate
 493         temp_coord = []
 494         for row in self.plot_FiltOrigCoord:
 495             res=[]
 496             for cols in config[2].split(","):
 497                 if cols.find("*")!=-1:
 498                     col = int(cols.split("*")[0])
 499                     molt = int(cols.split("*")[1])
 500                 elif cols.find("x")!=-1:
 501                     col = int(cols.split("x")[0])
 502                     molt = int(cols.split("x")[1])
 503                 else:
 504                     col = int(cols)
 505                     molt = 1
 506                 res.append(row[col]*molt)
 507             temp_coord.append(res)
 508         self.plot_FiltOrigCoord = temp_coord
 509
 510         # ricalcolo la PCA: array convert, calculate PCA, transpose
 511         self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
 512         #print self.plot_FiltOrigCoord.shape
 513         self.plot_NewPcaCoord = pca(self.plot_FiltOrigCoord, output_dim=2)      #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 514         self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
 515         pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
 516         pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
 517
 518         # Start section of testing with good plots                              # 4 TESTING!
 519         Xsyn_2=[]
 520         Ysyn_2=[]
 521         Xbad_2=[]
 522         Ybad_2=[]
 523         for index in range(len(self.plot_FiltPaths)):
 524             if self.plot_FiltPaths[index][:-1] in goodnames:
 525                 Xsyn_2.append(pca_X2[index])
 526                 Ysyn_2.append(pca_Y2[index])
 527             else:
 528                 Xbad_2.append(pca_X2[index])
 529                 Ybad_2.append(pca_Y2[index])
 530
 531         # print second plot
 532         clustplot2=lhc.PlotObject()
 533         #clustplot2.add_set(pca_X2,pca_Y2)
 534         clustplot2.add_set(Xbad_2,Ybad_2)                                       # 4 TESTING!
 535         clustplot2.add_set(Xsyn_2,Ysyn_2)                                       # 4 TESTING!
 536         clustplot2.normalize_vectors()
 537         clustplot2.styles=['scatter', 'scatter','scatter']
 538         clustplot2.colors=[None,'red','green']
 539         clustplot2.destination=1
 540         self._send_plot([clustplot2])
 541         self.clustplot2=clustplot2
 542         '''
 543
 544         # PRINT density plot
 545         clustplot2=lhc.PlotObject()
 546         clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
 547         clustplot2.normalize_vectors()
 548         clustplot2.styles=['scatter', 'scatter','scatter']
 549         clustplot2.colors=[None,'red','green']
 550         clustplot2.destination=1
 551         self._send_plot([clustplot2])
 552         self.clustplot2=clustplot2
 553
 554         # printing results
 555         config_pca1 = config[0].replace("*", "x").rstrip("\n")
 556         config_pca2 = config[2].replace("*", "x").rstrip("\n")
 557         print ""
 558         print "- START: "+file_name
 559         print "Curve totali: ", nPlotTot
 560         #print "Curve totali good: ", nPlotGood                                  # 4 TESTING!
 561         print "- FILTRO 1: 0-500 e NaN"
 562         print "Curve totali rimaste: ", len(self.plot_origCoord)
 563         #print 'Curve good rimaste: ', len(Xsyn_1)                               # 4 TESTING!
 564         print "- FILTRO 2: PCA:"+config_pca1+" e DENSITA:"+str(my_filter)
 565         print "Curve totali rimaste: ", len(self.plot_FiltOrigCoord)
 566         #print 'Curve good rimaste: ', len(Xsyn_2)                               # 4 TESTING!
 567         print "Piu alta: ", tallest
 568         #print "- FILTRO 3: 2'PCA:"+config_pca2
 569         print ""
 570
 571         # -- exporting coordinates and plot of PCA in debug mode! --
 572         if config[3].find("true")!=-1:
 573             #1' PCA: save plot and build coordinate s file
 574             self.do_export(file_name.replace("coordinate_", "debug_pca1graph_").replace('.txt','_'+config_pca1) + " 0")
 575             f = open(file_name.replace("coordinate_", "debug_pca1coor_").replace('.txt','_'+config_pca1+'.txt'),'w')
 576             for i in range(len(pca_X)):
 577                 f.write (str(i) + "\t" + str(pca_X[i]) + "\t" + str(pca_Y[i]) + "\n")
 578             f.close()
 579             #2' PCA: save plot and build coordinate s file
 580             #self.do_export(file_name.replace("coordinate_", "debug_pca2graph_").replace('.txt','_'+config_pca2) + " 1")
 581             #f = open(file_name.replace("coordinate_", "debug_pca2coor_").replace('.txt','_'+config_pca2+'.txt'),'w')
 582             #for i in range(len(pca_X2)):
 583             #    f.write (str(i) + "\t" + str(pca_X2[i]) + "\t" + str(pca_Y2[i]) + "\n")
 584             #f.close()
 585             #DENSITY: save plot
 586             self.do_export(file_name.replace("coordinate_", "debug_densitygraph_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")) + " 1")
 587             f = open(file_name.replace("coordinate_", "debug_densitycoor_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")+'.txt'),'w')
 588             for i in range(len(filtered_pca_X)):
 589                 f.write (str(i) + "\t" + str(filtered_pca_X[i]) + "\t" + str(filtered_pca_Y[i]) + "\n")
 590             f.close()
 591             #ALL GOOD COORDINATES (without NaN and 0<x<500)
 592             f = open(file_name.replace("coordinate_", "debug_allgoodcoor_"),'w')
 593             for i in range(len(self.plot_myCoord)):
 594                 for cel in self.plot_myCoord[i]:
 595                     f.write (" ; " + str(cel))
 596                 f.write ("\n")
 597             f.close()
 598
 599         # pCLUSTER SAVING!!!
 600         import shutil
 601         pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
 602         if os.path.exists(pcl_name+slash): shutil.rmtree(pcl_name)
 603         os.mkdir(pcl_name+slash)
 604         f = open(pcl_name+'.txt','w')
 605         for i in range(len(self.plot_FiltPaths)):
 606             myfile = str(self.plot_FiltPaths[i]).rstrip("\n")
 607             f.write (myfile+"\n")
 608             shutil.copy2(myfile, pcl_name)
 609         f.close()
 610
 611
 612     def do_multipca(self,args):
 613         '''
 614         MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
 615         Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
 616         measures pca from coordinates filename and save the png plots.
 617         (c)Paolo Pancaldi, Massimo Sandal 2009
 618         '''
 619         # reads the columns of pca
 620         conf=open("pca_config.txt")
 621         config = conf.readlines() # config[0] = "1,2,3"
 622         conf.close()
 623         # cycling pca
 624         arg = args.split(" ")
 625         file_name=arg[0]
 626         column=str(arg[1])
 627         for i in range(1, 51, 1):
 628             self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
 629
 630     def do_doublepca(self,args):
 631         '''
 632         DOUBLEPCA -> "doublepca gaeta_coor_blind50.txt"
 633         Automatically it launches the pca command for all combinations with two column
 634         (c)Paolo Pancaldi, Massimo Sandal 2009
 635         '''
 636         # cycling pca
 637         arg = args.split(" ")
 638         file_name=arg[0]
 639         for i in range(1, 13):
 640             for j in range(1, 13):
 641                 if i!=j:
 642                     self.do_pca(file_name + " " + str(i) + "," + str(j))
 643
 644     def do_triplepca(self,args):
 645         '''
 646         TRIPLEPCA -> "triplepca gaeta_coor_blind50.txt"
 647         Automatically it launches the pca command for all combinations with three column
 648         (c)Paolo Pancaldi, Massimo Sandal 2009
 649         '''
 650         # cycling pca
 651         arg = args.split(" ")
 652         file_name=arg[0]
 653         for i in range(1, 13):
 654             for j in range(1, 13):
 655                 for k in range(1, 13):
 656                     if i!=j and i!=k and j!=k:
 657                         self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
 658
 659     def do_pclick(self,args):
 660         '''
 661         It returns id, coordinates and file name of a clicked dot on a PCA graphic
 662         '''
 663
 664         self._send_plot([self.clustplot1]) #quick workaround for BAD problems in the GUI
 665         print 'Click point'
 666         point = self._measure_N_points(N=1, whatset=0)
 667         indice = point[0].index
 668         plot_file = self.plot_paths[indice]
 669         dot_coord = self.plot_pcaCoord[indice]
 670         print "file: " + str(plot_file).rstrip()
 671         print "id: " + str(indice)
 672         print "coord: " + str(dot_coord)
 673         self.do_genlist(str(plot_file))
 674         #self.do_jump(str(plot_file))
 675
 676     # indea iniziata e messa da parte...
 677     def do_peakforce(self, args):
 678         '''
 679         peackforce -> "peackforce peackforce_file.txt"
 680         Automatically measures peack and force plots
 681         (c)Paolo Pancaldi, Massimo Sandal 2009
 682         '''
 683
 684         # prende in inpunt un arg (nome del file)
 685         file_name=args
 686         f=open(file_name)
 687
 688         # scrivo un file temp
 689         g = open('_prove.txt','w')
 690
 691         plot_file = '';
 692         rows = f.readlines()
 693         for row in rows:
 694             if row[0]=="/":
 695                 plot_file = row
 696             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 697                 # FILTRO SUI 7 PICCHI
 698                 num_pic = int(row.split(" ; ")[1])
 699                 if num_pic==7:
 700                     width_force = row.split(" ; ")
 701                     w1 = float(width_force[2]); f1 = float(width_force[3]);
 702                     w2 = float(width_force[4]); f2 = float(width_force[5]);
 703                     w3 = float(width_force[6]); f3 = float(width_force[7]);
 704                     w4 = float(width_force[8]); f4 = float(width_force[9]);
 705                     w5 = float(width_force[10]); f5 = float(width_force[11]);
 706                     w6 = float(width_force[12]); f6 = float(width_force[13]);
 707                     w7 = float(width_force[14]); f7 = float(width_force[15]);
 708                     if w1>0 and w1<1000 and w2>0 and w2<1000 and w3>0 and w3<1000 and w4>0 and w4<1000 and w5>0 and w5<1000 and w6>0 and w6<1000 and w7>0 and w7<1000:
 709                         score_76 = abs(32 - (w7 - w6))
 710                         score_65 = abs(32 - (w6 - w5))
 711                         score_54 = abs(32 - (w5 - w4))
 712                         score_43 = abs(32 - (w4 - w3))
 713                         score_32 = abs(32 - (w3 - w2))
 714                         score_21 = abs(32 - (w2 - w1))
 715                         writeme = str(score_76) + " --- " + str(row)
 716                         g.write(writeme)
 717         g.close()
 718         f.close()