hooke/plugin/pcluster.py

   1 from ..libhooke import WX_GOOD, ClickedPoint, config_file_path
   2
   3 from mdp import pca
   4 import wxversion
   5 wxversion.select(WX_GOOD)
   6 from wx import PostEvent
   7 import numpy as np
   8 import scipy as sp
   9 import copy
  10 import os.path
  11 import time
  12 import pylab as pyl
  13
  14 from .. import libhookecurve as lhc
  15
  16
  17 import warnings
  18 warnings.simplefilter('ignore',np.RankWarning)
  19
  20
  21 class pclusterCommands(object):
  22
  23     def _plug_init(self):
  24         self.clustplot1=None
  25         self.clustplot2=None
  26
  27     def do_pcluster(self,args):
  28         '''
  29         pCLUSTER
  30         (pcluster.py)
  31         Automatically measures peaks and extracts informations for further clustering
  32         (c)Paolo Pancaldi, Massimo Sandal 2009
  33         '''
  34         blindw = str(self.convfilt_config['blindwindow'])
  35         pclus_dir = "pCluster_blind"+blindw+"_"+time.strftime("%Y%m%d_%H%M")
  36         self.my_work_dir = os.path.join(os.getcwd(), pclus_dir)
  37         self.my_curr_dir = os.path.basename(os.getcwd())
  38         os.mkdir(self.my_work_dir)
  39
  40         #--Custom persistent length
  41         pl_value=None
  42         for arg in args.split():
  43             #look for a persistent length argument.
  44             if 'pl=' in arg:
  45                 pl_expression=arg.split('=')
  46                 pl_value=float(pl_expression[1]) #actual value
  47             else:
  48                 pl_value=None
  49
  50         #configuration variables
  51         min_npks = self.convfilt_config['minpeaks']
  52         min_deviation = self.convfilt_config['mindeviation']
  53
  54         pclust_filename = "automeasure_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Automeasure filename? ')
  55         realclust_filename = "coordinate_"+self.my_curr_dir+"_blind"+blindw+".txt" #raw_input('Coordinates filename? ')
  56         peackforce_filename = "peakforce_"+self.my_curr_dir+"_blind"+blindw+".txt"  #raw_input('Peacks and Forces filename? ')
  57
  58         f=open(self.my_work_dir+pclust_filename,'w+')
  59         f.write('Analysis started '+time.asctime()+'\n')
  60         f.write('----------------------------------------\n')
  61         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
  62         f.close()
  63
  64         f=open(self.my_work_dir+realclust_filename,'w+')
  65         f.write('Analysis started '+time.asctime()+'\n')
  66         f.write('----------------------------------------\n')
  67         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm) ; Peaks Diff\n')
  68         f.close()
  69
  70         f=open(self.my_work_dir+peackforce_filename,'w+')
  71         f.write('Analysis started '+time.asctime()+'\n')
  72         f.write('----------------------------------------\n')
  73         f.write('; Peak number  ;  1 peak Length (nm) ; 1 peak Force (pN) ;  2 peak Length (nm) ; 2 peak Force (pN) ;  3 peak Length (nm) ; 3 peak Force (pN) ;  4 peak Length (nm) ; 4 peak Force (pN) ;  5 peak Length (nm) ; 5 peak Force (pN) ;  6 peak Length (nm) ; 6 peak Force (pN) ;  7 peak Length (nm) ; 7 peak Force (pN) ;  8 peak Length (nm) ; 8 peak Force (pN)\n')
  74         f.close()
  75
  76         # ------ FUNCTION ------
  77         def fit_interval_nm(start_index,plot,nm,backwards):
  78             '''
  79             Calculates the number of points to fit, given a fit interval in nm
  80             start_index: index of point
  81             plot: plot to use
  82             backwards: if true, finds a point backwards.
  83             '''
  84             whatset=1 #FIXME: should be decidable
  85             x_vect=plot.vectors[1][0]
  86
  87             c=0
  88             i=start_index
  89             start=x_vect[start_index]
  90             maxlen=len(x_vect)
  91             while abs(x_vect[i]-x_vect[start_index])*(10**9) < nm:
  92                 if i==0 or i==maxlen-1: #we reached boundaries of vector!
  93                     return c
  94                 if backwards:
  95                     i-=1
  96                 else:
  97                     i+=1
  98                 c+=1
  99             return c
 100
 101         def plot_informations(itplot,pl_value):
 102             '''
 103             OUR VARIABLES
 104             contact_point.absolute_coords               (2.4584142802103689e-007, -6.9647135616234017e-009)
 105             peak_point.absolute_coords                  (3.6047748250571423e-008, -7.7142802788854212e-009)
 106             other_fit_point.absolute_coords     (4.1666139243838867e-008, -7.3759393477579707e-009)
 107             peak_location                                                                               [510, 610, 703, 810, 915, 1103]
 108             peak_size                                                                                           [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
 109             params                                                                                                      [2.2433999931959462e-007, 3.3230248825175678e-010]
 110             fit_errors                                                                                  [6.5817195369767644e-010, 2.4415923138871498e-011]
 111             '''
 112             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
 113
 114             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
 115             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
 116             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
 117             self.basepoints=[]
 118             base_index_0=peak_location[-1]+fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
 119             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
 120             base_index_1=self.basepoints[0].index+fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
 121             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
 122             self.basecurrent=self.current.path
 123             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
 124             boundaries.sort()
 125             to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
 126             avg=np.mean(to_average)
 127             return fit_points, contact_point, pl_value, T, cindex, avg
 128
 129         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
 130             '''
 131             calculate informations for each peak and add they in
 132             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 133             '''
 134             c_leng=None
 135             p_leng=None
 136             sigma_c_leng=None
 137             sigma_p_leng=None
 138             force=None
 139             slope=None
 140
 141             delta_force=10
 142             slope_span=int(self.config['auto_slope_span'])
 143
 144             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
 145             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
 146
 147             points=[contact_point, peak_point, other_fit_point]
 148
 149             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
 150
 151             #Measure forces
 152             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
 153             y=min(delta_to_measure)
 154             #Measure slopes
 155             slope=self.linefit_between(peak-slope_span,peak)[0]
 156             #check fitted data and, if right, add peak to the measurement
 157             if len(params)==1: #if we did choose 1-value fit
 158                 p_leng=pl_value
 159                 c_leng=params[0]*(1.0e+9)
 160                 sigma_p_leng=0
 161                 sigma_c_leng=fit_errors[0]*(1.0e+9)
 162                 force = abs(y-avg)*(1.0e+12)
 163             else: #2-value fit
 164                 p_leng=params[1]*(1.0e+9)
 165                 #check if persistent length makes sense. otherwise, discard peak.
 166                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
 167                     '''
 168                     p_lengths.append(p_leng)
 169                     c_lengths.append(params[0]*(1.0e+9))
 170                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
 171                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
 172                     forces.append(abs(y-avg)*(1.0e+12))
 173                     slopes.append(slope)
 174                     '''
 175                     c_leng=params[0]*(1.0e+9)
 176                     sigma_c_leng=fit_errors[0]*(1.0e+9)
 177                     sigma_p_leng=fit_errors[1]*(1.0e+9)
 178                     force=abs(y-avg)*(1.0e+12)
 179                 else:
 180                     p_leng=None
 181                     slope=None
 182             #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
 183             return  c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
 184
 185
 186         # ------ PROGRAM -------
 187         c=0
 188         for item in self.current_list:
 189             c+=1
 190             item.identify(self.drivers)
 191             itplot=item.curve.default_plots()
 192             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
 193             itplot[0]=flatten(itplot[0], item, customvalue=1)
 194             try:
 195                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
 196             except:
 197                 #We have troubles with exec_has_peaks (bad curve, whatever).
 198                 #Print info and go to next cycle.
 199                 print 'Cannot process ',item.path
 200                 continue
 201
 202             if len(peak_location)==0:
 203                 print 'No peaks!'
 204                 continue
 205
 206             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
 207
 208             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
 209
 210             #initialize output data vectors
 211             c_lengths=[]
 212             p_lengths=[]
 213             sigma_c_lengths=[]
 214             sigma_p_lengths=[]
 215             forces=[]
 216             slopes=[]
 217
 218             #loop each peak of my curve
 219             for peak in peak_location:
 220                 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
 221                 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
 222                     if var is not None:
 223                         vector.append(var)
 224
 225             #FIXME: We need a dictionary here...
 226             allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
 227             for vect in allvects:
 228                 if len(vect)==0:
 229                     for i in range(len(c_lengths)):
 230                         vect.append(0)
 231
 232             print 'Measurements for all peaks detected:'
 233             print 'contour (nm)', c_lengths
 234             print 'sigma contour (nm)',sigma_c_lengths
 235             print 'p (nm)',p_lengths
 236             print 'sigma p (nm)',sigma_p_lengths
 237             print 'forces (pN)',forces
 238             print 'slopes (N/m)',slopes
 239
 240             '''
 241             write automeasure text file
 242             '''
 243             print 'Saving automatic measurement...'
 244             f=open(self.my_work_dir+pclust_filename,'a+')
 245             f.write(item.path+'\n')
 246             for i in range(len(c_lengths)):
 247                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
 248             f.close()
 249
 250             peak_number=len(c_lengths)
 251
 252             '''
 253             write peackforce text file
 254             '''
 255             print 'Saving automatic measurement...'
 256             f=open(self.my_work_dir+peackforce_filename,'a+')
 257             f.write(item.path+'\n')
 258             peackforce_info = ''
 259             for i in range(len(c_lengths)):
 260                 peackforce_info = peackforce_info + ' ; ' + str(c_lengths[i]) + ' ; ' + str(forces[i])
 261             f.write(' ; '+str(peak_number)+peackforce_info+'\n')
 262             f.close()
 263
 264             '''
 265             calculate clustering coordinates
 266             '''
 267             if peak_number > 1:
 268                 deltas=[]
 269                 for i in range(len(c_lengths)-1):
 270                     deltas.append(c_lengths[i+1]-c_lengths[i])
 271
 272                 delta_mean=np.mean(deltas)
 273                 delta_median=np.median(deltas)
 274
 275                 force_mean=np.mean(forces)
 276                 force_median=np.median(forces)
 277
 278                 first_peak_cl=c_lengths[0]
 279                 last_peak_cl=c_lengths[-1]
 280
 281                 max_force=max(forces[:-1])
 282                 min_force=min(forces)
 283
 284                 max_delta=max(deltas)
 285                 min_delta=min(deltas)
 286
 287                 delta_stdev=np.std(deltas)
 288                 forces_stdev=np.std(forces[:-1])
 289
 290                 peaks_diff=(last_peak_cl-first_peak_cl)/peak_number
 291
 292                 print 'Coordinates'
 293                 print 'Peaks',peak_number
 294                 print 'Mean delta',delta_mean
 295                 print 'Median delta',delta_median
 296                 print 'Mean force',force_mean
 297                 print 'Median force',force_median
 298                 print 'First peak',first_peak_cl
 299                 print 'Last peak',last_peak_cl
 300                 print 'Max force',max_force
 301                 print 'Min force',min_force
 302                 print 'Max delta',max_delta
 303                 print 'Min delta',min_delta
 304                 print 'Delta stdev',delta_stdev
 305                 print 'Forces stdev',forces_stdev
 306                 print 'Peaks difference',peaks_diff
 307
 308                 '''
 309                 write clustering coordinates
 310                 '''
 311                 f=open(self.my_work_dir+realclust_filename,'a+')
 312                 f.write(item.path+'\n')
 313                 f.write(' ; '+str(peak_number)+     # non considerato
 314                         ' ; '+str(delta_mean)+      # 0
 315                         ' ; '+str(delta_median)+    # 1 -
 316                         ' ; '+str(force_mean)+      # 2
 317                         ' ; '+str(force_median)+    # 3 -
 318                         ' ; '+str(first_peak_cl)+   # 4 -
 319                         ' ; '+str(last_peak_cl)+    # 5 -
 320                         ' ; '+str(max_force)+       # 6
 321                         ' ; '+str(min_force)+       # 7
 322                         ' ; '+str(max_delta)+       # 8
 323                         ' ; '+str(min_delta)+       # 9
 324                         ' ; '+str(delta_stdev)+     # 10
 325                         ' ; '+str(forces_stdev)+    # 11
 326                         ' ; '+str(peaks_diff)+      # 12
 327                         '\n')
 328                 f.close()
 329
 330         # start PCA
 331         self.do_pca(pclus_dir+"/"+realclust_filename)
 332
 333
 334     def do_pca(self,args):
 335         '''
 336         PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
 337         Automatically measures pca from coordinates filename and shows two interactives plots
 338         With the second argument (arbitrary) you can select the columns and the multiplier factor
 339         to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
 340         Without second argument it reads pca_config.txt file
 341         (c)Paolo Pancaldi, Massimo Sandal 2009
 342         '''
 343
 344         # reads the columns of pca
 345         conf=open(config_file_path("pca_config.txt"), 'r')
 346         config = conf.readlines()
 347         conf.close()
 348
 349         self.plot_myCoord = []          # tiene le coordinate prese direttamente dal file creato con pCluster
 350         self.plot_origCoord = []        # tiene le coordinate solo delle colonne scelte e moltiplicate per i valori scelti
 351         self.plot_pcaCoord = []         # tiene le due colonne della pca
 352         self.plot_pcaCoordTr = []       # tiene le due colonne della pca trasposta
 353         self.plot_FiltOrigCoord = []    # tiene le coordinate solo dei punti filtrati per densita
 354         self.plot_FiltPaths = []        # tiene i paths dei plot solo dei punti filtrati per densita
 355         self.plot_paths = []            # tiene i paths di tutti i plots
 356         self.plot_NewPcaCoord = []      # tiene le due colonne della pca filtrate per densita
 357         self.plot_NewPcaCoordTr=[]      # tiene le due colonne della pca trasposta filtrate per densita
 358         plot_path_temp = ""
 359
 360         # prende in inpunt un arg (nome del file)
 361         # e il secondo le colonne su cui lavorare (e' arbitrario, riceve x es "1,2,3")
 362         arg = args.split(" ")
 363         if arg[0]==args:
 364             file_name=args
 365         else:
 366             file_name=arg[0]
 367             config[0] = arg[1]
 368
 369         # creo l'array "plot_myCoord" con tutte le coordinate dei plots
 370         # e l'array plot_paths con tutti i percorsi dei plots
 371         nPlotTot = -3 #tolgo le prime 3 righe iniziali del file
 372         f=open(file_name)
 373         rows = f.readlines()
 374         for row in rows:
 375             if row[0]!=" " and row[0]!="":
 376                 nPlotTot = nPlotTot+1
 377                 plot_path_temp = row
 378             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 379                 row = row[row.index(";",2)+2:].split(" ; ")     # non considero la prima colonna col #picchi
 380                 row = [float(i) for i in row]
 381
 382                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
 383                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
 384                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500 and row[6]<500 and row[7]<500 and row[8]<500 and row[9]<500 and row[10]<500 and row[11]<500):
 385                     if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0 and row[6]>0 and row[7]>0 and row[8]>0 and row[9]>0 and row[10]>0 and row[11]>0):
 386                         #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
 387                         self.plot_myCoord.append(row)
 388                         self.plot_paths.append(plot_path_temp)
 389         f.close()
 390
 391         # creo l'array con alcune colonne e pure moltiplicate
 392         for row in self.plot_myCoord:
 393             res=[]
 394             for cols in config[0].split(","):
 395                 if cols.find("*")!=-1:
 396                     col = int(cols.split("*")[0])
 397                     molt = int(cols.split("*")[1])
 398                 elif cols.find("x")!=-1:
 399                     col = int(cols.split("x")[0])
 400                     molt = int(cols.split("x")[1])
 401                 else:
 402                     col = int(cols)
 403                     molt = 1
 404                 res.append(row[col]*molt)
 405             self.plot_origCoord.append(res)
 406
 407         # array convert, calculate PCA, transpose
 408         self.plot_origCoord = np.array(self.plot_origCoord,dtype='float')
 409         #print self.plot_origCoord.shape
 410         self.plot_pcaCoord = pca(self.plot_origCoord, output_dim=2)     #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 411         self.plot_pcaCoordTr = np.transpose(self.plot_pcaCoord)
 412         pca_X=np.array(self.plot_pcaCoordTr[0],dtype='float')
 413         pca_Y=np.array(self.plot_pcaCoordTr[1],dtype='float')
 414
 415         '''
 416         # Start section of testing with good plots                                  # 4 TESTING!
 417         Xsyn_1=[]
 418         Ysyn_1=[]
 419         Xgb1_1=[]
 420         Ygb1_1=[]
 421         Xbad_1=[]
 422         Ybad_1=[]
 423         goodnamefile=open(file_name.replace("coordinate", "good"),'r')
 424         goodnames=goodnamefile.readlines()
 425         nPlotGood = len(goodnames)-2 #tolgo prima e ultima riga
 426         goodnames=[i.split()[0] for i in goodnames[1:]]
 427
 428         for index in range(len(self.plot_paths)):
 429             if self.plot_paths[index][:-1] in goodnames:
 430                 Xsyn_1.append(pca_X[index])
 431                 Ysyn_1.append(pca_Y[index])
 432             else:
 433                 Xbad_1.append(pca_X[index])
 434                 Ybad_1.append(pca_Y[index])
 435         # Stop section of testing with good plots                                   # 4 TESTING!
 436         '''
 437
 438         # print first plot
 439         clustplot1=lhc.PlotObject()
 440         clustplot1.add_set(pca_X,pca_Y)
 441         #clustplot1.add_set(Xbad_1,Ybad_1) # 4 TESTING!
 442         #clustplot1.add_set(Xsyn_1,Ysyn_1) # 4 TESTING!
 443         clustplot1.normalize_vectors()
 444         clustplot1.styles=['scatter', 'scatter','scatter']
 445         clustplot1.colors=[None,'red','green']
 446         clustplot1.destination=0
 447         self._send_plot([clustplot1])
 448         self.clustplot1=clustplot1
 449
 450         # density and filer estimation
 451         kernel = sp.stats.kde.gaussian_kde(sp.c_[pca_X,pca_Y].T)
 452         tallest = 0
 453         for i in range(len(pca_X)):
 454             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 455             if tallest < kern_value:
 456                     tallest = float(kern_value)
 457         if float(config[1]) == 0:
 458             my_filter = float(tallest / 3.242311147)
 459         else:
 460             my_filter = float(config[1])
 461         '''
 462         # section useful only for graphic printing
 463         xmin = pca_X.min()
 464         xmax = pca_X.max()
 465         ymin = pca_Y.min()
 466         ymax = pca_Y.max()
 467         mX, mY = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
 468         Z = sp.rot90(sp.fliplr(sp.reshape(kernel(sp.c_[mX.ravel(), mY.ravel()].T).T, mX.T.shape)))
 469         axis_X = np.linspace(xmin,xmax,num=100)
 470         axis_Y = np.linspace(ymin,ymax,num=100)
 471         '''
 472
 473         # density filtering:
 474         # tramite "kernel.evaluate" trovo lo score (altezza) di ogni coordinata e decido se mantenerla o no
 475         filtered_pca_X = []
 476         filtered_pca_Y = []
 477         filtered_PcaCoordTr = []
 478         filtered_PcaCoord = []
 479         for i in range(len(pca_X)):
 480             kern_value = kernel.evaluate([pca_X[i],pca_Y[i]])
 481             if kern_value > my_filter:
 482                 filtered_pca_X.append(pca_X[i])
 483                 filtered_pca_Y.append(pca_Y[i])
 484         filtered_PcaCoordTr.append(filtered_pca_X)
 485         filtered_PcaCoordTr.append(filtered_pca_Y)
 486         filtered_PcaCoord = np.transpose(filtered_PcaCoordTr)
 487
 488         # creo i due array "plot_FiltOrigCoord" e "plot_FiltPaths" contenenti solo i dati filtrati con alta densita
 489         for index in range(len(self.plot_pcaCoord)):
 490             if self.plot_pcaCoord[index] in filtered_PcaCoord:
 491                 self.plot_FiltOrigCoord.append(self.plot_myCoord[index])
 492                 self.plot_FiltPaths.append(self.plot_paths[index])
 493
 494         '''
 495         # START PCA#2: USELESS!!!
 496
 497         # creo l array con alcune colonne e pure moltiplicate
 498         temp_coord = []
 499         for row in self.plot_FiltOrigCoord:
 500             res=[]
 501             for cols in config[2].split(","):
 502                 if cols.find("*")!=-1:
 503                     col = int(cols.split("*")[0])
 504                     molt = int(cols.split("*")[1])
 505                 elif cols.find("x")!=-1:
 506                     col = int(cols.split("x")[0])
 507                     molt = int(cols.split("x")[1])
 508                 else:
 509                     col = int(cols)
 510                     molt = 1
 511                 res.append(row[col]*molt)
 512             temp_coord.append(res)
 513         self.plot_FiltOrigCoord = temp_coord
 514
 515         # ricalcolo la PCA: array convert, calculate PCA, transpose
 516         self.plot_FiltOrigCoord = np.array(self.plot_FiltOrigCoord,dtype='float')
 517         #print self.plot_FiltOrigCoord.shape
 518         self.plot_NewPcaCoord = pca(self.plot_FiltOrigCoord, output_dim=2)      #other way -> y = mdp.nodes.PCANode(output_dim=2)(array)
 519         self.plot_NewPcaCoordTr = np.transpose(self.plot_NewPcaCoord)
 520         pca_X2=np.array(self.plot_NewPcaCoordTr[0],dtype='float')
 521         pca_Y2=np.array(self.plot_NewPcaCoordTr[1],dtype='float')
 522
 523         # Start section of testing with good plots                              # 4 TESTING!
 524         Xsyn_2=[]
 525         Ysyn_2=[]
 526         Xbad_2=[]
 527         Ybad_2=[]
 528         for index in range(len(self.plot_FiltPaths)):
 529             if self.plot_FiltPaths[index][:-1] in goodnames:
 530                 Xsyn_2.append(pca_X2[index])
 531                 Ysyn_2.append(pca_Y2[index])
 532             else:
 533                 Xbad_2.append(pca_X2[index])
 534                 Ybad_2.append(pca_Y2[index])
 535
 536         # print second plot
 537         clustplot2=lhc.PlotObject()
 538         #clustplot2.add_set(pca_X2,pca_Y2)
 539         clustplot2.add_set(Xbad_2,Ybad_2)                                       # 4 TESTING!
 540         clustplot2.add_set(Xsyn_2,Ysyn_2)                                       # 4 TESTING!
 541         clustplot2.normalize_vectors()
 542         clustplot2.styles=['scatter', 'scatter','scatter']
 543         clustplot2.colors=[None,'red','green']
 544         clustplot2.destination=1
 545         self._send_plot([clustplot2])
 546         self.clustplot2=clustplot2
 547         '''
 548
 549         # PRINT density plot
 550         clustplot2=lhc.PlotObject()
 551         clustplot2.add_set(filtered_pca_X,filtered_pca_Y)
 552         clustplot2.normalize_vectors()
 553         clustplot2.styles=['scatter', 'scatter','scatter']
 554         clustplot2.colors=[None,'red','green']
 555         clustplot2.destination=1
 556         self._send_plot([clustplot2])
 557         self.clustplot2=clustplot2
 558
 559         # printing results
 560         config_pca1 = config[0].replace("*", "x").rstrip("\n")
 561         config_pca2 = config[2].replace("*", "x").rstrip("\n")
 562         print ""
 563         print "- START: "+file_name
 564         print "Curve totali: ", nPlotTot
 565         #print "Curve totali good: ", nPlotGood                                  # 4 TESTING!
 566         print "- FILTRO 1: 0-500 e NaN"
 567         print "Curve totali rimaste: ", len(self.plot_origCoord)
 568         #print 'Curve good rimaste: ', len(Xsyn_1)                               # 4 TESTING!
 569         print "- FILTRO 2: PCA:"+config_pca1+" e DENSITA:"+str(my_filter)
 570         print "Curve totali rimaste: ", len(self.plot_FiltOrigCoord)
 571         #print 'Curve good rimaste: ', len(Xsyn_2)                               # 4 TESTING!
 572         print "Piu alta: ", tallest
 573         #print "- FILTRO 3: 2'PCA:"+config_pca2
 574         print ""
 575
 576         # -- exporting coordinates and plot of PCA in debug mode! --
 577         if config[3].find("true")!=-1:
 578             #1' PCA: save plot and build coordinate s file
 579             self.do_export(file_name.replace("coordinate_", "debug_pca1graph_").replace('.txt','_'+config_pca1) + " 0")
 580             f = open(file_name.replace("coordinate_", "debug_pca1coor_").replace('.txt','_'+config_pca1+'.txt'),'w')
 581             for i in range(len(pca_X)):
 582                 f.write (str(i) + "\t" + str(pca_X[i]) + "\t" + str(pca_Y[i]) + "\n")
 583             f.close()
 584             #2' PCA: save plot and build coordinate s file
 585             #self.do_export(file_name.replace("coordinate_", "debug_pca2graph_").replace('.txt','_'+config_pca2) + " 1")
 586             #f = open(file_name.replace("coordinate_", "debug_pca2coor_").replace('.txt','_'+config_pca2+'.txt'),'w')
 587             #for i in range(len(pca_X2)):
 588             #    f.write (str(i) + "\t" + str(pca_X2[i]) + "\t" + str(pca_Y2[i]) + "\n")
 589             #f.close()
 590             #DENSITY: save plot
 591             self.do_export(file_name.replace("coordinate_", "debug_densitygraph_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")) + " 1")
 592             f = open(file_name.replace("coordinate_", "debug_densitycoor_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",",")+'.txt'),'w')
 593             for i in range(len(filtered_pca_X)):
 594                 f.write (str(i) + "\t" + str(filtered_pca_X[i]) + "\t" + str(filtered_pca_Y[i]) + "\n")
 595             f.close()
 596             #ALL GOOD COORDINATES (without NaN and 0<x<500)
 597             f = open(file_name.replace("coordinate_", "debug_allgoodcoor_"),'w')
 598             for i in range(len(self.plot_myCoord)):
 599                 for cel in self.plot_myCoord[i]:
 600                     f.write (" ; " + str(cel))
 601                 f.write ("\n")
 602             f.close()
 603
 604         # pCLUSTER SAVING!!!
 605         import shutil
 606         pcl_name = file_name.replace("coordinate_", "goodplots_").replace('.txt','_'+config_pca1+'_'+str(my_filter).replace(".",","))
 607         if os.path.exists(pcl_name+slash): shutil.rmtree(pcl_name)
 608         os.mkdir(pcl_name+slash)
 609         f = open(pcl_name+'.txt','w')
 610         for i in range(len(self.plot_FiltPaths)):
 611             myfile = str(self.plot_FiltPaths[i]).rstrip("\n")
 612             f.write (myfile+"\n")
 613             shutil.copy2(myfile, pcl_name)
 614         f.close()
 615
 616
 617     def do_multipca(self,args):
 618         '''
 619         MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
 620         Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
 621         measures pca from coordinates filename and save the png plots.
 622         (c)Paolo Pancaldi, Massimo Sandal 2009
 623         '''
 624         # reads the columns of pca
 625         conf=open("pca_config.txt")
 626         config = conf.readlines() # config[0] = "1,2,3"
 627         conf.close()
 628         # cycling pca
 629         arg = args.split(" ")
 630         file_name=arg[0]
 631         column=str(arg[1])
 632         for i in range(1, 51, 1):
 633             self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
 634
 635     def do_doublepca(self,args):
 636         '''
 637         DOUBLEPCA -> "doublepca gaeta_coor_blind50.txt"
 638         Automatically it launches the pca command for all combinations with two column
 639         (c)Paolo Pancaldi, Massimo Sandal 2009
 640         '''
 641         # cycling pca
 642         arg = args.split(" ")
 643         file_name=arg[0]
 644         for i in range(1, 13):
 645             for j in range(1, 13):
 646                 if i!=j:
 647                     self.do_pca(file_name + " " + str(i) + "," + str(j))
 648
 649     def do_triplepca(self,args):
 650         '''
 651         TRIPLEPCA -> "triplepca gaeta_coor_blind50.txt"
 652         Automatically it launches the pca command for all combinations with three column
 653         (c)Paolo Pancaldi, Massimo Sandal 2009
 654         '''
 655         # cycling pca
 656         arg = args.split(" ")
 657         file_name=arg[0]
 658         for i in range(1, 13):
 659             for j in range(1, 13):
 660                 for k in range(1, 13):
 661                     if i!=j and i!=k and j!=k:
 662                         self.do_pca(file_name + " " + str(i) + "," + str(j) + "," + str(k))
 663
 664     def do_pclick(self,args):
 665         '''
 666         It returns id, coordinates and file name of a clicked dot on a PCA graphic
 667         '''
 668
 669         self._send_plot([self.clustplot1]) #quick workaround for BAD problems in the GUI
 670         print 'Click point'
 671         point = self._measure_N_points(N=1, whatset=0)
 672         indice = point[0].index
 673         plot_file = self.plot_paths[indice]
 674         dot_coord = self.plot_pcaCoord[indice]
 675         print "file: " + str(plot_file).rstrip()
 676         print "id: " + str(indice)
 677         print "coord: " + str(dot_coord)
 678         self.do_genlist(str(plot_file))
 679         #self.do_jump(str(plot_file))
 680
 681     # indea iniziata e messa da parte...
 682     def do_peakforce(self, args):
 683         '''
 684         peackforce -> "peackforce peackforce_file.txt"
 685         Automatically measures peack and force plots
 686         (c)Paolo Pancaldi, Massimo Sandal 2009
 687         '''
 688
 689         # prende in inpunt un arg (nome del file)
 690         file_name=args
 691         f=open(file_name)
 692
 693         # scrivo un file temp
 694         g = open('_prove.txt','w')
 695
 696         plot_file = '';
 697         rows = f.readlines()
 698         for row in rows:
 699             if row[0]=="/":
 700                 plot_file = row
 701             if row[0]==" " and row.find('nan')==-1 and row.find("-1.#IND")==-1:
 702                 # FILTRO SUI 7 PICCHI
 703                 num_pic = int(row.split(" ; ")[1])
 704                 if num_pic==7:
 705                     width_force = row.split(" ; ")
 706                     w1 = float(width_force[2]); f1 = float(width_force[3]);
 707                     w2 = float(width_force[4]); f2 = float(width_force[5]);
 708                     w3 = float(width_force[6]); f3 = float(width_force[7]);
 709                     w4 = float(width_force[8]); f4 = float(width_force[9]);
 710                     w5 = float(width_force[10]); f5 = float(width_force[11]);
 711                     w6 = float(width_force[12]); f6 = float(width_force[13]);
 712                     w7 = float(width_force[14]); f7 = float(width_force[15]);
 713                     if w1>0 and w1<1000 and w2>0 and w2<1000 and w3>0 and w3<1000 and w4>0 and w4<1000 and w5>0 and w5<1000 and w6>0 and w6<1000 and w7>0 and w7<1000:
 714                         score_76 = abs(32 - (w7 - w6))
 715                         score_65 = abs(32 - (w6 - w5))
 716                         score_54 = abs(32 - (w5 - w4))
 717                         score_43 = abs(32 - (w4 - w3))
 718                         score_32 = abs(32 - (w3 - w2))
 719                         score_21 = abs(32 - (w2 - w1))
 720                         writeme = str(score_76) + " --- " + str(row)
 721                         g.write(writeme)
 722         g.close()
 723         f.close()