nothing of important (new function for building a file's coordinate)
[hooke.git] / pcluster.py
1 #!/usr/bin/env python
2
3 from mdp import pca
4 from libhooke import WX_GOOD, ClickedPoint
5 import wxversion
6 wxversion.select(WX_GOOD)
7 from wx import PostEvent
8 import numpy as np
9 import scipy as sp
10 import copy
11 import os.path
12 import time
13 import libhookecurve as lhc
14
15 import warnings
16 warnings.simplefilter('ignore',np.RankWarning)
17
18
19 class pclusterCommands:
20     
21     def _plug_init(self):
22         self.pca_paths=[]
23         self.pca_myArray=None
24         self.clustplot=None
25
26     def do_pcluster(self,args):
27         '''
28         pCLUSTER
29         (pcluster.py)
30         Automatically measures peaks and extracts informations for further clustering
31         (c)Paolo Pancaldi, Massimo Sandal 2009
32         '''
33         #--Custom persistent length
34         pl_value=None
35         for arg in args.split():
36             #look for a persistent length argument.
37             if 'pl=' in arg:
38                 pl_expression=arg.split('=')
39                 pl_value=float(pl_expression[1]) #actual value
40             else:
41                 pl_value=None
42                 
43         #configuration variables
44         min_npks = self.convfilt_config['minpeaks']
45         min_deviation = self.convfilt_config['mindeviation']
46         
47         pclust_filename=raw_input('Automeasure filename? ')
48         realclust_filename=raw_input('Coordinates filename? ')
49         
50         f=open(pclust_filename,'w+')
51         f.write('Analysis started '+time.asctime()+'\n')
52         f.write('----------------------------------------\n')
53         f.write('; Contour length (nm)  ;  Persistence length (nm) ;  Max.Force (pN)  ;  Slope (N/m) ;  Sigma contour (nm) ; Sigma persistence (nm)\n')
54         f.close()
55         
56         f=open(realclust_filename,'w+')
57         f.write('Analysis started '+time.asctime()+'\n')
58         f.write('----------------------------------------\n')
59         f.write('; Peak number ; Mean delta (nm)  ;  Median delta (nm) ;  Mean force (pN)  ;  Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm)\n')
60         f.close()
61         # ------ FUNCTION ------
62         def fit_interval_nm(start_index,plot,nm,backwards):
63             '''
64             Calculates the number of points to fit, given a fit interval in nm
65             start_index: index of point
66             plot: plot to use
67             backwards: if true, finds a point backwards.
68             '''
69             whatset=1 #FIXME: should be decidable
70             x_vect=plot.vectors[1][0]
71             
72             c=0
73             i=start_index
74             start=x_vect[start_index]
75             maxlen=len(x_vect)
76             while abs(x_vect[i]-x_vect[start_index])*(10**9) < nm:
77                 if i==0 or i==maxlen-1: #we reached boundaries of vector!
78                     return c
79                 if backwards:
80                     i-=1
81                 else:
82                     i+=1
83                 c+=1
84             return c
85
86         def plot_informations(itplot,pl_value):
87             '''
88             OUR VARIABLES
89             contact_point.absolute_coords               (2.4584142802103689e-007, -6.9647135616234017e-009)
90             peak_point.absolute_coords                  (3.6047748250571423e-008, -7.7142802788854212e-009)
91             other_fit_point.absolute_coords     (4.1666139243838867e-008, -7.3759393477579707e-009)
92             peak_location                                                                               [510, 610, 703, 810, 915, 1103]
93             peak_size                                                                                           [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
94             params                                                                                                      [2.2433999931959462e-007, 3.3230248825175678e-010]
95             fit_errors                                                                                  [6.5817195369767644e-010, 2.4415923138871498e-011]
96             '''
97             fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
98             
99             T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
100             cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
101             contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
102             self.basepoints=[]
103             base_index_0=peak_location[-1]+fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
104             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
105             base_index_1=self.basepoints[0].index+fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
106             self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
107             self.basecurrent=self.current.path
108             boundaries=[self.basepoints[0].index, self.basepoints[1].index]
109             boundaries.sort()
110             to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
111             avg=np.mean(to_average)
112             return fit_points, contact_point, pl_value, T, cindex, avg
113
114         def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
115             '''
116             calculate informations for each peak and add they in 
117             c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
118             '''
119             c_leng=None
120             p_leng=None
121             sigma_c_leng=None
122             sigma_p_leng=None
123             force=None
124             slope=None
125             
126             delta_force=10
127             slope_span=int(self.config['auto_slope_span'])
128             
129             peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
130             other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
131             
132             points=[contact_point, peak_point, other_fit_point]
133             
134             params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
135             
136             #Measure forces
137             delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
138             y=min(delta_to_measure)
139             #Measure slopes
140             slope=self.linefit_between(peak-slope_span,peak)[0]
141             #check fitted data and, if right, add peak to the measurement
142             if len(params)==1: #if we did choose 1-value fit
143                 p_leng=pl_value
144                 c_leng=params[0]*(1.0e+9)
145                 sigma_p_leng=0
146                 sigma_c_leng=fit_errors[0]*(1.0e+9)
147                 force = abs(y-avg)*(1.0e+12)
148             else: #2-value fit
149                 p_leng=params[1]*(1.0e+9)
150                 #check if persistent length makes sense. otherwise, discard peak.
151                 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
152                     '''
153                     p_lengths.append(p_leng)       
154                     c_lengths.append(params[0]*(1.0e+9))
155                     sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
156                     sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
157                     forces.append(abs(y-avg)*(1.0e+12))
158                     slopes.append(slope)     
159                     '''
160                     c_leng=params[0]*(1.0e+9)
161                     sigma_c_leng=fit_errors[0]*(1.0e+9)
162                     sigma_p_leng=fit_errors[1]*(1.0e+9)
163                     force=abs(y-avg)*(1.0e+12)
164                 else:
165                     p_leng=None
166                     slope=None
167             #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
168             return  c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
169
170
171         # ------ PROGRAM -------
172         c=0
173         for item in self.current_list:
174             c+=1
175             item.identify(self.drivers)
176             itplot=item.curve.default_plots()
177             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
178             itplot[0]=flatten(itplot[0], item, customvalue=1)
179             try:
180                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
181             except: 
182                 #We have troubles with exec_has_peaks (bad curve, whatever).
183                 #Print info and go to next cycle.
184                 print 'Cannot process ',item.path
185                 continue 
186
187             if len(peak_location)==0:
188                 print 'No peaks!'
189                 continue
190
191             fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
192             
193             print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
194
195             #initialize output data vectors
196             c_lengths=[]
197             p_lengths=[]
198             sigma_c_lengths=[]
199             sigma_p_lengths=[]
200             forces=[]
201             slopes=[]
202
203             #loop each peak of my curve
204             for peak in peak_location:
205                 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
206                 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
207                     if var is not None:
208                         vector.append(var)
209
210             #FIXME: We need a dictionary here...
211             allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
212             for vect in allvects:
213                 if len(vect)==0:
214                     for i in range(len(c_lengths)):
215                         vect.append(0)
216             
217             print 'Measurements for all peaks detected:'
218             print 'contour (nm)', c_lengths
219             print 'sigma contour (nm)',sigma_c_lengths
220             print 'p (nm)',p_lengths
221             print 'sigma p (nm)',sigma_p_lengths
222             print 'forces (pN)',forces
223             print 'slopes (N/m)',slopes
224             
225             '''
226             write automeasure text file
227             '''
228             print 'Saving automatic measurement...'
229             f=open(pclust_filename,'a+')
230             f.write(item.path+'\n')
231             for i in range(len(c_lengths)):
232                 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
233             f.close()
234             
235             '''
236             calculate clustering coordinates
237             '''
238             peak_number=len(c_lengths)
239             if peak_number > 1:
240                 deltas=[]
241                 for i in range(len(c_lengths)-1):
242                     deltas.append(c_lengths[i+1]-c_lengths[i])
243                 
244                 delta_mean=np.mean(deltas)
245                 delta_median=np.median(deltas)
246                 
247                 force_mean=np.mean(forces)
248                 force_median=np.median(forces)
249                 
250                 first_peak_cl=c_lengths[0]
251                 last_peak_cl=c_lengths[-1]
252                 
253                 max_force=max(forces[:-1])
254                 min_force=min(forces)
255                 
256                 max_delta=max(deltas)
257                 min_delta=min(deltas)
258                 
259                 delta_stdev=np.std(deltas)
260                 forces_stdev=np.std(forces[:-1])
261                 
262                 print 'Coordinates'
263                 print 'Peaks',peak_number
264                 print 'Mean delta',delta_mean
265                 print 'Median delta',delta_median
266                 print 'Mean force',force_mean
267                 print 'Median force',force_median
268                 print 'First peak',first_peak_cl
269                 print 'Last peak',last_peak_cl
270                 print 'Max force',max_force
271                 print 'Min force',min_force
272                 print 'Max delta',max_delta
273                 print 'Min delta',min_delta
274                 print 'Delta stdev',delta_stdev
275                 print 'Forces stdev',forces_stdev
276                 
277                 '''
278                 write clustering coordinates
279                 '''
280                 f=open(realclust_filename,'a+')
281                 f.write(item.path+'\n')
282                 f.write(' ; '+str(peak_number)+     # non considerato
283                         ' ; '+str(delta_mean)+      # 0
284                         ' ; '+str(delta_median)+    # 1 -
285                         ' ; '+str(force_mean)+      # 2
286                         ' ; '+str(force_median)+    # 3 -
287                         ' ; '+str(first_peak_cl)+   # 4 -
288                         ' ; '+str(last_peak_cl)+    # 5 -
289                         ' ; '+str(max_force)+       # 6
290                         ' ; '+str(min_force)+       # 7
291                         ' ; '+str(max_delta)+       # 8
292                         ' ; '+str(min_delta)+       # 9
293                         ' ; '+str(delta_stdev)+     # 10
294                         ' ; '+str(forces_stdev)+    # 11
295                         '\n')
296                 f.close()
297             else:
298                 pass
299                 
300     def do_pca(self,args):
301         '''
302         PCA
303         Automatically measures pca from coordinates filename and shows two interactives plots
304         (c)Paolo Pancaldi, Massimo Sandal 2009
305         '''
306         
307         # reads the columns of pca
308         conf=open("pca_config.txt")
309         config = conf.readlines()
310         conf.close()
311         
312         self.pca_myArray = []
313         self.pca_paths = {}
314         plot_path_temp = ""
315         nPlotTot = 0
316         nPlotGood = 0
317         
318         file_name=args
319         
320         for arg in args.split():
321             #look for a file argument.
322             if 'f=' in arg:
323                 file_temp=arg.split('=')[1] #actual coordinates filename
324                 try:
325                     f=open(file_temp)
326                     f.close()
327                     file_name = file_temp
328                     print "Coordinates filename used: " + file_name
329                 except:
330                     print "Impossibile to find " + file_temp + " in current directory"
331                     print "Coordinates filename used: " + file_name
332             
333         f=open(file_name)
334         rows = f.readlines()
335         for row in rows:
336             if row[0]=="/":
337                 nPlotTot = nPlotTot+1
338                 #plot_path_temp = row.split("/")[6][:-1]
339                 plot_path_temp = row
340             if row[0]==" " and row.find('nan')==-1:
341                 row = row[row.index(";",2)+2:].split(" ; ")     # non considero la prima colonna col #picchi
342                 row = [float(i) for i in row]
343                         
344                 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
345                 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
346                 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500):
347                     if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0):
348                         self.pca_paths[nPlotGood] = plot_path_temp
349                         #row = row[0], row[2], row[3], row[6], row[7], row[8]
350                         #row= row[0], row[1], row[2], row[3], row[6], row[7], row[8], row[9], row[10], row[11]
351                         #row= row[6], row[7], row[8], row[9]
352                         #row= row[1], row[3], row[6], row[7], row[8], row[9], row[10], row[11]*10
353                         row = eval(config[0])
354                         self.pca_myArray.append(row)
355                         nPlotGood = nPlotGood+1
356                         
357         f.close()
358         print nPlotGood, "of", nPlotTot
359         
360         # array convert, calculate PCA, transpose
361         self.pca_myArray = np.array(self.pca_myArray,dtype='float')
362         print self.pca_myArray.shape
363         self.pca_myArray = pca(self.pca_myArray, output_dim=2)  #other way -> y = mdp.nodes.PCANode(output_dim=2)(gigi)
364         myArrayTr = np.transpose(self.pca_myArray)
365         
366         # plotting
367         X=myArrayTr[0]
368         Y=myArrayTr[1]
369         
370         X=list(X)
371         Y=list(Y)
372         
373         '''#builds coordinate s file
374         f = open('coordinate_punti.txt','w')
375         for i in range(len(X)):
376             f.write (str(i) + "\t" + str(X[i]) + "\t" + str(Y[i]) + "\n")
377         f.close()
378         '''
379         
380         clustplot=lhc.PlotObject()
381         
382         #FIXME
383         #our dataset-specific stuff
384         #This will go away after testing :)
385         Xsyn=[]
386         Ysyn=[]
387         
388         Xgb1=[]
389         Ygb1=[]
390         
391         Xbad=[]
392         Ybad=[]
393         
394         goodnamefile=open('roslin_blind50.log','r')
395         #goodnamefile=open('/home/massimo/python/hooke/dataset_clust/roslin_blind50.log','r')
396         goodnames=goodnamefile.readlines()
397         goodnames=[i.split()[0] for i in goodnames[1:]]
398         
399         
400         for index in range(len(self.pca_paths)):
401             '''
402             if '3s3' in self.pca_paths[index] and not 'bad' in self.pca_paths[index]:
403                 Xsyn.append(X[index])
404                 Ysyn.append(Y[index])
405             elif 'bad' in self.pca_paths[index]:
406                 Xbad.append(X[index])
407                 Ybad.append(Y[index])
408             else:
409                 Xgb1.append(X[index])
410                 Ygb1.append(Y[index])
411             '''
412             #print self.pca_paths
413             if self.pca_paths[index][:-1] in goodnames:
414                 Xsyn.append(X[index])
415                 Ysyn.append(Y[index])
416             else:
417                 Xbad.append(X[index])
418                 Ybad.append(Y[index])
419             
420         print 'blath',len(Xsyn),len(Ysyn)
421         
422         #clustplot.add_set(Xgb1,Ygb1)
423         clustplot.add_set(Xbad,Ybad)
424         clustplot.add_set(Xsyn,Ysyn)
425         clustplot.normalize_vectors()
426         clustplot.styles=['scatter', 'scatter','scatter']
427         clustplot.colors=[None,'red','green']
428         #clustplot.styles=['scatter',None]
429         clustplot.destination=1
430         self._send_plot([clustplot])
431         self.clustplot=clustplot
432         
433         
434     def do_pclick(self,args):        
435         
436         self._send_plot([self.clustplot]) #quick workaround for BAD problems in the GUI
437         print 'Click point'
438         point = self._measure_N_points(N=1, whatset=0)
439         indice = point[0].index
440         plot_file = self.pca_paths[indice]
441         dot_coord = self.pca_myArray[indice]
442         print "file: " + str(plot_file).rstrip()
443         print "id: " + str(indice)
444         print "coord: " + str(dot_coord)
445         self.do_genlist(str(plot_file))
446         #self.do_jump(str(plot_file))