4 from libhooke import WX_GOOD, ClickedPoint
6 wxversion.select(WX_GOOD)
7 from wx import PostEvent
13 import libhookecurve as lhc
16 warnings.simplefilter('ignore',np.RankWarning)
19 class pclusterCommands:
26 def do_pcluster(self,args):
30 Automatically measures peaks and extracts informations for further clustering
31 (c)Paolo Pancaldi, Massimo Sandal 2009
33 #--Custom persistent length
35 for arg in args.split():
36 #look for a persistent length argument.
38 pl_expression=arg.split('=')
39 pl_value=float(pl_expression[1]) #actual value
43 #configuration variables
44 min_npks = self.convfilt_config['minpeaks']
45 min_deviation = self.convfilt_config['mindeviation']
47 pclust_filename=raw_input('Automeasure filename? ')
48 realclust_filename=raw_input('Coordinates filename? ')
50 f=open(pclust_filename,'w+')
51 f.write('Analysis started '+time.asctime()+'\n')
52 f.write('----------------------------------------\n')
53 f.write('; Contour length (nm) ; Persistence length (nm) ; Max.Force (pN) ; Slope (N/m) ; Sigma contour (nm) ; Sigma persistence (nm)\n')
56 f=open(realclust_filename,'w+')
57 f.write('Analysis started '+time.asctime()+'\n')
58 f.write('----------------------------------------\n')
59 f.write('; Peak number ; Mean delta (nm) ; Median delta (nm) ; Mean force (pN) ; Median force (pN) ; First peak length (nm) ; Last peak length (nm) ; Max force (pN) ; Min force (pN) ; Max delta (nm) ; Min delta (nm)\n')
61 # ------ FUNCTION ------
62 def fit_interval_nm(start_index,plot,nm,backwards):
64 Calculates the number of points to fit, given a fit interval in nm
65 start_index: index of point
67 backwards: if true, finds a point backwards.
69 whatset=1 #FIXME: should be decidable
70 x_vect=plot.vectors[1][0]
74 start=x_vect[start_index]
76 while abs(x_vect[i]-x_vect[start_index])*(10**9) < nm:
77 if i==0 or i==maxlen-1: #we reached boundaries of vector!
86 def plot_informations(itplot,pl_value):
89 contact_point.absolute_coords (2.4584142802103689e-007, -6.9647135616234017e-009)
90 peak_point.absolute_coords (3.6047748250571423e-008, -7.7142802788854212e-009)
91 other_fit_point.absolute_coords (4.1666139243838867e-008, -7.3759393477579707e-009)
92 peak_location [510, 610, 703, 810, 915, 1103]
93 peak_size [-1.2729111505202212e-009, -9.1632775347399312e-010, -8.1707438353929907e-010, -8.0335812578148904e-010, -8.7483955226387558e-010, -3.6269619757067322e-009]
94 params [2.2433999931959462e-007, 3.3230248825175678e-010]
95 fit_errors [6.5817195369767644e-010, 2.4415923138871498e-011]
97 fit_points=int(self.config['auto_fit_points']) # number of points to fit before the peak maximum <50>
99 T=self.config['temperature'] #temperature of the system in kelvins. By default it is 293 K. <301.0>
100 cindex=self.find_contact_point(itplot[0]) #Automatically find contact point <158, libhooke.ClickedPoint>
101 contact_point=self._clickize(itplot[0].vectors[1][0], itplot[0].vectors[1][1], cindex)
103 base_index_0=peak_location[-1]+fit_interval_nm(peak_location[-1], itplot[0], self.config['auto_right_baseline'],False)
104 self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_0))
105 base_index_1=self.basepoints[0].index+fit_interval_nm(self.basepoints[0].index, itplot[0], self.config['auto_left_baseline'],False)
106 self.basepoints.append(self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],base_index_1))
107 self.basecurrent=self.current.path
108 boundaries=[self.basepoints[0].index, self.basepoints[1].index]
110 to_average=itplot[0].vectors[1][1][boundaries[0]:boundaries[1]] #y points to average
111 avg=np.mean(to_average)
112 return fit_points, contact_point, pl_value, T, cindex, avg
114 def features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg):
116 calculate informations for each peak and add they in
117 c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
127 slope_span=int(self.config['auto_slope_span'])
129 peak_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak)
130 other_fit_point=self._clickize(itplot[0].vectors[1][0],itplot[0].vectors[1][1],peak-fit_points)
132 points=[contact_point, peak_point, other_fit_point]
134 params, yfit, xfit, fit_errors = self.wlc_fit(points, itplot[0].vectors[1][0], itplot[0].vectors[1][1], pl_value, T, return_errors=True)
137 delta_to_measure=itplot[0].vectors[1][1][peak-delta_force:peak+delta_force]
138 y=min(delta_to_measure)
140 slope=self.linefit_between(peak-slope_span,peak)[0]
141 #check fitted data and, if right, add peak to the measurement
142 if len(params)==1: #if we did choose 1-value fit
144 c_leng=params[0]*(1.0e+9)
146 sigma_c_leng=fit_errors[0]*(1.0e+9)
147 force = abs(y-avg)*(1.0e+12)
149 p_leng=params[1]*(1.0e+9)
150 #check if persistent length makes sense. otherwise, discard peak.
151 if p_leng>self.config['auto_min_p'] and p_leng<self.config['auto_max_p']:
153 p_lengths.append(p_leng)
154 c_lengths.append(params[0]*(1.0e+9))
155 sigma_c_lengths.append(fit_errors[0]*(1.0e+9))
156 sigma_p_lengths.append(fit_errors[1]*(1.0e+9))
157 forces.append(abs(y-avg)*(1.0e+12))
160 c_leng=params[0]*(1.0e+9)
161 sigma_c_leng=fit_errors[0]*(1.0e+9)
162 sigma_p_leng=fit_errors[1]*(1.0e+9)
163 force=abs(y-avg)*(1.0e+12)
167 #return c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes
168 return c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope
171 # ------ PROGRAM -------
173 for item in self.current_list:
175 item.identify(self.drivers)
176 itplot=item.curve.default_plots()
177 flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
178 itplot[0]=flatten(itplot[0], item, customvalue=1)
180 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
182 #We have troubles with exec_has_peaks (bad curve, whatever).
183 #Print info and go to next cycle.
184 print 'Cannot process ',item.path
187 if len(peak_location)==0:
191 fit_points, contact_point, pl_value, T, cindex, avg = plot_informations(itplot,pl_value)
193 print '\n\nCurve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'
195 #initialize output data vectors
203 #loop each peak of my curve
204 for peak in peak_location:
205 c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope = features_peaks(itplot, peak, fit_points, contact_point, pl_value, T, cindex, avg)
206 for var, vector in zip([c_leng, p_leng, sigma_c_leng, sigma_p_leng, force, slope],[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]):
210 #FIXME: We need a dictionary here...
211 allvects=[c_lengths, p_lengths, sigma_c_lengths, sigma_p_lengths, forces, slopes]
212 for vect in allvects:
214 for i in range(len(c_lengths)):
217 print 'Measurements for all peaks detected:'
218 print 'contour (nm)', c_lengths
219 print 'sigma contour (nm)',sigma_c_lengths
220 print 'p (nm)',p_lengths
221 print 'sigma p (nm)',sigma_p_lengths
222 print 'forces (pN)',forces
223 print 'slopes (N/m)',slopes
226 write automeasure text file
228 print 'Saving automatic measurement...'
229 f=open(pclust_filename,'a+')
230 f.write(item.path+'\n')
231 for i in range(len(c_lengths)):
232 f.write(' ; '+str(c_lengths[i])+' ; '+str(p_lengths[i])+' ; '+str(forces[i])+' ; '+str(slopes[i])+' ; '+str(sigma_c_lengths[i])+' ; '+str(sigma_p_lengths[i])+'\n')
236 calculate clustering coordinates
238 peak_number=len(c_lengths)
241 for i in range(len(c_lengths)-1):
242 deltas.append(c_lengths[i+1]-c_lengths[i])
244 delta_mean=np.mean(deltas)
245 delta_median=np.median(deltas)
247 force_mean=np.mean(forces)
248 force_median=np.median(forces)
250 first_peak_cl=c_lengths[0]
251 last_peak_cl=c_lengths[-1]
253 max_force=max(forces[:-1])
254 min_force=min(forces)
256 max_delta=max(deltas)
257 min_delta=min(deltas)
259 delta_stdev=np.std(deltas)
260 forces_stdev=np.std(forces[:-1])
263 print 'Peaks',peak_number
264 print 'Mean delta',delta_mean
265 print 'Median delta',delta_median
266 print 'Mean force',force_mean
267 print 'Median force',force_median
268 print 'First peak',first_peak_cl
269 print 'Last peak',last_peak_cl
270 print 'Max force',max_force
271 print 'Min force',min_force
272 print 'Max delta',max_delta
273 print 'Min delta',min_delta
274 print 'Delta stdev',delta_stdev
275 print 'Forces stdev',forces_stdev
278 write clustering coordinates
280 f=open(realclust_filename,'a+')
281 f.write(item.path+'\n')
282 f.write(' ; '+str(peak_number)+ # non considerato
283 ' ; '+str(delta_mean)+ # 0
284 ' ; '+str(delta_median)+ # 1 -
285 ' ; '+str(force_mean)+ # 2
286 ' ; '+str(force_median)+ # 3 -
287 ' ; '+str(first_peak_cl)+ # 4 -
288 ' ; '+str(last_peak_cl)+ # 5 -
289 ' ; '+str(max_force)+ # 6
290 ' ; '+str(min_force)+ # 7
291 ' ; '+str(max_delta)+ # 8
292 ' ; '+str(min_delta)+ # 9
293 ' ; '+str(delta_stdev)+ # 10
294 ' ; '+str(forces_stdev)+ # 11
300 def do_pca(self,args):
302 PCA -> "pca gaeta_coor_blind50.txt 1,3,6"
303 Automatically measures pca from coordinates filename and shows two interactives plots
304 With the second argument (arbitrary) you can select the columns and the multiplier factor
305 to use for the pca (for es "1,3*50,6,8x10,9"). Dont use spaces. "*" or "x" are the same thing.
306 Without second argument it reads pca_config.txt file
307 (c)Paolo Pancaldi, Massimo Sandal 2009
310 # reads the columns of pca
311 conf=open("pca_config.txt")
312 config = conf.readlines()
315 self.pca_myArray = []
321 # prende in inpunt un arg (nome del file)
322 # e il secondo e' arbitrario riceve x es "row[1],row2,row[3]"
323 arg = args.split(" ")
334 nPlotTot = nPlotTot+1
335 #plot_path_temp = row.split("/")[6][:-1]
337 if row[0]==" " and row.find('nan')==-1:
338 row = row[row.index(";",2)+2:].split(" ; ") # non considero la prima colonna col #picchi
339 row = [float(i) for i in row]
341 #0:Mean delta, 1:Median delta, 2:Mean force, 3:Median force, 4:First peak length, 5:Last peak length
342 #6:Max delta 7:Min delta 8:Max force 9:Min force 10:Std delta 11:Std force
343 if (row[0]<500 and row[1]<500 and row[2]<500 and row[3]<500 and row[4]<500 and row[5]<500):
344 if (row[0]>0 and row[1]>0 and row[2]>0 and row[3]>0 and row[4]>0 and row[5]>0):
345 self.pca_paths[nPlotGood] = plot_path_temp
346 #row = row[0], row[2], row[3]*3, row[6], row[7]*56, row[8]
348 for cols in config[0].split(","):
349 if cols.find("*")!=-1:
350 col = int(cols.split("*")[0])
351 molt = int(cols.split("*")[1])
352 elif cols.find("x")!=-1:
353 col = int(cols.split("x")[0])
354 molt = int(cols.split("x")[1])
358 res.append(row[col]*molt)
359 self.pca_myArray.append(res)
360 nPlotGood = nPlotGood+1
363 print nPlotGood, "of", nPlotTot
365 # array convert, calculate PCA, transpose
366 self.pca_myArray = np.array(self.pca_myArray,dtype='float')
367 print self.pca_myArray.shape
368 self.pca_myArray = pca(self.pca_myArray, output_dim=2) #other way -> y = mdp.nodes.PCANode(output_dim=2)(gigi)
369 myArrayTr = np.transpose(self.pca_myArray)
378 clustplot=lhc.PlotObject()
381 #our dataset-specific stuff
382 #This will go away after testing :)
392 goodnamefile=open('gaeta_good_blind50.log','r')
393 #goodnamefile=open('/home/massimo/python/hooke/dataset_clust/roslin_blind50.log','r')
394 goodnames=goodnamefile.readlines()
395 goodnames=[i.split()[0] for i in goodnames[1:]]
398 for index in range(len(self.pca_paths)):
400 if '3s3' in self.pca_paths[index] and not 'bad' in self.pca_paths[index]:
401 Xsyn.append(X[index])
402 Ysyn.append(Y[index])
403 elif 'bad' in self.pca_paths[index]:
404 Xbad.append(X[index])
405 Ybad.append(Y[index])
407 Xgb1.append(X[index])
408 Ygb1.append(Y[index])
410 #print self.pca_paths
411 if self.pca_paths[index][:-1] in goodnames:
412 Xsyn.append(X[index])
413 Ysyn.append(Y[index])
415 Xbad.append(X[index])
416 Ybad.append(Y[index])
418 print 'blath',len(Xsyn),len(Ysyn)
420 #clustplot.add_set(Xgb1,Ygb1)
421 clustplot.add_set(Xbad,Ybad)
422 clustplot.add_set(Xsyn,Ysyn)
423 clustplot.normalize_vectors()
424 clustplot.styles=['scatter', 'scatter','scatter']
425 clustplot.colors=[None,'red','green']
426 #clustplot.styles=['scatter',None]
427 clustplot.destination=1
428 self._send_plot([clustplot])
429 self.clustplot=clustplot
431 # -- exporting coordinates and plot! --
433 #builds coordinate s file
435 f = open('coordinate_punti.txt','w')
436 for i in range(len(X)):
437 f.write (str(i) + "\t" + str(X[i]) + "\t" + str(Y[i]) + "\n")
441 config = config[0].replace("*", "x")
442 self.do_export("png/" + config + " 1")
444 def do_multipca(self,args):
446 MULTIPCA -> "multipca gaeta_coor_blind50.txt 3"
447 Automatically multiply the column suggest in second argument for value between 1-100 (step of 2),
448 measures pca from coordinates filename and save the png plots.
449 (c)Paolo Pancaldi, Massimo Sandal 2009
451 # reads the columns of pca
452 conf=open("pca_config.txt")
453 config = conf.readlines() # config[0] = "1,2,3"
456 arg = args.split(" ")
459 for i in range(1, 101, 2):
460 self.do_pca(file_name + " " + config[0].replace(column,column+"*"+str(i),1))
462 def do_pclick(self,args):
464 self._send_plot([self.clustplot]) #quick workaround for BAD problems in the GUI
466 point = self._measure_N_points(N=1, whatset=0)
467 indice = point[0].index
468 plot_file = self.pca_paths[indice]
469 dot_coord = self.pca_myArray[indice]
470 print "file: " + str(plot_file).rstrip()
471 print "id: " + str(indice)
472 print "coord: " + str(dot_coord)
473 self.do_genlist(str(plot_file))
474 #self.do_jump(str(plot_file))