0843481310c67c2bc3d546e5306d7ee5b2917fe7
[hooke.git] / flatfilts.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 '''
5 FLATFILTS
6
7 Force spectroscopy curves filtering of flat curves
8 Licensed under the GNU LGPL version 2
9
10 Other plugin dependencies:
11 procplots.py (plot processing plugin)
12 '''
13 from libhooke import WX_GOOD
14 import wxversion
15 wxversion.select(WX_GOOD)
16
17 import xml.dom.minidom
18
19 import wx
20 import scipy
21 import numpy
22 from numpy import diff
23
24 #import pickle
25
26 import libpeakspot as lps
27 import libhookecurve as lhc
28
29
30 class flatfiltsCommands:
31     
32     def _plug_init(self):
33         #configurate convfilt variables
34         convfilt_configurator=ConvfiltConfig()
35         
36         #different OSes have different path conventions
37         if self.config['hookedir'][0]=='/':
38             slash='/' #a Unix or Unix-like system
39         else:
40             slash='\\' #it's a drive letter, we assume it's Windows
41         
42         self.convfilt_config=convfilt_configurator.load_config(self.config['hookedir']+slash+'convfilt.conf')
43     
44     def do_flatfilt(self,args):
45         '''
46         FLATFILT
47         (flatfilts.py)
48         Filters out flat (featureless) curves of the current playlist,
49         creating a playlist containing only the curves with potential
50         features.
51         ------------
52         Syntax:
53         flatfilt [min_npks min_deviation]
54
55         min_npks = minmum number of points over the deviation
56         (default=4)
57
58         min_deviation = minimum signal/noise ratio
59         (default=9)
60
61         If called without arguments, it uses default values, that
62         should work most of the times.
63         '''
64         median_filter=7
65         min_npks=4
66         min_deviation=9
67         
68         args=args.split(' ')
69         if len(args) == 2:
70             min_npks=int(args[0])
71             min_deviation=int(args[1])
72         else:
73             pass
74         
75         print 'Processing playlist...'
76         notflat_list=[]
77         
78         c=0
79         for item in self.current_list:
80             c+=1
81                                    
82             try:
83                 notflat=self.has_features(item, median_filter, min_npks, min_deviation)
84                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': features are ',notflat
85             except:
86                 notflat=False
87                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': cannot be filtered. Probably unable to retrieve force data from corrupt file.'
88             
89             if notflat:
90                 item.features=notflat
91                 item.curve=None #empty the item object, to further avoid memory leak
92                 notflat_list.append(item)
93         
94         if len(notflat_list)==0:
95             print 'Found nothing interesting. Check your playlist, could be a bug or criteria could be too much stringent'
96             return
97         else:
98             print 'Found ',len(notflat_list),' potentially interesting curves'
99             print 'Regenerating playlist...'
100             self.pointer=0
101             self.current_list=notflat_list
102             self.current=self.current_list[self.pointer]
103             self.do_plot(0)
104                  
105     def has_features(self,item,median_filter,min_npks,min_deviation):
106         '''
107         decides if a curve is flat enough to be rejected from analysis: it sees if there
108         are at least min_npks points that are higher than min_deviation times the absolute value
109         of noise.
110    
111         Algorithm original idea by Francesco Musiani, with my tweaks and corrections.
112         '''
113         retvalue=False
114         
115         item.identify(self.drivers)        
116         #we assume the first is the plot with the force curve
117         #do the median to better resolve features from noise
118         flat_plot=self.plotmanip_median(item.curve.default_plots()[0], item, customvalue=median_filter)
119         flat_vects=flat_plot.vectors 
120         item.curve.close_all()
121         #needed to avoid *big* memory leaks!
122         del item.curve
123         del item
124         
125         #absolute value of derivate        
126         yretdiff=diff(flat_vects[1][1])
127         yretdiff=[abs(value) for value in yretdiff]
128         #average of derivate values
129         diffmean=numpy.mean(yretdiff)
130         yretdiff.sort()
131         yretdiff.reverse()
132         c_pks=0
133         for value in yretdiff:
134             if value/diffmean > min_deviation:
135                 c_pks+=1
136             else:
137                 break
138                     
139         if c_pks>=min_npks:
140             retvalue = c_pks
141         
142         del flat_plot, flat_vects, yretdiff
143         
144         return retvalue
145
146     ################################################################
147     #-----CONVFILT-------------------------------------------------
148     #-----Convolution-based peak recognition and filtering.
149     #Requires the libpeakspot.py library
150     
151     def has_peaks(self, plot, abs_devs=None, maxpeak=True, window=10):
152         '''
153         Finds peak position in a force curve.
154         FIXME: should be moved in libpeakspot.py
155         '''
156         if abs_devs==None:
157             abs_devs=self.convfilt_config['mindeviation']
158         
159         
160         xret=plot.vectors[1][0]
161         yret=plot.vectors[1][1]
162         #Calculate convolution.
163         convoluted=lps.conv_dx(yret, self.convfilt_config['convolution'])
164         
165         #surely cut everything before the contact point
166         cut_index=self.find_contact_point(plot)
167         #cut even more, before the blind window
168         start_x=xret[cut_index]
169         blind_index=0
170         for value in xret[cut_index:]:
171             if abs((value) - (start_x)) > self.convfilt_config['blindwindow']*(10**-9):
172                 break
173             blind_index+=1
174         cut_index+=blind_index
175         #do the dirty convolution-peak finding stuff
176         noise_level=lps.noise_absdev(convoluted[cut_index:], self.convfilt_config['positive'], self.convfilt_config['maxcut'], self.convfilt_config['stable'])               
177         above=lps.abovenoise(convoluted,noise_level,cut_index,abs_devs)     
178         peak_location,peak_size=lps.find_peaks(above,seedouble=self.convfilt_config['seedouble'])
179                 
180         #take the minimum or the maximum of a peak
181         for i in range(len(peak_location)):
182             peak=peak_location[i]
183             valpk=min(yret[peak-window:peak+window])  #maximum in force (near the unfolding point)
184             index_pk=yret[peak-window:peak+window].index(valpk)+(peak-window)            
185
186             if maxpeak==False:
187                valpk=max(yret[peak:peak+window]) #minimum in force, near the baseline
188                index_pk=yret[peak:peak+window].index(valpk)+(peak)
189
190 #  Let's explain that for the minimum.  Immaging that we know that there is a peak at position/region 100 and you have found its y-value,
191 #  Now you look in the array, from 100-10 to 100+10  (if the window is 10).
192 #  This "100-10 to 100+10" is substancially a new array with its index. In this array you have 20
193 #  elements, so the index of your y-value will be 10.
194 #  Now to find the index in the TOTAL array you have to add the "position" of the "region" (that in this case
195 #  correspond to 100) and also substract the window size ---> (+100-10)
196
197             peak_location[i]=index_pk
198             
199         return peak_location,peak_size
200     
201     
202     def exec_has_peaks(self,item,abs_devs):
203         '''
204         encapsulates has_peaks for the purpose of correctly treating the curve objects in the convfilt loop,
205         to avoid memory leaks
206         '''
207         item.identify(self.drivers)        
208         #we assume the first is the plot with the force curve
209         plot=item.curve.default_plots()[0]
210         
211         if 'flatten' in self.config['plotmanips']:
212                     #If flatten is present, use it for better recognition of peaks...
213                     flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
214                     plot=flatten(plot, item, customvalue=1)
215         
216         peak_location,peak_size=self.has_peaks(plot,abs_devs)
217         #close all open files
218         item.curve.close_all()
219         #needed to avoid *big* memory leaks!
220         del item.curve
221         del item
222         return peak_location, peak_size
223         
224     #------------------------
225     #------commands----------
226     #------------------------    
227     def do_peaks(self,args):
228         '''
229         PEAKS
230         (flatfilts.py)
231         Test command for convolution filter / test.
232         ----
233         Syntax: peaks [deviations]
234         absolute deviation = number of times the convolution signal is above the noise absolute deviation.
235         Default is 5.
236         '''
237         if len(args)==0:
238             args=self.convfilt_config['mindeviation']
239         
240         try:
241             abs_devs=float(args)
242         except:
243             print 'Wrong argument, using config value'
244             abs_devs=float(self.convfilt_config['mindeviation'])
245                         
246         defplots=self.current.curve.default_plots()[0] #we need the raw, uncorrected plots
247         
248         if 'flatten' in self.config['plotmanips']:
249             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
250             defplots=flatten(defplots, self.current)
251         else:
252             print 'You have the flatten plot manipulator not loaded. Enabling it could give you better results.'
253         
254         peak_location,peak_size=self.has_peaks(defplots,abs_devs)
255         print 'Found '+str(len(peak_location))+' peaks.'
256         to_dump='peaks '+self.current.path+' '+str(len(peak_location))
257         self.outlet.push(to_dump)
258         #print peak_location
259         
260         #if no peaks, we have nothing to plot. exit.
261         if len(peak_location)==0:
262             return
263         
264         #otherwise, we plot the peak locations.
265         xplotted_ret=self.plots[0].vectors[1][0]
266         yplotted_ret=self.plots[0].vectors[1][1]
267         xgood=[xplotted_ret[index] for index in peak_location]
268         ygood=[yplotted_ret[index] for index in peak_location]
269         
270         recplot=self._get_displayed_plot()
271         recplot.vectors.append([xgood,ygood])
272         if recplot.styles==[]:
273             recplot.styles=[None,None,'scatter']
274             recplot.colors=[None,None,None]
275         else:
276             recplot.styles+=['scatter']
277             recplot.colors+=[None]
278         
279         self._send_plot([recplot])
280         
281     def do_convfilt(self,args):
282         '''
283         CONVFILT
284         (flatfilts.py)
285         Filters out flat (featureless) curves of the current playlist,
286         creating a playlist containing only the curves with potential
287         features.
288         ------------
289         Syntax:
290         convfilt [min_npks min_deviation]
291
292         min_npks = minmum number of peaks
293         (to set the default, see convfilt.conf file; CONVCONF and SETCONF commands)
294
295         min_deviation = minimum signal/noise ratio *in the convolution*
296         (to set the default, see convfilt.conf file; CONVCONF and SETCONF commands)
297
298         If called without arguments, it uses default values.
299         '''
300         
301         min_npks=self.convfilt_config['minpeaks']
302         min_deviation=self.convfilt_config['mindeviation']
303         
304         args=args.split(' ')
305         if len(args) == 2:
306             min_npks=int(args[0])
307             min_deviation=int(args[1])
308         else:
309             pass
310         
311         print 'Processing playlist...'
312         print '(Please wait)'
313         notflat_list=[]
314         
315         c=0
316         for item in self.current_list:
317             c+=1
318                                    
319             try:    
320                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
321                 if len(peak_location)>=min_npks:
322                     isok='+'
323                 else:
324                     isok=''
325                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'+isok
326             except:
327                 peak_location,peak_size=[],[]
328                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': cannot be filtered. Probably unable to retrieve force data from corrupt file.'
329             
330             if len(peak_location)>=min_npks:
331                 item.peak_location=peak_location
332                 item.peak_size=peak_size
333                 item.curve=None #empty the item object, to further avoid memory leak
334                 notflat_list.append(item)
335
336             for i in range(1000):
337                  k=0
338
339         #Warn that no flattening had been done.
340         if not ('flatten' in self.config['plotmanips']):
341             print 'Flatten manipulator was not found. Processing was done without flattening.'
342             print 'Try to enable it in your configuration file for better results.'
343         
344         if len(notflat_list)==0:
345             print 'Found nothing interesting. Check your playlist, could be a bug or criteria could be too much stringent'
346             return
347         else:
348             print 'Found ',len(notflat_list),' potentially interesting curves'
349             print 'Regenerating playlist...'
350             self.pointer=0
351             self.current_list=notflat_list
352             self.current=self.current_list[self.pointer]
353             self.do_plot(0)
354         
355         
356     def do_setconv(self,args):
357         '''
358         SETCONV
359         (flatfilts.py)
360         Sets the convfilt configuration variables
361         ------
362         Syntax: setconv variable value
363         '''
364         args=args.split()
365         #FIXME: a general "set dictionary" function has to be built
366         if len(args)==0:
367             print self.convfilt_config
368         else:
369             if not (args[0] in self.convfilt_config.keys()):
370                 print 'This is not an internal convfilt variable!'
371                 print 'Run "setconv" without arguments to see a list of defined variables.'
372                 return
373             
374             if len(args)==1:
375                 print self.convfilt_config[args[0]]
376             elif len(args)>1:
377                 try:
378                     self.convfilt_config[args[0]]=eval(args[1])
379                 except NameError: #we have a string argument
380                     self.convfilt_config[args[0]]=args[1]
381
382
383 #########################
384 #HANDLING OF CONFIGURATION FILE
385 class ConvfiltConfig:
386     '''
387     Handling of convfilt configuration file
388     
389     Mostly based on the simple-yet-useful examples of the Python Library Reference
390     about xml.dom.minidom
391     
392     FIXME: starting to look a mess, should require refactoring
393     '''
394     
395     def __init__(self):
396         self.config={}
397         
398                 
399     def load_config(self, filename):
400         myconfig=file(filename)                    
401         #the following 3 lines are needed to strip newlines. otherwise, since newlines
402         #are XML elements too, the parser would read them (and re-save them, multiplying
403         #newlines...)
404         #yes, I'm an XML n00b
405         the_file=myconfig.read()
406         the_file_lines=the_file.split('\n')
407         the_file=''.join(the_file_lines)
408                        
409         self.config_tree=xml.dom.minidom.parseString(the_file)  
410         
411         def getText(nodelist):
412             #take the text from a nodelist
413             #from Python Library Reference 13.7.2
414             rc = ''
415             for node in nodelist:
416                 if node.nodeType == node.TEXT_NODE:
417                     rc += node.data
418             return rc
419         
420         def handleConfig(config):
421             noiseabsdev_elements=config.getElementsByTagName("noise_absdev")
422             convfilt_elements=config.getElementsByTagName("convfilt")
423             handleAbsdev(noiseabsdev_elements)
424             handleConvfilt(convfilt_elements)
425                         
426         def handleAbsdev(noiseabsdev_elements):
427             for element in noiseabsdev_elements:
428                 for attribute in element.attributes.keys():
429                     self.config[attribute]=element.getAttribute(attribute)
430                     
431         def handleConvfilt(convfilt_elements):
432             for element in convfilt_elements:
433                 for attribute in element.attributes.keys():
434                     self.config[attribute]=element.getAttribute(attribute)
435             
436         handleConfig(self.config_tree)
437         #making items in the dictionary machine-readable
438         for item in self.config.keys():
439             try:
440                 self.config[item]=eval(self.config[item])
441             except NameError: #if it's an unreadable string, keep it as a string
442                 pass
443             
444         return self.config