Merged with trunk
[hooke.git] / hooke / plugin / flatfilts.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 '''
5 FLATFILTS
6
7 Force spectroscopy curves filtering of flat curves
8 Licensed under the GNU LGPL version 2
9
10 Other plugin dependencies:
11 procplots.py (plot processing plugin)
12 '''
13
14 from hooke.libhooke import WX_GOOD
15
16 import wxversion
17 wxversion.select(WX_GOOD)
18 import xml.dom.minidom
19 import wx
20 import scipy
21 import numpy
22 from numpy import diff
23 #import pickle
24
25 from .. import libpeakspot as lps
26 from .. import libhookecurve as lhc
27
28
29 class flatfiltsCommands(object):
30
31     def _plug_init(self):
32         #configurate convfilt variables
33         convfilt_configurator=ConvfiltConfig()
34         self.convfilt_config=convfilt_configurator.load_config('convfilt.conf')
35
36     def do_flatfilt(self,args):
37         '''
38         FLATFILT
39         (flatfilts.py)
40         Filters out flat (featureless) curves of the current playlist,
41         creating a playlist containing only the curves with potential
42         features.
43         ------------
44         Syntax:
45         flatfilt [min_npks min_deviation]
46
47         min_npks = minmum number of points over the deviation
48         (default=4)
49
50         min_deviation = minimum signal/noise ratio
51         (default=9)
52
53         If called without arguments, it uses default values, that
54         should work most of the times.
55         '''
56         median_filter=7
57         min_npks=4
58         min_deviation=9
59
60         args=args.split(' ')
61         if len(args) == 2:
62             min_npks=int(args[0])
63             min_deviation=int(args[1])
64         else:
65             pass
66
67         print 'Processing playlist...'
68         notflat_list=[]
69
70         c=0
71         for item in self.current_list:
72             c+=1
73
74             try:
75                 notflat=self.has_features(item, median_filter, min_npks, min_deviation)
76                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': features are ',notflat
77             except:
78                 notflat=False
79                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': cannot be filtered. Probably unable to retrieve force data from corrupt file.'
80
81             if notflat:
82                 item.features=notflat
83                 item.curve=None #empty the item object, to further avoid memory leak
84                 notflat_list.append(item)
85
86         if len(notflat_list)==0:
87             print 'Found nothing interesting. Check your playlist, could be a bug or criteria could be too much stringent'
88             return
89         else:
90             print 'Found ',len(notflat_list),' potentially interesting curves'
91             print 'Regenerating playlist...'
92             self.pointer=0
93             self.current_list=notflat_list
94             self.current=self.current_list[self.pointer]
95             self.do_plot(0)
96
97     def has_features(self,item,median_filter,min_npks,min_deviation):
98         '''
99         decides if a curve is flat enough to be rejected from analysis: it sees if there
100         are at least min_npks points that are higher than min_deviation times the absolute value
101         of noise.
102
103         Algorithm original idea by Francesco Musiani, with my tweaks and corrections.
104         '''
105         retvalue=False
106
107         item.identify(self.drivers)
108         #we assume the first is the plot with the force curve
109         #do the median to better resolve features from noise
110         flat_plot=self.plotmanip_median(item.curve.default_plots()[0], item, customvalue=median_filter)
111         flat_vects=flat_plot.vectors
112         item.curve.close_all()
113         #needed to avoid *big* memory leaks!
114         del item.curve
115         del item
116
117         #absolute value of derivate
118         yretdiff=diff(flat_vects[1][1])
119         yretdiff=[abs(value) for value in yretdiff]
120         #average of derivate values
121         diffmean=numpy.mean(yretdiff)
122         yretdiff.sort()
123         yretdiff.reverse()
124         c_pks=0
125         for value in yretdiff:
126             if value/diffmean > min_deviation:
127                 c_pks+=1
128             else:
129                 break
130
131         if c_pks>=min_npks:
132             retvalue = c_pks
133
134         del flat_plot, flat_vects, yretdiff
135
136         return retvalue
137
138     ################################################################
139     #-----CONVFILT-------------------------------------------------
140     #-----Convolution-based peak recognition and filtering.
141     #Requires the libpeakspot.py library
142     
143     def has_peaks(self, plot, abs_devs=None, maxpeak=True, window=10):
144         '''
145         Finds peak position in a force curve.
146         FIXME: should be moved in libpeakspot.py
147         '''
148         if abs_devs==None:
149             abs_devs=self.convfilt_config['mindeviation']
150
151
152         xret=plot.vectors[1][0]
153         yret=plot.vectors[1][1]
154         #Calculate convolution.
155         convoluted=lps.conv_dx(yret, self.convfilt_config['convolution'])
156
157         #surely cut everything before the contact point
158         cut_index=self.find_contact_point(plot)
159         #cut even more, before the blind window
160         start_x=xret[cut_index]
161         blind_index=0
162         for value in xret[cut_index:]:
163             if abs((value) - (start_x)) > self.convfilt_config['blindwindow']*(10**-9):
164                 break
165             blind_index+=1
166         cut_index+=blind_index
167         #do the dirty convolution-peak finding stuff
168         noise_level=lps.noise_absdev(convoluted[cut_index:], self.convfilt_config['positive'], self.convfilt_config['maxcut'], self.convfilt_config['stable'])
169         above=lps.abovenoise(convoluted,noise_level,cut_index,abs_devs)
170         peak_location,peak_size=lps.find_peaks(above,seedouble=self.convfilt_config['seedouble'])
171                 
172         #take the minimum or the maximum of a peak
173         for i in range(len(peak_location)):
174             peak=peak_location[i]
175             valpk=min(yret[peak-window:peak+window])  #maximum in force (near the unfolding point)
176             index_pk=yret[peak-window:peak+window].index(valpk)+(peak-window)            
177
178             if maxpeak==False:
179                valpk=max(yret[peak:peak+window]) #minimum in force, near the baseline
180                index_pk=yret[peak:peak+window].index(valpk)+(peak)
181
182 #  Let's explain that for the minimum.  Immaging that we know that there is a peak at position/region 100 and you have found its y-value,
183 #  Now you look in the array, from 100-10 to 100+10  (if the window is 10).
184 #  This "100-10 to 100+10" is substancially a new array with its index. In this array you have 20
185 #  elements, so the index of your y-value will be 10.
186 #  Now to find the index in the TOTAL array you have to add the "position" of the "region" (that in this case
187 #  correspond to 100) and also substract the window size ---> (+100-10)
188
189             peak_location[i]=index_pk
190             
191         return peak_location,peak_size
192
193
194     def exec_has_peaks(self,item,abs_devs):
195         '''
196         encapsulates has_peaks for the purpose of correctly treating the curve objects in the convfilt loop,
197         to avoid memory leaks
198         '''
199         item.identify(self.drivers)
200         #we assume the first is the plot with the force curve
201         plot=item.curve.default_plots()[0]
202
203         if 'flatten' in self.config['plotmanips']:
204                     #If flatten is present, use it for better recognition of peaks...
205                     flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
206                     plot=flatten(plot, item, customvalue=1)
207
208         peak_location,peak_size=self.has_peaks(plot,abs_devs)
209         #close all open files
210         item.curve.close_all()
211         #needed to avoid *big* memory leaks!
212         del item.curve
213         del item
214         return peak_location, peak_size
215
216     #------------------------
217     #------commands----------
218     #------------------------
219     def do_peaks(self,args):
220         '''
221         PEAKS
222         (flatfilts.py)
223         Test command for convolution filter / test.
224         ----
225         Syntax: peaks [deviations]
226         absolute deviation = number of times the convolution signal is above the noise absolute deviation.
227         Default is 5.
228         '''
229         if len(args)==0:
230             args=self.convfilt_config['mindeviation']
231
232         try:
233             abs_devs=float(args)
234         except:
235             print 'Wrong argument, using config value'
236             abs_devs=float(self.convfilt_config['mindeviation'])
237
238         defplots=self.current.curve.default_plots()[0] #we need the raw, uncorrected plots
239
240         if 'flatten' in self.config['plotmanips']:
241             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
242             defplots=flatten(defplots, self.current)
243         else:
244             print 'You have the flatten plot manipulator not loaded. Enabling it could give you better results.'
245
246         peak_location,peak_size=self.has_peaks(defplots,abs_devs)
247         print 'Found '+str(len(peak_location))+' peaks.'
248         to_dump='peaks '+self.current.path+' '+str(len(peak_location))
249         self.outlet.push(to_dump)
250         #print peak_location
251
252         #if no peaks, we have nothing to plot. exit.
253         if len(peak_location)==0:
254             return
255
256         #otherwise, we plot the peak locations.
257         xplotted_ret=self.plots[0].vectors[1][0]
258         yplotted_ret=self.plots[0].vectors[1][1]
259         xgood=[xplotted_ret[index] for index in peak_location]
260         ygood=[yplotted_ret[index] for index in peak_location]
261
262         recplot=self._get_displayed_plot()
263         recplot.vectors.append([xgood,ygood])
264         if recplot.styles==[]:
265             recplot.styles=[None,None,'scatter']
266             recplot.colors=[None,None,None]
267         else:
268             recplot.styles+=['scatter']
269             recplot.colors+=[None]
270
271         self._send_plot([recplot])
272
273     def do_convfilt(self,args):
274         '''
275         CONVFILT
276         (flatfilts.py)
277         Filters out flat (featureless) curves of the current playlist,
278         creating a playlist containing only the curves with potential
279         features.
280         ------------
281         Syntax:
282         convfilt [min_npks min_deviation]
283
284         min_npks = minmum number of peaks
285         (to set the default, see convfilt.conf file; CONVCONF and SETCONF commands)
286
287         min_deviation = minimum signal/noise ratio *in the convolution*
288         (to set the default, see convfilt.conf file; CONVCONF and SETCONF commands)
289
290         If called without arguments, it uses default values.
291         '''
292
293         min_npks=self.convfilt_config['minpeaks']
294         min_deviation=self.convfilt_config['mindeviation']
295
296         args=args.split(' ')
297         if len(args) == 2:
298             min_npks=int(args[0])
299             min_deviation=int(args[1])
300         else:
301             pass
302
303         print 'Processing playlist...'
304         print '(Please wait)'
305         notflat_list=[]
306
307         c=0
308         for item in self.current_list:
309             c+=1
310
311             try:
312                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
313                 if len(peak_location)>=min_npks:
314                     isok='+'
315                 else:
316                     isok=''
317                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'+isok
318             except:
319                 peak_location,peak_size=[],[]
320                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': cannot be filtered. Probably unable to retrieve force data from corrupt file.'
321
322             if len(peak_location)>=min_npks:
323                 item.peak_location=peak_location
324                 item.peak_size=peak_size
325                 item.curve=None #empty the item object, to further avoid memory leak
326                 notflat_list.append(item)
327
328             for i in range(1000):
329                  k=0
330
331         #Warn that no flattening had been done.
332         if not ('flatten' in self.config['plotmanips']):
333             print 'Flatten manipulator was not found. Processing was done without flattening.'
334             print 'Try to enable it in your configuration file for better results.'
335
336         if len(notflat_list)==0:
337             print 'Found nothing interesting. Check your playlist, could be a bug or criteria could be too much stringent'
338             return
339         else:
340             print 'Found ',len(notflat_list),' potentially interesting curves'
341             print 'Regenerating playlist...'
342             self.pointer=0
343             self.current_list=notflat_list
344             self.current=self.current_list[self.pointer]
345             self.do_plot(0)
346
347
348     def do_setconv(self,args):
349         '''
350         SETCONV
351         (flatfilts.py)
352         Sets the convfilt configuration variables
353         ------
354         Syntax: setconv variable value
355         '''
356         args=args.split()
357         #FIXME: a general "set dictionary" function has to be built
358         if len(args)==0:
359             print self.convfilt_config
360         else:
361             if not (args[0] in self.convfilt_config.keys()):
362                 print 'This is not an internal convfilt variable!'
363                 print 'Run "setconv" without arguments to see a list of defined variables.'
364                 return
365
366             if len(args)==1:
367                 print self.convfilt_config[args[0]]
368             elif len(args)>1:
369                 try:
370                     self.convfilt_config[args[0]]=eval(args[1])
371                 except NameError: #we have a string argument
372                     self.convfilt_config[args[0]]=args[1]
373
374
375 #########################
376 #HANDLING OF CONFIGURATION FILE
377 class ConvfiltConfig(object):
378     '''
379     Handling of convfilt configuration file
380
381     Mostly based on the simple-yet-useful examples of the Python Library Reference
382     about xml.dom.minidom
383
384     FIXME: starting to look a mess, should require refactoring
385     '''
386
387     def __init__(self):
388         self.config={}
389
390
391     def load_config(self, filename):
392         myconfig=file(filename)
393         #the following 3 lines are needed to strip newlines. otherwise, since newlines
394         #are XML elements too, the parser would read them (and re-save them, multiplying
395         #newlines...)
396         #yes, I'm an XML n00b
397         the_file=myconfig.read()
398         the_file_lines=the_file.split('\n')
399         the_file=''.join(the_file_lines)
400
401         self.config_tree=xml.dom.minidom.parseString(the_file)
402
403         def getText(nodelist):
404             #take the text from a nodelist
405             #from Python Library Reference 13.7.2
406             rc = ''
407             for node in nodelist:
408                 if node.nodeType == node.TEXT_NODE:
409                     rc += node.data
410             return rc
411
412         def handleConfig(config):
413             noiseabsdev_elements=config.getElementsByTagName("noise_absdev")
414             convfilt_elements=config.getElementsByTagName("convfilt")
415             handleAbsdev(noiseabsdev_elements)
416             handleConvfilt(convfilt_elements)
417
418         def handleAbsdev(noiseabsdev_elements):
419             for element in noiseabsdev_elements:
420                 for attribute in element.attributes.keys():
421                     self.config[attribute]=element.getAttribute(attribute)
422
423         def handleConvfilt(convfilt_elements):
424             for element in convfilt_elements:
425                 for attribute in element.attributes.keys():
426                     self.config[attribute]=element.getAttribute(attribute)
427
428         handleConfig(self.config_tree)
429         #making items in the dictionary machine-readable
430         for item in self.config.keys():
431             try:
432                 self.config[item]=eval(self.config[item])
433             except NameError: #if it's an unreadable string, keep it as a string
434                 pass
435
436         return self.config