156531bfad80daabf03df8e239b6cf4f37ccb766
[hooke.git] / hooke / plugin / flatfilts.py
1 # Copyright
2
3 """Force spectroscopy curves filtering of flat curves
4
5 Other plugin dependencies:
6 procplots.py (plot processing plugin)
7 """
8
9 from hooke.libhooke import WX_GOOD
10
11 import wxversion
12 wxversion.select(WX_GOOD)
13 import xml.dom.minidom
14 import wx
15 import scipy
16 import numpy
17 from numpy import diff
18 #import pickle
19
20 from .. import libpeakspot as lps
21 from .. import curve as lhc
22
23
24 class flatfiltsCommands(object):
25
26     def _plug_init(self):
27         #configurate convfilt variables
28         convfilt_configurator=ConvfiltConfig()
29         self.convfilt_config=convfilt_configurator.load_config('convfilt.conf')
30
31     def do_flatfilt(self,args):
32         '''
33         FLATFILT
34         (flatfilts.py)
35         Filters out flat (featureless) curves of the current playlist,
36         creating a playlist containing only the curves with potential
37         features.
38         ------------
39         Syntax:
40         flatfilt [min_npks min_deviation]
41
42         min_npks = minmum number of points over the deviation
43         (default=4)
44
45         min_deviation = minimum signal/noise ratio
46         (default=9)
47
48         If called without arguments, it uses default values, that
49         should work most of the times.
50         '''
51         median_filter=7
52         min_npks=4
53         min_deviation=9
54
55         args=args.split(' ')
56         if len(args) == 2:
57             min_npks=int(args[0])
58             min_deviation=int(args[1])
59         else:
60             pass
61
62         print 'Processing playlist...'
63         notflat_list=[]
64
65         c=0
66         for item in self.current_list:
67             c+=1
68
69             try:
70                 notflat=self.has_features(item, median_filter, min_npks, min_deviation)
71                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': features are ',notflat
72             except:
73                 notflat=False
74                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': cannot be filtered. Probably unable to retrieve force data from corrupt file.'
75
76             if notflat:
77                 item.features=notflat
78                 item.curve=None #empty the item object, to further avoid memory leak
79                 notflat_list.append(item)
80
81         if len(notflat_list)==0:
82             print 'Found nothing interesting. Check your playlist, could be a bug or criteria could be too much stringent'
83             return
84         else:
85             print 'Found ',len(notflat_list),' potentially interesting curves'
86             print 'Regenerating playlist...'
87             self.pointer=0
88             self.current_list=notflat_list
89             self.current=self.current_list[self.pointer]
90             self.do_plot(0)
91
92     def has_features(self,item,median_filter,min_npks,min_deviation):
93         '''
94         decides if a curve is flat enough to be rejected from analysis: it sees if there
95         are at least min_npks points that are higher than min_deviation times the absolute value
96         of noise.
97
98         Algorithm original idea by Francesco Musiani, with my tweaks and corrections.
99         '''
100         retvalue=False
101
102         item.identify(self.drivers)
103         #we assume the first is the plot with the force curve
104         #do the median to better resolve features from noise
105         flat_plot=self.plotmanip_median(item.curve.default_plots()[0], item, customvalue=median_filter)
106         flat_vects=flat_plot.vectors
107         item.curve.close_all()
108         #needed to avoid *big* memory leaks!
109         del item.curve
110         del item
111
112         #absolute value of derivate
113         yretdiff=diff(flat_vects[1][1])
114         yretdiff=[abs(value) for value in yretdiff]
115         #average of derivate values
116         diffmean=numpy.mean(yretdiff)
117         yretdiff.sort()
118         yretdiff.reverse()
119         c_pks=0
120         for value in yretdiff:
121             if value/diffmean > min_deviation:
122                 c_pks+=1
123             else:
124                 break
125
126         if c_pks>=min_npks:
127             retvalue = c_pks
128
129         del flat_plot, flat_vects, yretdiff
130
131         return retvalue
132
133     ################################################################
134     #-----CONVFILT-------------------------------------------------
135     #-----Convolution-based peak recognition and filtering.
136     #Requires the libpeakspot.py library
137     
138     def has_peaks(self, plot, abs_devs=None, maxpeak=True, window=10):
139         '''
140         Finds peak position in a force curve.
141         FIXME: should be moved in libpeakspot.py
142         '''
143         if abs_devs==None:
144             abs_devs=self.convfilt_config['mindeviation']
145
146
147         xret=plot.vectors[1][0]
148         yret=plot.vectors[1][1]
149         #Calculate convolution.
150         convoluted=lps.conv_dx(yret, self.convfilt_config['convolution'])
151
152         #surely cut everything before the contact point
153         cut_index=self.find_contact_point(plot)
154         #cut even more, before the blind window
155         start_x=xret[cut_index]
156         blind_index=0
157         for value in xret[cut_index:]:
158             if abs((value) - (start_x)) > self.convfilt_config['blindwindow']*(10**-9):
159                 break
160             blind_index+=1
161         cut_index+=blind_index
162         #do the dirty convolution-peak finding stuff
163         noise_level=lps.noise_absdev(convoluted[cut_index:], self.convfilt_config['positive'], self.convfilt_config['maxcut'], self.convfilt_config['stable'])
164         above=lps.abovenoise(convoluted,noise_level,cut_index,abs_devs)
165         peak_location,peak_size=lps.find_peaks(above,seedouble=self.convfilt_config['seedouble'])
166                 
167         #take the minimum or the maximum of a peak
168         for i in range(len(peak_location)):
169             peak=peak_location[i]
170             valpk=min(yret[peak-window:peak+window])  #maximum in force (near the unfolding point)
171             index_pk=yret[peak-window:peak+window].index(valpk)+(peak-window)            
172
173             if maxpeak==False:
174                valpk=max(yret[peak:peak+window]) #minimum in force, near the baseline
175                index_pk=yret[peak:peak+window].index(valpk)+(peak)
176
177 #  Let's explain that for the minimum.  Immaging that we know that there is a peak at position/region 100 and you have found its y-value,
178 #  Now you look in the array, from 100-10 to 100+10  (if the window is 10).
179 #  This "100-10 to 100+10" is substancially a new array with its index. In this array you have 20
180 #  elements, so the index of your y-value will be 10.
181 #  Now to find the index in the TOTAL array you have to add the "position" of the "region" (that in this case
182 #  correspond to 100) and also substract the window size ---> (+100-10)
183
184             peak_location[i]=index_pk
185             
186         return peak_location,peak_size
187
188
189     def exec_has_peaks(self,item,abs_devs):
190         '''
191         encapsulates has_peaks for the purpose of correctly treating the curve objects in the convfilt loop,
192         to avoid memory leaks
193         '''
194         item.identify(self.drivers)
195         #we assume the first is the plot with the force curve
196         plot=item.curve.default_plots()[0]
197
198         if 'flatten' in self.config['plotmanips']:
199                     #If flatten is present, use it for better recognition of peaks...
200                     flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
201                     plot=flatten(plot, item, customvalue=1)
202
203         peak_location,peak_size=self.has_peaks(plot,abs_devs)
204         #close all open files
205         item.curve.close_all()
206         #needed to avoid *big* memory leaks!
207         del item.curve
208         del item
209         return peak_location, peak_size
210
211     #------------------------
212     #------commands----------
213     #------------------------
214     def do_peaks(self,args):
215         '''
216         PEAKS
217         (flatfilts.py)
218         Test command for convolution filter / test.
219         ----
220         Syntax: peaks [deviations]
221         absolute deviation = number of times the convolution signal is above the noise absolute deviation.
222         Default is 5.
223         '''
224         if len(args)==0:
225             args=self.convfilt_config['mindeviation']
226
227         try:
228             abs_devs=float(args)
229         except:
230             print 'Wrong argument, using config value'
231             abs_devs=float(self.convfilt_config['mindeviation'])
232
233         defplots=self.current.curve.default_plots()[0] #we need the raw, uncorrected plots
234
235         if 'flatten' in self.config['plotmanips']:
236             flatten=self._find_plotmanip('flatten') #extract flatten plot manipulator
237             defplots=flatten(defplots, self.current)
238         else:
239             print 'You have the flatten plot manipulator not loaded. Enabling it could give you better results.'
240
241         peak_location,peak_size=self.has_peaks(defplots,abs_devs)
242         print 'Found '+str(len(peak_location))+' peaks.'
243         to_dump='peaks '+self.current.path+' '+str(len(peak_location))
244         self.outlet.push(to_dump)
245         #print peak_location
246
247         #if no peaks, we have nothing to plot. exit.
248         if len(peak_location)==0:
249             return
250
251         #otherwise, we plot the peak locations.
252         xplotted_ret=self.plots[0].vectors[1][0]
253         yplotted_ret=self.plots[0].vectors[1][1]
254         xgood=[xplotted_ret[index] for index in peak_location]
255         ygood=[yplotted_ret[index] for index in peak_location]
256
257         recplot=self._get_displayed_plot()
258         recplot.vectors.append([xgood,ygood])
259         if recplot.styles==[]:
260             recplot.styles=[None,None,'scatter']
261             recplot.colors=[None,None,None]
262         else:
263             recplot.styles+=['scatter']
264             recplot.colors+=[None]
265
266         self._send_plot([recplot])
267
268     def do_convfilt(self,args):
269         '''
270         CONVFILT
271         (flatfilts.py)
272         Filters out flat (featureless) curves of the current playlist,
273         creating a playlist containing only the curves with potential
274         features.
275         ------------
276         Syntax:
277         convfilt [min_npks min_deviation]
278
279         min_npks = minmum number of peaks
280         (to set the default, see convfilt.conf file; CONVCONF and SETCONF commands)
281
282         min_deviation = minimum signal/noise ratio *in the convolution*
283         (to set the default, see convfilt.conf file; CONVCONF and SETCONF commands)
284
285         If called without arguments, it uses default values.
286         '''
287
288         min_npks=self.convfilt_config['minpeaks']
289         min_deviation=self.convfilt_config['mindeviation']
290
291         args=args.split(' ')
292         if len(args) == 2:
293             min_npks=int(args[0])
294             min_deviation=int(args[1])
295         else:
296             pass
297
298         print 'Processing playlist...'
299         print '(Please wait)'
300         notflat_list=[]
301
302         c=0
303         for item in self.current_list:
304             c+=1
305
306             try:
307                 peak_location,peak_size=self.exec_has_peaks(item,min_deviation)
308                 if len(peak_location)>=min_npks:
309                     isok='+'
310                 else:
311                     isok=''
312                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': found '+str(len(peak_location))+' peaks.'+isok
313             except:
314                 peak_location,peak_size=[],[]
315                 print 'Curve',item.path, 'is',c,'of',len(self.current_list),': cannot be filtered. Probably unable to retrieve force data from corrupt file.'
316
317             if len(peak_location)>=min_npks:
318                 item.peak_location=peak_location
319                 item.peak_size=peak_size
320                 item.curve=None #empty the item object, to further avoid memory leak
321                 notflat_list.append(item)
322
323         #Warn that no flattening had been done.
324         if not ('flatten' in self.config['plotmanips']):
325             print 'Flatten manipulator was not found. Processing was done without flattening.'
326             print 'Try to enable it in your configuration file for better results.'
327
328         if len(notflat_list)==0:
329             print 'Found nothing interesting. Check your playlist, could be a bug or criteria could be too much stringent'
330             return
331         else:
332             print 'Found ',len(notflat_list),' potentially interesting curves'
333             print 'Regenerating playlist...'
334             self.pointer=0
335             self.current_list=notflat_list
336             self.current=self.current_list[self.pointer]
337             self.do_plot(0)
338
339
340     def do_setconv(self,args):
341         '''
342         SETCONV
343         (flatfilts.py)
344         Sets the convfilt configuration variables
345         ------
346         Syntax: setconv variable value
347         '''
348         args=args.split()
349         #FIXME: a general "set dictionary" function has to be built
350         if len(args)==0:
351             print self.convfilt_config
352         else:
353             if not (args[0] in self.convfilt_config.keys()):
354                 print 'This is not an internal convfilt variable!'
355                 print 'Run "setconv" without arguments to see a list of defined variables.'
356                 return
357
358             if len(args)==1:
359                 print self.convfilt_config[args[0]]
360             elif len(args)>1:
361                 try:
362                     self.convfilt_config[args[0]]=eval(args[1])
363                 except NameError: #we have a string argument
364                     self.convfilt_config[args[0]]=args[1]
365
366
367 #########################
368 #HANDLING OF CONFIGURATION FILE
369 class ConvfiltConfig(object):
370     '''
371     Handling of convfilt configuration file
372
373     Mostly based on the simple-yet-useful examples of the Python Library Reference
374     about xml.dom.minidom
375
376     FIXME: starting to look a mess, should require refactoring
377     '''
378
379     def __init__(self):
380         self.config={}
381
382
383     def load_config(self, filename):
384         myconfig=file(filename)
385         #the following 3 lines are needed to strip newlines. otherwise, since newlines
386         #are XML elements too, the parser would read them (and re-save them, multiplying
387         #newlines...)
388         #yes, I'm an XML n00b
389         the_file=myconfig.read()
390         the_file_lines=the_file.split('\n')
391         the_file=''.join(the_file_lines)
392
393         self.config_tree=xml.dom.minidom.parseString(the_file)
394
395         def getText(nodelist):
396             #take the text from a nodelist
397             #from Python Library Reference 13.7.2
398             rc = ''
399             for node in nodelist:
400                 if node.nodeType == node.TEXT_NODE:
401                     rc += node.data
402             return rc
403
404         def handleConfig(config):
405             noiseabsdev_elements=config.getElementsByTagName("noise_absdev")
406             convfilt_elements=config.getElementsByTagName("convfilt")
407             handleAbsdev(noiseabsdev_elements)
408             handleConvfilt(convfilt_elements)
409
410         def handleAbsdev(noiseabsdev_elements):
411             for element in noiseabsdev_elements:
412                 for attribute in element.attributes.keys():
413                     self.config[attribute]=element.getAttribute(attribute)
414
415         def handleConvfilt(convfilt_elements):
416             for element in convfilt_elements:
417                 for attribute in element.attributes.keys():
418                     self.config[attribute]=element.getAttribute(attribute)
419
420         handleConfig(self.config_tree)
421         #making items in the dictionary machine-readable
422         for item in self.config.keys():
423             try:
424                 self.config[item]=eval(self.config[item])
425             except NameError: #if it's an unreadable string, keep it as a string
426                 pass
427
428         return self.config