1 # Copyright (C) 2008-2010 W. Trevor King <wking@drexel.edu>
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
20 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
23 from copy import deepcopy
25 from numpy import arange, array, ndarray, exp, pi, sqrt
26 from scipy.optimize import leastsq
29 class ModelFitter (object):
30 """A convenient wrapper around :func:`scipy.optimize.leastsq`.
32 For minimizing fit error: returns the normalized rms difference
33 between a dataset and a function `y(X)`. In order to use this
34 class, you must sublcass it and overwrite `ModelFitter.model()`.
35 It is recommended that you also overwrite
36 `ModelFitter.guess_initial_params()`, and then not pass
37 initializing params to `ModelFitter.model()`. This helps ensure
38 good automation for unsupervised testing. Developing good
39 heuristics for `ModelFitter.guess_initial_params()` is usually the
40 hardest part writing a theoretical histogram test.
42 For bonus points, you can override `ModelFitter.print_model()` and
43 `ModelFitter.print_params()`, to give your users an easy to
44 understand idea of what's going on. It's better to be verbose and
45 clear. Remember, this is the testing code. It needs to be solid.
46 Save your wonderful, sneaky hacks for the main code ;).
48 TODO: look into :mod:`scipy.odr` as an alternative fitting
49 algorithm (minimizing the sum of squares of orthogonal distances,
50 vs. minimizing y distances).
55 Deflection data to be analyzed for the contact position.
57 Store any extra information useful inside your overridden
60 Rescale parameters so the guess for each is 1.0. Also rescale
61 the data so data.std() == 1.0.
65 >>> from pprint import pprint
67 >>> class LinearModel (ModelFitter):
68 ... '''Simple linear model.
70 ... Levenberg-Marquardt is not how you want to solve this problem
71 ... for real systems, but it's a simple test case.
73 ... def model(self, params):
74 ... '''A linear model.
78 ... .. math:: y = p_0 x + p_1
80 ... p = params # convenient alias
81 ... self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
82 ... return self._model_data
83 ... def guess_initial_params(self):
84 ... return [float(self._data[-1] - self._data[0])/len(self._data),
86 ... def guess_scale(self, params):
88 ... if params[1] == 0:
91 ... offset_scale = 0.1*self._data.std()/abs(params[1])
92 ... if offset_scale == 0: # data is completely flat
94 ... return [slope_scale, offset_scale]
95 ... def model_string(self):
96 ... return 'y = A x + B'
97 ... def param_string(self, params):
99 ... for name,param in zip(['A', 'B'], params):
100 ... pstrings.append('%s=%.3f' % (name, param))
101 ... return ', '.join(pstrings)
102 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
103 >>> m = LinearModel(data)
104 >>> slope,offset = m.fit()
105 >>> pprint(m.fit_info)
106 ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
107 {'active fitted parameters': array([ 6.999..., -32.889...]),
108 'active parameters': array([ 6.999..., -32.889...]),
109 'convergence flag': ...,
110 'covariance matrix': array([[ 1.199...e-08, -5.993...e-06],
111 [ -5.993...e-06, 3.994...e-03]]),
112 'data scale factor': 1.0,
113 'fitted parameters': array([ 6.999..., -32.889...]),
115 'initial parameters': [6.992..., -33.0],
116 'message': '...relative error between two consecutive iterates is at most 0.000...',
118 'scale': [0.100..., 6.123...]}
120 We round the outputs to protect the doctest against differences in
121 machine rounding during computation. We expect the values to be close
122 to the input settings (slope 7, offset -33).
124 >>> print '%.3f' % slope
126 >>> print '%.3f' % offset
129 >>> print m.model_string()
131 >>> print m.param_string([slope,offset])
133 >>> print ModelFitter.param_string(m, [slope,offset]) # doctest: +ELLIPSIS
134 param_0=6.9..., param_1=-32.8...
136 The offset is a bit off because, the range is not a multiple of
139 We could also use rescaled parameters:
141 >>> m = LinearModel(data, rescale=True)
142 >>> slope,offset = m.fit()
143 >>> print '%.3f' % slope
145 >>> print '%.3f' % offset
148 def __init__(self, *args, **kwargs):
149 self.set_data(*args, **kwargs)
151 def set_data(self, data, info=None, rescale=False):
153 self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
155 self._rescale = rescale
157 for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
159 self._data_scale_factor = x
162 self._data_scale_factor = 1.0
164 def model(self, params):
165 p = params # convenient alias
166 self._model_data[:] = arange(len(self._data))
167 raise NotImplementedError
169 def model_string(self):
170 """OVERRIDE: Give the user some idea of what model we're using.
172 return 'model string not defined'
174 def param_string(self, params):
175 """OVERRIDE: Give the user nicely named format for a parameter vector.
178 for i,p in enumerate(params):
179 pstrings.append('param_%d=%g' % (i,p))
180 return ', '.join(pstrings)
182 def guess_initial_params(self):
183 """OVERRIDE: Provide some intelligent heuristic to pick a
184 starting point for minimizing your model"""
185 raise NotImplementedError
187 def guess_scale(self, params):
188 """Guess the problem length scale in each parameter dimension.
192 From the :func:`scipy.optimize.leastsq` documentation, `diag`
193 (which we refer to as `scale`, sets `factor * || diag * x||`
194 as the initial step. If `x == 0`, then `factor` is used
195 instead (from `lmdif.f`_)::
198 wa3(j) = diag(j)*x(j)
202 if (delta .eq. zero) delta = factor
204 For most situations then, you don't need to do anything fancy.
205 The default scaling (if you don't set a scale) is::
207 c on the first iteration and if mode is 1, scale according
208 c to the norms of the columns of the initial jacobian.
210 (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
211 of the initial Jacobian).
213 .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
217 def residual(self, params):
218 if self._rescale == True:
219 params = [p*s for p,s in zip(params, self._param_scale_factors)]
220 residual = self._data - self.model(params)
221 if False: # fit debugging
222 if not hasattr(self, '_i_'):
224 self._data.tofile('data.%d' % self._i_, sep='\n')
225 self._model_data.tofile('model.%d' % self._i_, sep='\n')
227 if self._rescale == True:
228 residual /= self._data_scale_factor
231 def fit(self, initial_params=None, scale=None, **kwargs):
235 initial_params : iterable or None
236 Initial parameter values for residual minimization. If
237 `None`, they are estimated from the data using
238 :meth:`guess_initial_params`.
239 scale : iterable or None
240 Parameter length scales for residual minimization. If
241 `None`, they are estimated from the data using
244 Any additional arguments are passed through to `leastsq`.
246 if initial_params == None:
247 initial_params = self.guess_initial_params()
249 scale = self.guess_scale(initial_params)
251 assert min(scale) > 0, scale
252 if self._rescale == True:
253 self._param_scale_factors = initial_params
254 for i,s in enumerate(self._param_scale_factors):
256 self._param_scale_factors[i] = 1.0
257 active_params = [p/s for p,s in zip(initial_params,
258 self._param_scale_factors)]
260 active_params = initial_params
261 params,cov,info,mesg,ier = leastsq(
262 func=self.residual, x0=active_params, full_output=True,
263 diag=scale, **kwargs)
264 if len(initial_params) == 1:
266 if self._rescale == True:
267 active_params = params
268 params = [p*s for p,s in zip(params, self._param_scale_factors)]
270 active_params = params
272 'rescaled': self._rescale,
273 'initial parameters': initial_params,
274 'active parameters': active_params,
276 'data scale factor': self._data_scale_factor,
277 'active fitted parameters': active_params,
278 'fitted parameters': params,
279 'covariance matrix': cov,
282 'convergence flag': ier,
287 class HistogramModelFitter (ModelFitter):
288 """Adapt ModelFitter for easy Histogram() comparison.
290 TODO: look into :func:`scipy.optimize.fmin_powell` as way to
291 minimize a single residul number. Useful for fitting `Histograms`
292 with their assorted residal methods.
296 >>> from random import gauss
297 >>> from numpy import array
298 >>> from .histogram import Histogram
300 >>> class GaussianModel (HistogramModelFitter):
301 ... '''Simple gaussian model.
303 ... def model(self, params):
304 ... '''A gaussian model.
308 ... .. math:: y \\propto e^{-\\frac{(x-p_0)^2}{2 p_1^2}}
310 ... p = params # convenient alias
311 ... p[1] = abs(p[1]) # cannot have negative std. dev.
312 ... self._model_data.counts = (
313 ... self.info['binwidth']*self.info['N']/(p[1]*sqrt(2*pi)) *
314 ... exp(-((self._model_data.bin_centers - p[0])/p[1])**2 / 2))
315 ... return self._model_data
316 ... def guess_initial_params(self):
317 ... return [self._data.mean, self._data.std_dev]
318 ... def guess_scale(self, params):
319 ... return [self._data.std_dev, self._data.std_dev]
320 ... def model_string(self):
321 ... return 'p(x) = A exp((x-mu)^2 / (2 sigma^2))'
322 ... def param_string(self, params):
324 ... for name,param in zip(['mu', 'sigma'], params):
325 ... pstrings.append('%s=%.3f' % (name, param))
326 ... return ', '.join(pstrings)
330 >>> data = array([gauss(mu,sigma) for i in range(int(1e4))])
332 >>> h.from_data(data, h.calculate_bin_edges(data, 1))
333 >>> m = GaussianModel(h)
335 >>> print m.model_string()
336 p(x) = A exp((x-mu)^2 / (2 sigma^2))
337 >>> print m.param_string(params) # doctest: +ELLIPSIS
338 mu=3.1..., sigma=1.4...
340 def set_data(self, data, info=None, rescale=False):
341 self._data = data # Histogram() instance
342 self._model_data = deepcopy(data)
343 self._model_data.bin_edges = array(self._model_data.bin_edges)
344 self._model_data.counts = array(self._model_data.counts)
348 self._rescale = rescale
349 self._data_scale_factor = 1.0 # bo need to rescale normalized hists.
350 self.info['N'] = self._data.total
351 # assume constant binwidth
352 self.info['binwidth'] = (
353 self._data.bin_edges[1] - self._data.bin_edges[0])
354 self._model_data.bin_centers = (
355 self._model_data.bin_edges[:-1] + self.info['binwidth']/2)
357 def residual(self, params):
358 if self._rescale == True:
359 params = [p*s for p,s in zip(params, self._param_scale_factors)]
360 residual = self._data.counts - self.model(params).counts
361 if False: # fit debugging
362 if not hasattr(self, '_i_'):
364 self._data.to_stream(open('hist-data.%d' % self._i_, 'w'))
365 self._model_data.headings = [str(x) for x in params]
366 self._model_data.to_stream(open('hist-model.%d' % self._i_, 'w'))
368 if self._rescale == True:
369 residual /= self._data_scale_factor