1 # Copyright (C) 2010-2011 W. Trevor King <wking@drexel.edu>
3 # This file is part of Hooke.
5 # Hooke is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Lesser General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
10 # Hooke is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
13 # Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with Hooke. If not, see
17 # <http://www.gnu.org/licenses/>.
19 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
22 from numpy import arange, ndarray
23 from scipy import __version__ as _scipy_version
24 from scipy.optimize import leastsq
26 _strings = _scipy_version.split('.')
27 # Don't convert third string to an integer in case of (for example) '0.7.2rc3'
28 _SCIPY_VERSION = (int(_strings[0]), int(_strings[1]), _strings[2])
32 class PoorFit (ValueError):
35 class ModelFitter (object):
36 """A convenient wrapper around :func:`scipy.optimize.leastsq`.
38 TODO: look into :mod:`scipy.odr` as an alternative fitting
39 algorithm (minimizing the sum of squares of orthogonal distances,
40 vs. minimizing y distances).
45 Deflection data to be analyzed for the contact position.
47 Store any extra information useful inside your overridden
50 Rescale parameters so the guess for each is 1.0. Also rescale
51 the data so data.std() == 1.0.
56 >>> from pprint import pprint
57 >>> from Queue import Queue
60 You'll want to subclass `ModelFitter`, overriding at least
61 `.model` and potentially the parameter and scale guessing
64 >>> class LinearModel (ModelFitter):
65 ... '''Simple linear model.
67 ... Levenberg-Marquardt is not how you want to solve this problem
68 ... for real systems, but it's a simple test case.
70 ... def model(self, params):
71 ... '''A linear model.
75 ... .. math:: y = p_0 x + p_1
77 ... p = params # convenient alias
78 ... self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
79 ... return self._model_data
80 ... def guess_initial_params(self, outqueue=None):
81 ... return [float(self._data[-1] - self._data[0])/len(self._data),
83 ... def guess_scale(self, params, outqueue=None):
85 ... if params[1] == 0:
88 ... offset_scale = 0.1*self._data.std()/abs(params[1])
89 ... if offset_scale == 0: # data is completely flat
91 ... return [slope_scale, offset_scale]
92 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
93 >>> m = LinearModel(data)
94 >>> outqueue = Queue()
95 >>> slope,offset = m.fit(outqueue=outqueue)
96 >>> info = outqueue.get(block=False)
97 >>> pprint(info) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
98 {'active fitted parameters': array([ 6.999..., -32.889...]),
99 'active parameters': array([ 6.999..., -32.889...]),
100 'convergence flag': ...,
101 'covariance matrix': array([[ 1.199...e-08, -5.993...e-06],
102 [ -5.993...e-06, 3.994...e-03]]),
103 'data scale factor': 1.0,
104 'fitted parameters': array([ 6.999..., -32.889...]),
105 'info': {'fjac': array([[...]]),
106 'fvec': array([...]),
107 'ipvt': array([1, 2]),
109 'qtf': array([...])},
110 'initial parameters': [6.992..., -33.0],
111 'message': '...relative error between two consecutive iterates is at most 0.000...',
113 'scale': [0.100..., 6.123...]}
115 We round the outputs to protect the doctest against differences in
116 machine rounding during computation. We expect the values to be close
117 to the input settings (slope 7, offset -33).
119 >>> print '%.3f' % slope
121 >>> print '%.3f' % offset
124 The offset is a bit off because, the range is not a multiple of
127 We could also use rescaled parameters:
129 >>> m = LinearModel(data, rescale=True)
130 >>> outqueue = Queue()
131 >>> slope,offset = m.fit(outqueue=outqueue)
132 >>> print '%.3f' % slope
134 >>> print '%.3f' % offset
137 Test single-parameter models:
139 >>> class SingleParameterModel (LinearModel):
140 ... '''Simple linear model.
142 ... def model(self, params):
143 ... return super(SingleParameterModel, self).model([params[0], 0.])
144 ... def guess_initial_params(self, outqueue=None):
145 ... return super(SingleParameterModel, self
146 ... ).guess_initial_params(outqueue)[:1]
147 ... def guess_scale(self, params, outqueue=None):
148 ... return super(SingleParameterModel, self
149 ... ).guess_scale([params[0], 0.], outqueue)[:1]
150 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000)
151 >>> m = SingleParameterModel(data)
152 >>> slope, = m.fit(outqueue=outqueue)
153 >>> print '%.3f' % slope
156 def __init__(self, *args, **kwargs):
157 self.set_data(*args, **kwargs)
159 def set_data(self, data, info=None, rescale=False):
161 self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
163 self._rescale = rescale
165 for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
167 self._data_scale_factor = x
170 self._data_scale_factor = 1.0
172 def model(self, params):
173 p = params # convenient alias
174 self._model_data[:] = arange(len(self._data))
175 raise NotImplementedError
177 def guess_initial_params(self, outqueue=None):
180 def guess_scale(self, params, outqueue=None):
181 """Guess the problem length scale in each parameter dimension.
185 From the :func:`scipy.optimize.leastsq` documentation, `diag`
186 (which we refer to as `scale`, sets `factor * || diag * x||`
187 as the initial step. If `x == 0`, then `factor` is used
188 instead (from `lmdif.f`_)::
191 wa3(j) = diag(j)*x(j)
195 if (delta .eq. zero) delta = factor
197 For most situations then, you don't need to do anything fancy.
198 The default scaling (if you don't set a scale) is::
200 c on the first iteration and if mode is 1, scale according
201 c to the norms of the columns of the initial jacobian.
203 (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
204 of the initial Jacobian).
206 .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
210 def residual(self, params):
211 if self._rescale == True:
212 params = [p*s for p,s in zip(params, self._param_scale_factors)]
213 residual = self._data - self.model(params)
214 if False: # fit debugging
215 if not hasattr(self, '_i_'):
217 self._data.tofile('data.%d' % self._i_, sep='\n')
218 self.model(params).tofile('model.%d' % self._i_, sep='\n')
220 if self._rescale == True:
221 residual /= self._data_scale_factor
224 def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
228 initial_params : iterable or None
229 Initial parameter values for residual minimization. If
230 `None`, they are estimated from the data using
231 :meth:`guess_initial_params`.
232 scale : iterable or None
233 Parameter length scales for residual minimization. If
234 `None`, they are estimated from the data using
236 outqueue : Queue or None
237 If given, will be used to output the data and fitted model
238 for user verification.
240 Any additional arguments are passed through to `leastsq`.
242 if initial_params == None:
243 initial_params = self.guess_initial_params(outqueue)
245 scale = self.guess_scale(initial_params, outqueue)
247 assert min(scale) > 0, scale
248 if self._rescale == True:
249 self._param_scale_factors = initial_params
250 for i,s in enumerate(self._param_scale_factors):
252 self._param_scale_factors[i] = 1.0
253 active_params = [p/s for p,s in zip(initial_params,
254 self._param_scale_factors)]
256 active_params = initial_params
257 params,cov,info,mesg,ier = leastsq(
258 func=self.residual, x0=active_params, full_output=True,
259 diag=scale, **kwargs)
260 if len(initial_params) == 1 and _SCIPY_VERSION < (0, 8, '0'):
261 # params is a float for scipy < 0.8.0. Convert to list.
263 if self._rescale == True:
264 active_params = params
265 params = [p*s for p,s in zip(params,
266 self._param_scale_factors)]
268 active_params = params
271 'rescaled': self._rescale,
272 'initial parameters': initial_params,
273 'active parameters': active_params,
275 'data scale factor': self._data_scale_factor,
276 'active fitted parameters': active_params,
277 'fitted parameters': params,
278 'covariance matrix': cov,
281 'convergence flag': ier,
285 # Example ORD code from the old fit.py
286 # def dist(px,py,linex,liney):
287 # distancesx=scipy.array([(px-x)**2 for x in linex])
288 # minindex=numpy.argmin(distancesx)
289 # print px, linex[0], linex[-1]
290 # return (py-liney[minindex])**2
293 # def f_wlc(params,x,T=T):
295 # wlc function for ODR fitting
300 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
303 # def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
305 # wlc function for ODR fitting
311 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
315 # realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
317 # model=scipy.odr.Model(f_wlc_plfix)
318 # o = scipy.odr.ODR(realdata, model, p0_plfix)
320 # model=scipy.odr.Model(f_wlc)
321 # o = scipy.odr.ODR(realdata, model, p0)
323 # o.set_job(fit_type=2)
325 # fit_out=[(1/i) for i in out.beta]
327 # #Calculate fit errors from output standard deviations.
328 # #We must propagate the error because we fit the *inverse* parameters!
329 # #The error = (error of the inverse)*(value**2)
331 # for sd,value in zip(out.sd_beta, fit_out):
332 # err_real=sd*(value**2)
333 # fit_errors.append(err_real)