1 # Copyright (C) 2010-2012 W. Trevor King <wking@tremily.us>
3 # This file is part of Hooke.
5 # Hooke is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU Lesser General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
10 # Hooke is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with Hooke. If not, see <http://www.gnu.org/licenses/>.
18 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
21 from numpy import arange, ndarray
22 from scipy import __version__ as _scipy_version
23 from scipy.optimize import leastsq
25 _strings = _scipy_version.split('.')
26 # Don't convert third string to an integer in case of (for example) '0.7.2rc3'
27 _SCIPY_VERSION = (int(_strings[0]), int(_strings[1]), _strings[2])
31 class PoorFit (ValueError):
34 class ModelFitter (object):
35 """A convenient wrapper around :func:`scipy.optimize.leastsq`.
37 TODO: look into :mod:`scipy.odr` as an alternative fitting
38 algorithm (minimizing the sum of squares of orthogonal distances,
39 vs. minimizing y distances).
44 Deflection data to be analyzed for the contact position.
46 Store any extra information useful inside your overridden
49 Rescale parameters so the guess for each is 1.0. Also rescale
50 the data so data.std() == 1.0.
55 >>> from pprint import pprint
56 >>> from Queue import Queue
59 You'll want to subclass `ModelFitter`, overriding at least
60 `.model` and potentially the parameter and scale guessing
63 >>> class LinearModel (ModelFitter):
64 ... '''Simple linear model.
66 ... Levenberg-Marquardt is not how you want to solve this problem
67 ... for real systems, but it's a simple test case.
69 ... def model(self, params):
70 ... '''A linear model.
74 ... .. math:: y = p_0 x + p_1
76 ... p = params # convenient alias
77 ... self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
78 ... return self._model_data
79 ... def guess_initial_params(self, outqueue=None):
80 ... return [float(self._data[-1] - self._data[0])/len(self._data),
82 ... def guess_scale(self, params, outqueue=None):
84 ... if params[1] == 0:
87 ... offset_scale = 0.1*self._data.std()/abs(params[1])
88 ... if offset_scale == 0: # data is completely flat
90 ... return [slope_scale, offset_scale]
91 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
92 >>> m = LinearModel(data)
93 >>> outqueue = Queue()
94 >>> slope,offset = m.fit(outqueue=outqueue)
95 >>> info = outqueue.get(block=False)
96 >>> pprint(info) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
97 {'active fitted parameters': array([ 6.999..., -32.889...]),
98 'active parameters': array([ 6.999..., -32.889...]),
99 'convergence flag': ...,
100 'covariance matrix': array([[ 1.199...e-08, -5.993...e-06],
101 [ -5.993...e-06, 3.994...e-03]]),
102 'data scale factor': 1.0,
103 'fitted parameters': array([ 6.999..., -32.889...]),
104 'info': {'fjac': array([[...]]),
105 'fvec': array([...]),
106 'ipvt': array([1, 2]),
108 'qtf': array([...])},
109 'initial parameters': [6.992..., -33.0],
110 'message': '...relative error between two consecutive iterates is at most 0.000...',
112 'scale': [0.100..., 6.123...]}
114 We round the outputs to protect the doctest against differences in
115 machine rounding during computation. We expect the values to be close
116 to the input settings (slope 7, offset -33).
118 >>> print('{:.3f}'.format(slope))
120 >>> print('{:.3f}'.format(offset))
123 The offset is a bit off because, the range is not a multiple of
126 We could also use rescaled parameters:
128 >>> m = LinearModel(data, rescale=True)
129 >>> outqueue = Queue()
130 >>> slope,offset = m.fit(outqueue=outqueue)
131 >>> print('{:.3f}'.format(slope))
133 >>> print('{:.3f}'.format(offset))
136 Test single-parameter models:
138 >>> class SingleParameterModel (LinearModel):
139 ... '''Simple linear model.
141 ... def model(self, params):
142 ... return super(SingleParameterModel, self).model([params[0], 0.])
143 ... def guess_initial_params(self, outqueue=None):
144 ... return super(SingleParameterModel, self
145 ... ).guess_initial_params(outqueue)[:1]
146 ... def guess_scale(self, params, outqueue=None):
147 ... return super(SingleParameterModel, self
148 ... ).guess_scale([params[0], 0.], outqueue)[:1]
149 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000)
150 >>> m = SingleParameterModel(data)
151 >>> slope, = m.fit(outqueue=outqueue)
152 >>> print('{:.3f}'.format(slope))
155 def __init__(self, *args, **kwargs):
156 self.set_data(*args, **kwargs)
158 def set_data(self, data, info=None, rescale=False):
160 self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
162 self._rescale = rescale
164 for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
166 self._data_scale_factor = x
169 self._data_scale_factor = 1.0
171 def model(self, params):
172 p = params # convenient alias
173 self._model_data[:] = arange(len(self._data))
174 raise NotImplementedError
176 def guess_initial_params(self, outqueue=None):
179 def guess_scale(self, params, outqueue=None):
180 """Guess the problem length scale in each parameter dimension.
184 From the :func:`scipy.optimize.leastsq` documentation, `diag`
185 (which we refer to as `scale`, sets `factor * || diag * x||`
186 as the initial step. If `x == 0`, then `factor` is used
187 instead (from `lmdif.f`_)::
190 wa3(j) = diag(j)*x(j)
194 if (delta .eq. zero) delta = factor
196 For most situations then, you don't need to do anything fancy.
197 The default scaling (if you don't set a scale) is::
199 c on the first iteration and if mode is 1, scale according
200 c to the norms of the columns of the initial jacobian.
202 (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
203 of the initial Jacobian).
205 .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
209 def residual(self, params):
210 if self._rescale == True:
211 params = [p*s for p,s in zip(params, self._param_scale_factors)]
212 residual = self._data - self.model(params)
213 if False: # fit debugging
214 if not hasattr(self, '_i_'):
216 self._data.tofile('data.%d' % self._i_, sep='\n')
217 self.model(params).tofile('model.%d' % self._i_, sep='\n')
219 if self._rescale == True:
220 residual /= self._data_scale_factor
223 def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
227 initial_params : iterable or None
228 Initial parameter values for residual minimization. If
229 `None`, they are estimated from the data using
230 :meth:`guess_initial_params`.
231 scale : iterable or None
232 Parameter length scales for residual minimization. If
233 `None`, they are estimated from the data using
235 outqueue : Queue or None
236 If given, will be used to output the data and fitted model
237 for user verification.
239 Any additional arguments are passed through to `leastsq`.
241 if initial_params == None:
242 initial_params = self.guess_initial_params(outqueue)
244 scale = self.guess_scale(initial_params, outqueue)
246 assert min(scale) > 0, scale
247 if self._rescale == True:
248 self._param_scale_factors = initial_params
249 for i,s in enumerate(self._param_scale_factors):
251 self._param_scale_factors[i] = 1.0
252 active_params = [p/s for p,s in zip(initial_params,
253 self._param_scale_factors)]
255 active_params = initial_params
256 params,cov,info,mesg,ier = leastsq(
257 func=self.residual, x0=active_params, full_output=True,
258 diag=scale, **kwargs)
259 if len(initial_params) == 1 and _SCIPY_VERSION < (0, 8, '0'):
260 # params is a float for scipy < 0.8.0. Convert to list.
262 if self._rescale == True:
263 active_params = params
264 params = [p*s for p,s in zip(params,
265 self._param_scale_factors)]
267 active_params = params
270 'rescaled': self._rescale,
271 'initial parameters': initial_params,
272 'active parameters': active_params,
274 'data scale factor': self._data_scale_factor,
275 'active fitted parameters': active_params,
276 'fitted parameters': params,
277 'covariance matrix': cov,
280 'convergence flag': ier,
284 # Example ORD code from the old fit.py
285 # def dist(px,py,linex,liney):
286 # distancesx=scipy.array([(px-x)**2 for x in linex])
287 # minindex=numpy.argmin(distancesx)
288 # print(px, linex[0], linex[-1])
289 # return (py-liney[minindex])**2
292 # def f_wlc(params,x,T=T):
294 # wlc function for ODR fitting
299 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
302 # def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
304 # wlc function for ODR fitting
310 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
314 # realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
316 # model=scipy.odr.Model(f_wlc_plfix)
317 # o = scipy.odr.ODR(realdata, model, p0_plfix)
319 # model=scipy.odr.Model(f_wlc)
320 # o = scipy.odr.ODR(realdata, model, p0)
322 # o.set_job(fit_type=2)
324 # fit_out=[(1/i) for i in out.beta]
326 # #Calculate fit errors from output standard deviations.
327 # #We must propagate the error because we fit the *inverse* parameters!
328 # #The error = (error of the inverse)*(value**2)
330 # for sd,value in zip(out.sd_beta, fit_out):
331 # err_real=sd*(value**2)
332 # fit_errors.append(err_real)