1 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
3 # This file is part of Hooke.
5 # Hooke is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Lesser General Public License as
7 # published by the Free Software Foundation, either version 3 of the
8 # License, or (at your option) any later version.
10 # Hooke is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
13 # Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with Hooke. If not, see
17 # <http://www.gnu.org/licenses/>.
19 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
22 from numpy import arange, ndarray
23 from scipy.optimize import leastsq
27 class PoorFit (ValueError):
30 class ModelFitter (object):
31 """A convenient wrapper around :func:`scipy.optimize.leastsq`.
33 TODO: look into :mod:`scipy.odr` as an alternative fitting
34 algorithm (minimizing the sum of squares of orthogonal distances,
35 vs. minimizing y distances).
40 Deflection data to be analyzed for the contact position.
42 Store any extra information useful inside your overridden
45 Rescale parameters so the guess for each is 1.0. Also rescale
46 the data so data.std() == 1.0.
51 >>> from pprint import pprint
52 >>> from Queue import Queue
55 You'll want to subclass `ModelFitter`, overriding at least
56 `.model` and potentially the parameter and scale guessing
59 >>> class LinearModel (ModelFitter):
60 ... '''Simple linear model.
62 ... Levenberg-Marquardt is not how you want to solve this problem
63 ... for real systems, but it's a simple test case.
65 ... def model(self, params):
66 ... '''A linear model.
70 ... .. math:: y = p_0 x + p_1
72 ... p = params # convenient alias
73 ... self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
74 ... return self._model_data
75 ... def guess_initial_params(self, outqueue=None):
76 ... return [float(self._data[-1] - self._data[0])/len(self._data),
78 ... def guess_scale(self, params, outqueue=None):
80 ... if params[1] == 0:
83 ... offset_scale = 0.1*self._data.std()/abs(params[1])
84 ... if offset_scale == 0: # data is completely flat
86 ... return [slope_scale, offset_scale]
87 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
88 >>> m = LinearModel(data)
89 >>> outqueue = Queue()
90 >>> slope,offset = m.fit(outqueue=outqueue)
91 >>> info = outqueue.get(block=False)
92 >>> pprint(info) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
93 {'active fitted parameters': array([ 6.999..., -32.889...]),
94 'active parameters': array([ 6.999..., -32.889...]),
95 'convergence flag': ...,
96 'covariance matrix': array([[ 1.199...e-08, -5.993...e-06],
97 [ -5.993...e-06, 3.994...e-03]]),
98 'data scale factor': 1.0,
99 'fitted parameters': array([ 6.999..., -32.889...]),
100 'info': {'fjac': array([[...]]),
101 'fvec': array([...]),
102 'ipvt': array([1, 2]),
104 'qtf': array([...])},
105 'initial parameters': [6.992..., -33.0],
106 'message': '...relative error between two consecutive iterates is at most 0.000...',
108 'scale': [0.100..., 6.123...]}
110 We round the outputs to protect the doctest against differences in
111 machine rounding during computation. We expect the values to be close
112 to the input settings (slope 7, offset -33).
114 >>> print '%.3f' % slope
116 >>> print '%.3f' % offset
119 The offset is a bit off because, the range is not a multiple of
122 We could also use rescaled parameters:
124 >>> m = LinearModel(data, rescale=True)
125 >>> outqueue = Queue()
126 >>> slope,offset = m.fit(outqueue=outqueue)
127 >>> print '%.3f' % slope
129 >>> print '%.3f' % offset
132 def __init__(self, *args, **kwargs):
133 self.set_data(*args, **kwargs)
135 def set_data(self, data, info=None, rescale=False):
137 self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
139 self._rescale = rescale
141 for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
143 self._data_scale_factor = x
146 self._data_scale_factor = 1.0
148 def model(self, params):
149 p = params # convenient alias
150 self._model_data[:] = arange(len(self._data))
151 raise NotImplementedError
153 def guess_initial_params(self, outqueue=None):
156 def guess_scale(self, params, outqueue=None):
157 """Guess the problem length scale in each parameter dimension.
161 From the :func:`scipy.optimize.leastsq` documentation, `diag`
162 (which we refer to as `scale`, sets `factor * || diag * x||`
163 as the initial step. If `x == 0`, then `factor` is used
164 instead (from `lmdif.f`_)::
167 wa3(j) = diag(j)*x(j)
171 if (delta .eq. zero) delta = factor
173 For most situations then, you don't need to do anything fancy.
174 The default scaling (if you don't set a scale) is::
176 c on the first iteration and if mode is 1, scale according
177 c to the norms of the columns of the initial jacobian.
179 (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
180 of the initial Jacobian).
182 .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
186 def residual(self, params):
187 if self._rescale == True:
188 params = [p*s for p,s in zip(params, self._param_scale_factors)]
189 residual = self._data - self.model(params)
190 if False: # fit debugging
191 if not hasattr(self, '_i_'):
193 self._data.tofile('data.%d' % self._i_, sep='\n')
194 self.model(params).tofile('model.%d' % self._i_, sep='\n')
196 if self._rescale == True:
197 residual /= self._data_scale_factor
200 def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
204 initial_params : iterable or None
205 Initial parameter values for residual minimization. If
206 `None`, they are estimated from the data using
207 :meth:`guess_initial_params`.
208 scale : iterable or None
209 Parameter length scales for residual minimization. If
210 `None`, they are estimated from the data using
212 outqueue : Queue or None
213 If given, will be used to output the data and fitted model
214 for user verification.
216 Any additional arguments are passed through to `leastsq`.
218 if initial_params == None:
219 initial_params = self.guess_initial_params(outqueue)
221 scale = self.guess_scale(initial_params, outqueue)
223 assert min(scale) > 0, scale
224 if self._rescale == True:
225 self._param_scale_factors = initial_params
226 for i,s in enumerate(self._param_scale_factors):
228 self._param_scale_factors[i] = 1.0
229 active_params = [p/s for p,s in zip(initial_params,
230 self._param_scale_factors)]
232 active_params = initial_params
233 params,cov,info,mesg,ier = leastsq(
234 func=self.residual, x0=active_params, full_output=True,
235 diag=scale, **kwargs)
236 if self._rescale == True:
237 active_params = params
238 if len(initial_params) == 1: # params is a float
239 params = params * self._param_scale_factors[0]
241 params = [p*s for p,s in zip(params,
242 self._param_scale_factors)]
244 active_params = params
247 'rescaled': self._rescale,
248 'initial parameters': initial_params,
249 'active parameters': active_params,
251 'data scale factor': self._data_scale_factor,
252 'active fitted parameters': active_params,
253 'fitted parameters': params,
254 'covariance matrix': cov,
257 'convergence flag': ier,
261 # Example ORD code from the old fit.py
262 # def dist(px,py,linex,liney):
263 # distancesx=scipy.array([(px-x)**2 for x in linex])
264 # minindex=numpy.argmin(distancesx)
265 # print px, linex[0], linex[-1]
266 # return (py-liney[minindex])**2
269 # def f_wlc(params,x,T=T):
271 # wlc function for ODR fitting
276 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
279 # def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
281 # wlc function for ODR fitting
287 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
291 # realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
293 # model=scipy.odr.Model(f_wlc_plfix)
294 # o = scipy.odr.ODR(realdata, model, p0_plfix)
296 # model=scipy.odr.Model(f_wlc)
297 # o = scipy.odr.ODR(realdata, model, p0)
299 # o.set_job(fit_type=2)
301 # fit_out=[(1/i) for i in out.beta]
303 # #Calculate fit errors from output standard deviations.
304 # #We must propagate the error because we fit the *inverse* parameters!
305 # #The error = (error of the inverse)*(value**2)
307 # for sd,value in zip(out.sd_beta, fit_out):
308 # err_real=sd*(value**2)
309 # fit_errors.append(err_real)