1 # Copyright (C) 2011-2012 W. Trevor King <wking@tremily.us>
3 # This file is part of pypid.
5 # pypid is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation, either version 3 of the License, or (at your option) any later
10 # pypid is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # pypid. If not, see <http://www.gnu.org/licenses/>.
17 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
20 from numpy import arange, ndarray
21 from scipy import __version__ as _scipy_version
22 from scipy.optimize import leastsq
24 _strings = _scipy_version.split('.')
25 # Don't convert third string to an integer in case of (for example) '0.7.2rc3'
26 _SCIPY_VERSION = (int(_strings[0]), int(_strings[1]), _strings[2])
30 class PoorFit (ValueError):
33 class ModelFitter (object):
34 """A convenient wrapper around :func:`scipy.optimize.leastsq`.
36 TODO: look into :mod:`scipy.odr` as an alternative fitting
37 algorithm (minimizing the sum of squares of orthogonal distances,
38 vs. minimizing y distances).
43 Deflection data to be analyzed for the contact position.
45 Store any extra information useful inside your overridden
48 Rescale parameters so the guess for each is 1.0. Also rescale
49 the data so data.std() == 1.0.
54 >>> from pprint import pprint
55 >>> from Queue import Queue
58 You'll want to subclass `ModelFitter`, overriding at least
59 `.model` and potentially the parameter and scale guessing
62 >>> class LinearModel (ModelFitter):
63 ... '''Simple linear model.
65 ... Levenberg-Marquardt is not how you want to solve this problem
66 ... for real systems, but it's a simple test case.
68 ... def model(self, params):
69 ... '''A linear model.
73 ... .. math:: y = p_0 x + p_1
75 ... p = params # convenient alias
76 ... self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
77 ... return self._model_data
78 ... def guess_initial_params(self, outqueue=None):
79 ... return [float(self._data[-1] - self._data[0])/len(self._data),
81 ... def guess_scale(self, params, outqueue=None):
83 ... if params[1] == 0:
86 ... offset_scale = 0.1*self._data.std()/abs(params[1])
87 ... if offset_scale == 0: # data is completely flat
89 ... return [slope_scale, offset_scale]
90 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
91 >>> m = LinearModel(data)
92 >>> outqueue = Queue()
93 >>> slope,offset = m.fit(outqueue=outqueue)
94 >>> info = outqueue.get(block=False)
95 >>> pprint(info) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
96 {'active fitted parameters': array([ 6.999..., -32.889...]),
97 'active parameters': array([ 6.999..., -32.889...]),
98 'convergence flag': ...,
99 'covariance matrix': array([[ 1.199...e-08, -5.993...e-06],
100 [ -5.993...e-06, 3.994...e-03]]),
101 'data scale factor': 1.0,
102 'fitted parameters': array([ 6.999..., -32.889...]),
103 'info': {'fjac': array([[...]]),
104 'fvec': array([...]),
105 'ipvt': array([1, 2]),
107 'qtf': array([...])},
108 'initial parameters': [6.992..., -33.0],
109 'message': '...relative error between two consecutive iterates is at most 0.000...',
111 'scale': [0.100..., 6.123...]}
113 We round the outputs to protect the doctest against differences in
114 machine rounding during computation. We expect the values to be close
115 to the input settings (slope 7, offset -33).
117 >>> print '%.3f' % slope
119 >>> print '%.3f' % offset
122 The offset is a bit off because, the range is not a multiple of
125 We could also use rescaled parameters:
127 >>> m = LinearModel(data, rescale=True)
128 >>> outqueue = Queue()
129 >>> slope,offset = m.fit(outqueue=outqueue)
130 >>> print '%.3f' % slope
132 >>> print '%.3f' % offset
135 Test single-parameter models:
137 >>> class SingleParameterModel (LinearModel):
138 ... '''Simple linear model.
140 ... def model(self, params):
141 ... return super(SingleParameterModel, self).model([params[0], 0.])
142 ... def guess_initial_params(self, outqueue=None):
143 ... return super(SingleParameterModel, self
144 ... ).guess_initial_params(outqueue)[:1]
145 ... def guess_scale(self, params, outqueue=None):
146 ... return super(SingleParameterModel, self
147 ... ).guess_scale([params[0], 0.], outqueue)[:1]
148 >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000)
149 >>> m = SingleParameterModel(data)
150 >>> slope, = m.fit(outqueue=outqueue)
151 >>> print '%.3f' % slope
154 def __init__(self, *args, **kwargs):
155 self.set_data(*args, **kwargs)
157 def set_data(self, data, info=None, rescale=False):
159 self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
161 self._rescale = rescale
163 for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
165 self._data_scale_factor = x
168 self._data_scale_factor = 1.0
170 def model(self, params):
171 p = params # convenient alias
172 self._model_data[:] = arange(len(self._data))
173 raise NotImplementedError
175 def guess_initial_params(self, outqueue=None):
178 def guess_scale(self, params, outqueue=None):
179 """Guess the problem length scale in each parameter dimension.
183 From the :func:`scipy.optimize.leastsq` documentation, `diag`
184 (which we refer to as `scale`, sets `factor * || diag * x||`
185 as the initial step. If `x == 0`, then `factor` is used
186 instead (from `lmdif.f`_)::
189 wa3(j) = diag(j)*x(j)
193 if (delta .eq. zero) delta = factor
195 For most situations then, you don't need to do anything fancy.
196 The default scaling (if you don't set a scale) is::
198 c on the first iteration and if mode is 1, scale according
199 c to the norms of the columns of the initial jacobian.
201 (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
202 of the initial Jacobian).
204 .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
208 def residual(self, params):
209 if self._rescale == True:
210 params = [p*s for p,s in zip(params, self._param_scale_factors)]
211 residual = self._data - self.model(params)
212 if False: # fit debugging
213 if not hasattr(self, '_i_'):
215 self._data.tofile('data.%d' % self._i_, sep='\n')
216 self.model(params).tofile('model.%d' % self._i_, sep='\n')
218 if self._rescale == True:
219 residual /= self._data_scale_factor
222 def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
226 initial_params : iterable or None
227 Initial parameter values for residual minimization. If
228 `None`, they are estimated from the data using
229 :meth:`guess_initial_params`.
230 scale : iterable or None
231 Parameter length scales for residual minimization. If
232 `None`, they are estimated from the data using
234 outqueue : Queue or None
235 If given, will be used to output the data and fitted model
236 for user verification.
238 Any additional arguments are passed through to `leastsq`.
240 if initial_params == None:
241 initial_params = self.guess_initial_params(outqueue)
243 scale = self.guess_scale(initial_params, outqueue)
245 assert min(scale) > 0, scale
246 if self._rescale == True:
247 self._param_scale_factors = initial_params
248 for i,s in enumerate(self._param_scale_factors):
250 self._param_scale_factors[i] = 1.0
251 active_params = [p/s for p,s in zip(initial_params,
252 self._param_scale_factors)]
254 active_params = initial_params
255 params,cov,info,mesg,ier = leastsq(
256 func=self.residual, x0=active_params, full_output=True,
257 diag=scale, **kwargs)
258 if len(initial_params) == 1 and _SCIPY_VERSION < (0, 8, '0'):
259 # params is a float for scipy < 0.8.0. Convert to list.
261 if self._rescale == True:
262 active_params = params
263 params = [p*s for p,s in zip(params,
264 self._param_scale_factors)]
266 active_params = params
269 'rescaled': self._rescale,
270 'initial parameters': initial_params,
271 'active parameters': active_params,
273 'data scale factor': self._data_scale_factor,
274 'active fitted parameters': active_params,
275 'fitted parameters': params,
276 'covariance matrix': cov,
279 'convergence flag': ier,
283 # Example ORD code from the old fit.py
284 # def dist(px,py,linex,liney):
285 # distancesx=scipy.array([(px-x)**2 for x in linex])
286 # minindex=numpy.argmin(distancesx)
287 # print px, linex[0], linex[-1]
288 # return (py-liney[minindex])**2
291 # def f_wlc(params,x,T=T):
293 # wlc function for ODR fitting
298 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
301 # def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
303 # wlc function for ODR fitting
309 # y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
313 # realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
315 # model=scipy.odr.Model(f_wlc_plfix)
316 # o = scipy.odr.ODR(realdata, model, p0_plfix)
318 # model=scipy.odr.Model(f_wlc)
319 # o = scipy.odr.ODR(realdata, model, p0)
321 # o.set_job(fit_type=2)
323 # fit_out=[(1/i) for i in out.beta]
325 # #Calculate fit errors from output standard deviations.
326 # #We must propagate the error because we fit the *inverse* parameters!
327 # #The error = (error of the inverse)*(value**2)
329 # for sd,value in zip(out.sd_beta, fit_out):
330 # err_real=sd*(value**2)
331 # fit_errors.append(err_real)