hooke/util/fit.py

   1 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
   2 #
   3 # This file is part of Hooke.
   4 #
   5 # Hooke is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Lesser General Public License as
   7 # published by the Free Software Foundation, either version 3 of the
   8 # License, or (at your option) any later version.
   9 #
  10 # Hooke is distributed in the hope that it will be useful, but WITHOUT
  11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
  13 # Public License for more details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with Hooke.  If not, see
  17 # <http://www.gnu.org/licenses/>.
  18
  19 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
  20 """
  21
  22 from numpy import arange, ndarray
  23 from scipy.optimize import leastsq
  24 import scipy.optimize
  25
  26
  27 class PoorFit (ValueError):
  28     pass
  29
  30 class ModelFitter (object):
  31     """A convenient wrapper around :func:`scipy.optimize.leastsq`.
  32
  33     TODO: look into :mod:`scipy.odr` as an alternative fitting
  34     algorithm (minimizing the sum of squares of orthogonal distances,
  35     vs. minimizing y distances).
  36
  37     Parameters
  38     ----------
  39     d_data : array_like
  40         Deflection data to be analyzed for the contact position.
  41     info :
  42         Store any extra information useful inside your overridden
  43         methods.
  44     rescale : boolean
  45         Rescale parameters so the guess for each is 1.0.  Also rescale
  46         the data so data.std() == 1.0.
  47
  48     Examples
  49     --------
  50
  51     >>> from pprint import pprint
  52     >>> from Queue import Queue
  53     >>> import numpy
  54
  55     You'll want to subclass `ModelFitter`, overriding at least
  56     `.model` and potentially the parameter and scale guessing
  57     methods.
  58
  59     >>> class LinearModel (ModelFitter):
  60     ...     '''Simple linear model.
  61     ...
  62     ...     Levenberg-Marquardt is not how you want to solve this problem
  63     ...     for real systems, but it's a simple test case.
  64     ...     '''
  65     ...     def model(self, params):
  66     ...         '''A linear model.
  67     ...
  68     ...         Notes
  69     ...         -----
  70     ...         .. math:: y = p_0 x + p_1
  71     ...         '''
  72     ...         p = params  # convenient alias
  73     ...         self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
  74     ...         return self._model_data
  75     ...     def guess_initial_params(self, outqueue=None):
  76     ...         return [float(self._data[-1] - self._data[0])/len(self._data),
  77     ...                 self._data[0]]
  78     ...     def guess_scale(self, params, outqueue=None):
  79     ...         slope_scale = 0.1
  80     ...         if params[1] == 0:
  81     ...             offset_scale = 1
  82     ...         else:
  83     ...             offset_scale = 0.1*self._data.std()/abs(params[1])
  84     ...             if offset_scale == 0:  # data is completely flat
  85     ...                 offset_scale = 1.
  86     ...         return [slope_scale, offset_scale]
  87     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
  88     >>> m = LinearModel(data)
  89     >>> outqueue = Queue()
  90     >>> slope,offset = m.fit(outqueue=outqueue)
  91     >>> info = outqueue.get(block=False)
  92     >>> pprint(info)  # doctest: +ELLIPSIS, +REPORT_UDIFF
  93     {'active fitted parameters': array([  6.999..., -32.889...]),
  94      'active parameters': array([  6.999..., -32.889...]),
  95      'convergence flag': 2,
  96      'covariance matrix': array([[  1.199...e-08,  -5.993...e-06],
  97            [ -5.993...e-06,   3.994...e-03]]),
  98      'data scale factor': 1.0,
  99      'fitted parameters': array([  6.999..., -32.889...]),
 100      'info': {'fjac': array([[...]]),
 101               'fvec': array([...]),
 102               'ipvt': array([1, 2]),
 103               'nfev': 7,
 104               'qtf': array([  2.851...e-07,   1.992...e-06])},
 105      'initial parameters': [6.992..., -33.0],
 106      'message': 'The relative error between two consecutive iterates is at most 0.000...',
 107      'rescaled': False,
 108      'scale': [0.100..., 6.123...]}
 109
 110     We round the outputs to protect the doctest against differences in
 111     machine rounding during computation.  We expect the values to be close
 112     to the input settings (slope 7, offset -33).
 113
 114     >>> print '%.3f' % slope
 115     7.000
 116     >>> print '%.3f' % offset
 117     -32.890
 118
 119     The offset is a bit off because, the range is not a multiple of
 120     :math:`2\pi`.
 121
 122     We could also use rescaled parameters:
 123
 124     >>> m = LinearModel(data, rescale=True)
 125     >>> outqueue = Queue()
 126     >>> slope,offset = m.fit(outqueue=outqueue)
 127     >>> print '%.3f' % slope
 128     7.000
 129     >>> print '%.3f' % offset
 130     -32.890
 131     """
 132     def __init__(self, *args, **kwargs):
 133         self.set_data(*args, **kwargs)
 134
 135     def set_data(self, data, info=None, rescale=False):
 136         self._data = data
 137         self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
 138         self.info = info
 139         self._rescale = rescale
 140         if rescale == True:
 141             for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
 142                 if x != 0:
 143                     self._data_scale_factor = x
 144                     break
 145         else:
 146             self._data_scale_factor = 1.0
 147
 148     def model(self, params):
 149         p = params  # convenient alias
 150         self._model_data[:] = arange(len(self._data))
 151         raise NotImplementedError
 152
 153     def guess_initial_params(self, outqueue=None):
 154         return []
 155
 156     def guess_scale(self, params, outqueue=None):
 157         """Guess the problem length scale in each parameter dimension.
 158
 159         Notes
 160         -----
 161         From the :func:`scipy.optimize.leastsq` documentation, `diag`
 162         (which we refer to as `scale`, sets `factor * || diag * x||`
 163         as the initial step.  If `x == 0`, then `factor` is used
 164         instead (from `lmdif.f`_)::
 165
 166                       do 70 j = 1, n
 167                         wa3(j) = diag(j)*x(j)
 168                70       continue
 169                       xnorm = enorm(n,wa3)
 170                       delta = factor*xnorm
 171                       if (delta .eq. zero) delta = factor
 172
 173         For most situations then, you don't need to do anything fancy.
 174         The default scaling (if you don't set a scale) is::
 175
 176             c        on the first iteration and if mode is 1, scale according
 177             c        to the norms of the columns of the initial jacobian.
 178
 179         (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
 180         of the initial Jacobian).
 181
 182         .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
 183         """
 184         return None
 185
 186     def residual(self, params):
 187         if self._rescale == True:
 188             params = [p*s for p,s in zip(params, self._param_scale_factors)]
 189         residual = self._data - self.model(params)
 190         if False:  # fit debugging
 191             if not hasattr(self, '_i_'):
 192                 self._i_ = 0
 193             self._data.tofile('data.%d' % self._i_, sep='\n')
 194             self.model(params).tofile('model.%d' % self._i_, sep='\n')
 195             self._i_ += 1
 196         if self._rescale == True:
 197             residual /= self._data_scale_factor
 198         return residual
 199
 200     def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
 201         """
 202         Parameters
 203         ----------
 204         initial_params : iterable or None
 205             Initial parameter values for residual minimization.  If
 206             `None`, they are estimated from the data using
 207             :meth:`guess_initial_params`.
 208         scale : iterable or None
 209             Parameter length scales for residual minimization.  If
 210             `None`, they are estimated from the data using
 211             :meth:`guess_scale`.
 212         outqueue : Queue or None
 213             If given, will be used to output the data and fitted model
 214             for user verification.
 215         kwargs :
 216             Any additional arguments are passed through to `leastsq`.
 217         """
 218         if initial_params == None:
 219             initial_params = self.guess_initial_params(outqueue)
 220         if scale == None:
 221             scale = self.guess_scale(initial_params, outqueue)
 222         if scale != None:
 223             assert min(scale) > 0, scale
 224         if self._rescale == True:
 225             self._param_scale_factors = initial_params
 226             for i,s in enumerate(self._param_scale_factors):
 227                 if s == 0:
 228                     self._param_scale_factors[i] = 1.0
 229             active_params = [p/s for p,s in zip(initial_params,
 230                                                 self._param_scale_factors)]
 231         else:
 232             active_params = initial_params
 233         params,cov,info,mesg,ier = leastsq(
 234             func=self.residual, x0=active_params, full_output=True,
 235             diag=scale, **kwargs)
 236         if self._rescale == True:
 237             active_params = params
 238             if len(initial_params) == 1:  # params is a float
 239                 params = params * self._param_scale_factors[0]
 240             else:
 241                 params = [p*s for p,s in zip(params,
 242                                              self._param_scale_factors)]
 243         else:
 244             active_params = params
 245         if outqueue != None:
 246             outqueue.put({
 247                     'rescaled': self._rescale,
 248                     'initial parameters': initial_params,
 249                     'active parameters': active_params,
 250                     'scale': scale,
 251                     'data scale factor': self._data_scale_factor,
 252                     'active fitted parameters': active_params,
 253                     'fitted parameters': params,
 254                     'covariance matrix': cov,
 255                     'info': info,
 256                     'message': mesg,
 257                     'convergence flag': ier,
 258                     })
 259         return params
 260
 261 # Example ORD code from the old fit.py
 262 #        def dist(px,py,linex,liney):
 263 #            distancesx=scipy.array([(px-x)**2 for x in linex])
 264 #            minindex=numpy.argmin(distancesx)
 265 #            print px, linex[0], linex[-1]
 266 #            return (py-liney[minindex])**2
 267 #
 268 #
 269 #        def f_wlc(params,x,T=T):
 270 #            '''
 271 #            wlc function for ODR fitting
 272 #            '''
 273 #            lambd,pii=params
 274 #            Kb=(1.38065e-23)
 275 #            therm=Kb*T
 276 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 277 #            return y
 278 #
 279 #        def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
 280 #            '''
 281 #            wlc function for ODR fitting
 282 #            '''
 283 #            lambd=params
 284 #            pii=1/pl_value
 285 #            Kb=(1.38065e-23)
 286 #            therm=Kb*T
 287 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 288 #            return y
 289 #
 290 #        #make the ODR fit
 291 #        realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
 292 #        if pl_value:
 293 #            model=scipy.odr.Model(f_wlc_plfix)
 294 #            o = scipy.odr.ODR(realdata, model, p0_plfix)
 295 #        else:
 296 #            model=scipy.odr.Model(f_wlc)
 297 #            o = scipy.odr.ODR(realdata, model, p0)
 298 #
 299 #        o.set_job(fit_type=2)
 300 #        out=o.run()
 301 #        fit_out=[(1/i) for i in out.beta]
 302 #
 303 #        #Calculate fit errors from output standard deviations.
 304 #        #We must propagate the error because we fit the *inverse* parameters!
 305 #        #The error = (error of the inverse)*(value**2)
 306 #        fit_errors=[]
 307 #        for sd,value in zip(out.sd_beta, fit_out):
 308 #            err_real=sd*(value**2)
 309 #            fit_errors.append(err_real)
 310