hooke/util/fit.py

   1 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
   2 #
   3 # This file is part of Hooke.
   4 #
   5 # Hooke is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Lesser General Public License as
   7 # published by the Free Software Foundation, either version 3 of the
   8 # License, or (at your option) any later version.
   9 #
  10 # Hooke is distributed in the hope that it will be useful, but WITHOUT
  11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
  13 # Public License for more details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with Hooke.  If not, see
  17 # <http://www.gnu.org/licenses/>.
  18
  19 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
  20 """
  21
  22 from numpy import arange, ndarray
  23 from scipy.optimize import leastsq
  24 import scipy.optimize
  25
  26
  27 class PoorFit (ValueError):
  28     pass
  29
  30 class ModelFitter (object):
  31     """A convenient wrapper around :func:`scipy.optimize.leastsq`.
  32
  33     TODO: look into :mod:`scipy.odr` as an alternative fitting
  34     algorithm (minimizing the sum of squares of orthogonal distances,
  35     vs. minimizing y distances).
  36
  37     Parameters
  38     ----------
  39     d_data : array_like
  40         Deflection data to be analyzed for the contact position.
  41     info :
  42         Store any extra information useful inside your overridden
  43         methods.
  44     rescale : boolean
  45         Rescale parameters so the scale of each is 1.0.  Also rescale
  46         the data so data.std() == 1.0.
  47
  48     Examples
  49     --------
  50
  51     >>> from pprint import pprint
  52     >>> from Queue import Queue
  53     >>> import numpy
  54
  55     You'll want to subclass `ModelFitter`, overriding at least
  56     `.model` and potentially the parameter and scale guessing
  57     methods.
  58
  59     >>> class LinearModel (ModelFitter):
  60     ...     '''Simple linear model.
  61     ...
  62     ...     Levenberg-Marquardt is not how you want to solve this problem
  63     ...     for real systems, but it's a simple test case.
  64     ...     '''
  65     ...     def model(self, params):
  66     ...         '''A linear model.
  67     ...
  68     ...         Notes
  69     ...         -----
  70     ...         .. math:: y = p_0 x + p_1
  71     ...         '''
  72     ...         p = params  # convenient alias
  73     ...         self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
  74     ...         return self._model_data
  75     ...     def guess_initial_params(self, outqueue=None):
  76     ...         return [float(self._data[-1] - self._data[0])/len(self._data),
  77     ...                 self._data[0]]
  78     ...     def guess_scale(self, params, outqueue=None):
  79     ...         slope_scale = params[0]/10.
  80     ...         if slope_scale == 0:  # data is expected to be flat
  81     ...             slope_scale = float(self._data.max()-self._data.min())/len(self._data)
  82     ...             if slope_scale == 0:  # data is completely flat
  83     ...                 slope_scale = 1.
  84     ...         offset_scale = self._data.std()/10.0
  85     ...         if offset_scale == 0:  # data is completely flat
  86     ...             offset_scale = 1.
  87     ...         return [slope_scale, offset_scale]
  88     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
  89     >>> m = LinearModel(data)
  90     >>> outqueue = Queue()
  91     >>> slope,offset = m.fit(outqueue=outqueue)
  92     >>> info = outqueue.get(block=False)
  93     >>> pprint(info)  # doctest: +ELLIPSIS, +REPORT_UDIFF
  94     {'active fitted parameters': array([  6.999..., -32.889...]),
  95      'active parameters': array([  6.999..., -32.889...]),
  96      'active scale': [0.699..., 202.071...],
  97      'convergence flag': 2,
  98      'covariance matrix': array([[  1.199...e-08,  -5.993...e-06],
  99            [ -5.993...e-06,   3.994...e-03]]),
 100      'data scale factor': 1.0,
 101      'fitted parameters': array([  6.999..., -32.889...]),
 102      'info': {'fjac': array([[...]]),
 103               'fvec': array([...]),
 104               'ipvt': array([1, 2]),
 105               'nfev': 7,
 106               'qtf': array([  2.851...e-07,   1.992...e-06])},
 107      'initial parameters': [6.992..., -33.0],
 108      'message': 'The relative error between two consecutive iterates is at most 0.000...',
 109      'rescaled': False,
 110      'scale': [0.699..., 202.071...]}
 111
 112     We round the outputs to protect the doctest against differences in
 113     machine rounding during computation.  We expect the values to be close
 114     to the input settings (slope 7, offset -33).
 115
 116     >>> print '%.3f' % slope
 117     7.000
 118     >>> print '%.3f' % offset
 119     -32.890
 120
 121     The offset is a bit off because, the range is not a multiple of
 122     :math:`2\pi`.
 123
 124     We could also use rescaled parameters:
 125
 126     >>> m = LinearModel(data, rescale=True)
 127     >>> outqueue = Queue()
 128     >>> slope,offset = m.fit(outqueue=outqueue)
 129     >>> print '%.3f' % slope
 130     7.000
 131     >>> print '%.3f' % offset
 132     -32.890
 133     """
 134     def __init__(self, *args, **kwargs):
 135         self.set_data(*args, **kwargs)
 136
 137     def set_data(self, data, info=None, rescale=False):
 138         self._data = data
 139         self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
 140         self.info = info
 141         self._rescale = rescale
 142         if rescale == True:
 143             for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
 144                 if x != 0:
 145                     self._data_scale_factor = x
 146                     break
 147         else:
 148             self._data_scale_factor = 1.0
 149
 150     def model(self, params):
 151         p = params  # convenient alias
 152         self._model_data[:] = arange(len(self._data))
 153         raise NotImplementedError
 154
 155     def guess_initial_params(self, outqueue=None):
 156         return []
 157
 158     def guess_scale(self, params, outqueue=None):
 159         return None
 160
 161     def residual(self, params):
 162         if self._rescale == True:
 163             params = [p*s for p,s in zip(params, self._param_scale_factors)]
 164         residual = self._data - self.model(params)
 165         if self._rescale == True:
 166             residual /= self._data_scale_factor
 167         return residual
 168
 169     def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
 170         """
 171         Parameters
 172         ----------
 173         initial_params : iterable or None
 174             Initial parameter values for residual minimization.  If
 175             `None`, they are estimated from the data using
 176             :meth:`guess_initial_params`.
 177         scale : iterable or None
 178             Parameter length scales for residual minimization.  If
 179             `None`, they are estimated from the data using
 180             :meth:`guess_scale`.
 181         outqueue : Queue or None
 182             If given, will be used to output the data and fitted model
 183             for user verification.
 184         kwargs :
 185             Any additional arguments are passed through to `leastsq`.
 186         """
 187         if initial_params == None:
 188             initial_params = self.guess_initial_params(outqueue)
 189         if scale == None:
 190             scale = self.guess_scale(initial_params, outqueue)
 191         assert min(scale) > 0, scale
 192         if self._rescale == True:
 193             self._param_scale_factors = scale
 194             active_scale = [1.0 for s in scale]
 195             active_params = [p/s for p,s in zip(initial_params,
 196                                                 self._param_scale_factors)]
 197         else:
 198             active_scale = scale
 199             active_params = initial_params
 200         params,cov,info,mesg,ier = leastsq(
 201             func=self.residual, x0=active_params, full_output=True,
 202             diag=active_scale, **kwargs)
 203         if self._rescale == True:
 204             active_params = params
 205             if len(initial_params) == 1:  # params is a float
 206                 params = params * self._param_scale_factors[0]
 207             else:
 208                 params = [p*s for p,s in zip(params,
 209                                              self._param_scale_factors)]
 210         else:
 211             active_params = params
 212         if outqueue != None:
 213             outqueue.put({
 214                     'rescaled': self._rescale,
 215                     'initial parameters': initial_params,
 216                     'active parameters': active_params,
 217                     'scale': scale,
 218                     'active scale': active_scale,
 219                     'data scale factor': self._data_scale_factor,
 220                     'active fitted parameters': active_params,
 221                     'fitted parameters': params,
 222                     'covariance matrix': cov,
 223                     'info': info,
 224                     'message': mesg,
 225                     'convergence flag': ier,
 226                     })
 227         return params
 228
 229 # Example ORD code from the old fit.py
 230 #        def dist(px,py,linex,liney):
 231 #            distancesx=scipy.array([(px-x)**2 for x in linex])
 232 #            minindex=numpy.argmin(distancesx)
 233 #            print px, linex[0], linex[-1]
 234 #            return (py-liney[minindex])**2
 235 #
 236 #
 237 #        def f_wlc(params,x,T=T):
 238 #            '''
 239 #            wlc function for ODR fitting
 240 #            '''
 241 #            lambd,pii=params
 242 #            Kb=(1.38065e-23)
 243 #            therm=Kb*T
 244 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 245 #            return y
 246 #
 247 #        def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
 248 #            '''
 249 #            wlc function for ODR fitting
 250 #            '''
 251 #            lambd=params
 252 #            pii=1/pl_value
 253 #            Kb=(1.38065e-23)
 254 #            therm=Kb*T
 255 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 256 #            return y
 257 #
 258 #        #make the ODR fit
 259 #        realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
 260 #        if pl_value:
 261 #            model=scipy.odr.Model(f_wlc_plfix)
 262 #            o = scipy.odr.ODR(realdata, model, p0_plfix)
 263 #        else:
 264 #            model=scipy.odr.Model(f_wlc)
 265 #            o = scipy.odr.ODR(realdata, model, p0)
 266 #
 267 #        o.set_job(fit_type=2)
 268 #        out=o.run()
 269 #        fit_out=[(1/i) for i in out.beta]
 270 #
 271 #        #Calculate fit errors from output standard deviations.
 272 #        #We must propagate the error because we fit the *inverse* parameters!
 273 #        #The error = (error of the inverse)*(value**2)
 274 #        fit_errors=[]
 275 #        for sd,value in zip(out.sd_beta, fit_out):
 276 #            err_real=sd*(value**2)
 277 #            fit_errors.append(err_real)
 278