hooke/util/fit.py

   1 # Copyright (C) 2010-2012 W. Trevor King <wking@drexel.edu>
   2 #
   3 # This file is part of Hooke.
   4 #
   5 # Hooke is free software: you can redistribute it and/or modify it under the
   6 # terms of the GNU Lesser General Public License as published by the Free
   7 # Software Foundation, either version 3 of the License, or (at your option) any
   8 # later version.
   9 #
  10 # Hooke is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
  13 # details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public License
  16 # along with Hooke.  If not, see <http://www.gnu.org/licenses/>.
  17
  18 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
  19 """
  20
  21 from numpy import arange, ndarray
  22 from scipy import __version__ as _scipy_version
  23 from scipy.optimize import leastsq
  24
  25 _strings = _scipy_version.split('.')
  26 # Don't convert third string to an integer in case of (for example) '0.7.2rc3'
  27 _SCIPY_VERSION = (int(_strings[0]), int(_strings[1]), _strings[2])
  28 del _strings
  29
  30
  31 class PoorFit (ValueError):
  32     pass
  33
  34 class ModelFitter (object):
  35     """A convenient wrapper around :func:`scipy.optimize.leastsq`.
  36
  37     TODO: look into :mod:`scipy.odr` as an alternative fitting
  38     algorithm (minimizing the sum of squares of orthogonal distances,
  39     vs. minimizing y distances).
  40
  41     Parameters
  42     ----------
  43     d_data : array_like
  44         Deflection data to be analyzed for the contact position.
  45     info :
  46         Store any extra information useful inside your overridden
  47         methods.
  48     rescale : boolean
  49         Rescale parameters so the guess for each is 1.0.  Also rescale
  50         the data so data.std() == 1.0.
  51
  52     Examples
  53     --------
  54
  55     >>> from pprint import pprint
  56     >>> from Queue import Queue
  57     >>> import numpy
  58
  59     You'll want to subclass `ModelFitter`, overriding at least
  60     `.model` and potentially the parameter and scale guessing
  61     methods.
  62
  63     >>> class LinearModel (ModelFitter):
  64     ...     '''Simple linear model.
  65     ...
  66     ...     Levenberg-Marquardt is not how you want to solve this problem
  67     ...     for real systems, but it's a simple test case.
  68     ...     '''
  69     ...     def model(self, params):
  70     ...         '''A linear model.
  71     ...
  72     ...         Notes
  73     ...         -----
  74     ...         .. math:: y = p_0 x + p_1
  75     ...         '''
  76     ...         p = params  # convenient alias
  77     ...         self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
  78     ...         return self._model_data
  79     ...     def guess_initial_params(self, outqueue=None):
  80     ...         return [float(self._data[-1] - self._data[0])/len(self._data),
  81     ...                 self._data[0]]
  82     ...     def guess_scale(self, params, outqueue=None):
  83     ...         slope_scale = 0.1
  84     ...         if params[1] == 0:
  85     ...             offset_scale = 1
  86     ...         else:
  87     ...             offset_scale = 0.1*self._data.std()/abs(params[1])
  88     ...             if offset_scale == 0:  # data is completely flat
  89     ...                 offset_scale = 1.
  90     ...         return [slope_scale, offset_scale]
  91     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
  92     >>> m = LinearModel(data)
  93     >>> outqueue = Queue()
  94     >>> slope,offset = m.fit(outqueue=outqueue)
  95     >>> info = outqueue.get(block=False)
  96     >>> pprint(info)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
  97     {'active fitted parameters': array([  6.999..., -32.889...]),
  98      'active parameters': array([  6.999..., -32.889...]),
  99      'convergence flag': ...,
 100      'covariance matrix': array([[  1.199...e-08,  -5.993...e-06],
 101            [ -5.993...e-06,   3.994...e-03]]),
 102      'data scale factor': 1.0,
 103      'fitted parameters': array([  6.999..., -32.889...]),
 104      'info': {'fjac': array([[...]]),
 105               'fvec': array([...]),
 106               'ipvt': array([1, 2]),
 107               'nfev': 7,
 108               'qtf': array([...])},
 109      'initial parameters': [6.992..., -33.0],
 110      'message': '...relative error between two consecutive iterates is at most 0.000...',
 111      'rescaled': False,
 112      'scale': [0.100..., 6.123...]}
 113
 114     We round the outputs to protect the doctest against differences in
 115     machine rounding during computation.  We expect the values to be close
 116     to the input settings (slope 7, offset -33).
 117
 118     >>> print '%.3f' % slope
 119     7.000
 120     >>> print '%.3f' % offset
 121     -32.890
 122
 123     The offset is a bit off because, the range is not a multiple of
 124     :math:`2\pi`.
 125
 126     We could also use rescaled parameters:
 127
 128     >>> m = LinearModel(data, rescale=True)
 129     >>> outqueue = Queue()
 130     >>> slope,offset = m.fit(outqueue=outqueue)
 131     >>> print '%.3f' % slope
 132     7.000
 133     >>> print '%.3f' % offset
 134     -32.890
 135
 136     Test single-parameter models:
 137
 138     >>> class SingleParameterModel (LinearModel):
 139     ...     '''Simple linear model.
 140     ...     '''
 141     ...     def model(self, params):
 142     ...         return super(SingleParameterModel, self).model([params[0], 0.])
 143     ...     def guess_initial_params(self, outqueue=None):
 144     ...         return super(SingleParameterModel, self
 145     ...             ).guess_initial_params(outqueue)[:1]
 146     ...     def guess_scale(self, params, outqueue=None):
 147     ...         return super(SingleParameterModel, self
 148     ...             ).guess_scale([params[0], 0.], outqueue)[:1]
 149     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000)
 150     >>> m = SingleParameterModel(data)
 151     >>> slope, = m.fit(outqueue=outqueue)
 152     >>> print '%.3f' % slope
 153     7.000
 154     """
 155     def __init__(self, *args, **kwargs):
 156         self.set_data(*args, **kwargs)
 157
 158     def set_data(self, data, info=None, rescale=False):
 159         self._data = data
 160         self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
 161         self.info = info
 162         self._rescale = rescale
 163         if rescale == True:
 164             for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
 165                 if x != 0:
 166                     self._data_scale_factor = x
 167                     break
 168         else:
 169             self._data_scale_factor = 1.0
 170
 171     def model(self, params):
 172         p = params  # convenient alias
 173         self._model_data[:] = arange(len(self._data))
 174         raise NotImplementedError
 175
 176     def guess_initial_params(self, outqueue=None):
 177         return []
 178
 179     def guess_scale(self, params, outqueue=None):
 180         """Guess the problem length scale in each parameter dimension.
 181
 182         Notes
 183         -----
 184         From the :func:`scipy.optimize.leastsq` documentation, `diag`
 185         (which we refer to as `scale`, sets `factor * || diag * x||`
 186         as the initial step.  If `x == 0`, then `factor` is used
 187         instead (from `lmdif.f`_)::
 188
 189                       do 70 j = 1, n
 190                         wa3(j) = diag(j)*x(j)
 191                70       continue
 192                       xnorm = enorm(n,wa3)
 193                       delta = factor*xnorm
 194                       if (delta .eq. zero) delta = factor
 195
 196         For most situations then, you don't need to do anything fancy.
 197         The default scaling (if you don't set a scale) is::
 198
 199             c        on the first iteration and if mode is 1, scale according
 200             c        to the norms of the columns of the initial jacobian.
 201
 202         (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
 203         of the initial Jacobian).
 204
 205         .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
 206         """
 207         return None
 208
 209     def residual(self, params):
 210         if self._rescale == True:
 211             params = [p*s for p,s in zip(params, self._param_scale_factors)]
 212         residual = self._data - self.model(params)
 213         if False:  # fit debugging
 214             if not hasattr(self, '_i_'):
 215                 self._i_ = 0
 216             self._data.tofile('data.%d' % self._i_, sep='\n')
 217             self.model(params).tofile('model.%d' % self._i_, sep='\n')
 218             self._i_ += 1
 219         if self._rescale == True:
 220             residual /= self._data_scale_factor
 221         return residual
 222
 223     def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
 224         """
 225         Parameters
 226         ----------
 227         initial_params : iterable or None
 228             Initial parameter values for residual minimization.  If
 229             `None`, they are estimated from the data using
 230             :meth:`guess_initial_params`.
 231         scale : iterable or None
 232             Parameter length scales for residual minimization.  If
 233             `None`, they are estimated from the data using
 234             :meth:`guess_scale`.
 235         outqueue : Queue or None
 236             If given, will be used to output the data and fitted model
 237             for user verification.
 238         kwargs :
 239             Any additional arguments are passed through to `leastsq`.
 240         """
 241         if initial_params == None:
 242             initial_params = self.guess_initial_params(outqueue)
 243         if scale == None:
 244             scale = self.guess_scale(initial_params, outqueue)
 245         if scale != None:
 246             assert min(scale) > 0, scale
 247         if self._rescale == True:
 248             self._param_scale_factors = initial_params
 249             for i,s in enumerate(self._param_scale_factors):
 250                 if s == 0:
 251                     self._param_scale_factors[i] = 1.0
 252             active_params = [p/s for p,s in zip(initial_params,
 253                                                 self._param_scale_factors)]
 254         else:
 255             active_params = initial_params
 256         params,cov,info,mesg,ier = leastsq(
 257             func=self.residual, x0=active_params, full_output=True,
 258             diag=scale, **kwargs)
 259         if len(initial_params) == 1 and _SCIPY_VERSION < (0, 8, '0'):
 260             # params is a float for scipy < 0.8.0.  Convert to list.
 261             params = [params]
 262         if self._rescale == True:
 263             active_params = params
 264             params = [p*s for p,s in zip(params,
 265                                          self._param_scale_factors)]
 266         else:
 267             active_params = params
 268         if outqueue != None:
 269             outqueue.put({
 270                     'rescaled': self._rescale,
 271                     'initial parameters': initial_params,
 272                     'active parameters': active_params,
 273                     'scale': scale,
 274                     'data scale factor': self._data_scale_factor,
 275                     'active fitted parameters': active_params,
 276                     'fitted parameters': params,
 277                     'covariance matrix': cov,
 278                     'info': info,
 279                     'message': mesg,
 280                     'convergence flag': ier,
 281                     })
 282         return params
 283
 284 # Example ORD code from the old fit.py
 285 #        def dist(px,py,linex,liney):
 286 #            distancesx=scipy.array([(px-x)**2 for x in linex])
 287 #            minindex=numpy.argmin(distancesx)
 288 #            print px, linex[0], linex[-1]
 289 #            return (py-liney[minindex])**2
 290 #
 291 #
 292 #        def f_wlc(params,x,T=T):
 293 #            '''
 294 #            wlc function for ODR fitting
 295 #            '''
 296 #            lambd,pii=params
 297 #            Kb=(1.38065e-23)
 298 #            therm=Kb*T
 299 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 300 #            return y
 301 #
 302 #        def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
 303 #            '''
 304 #            wlc function for ODR fitting
 305 #            '''
 306 #            lambd=params
 307 #            pii=1/pl_value
 308 #            Kb=(1.38065e-23)
 309 #            therm=Kb*T
 310 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 311 #            return y
 312 #
 313 #        #make the ODR fit
 314 #        realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
 315 #        if pl_value:
 316 #            model=scipy.odr.Model(f_wlc_plfix)
 317 #            o = scipy.odr.ODR(realdata, model, p0_plfix)
 318 #        else:
 319 #            model=scipy.odr.Model(f_wlc)
 320 #            o = scipy.odr.ODR(realdata, model, p0)
 321 #
 322 #        o.set_job(fit_type=2)
 323 #        out=o.run()
 324 #        fit_out=[(1/i) for i in out.beta]
 325 #
 326 #        #Calculate fit errors from output standard deviations.
 327 #        #We must propagate the error because we fit the *inverse* parameters!
 328 #        #The error = (error of the inverse)*(value**2)
 329 #        fit_errors=[]
 330 #        for sd,value in zip(out.sd_beta, fit_out):
 331 #            err_real=sd*(value**2)
 332 #            fit_errors.append(err_real)
 333