hooke/util/fit.py

   1 # Copyright (C) 2010-2011 W. Trevor King <wking@drexel.edu>
   2 #
   3 # This file is part of Hooke.
   4 #
   5 # Hooke is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Lesser General Public License as
   7 # published by the Free Software Foundation, either version 3 of the
   8 # License, or (at your option) any later version.
   9 #
  10 # Hooke is distributed in the hope that it will be useful, but WITHOUT
  11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
  13 # Public License for more details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with Hooke.  If not, see
  17 # <http://www.gnu.org/licenses/>.
  18
  19 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
  20 """
  21
  22 from numpy import arange, ndarray
  23 from scipy import __version__ as _scipy_version
  24 from scipy.optimize import leastsq
  25
  26 _strings = _scipy_version.split('.')
  27 # Don't convert third string to an integer in case of (for example) '0.7.2rc3'
  28 _SCIPY_VERSION = (int(_strings[0]), int(_strings[1]), _strings[2])
  29 del _strings
  30
  31
  32 class PoorFit (ValueError):
  33     pass
  34
  35 class ModelFitter (object):
  36     """A convenient wrapper around :func:`scipy.optimize.leastsq`.
  37
  38     TODO: look into :mod:`scipy.odr` as an alternative fitting
  39     algorithm (minimizing the sum of squares of orthogonal distances,
  40     vs. minimizing y distances).
  41
  42     Parameters
  43     ----------
  44     d_data : array_like
  45         Deflection data to be analyzed for the contact position.
  46     info :
  47         Store any extra information useful inside your overridden
  48         methods.
  49     rescale : boolean
  50         Rescale parameters so the guess for each is 1.0.  Also rescale
  51         the data so data.std() == 1.0.
  52
  53     Examples
  54     --------
  55
  56     >>> from pprint import pprint
  57     >>> from Queue import Queue
  58     >>> import numpy
  59
  60     You'll want to subclass `ModelFitter`, overriding at least
  61     `.model` and potentially the parameter and scale guessing
  62     methods.
  63
  64     >>> class LinearModel (ModelFitter):
  65     ...     '''Simple linear model.
  66     ...
  67     ...     Levenberg-Marquardt is not how you want to solve this problem
  68     ...     for real systems, but it's a simple test case.
  69     ...     '''
  70     ...     def model(self, params):
  71     ...         '''A linear model.
  72     ...
  73     ...         Notes
  74     ...         -----
  75     ...         .. math:: y = p_0 x + p_1
  76     ...         '''
  77     ...         p = params  # convenient alias
  78     ...         self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
  79     ...         return self._model_data
  80     ...     def guess_initial_params(self, outqueue=None):
  81     ...         return [float(self._data[-1] - self._data[0])/len(self._data),
  82     ...                 self._data[0]]
  83     ...     def guess_scale(self, params, outqueue=None):
  84     ...         slope_scale = 0.1
  85     ...         if params[1] == 0:
  86     ...             offset_scale = 1
  87     ...         else:
  88     ...             offset_scale = 0.1*self._data.std()/abs(params[1])
  89     ...             if offset_scale == 0:  # data is completely flat
  90     ...                 offset_scale = 1.
  91     ...         return [slope_scale, offset_scale]
  92     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
  93     >>> m = LinearModel(data)
  94     >>> outqueue = Queue()
  95     >>> slope,offset = m.fit(outqueue=outqueue)
  96     >>> info = outqueue.get(block=False)
  97     >>> pprint(info)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
  98     {'active fitted parameters': array([  6.999..., -32.889...]),
  99      'active parameters': array([  6.999..., -32.889...]),
 100      'convergence flag': ...,
 101      'covariance matrix': array([[  1.199...e-08,  -5.993...e-06],
 102            [ -5.993...e-06,   3.994...e-03]]),
 103      'data scale factor': 1.0,
 104      'fitted parameters': array([  6.999..., -32.889...]),
 105      'info': {'fjac': array([[...]]),
 106               'fvec': array([...]),
 107               'ipvt': array([1, 2]),
 108               'nfev': 7,
 109               'qtf': array([...])},
 110      'initial parameters': [6.992..., -33.0],
 111      'message': '...relative error between two consecutive iterates is at most 0.000...',
 112      'rescaled': False,
 113      'scale': [0.100..., 6.123...]}
 114
 115     We round the outputs to protect the doctest against differences in
 116     machine rounding during computation.  We expect the values to be close
 117     to the input settings (slope 7, offset -33).
 118
 119     >>> print '%.3f' % slope
 120     7.000
 121     >>> print '%.3f' % offset
 122     -32.890
 123
 124     The offset is a bit off because, the range is not a multiple of
 125     :math:`2\pi`.
 126
 127     We could also use rescaled parameters:
 128
 129     >>> m = LinearModel(data, rescale=True)
 130     >>> outqueue = Queue()
 131     >>> slope,offset = m.fit(outqueue=outqueue)
 132     >>> print '%.3f' % slope
 133     7.000
 134     >>> print '%.3f' % offset
 135     -32.890
 136
 137     Test single-parameter models:
 138
 139     >>> class SingleParameterModel (LinearModel):
 140     ...     '''Simple linear model.
 141     ...     '''
 142     ...     def model(self, params):
 143     ...         return super(SingleParameterModel, self).model([params[0], 0.])
 144     ...     def guess_initial_params(self, outqueue=None):
 145     ...         return super(SingleParameterModel, self
 146     ...             ).guess_initial_params(outqueue)[:1]
 147     ...     def guess_scale(self, params, outqueue=None):
 148     ...         return super(SingleParameterModel, self
 149     ...             ).guess_scale([params[0], 0.], outqueue)[:1]
 150     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000)
 151     >>> m = SingleParameterModel(data)
 152     >>> slope, = m.fit(outqueue=outqueue)
 153     >>> print '%.3f' % slope
 154     7.000
 155     """
 156     def __init__(self, *args, **kwargs):
 157         self.set_data(*args, **kwargs)
 158
 159     def set_data(self, data, info=None, rescale=False):
 160         self._data = data
 161         self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
 162         self.info = info
 163         self._rescale = rescale
 164         if rescale == True:
 165             for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
 166                 if x != 0:
 167                     self._data_scale_factor = x
 168                     break
 169         else:
 170             self._data_scale_factor = 1.0
 171
 172     def model(self, params):
 173         p = params  # convenient alias
 174         self._model_data[:] = arange(len(self._data))
 175         raise NotImplementedError
 176
 177     def guess_initial_params(self, outqueue=None):
 178         return []
 179
 180     def guess_scale(self, params, outqueue=None):
 181         """Guess the problem length scale in each parameter dimension.
 182
 183         Notes
 184         -----
 185         From the :func:`scipy.optimize.leastsq` documentation, `diag`
 186         (which we refer to as `scale`, sets `factor * || diag * x||`
 187         as the initial step.  If `x == 0`, then `factor` is used
 188         instead (from `lmdif.f`_)::
 189
 190                       do 70 j = 1, n
 191                         wa3(j) = diag(j)*x(j)
 192                70       continue
 193                       xnorm = enorm(n,wa3)
 194                       delta = factor*xnorm
 195                       if (delta .eq. zero) delta = factor
 196
 197         For most situations then, you don't need to do anything fancy.
 198         The default scaling (if you don't set a scale) is::
 199
 200             c        on the first iteration and if mode is 1, scale according
 201             c        to the norms of the columns of the initial jacobian.
 202
 203         (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
 204         of the initial Jacobian).
 205
 206         .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
 207         """
 208         return None
 209
 210     def residual(self, params):
 211         if self._rescale == True:
 212             params = [p*s for p,s in zip(params, self._param_scale_factors)]
 213         residual = self._data - self.model(params)
 214         if False:  # fit debugging
 215             if not hasattr(self, '_i_'):
 216                 self._i_ = 0
 217             self._data.tofile('data.%d' % self._i_, sep='\n')
 218             self.model(params).tofile('model.%d' % self._i_, sep='\n')
 219             self._i_ += 1
 220         if self._rescale == True:
 221             residual /= self._data_scale_factor
 222         return residual
 223
 224     def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
 225         """
 226         Parameters
 227         ----------
 228         initial_params : iterable or None
 229             Initial parameter values for residual minimization.  If
 230             `None`, they are estimated from the data using
 231             :meth:`guess_initial_params`.
 232         scale : iterable or None
 233             Parameter length scales for residual minimization.  If
 234             `None`, they are estimated from the data using
 235             :meth:`guess_scale`.
 236         outqueue : Queue or None
 237             If given, will be used to output the data and fitted model
 238             for user verification.
 239         kwargs :
 240             Any additional arguments are passed through to `leastsq`.
 241         """
 242         if initial_params == None:
 243             initial_params = self.guess_initial_params(outqueue)
 244         if scale == None:
 245             scale = self.guess_scale(initial_params, outqueue)
 246         if scale != None:
 247             assert min(scale) > 0, scale
 248         if self._rescale == True:
 249             self._param_scale_factors = initial_params
 250             for i,s in enumerate(self._param_scale_factors):
 251                 if s == 0:
 252                     self._param_scale_factors[i] = 1.0
 253             active_params = [p/s for p,s in zip(initial_params,
 254                                                 self._param_scale_factors)]
 255         else:
 256             active_params = initial_params
 257         params,cov,info,mesg,ier = leastsq(
 258             func=self.residual, x0=active_params, full_output=True,
 259             diag=scale, **kwargs)
 260         if len(initial_params) == 1 and _SCIPY_VERSION < (0, 8, '0'):
 261             # params is a float for scipy < 0.8.0.  Convert to list.
 262             params = [params]
 263         if self._rescale == True:
 264             active_params = params
 265             params = [p*s for p,s in zip(params,
 266                                          self._param_scale_factors)]
 267         else:
 268             active_params = params
 269         if outqueue != None:
 270             outqueue.put({
 271                     'rescaled': self._rescale,
 272                     'initial parameters': initial_params,
 273                     'active parameters': active_params,
 274                     'scale': scale,
 275                     'data scale factor': self._data_scale_factor,
 276                     'active fitted parameters': active_params,
 277                     'fitted parameters': params,
 278                     'covariance matrix': cov,
 279                     'info': info,
 280                     'message': mesg,
 281                     'convergence flag': ier,
 282                     })
 283         return params
 284
 285 # Example ORD code from the old fit.py
 286 #        def dist(px,py,linex,liney):
 287 #            distancesx=scipy.array([(px-x)**2 for x in linex])
 288 #            minindex=numpy.argmin(distancesx)
 289 #            print px, linex[0], linex[-1]
 290 #            return (py-liney[minindex])**2
 291 #
 292 #
 293 #        def f_wlc(params,x,T=T):
 294 #            '''
 295 #            wlc function for ODR fitting
 296 #            '''
 297 #            lambd,pii=params
 298 #            Kb=(1.38065e-23)
 299 #            therm=Kb*T
 300 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 301 #            return y
 302 #
 303 #        def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
 304 #            '''
 305 #            wlc function for ODR fitting
 306 #            '''
 307 #            lambd=params
 308 #            pii=1/pl_value
 309 #            Kb=(1.38065e-23)
 310 #            therm=Kb*T
 311 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 312 #            return y
 313 #
 314 #        #make the ODR fit
 315 #        realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
 316 #        if pl_value:
 317 #            model=scipy.odr.Model(f_wlc_plfix)
 318 #            o = scipy.odr.ODR(realdata, model, p0_plfix)
 319 #        else:
 320 #            model=scipy.odr.Model(f_wlc)
 321 #            o = scipy.odr.ODR(realdata, model, p0)
 322 #
 323 #        o.set_job(fit_type=2)
 324 #        out=o.run()
 325 #        fit_out=[(1/i) for i in out.beta]
 326 #
 327 #        #Calculate fit errors from output standard deviations.
 328 #        #We must propagate the error because we fit the *inverse* parameters!
 329 #        #The error = (error of the inverse)*(value**2)
 330 #        fit_errors=[]
 331 #        for sd,value in zip(out.sd_beta, fit_out):
 332 #            err_real=sd*(value**2)
 333 #            fit_errors.append(err_real)
 334