pypid/fit.py

   1 # Copyright (C) 2011-2012 W. Trevor King <wking@tremily.us>
   2 #
   3 # This file is part of pypid.
   4 #
   5 # pypid is free software: you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation, either version 3 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # pypid is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # pypid.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 """Provide :class:`ModelFitter` to make arbitrary model fitting easy.
  18 """
  19
  20 from numpy import arange, ndarray
  21 from scipy import __version__ as _scipy_version
  22 from scipy.optimize import leastsq
  23
  24 _strings = _scipy_version.split('.')
  25 # Don't convert third string to an integer in case of (for example) '0.7.2rc3'
  26 _SCIPY_VERSION = (int(_strings[0]), int(_strings[1]), _strings[2])
  27 del _strings
  28
  29
  30 class PoorFit (ValueError):
  31     pass
  32
  33 class ModelFitter (object):
  34     """A convenient wrapper around :func:`scipy.optimize.leastsq`.
  35
  36     TODO: look into :mod:`scipy.odr` as an alternative fitting
  37     algorithm (minimizing the sum of squares of orthogonal distances,
  38     vs. minimizing y distances).
  39
  40     Parameters
  41     ----------
  42     d_data : array_like
  43         Deflection data to be analyzed for the contact position.
  44     info :
  45         Store any extra information useful inside your overridden
  46         methods.
  47     rescale : boolean
  48         Rescale parameters so the guess for each is 1.0.  Also rescale
  49         the data so data.std() == 1.0.
  50
  51     Examples
  52     --------
  53
  54     >>> from pprint import pprint
  55     >>> from Queue import Queue
  56     >>> import numpy
  57
  58     You'll want to subclass `ModelFitter`, overriding at least
  59     `.model` and potentially the parameter and scale guessing
  60     methods.
  61
  62     >>> class LinearModel (ModelFitter):
  63     ...     '''Simple linear model.
  64     ...
  65     ...     Levenberg-Marquardt is not how you want to solve this problem
  66     ...     for real systems, but it's a simple test case.
  67     ...     '''
  68     ...     def model(self, params):
  69     ...         '''A linear model.
  70     ...
  71     ...         Notes
  72     ...         -----
  73     ...         .. math:: y = p_0 x + p_1
  74     ...         '''
  75     ...         p = params  # convenient alias
  76     ...         self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
  77     ...         return self._model_data
  78     ...     def guess_initial_params(self, outqueue=None):
  79     ...         return [float(self._data[-1] - self._data[0])/len(self._data),
  80     ...                 self._data[0]]
  81     ...     def guess_scale(self, params, outqueue=None):
  82     ...         slope_scale = 0.1
  83     ...         if params[1] == 0:
  84     ...             offset_scale = 1
  85     ...         else:
  86     ...             offset_scale = 0.1*self._data.std()/abs(params[1])
  87     ...             if offset_scale == 0:  # data is completely flat
  88     ...                 offset_scale = 1.
  89     ...         return [slope_scale, offset_scale]
  90     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
  91     >>> m = LinearModel(data)
  92     >>> outqueue = Queue()
  93     >>> slope,offset = m.fit(outqueue=outqueue)
  94     >>> info = outqueue.get(block=False)
  95     >>> pprint(info)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
  96     {'active fitted parameters': array([  6.999..., -32.889...]),
  97      'active parameters': array([  6.999..., -32.889...]),
  98      'convergence flag': ...,
  99      'covariance matrix': array([[  1.199...e-08,  -5.993...e-06],
 100            [ -5.993...e-06,   3.994...e-03]]),
 101      'data scale factor': 1.0,
 102      'fitted parameters': array([  6.999..., -32.889...]),
 103      'info': {'fjac': array([[...]]),
 104               'fvec': array([...]),
 105               'ipvt': array([1, 2]),
 106               'nfev': 7,
 107               'qtf': array([...])},
 108      'initial parameters': [6.992..., -33.0],
 109      'message': '...relative error between two consecutive iterates is at most 0.000...',
 110      'rescaled': False,
 111      'scale': [0.100..., 6.123...]}
 112
 113     We round the outputs to protect the doctest against differences in
 114     machine rounding during computation.  We expect the values to be close
 115     to the input settings (slope 7, offset -33).
 116
 117     >>> print '%.3f' % slope
 118     7.000
 119     >>> print '%.3f' % offset
 120     -32.890
 121
 122     The offset is a bit off because, the range is not a multiple of
 123     :math:`2\pi`.
 124
 125     We could also use rescaled parameters:
 126
 127     >>> m = LinearModel(data, rescale=True)
 128     >>> outqueue = Queue()
 129     >>> slope,offset = m.fit(outqueue=outqueue)
 130     >>> print '%.3f' % slope
 131     7.000
 132     >>> print '%.3f' % offset
 133     -32.890
 134
 135     Test single-parameter models:
 136
 137     >>> class SingleParameterModel (LinearModel):
 138     ...     '''Simple linear model.
 139     ...     '''
 140     ...     def model(self, params):
 141     ...         return super(SingleParameterModel, self).model([params[0], 0.])
 142     ...     def guess_initial_params(self, outqueue=None):
 143     ...         return super(SingleParameterModel, self
 144     ...             ).guess_initial_params(outqueue)[:1]
 145     ...     def guess_scale(self, params, outqueue=None):
 146     ...         return super(SingleParameterModel, self
 147     ...             ).guess_scale([params[0], 0.], outqueue)[:1]
 148     >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000)
 149     >>> m = SingleParameterModel(data)
 150     >>> slope, = m.fit(outqueue=outqueue)
 151     >>> print '%.3f' % slope
 152     7.000
 153     """
 154     def __init__(self, *args, **kwargs):
 155         self.set_data(*args, **kwargs)
 156
 157     def set_data(self, data, info=None, rescale=False):
 158         self._data = data
 159         self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
 160         self.info = info
 161         self._rescale = rescale
 162         if rescale == True:
 163             for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
 164                 if x != 0:
 165                     self._data_scale_factor = x
 166                     break
 167         else:
 168             self._data_scale_factor = 1.0
 169
 170     def model(self, params):
 171         p = params  # convenient alias
 172         self._model_data[:] = arange(len(self._data))
 173         raise NotImplementedError
 174
 175     def guess_initial_params(self, outqueue=None):
 176         return []
 177
 178     def guess_scale(self, params, outqueue=None):
 179         """Guess the problem length scale in each parameter dimension.
 180
 181         Notes
 182         -----
 183         From the :func:`scipy.optimize.leastsq` documentation, `diag`
 184         (which we refer to as `scale`, sets `factor * || diag * x||`
 185         as the initial step.  If `x == 0`, then `factor` is used
 186         instead (from `lmdif.f`_)::
 187
 188                       do 70 j = 1, n
 189                         wa3(j) = diag(j)*x(j)
 190                70       continue
 191                       xnorm = enorm(n,wa3)
 192                       delta = factor*xnorm
 193                       if (delta .eq. zero) delta = factor
 194
 195         For most situations then, you don't need to do anything fancy.
 196         The default scaling (if you don't set a scale) is::
 197
 198             c        on the first iteration and if mode is 1, scale according
 199             c        to the norms of the columns of the initial jacobian.
 200
 201         (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
 202         of the initial Jacobian).
 203
 204         .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
 205         """
 206         return None
 207
 208     def residual(self, params):
 209         if self._rescale == True:
 210             params = [p*s for p,s in zip(params, self._param_scale_factors)]
 211         residual = self._data - self.model(params)
 212         if False:  # fit debugging
 213             if not hasattr(self, '_i_'):
 214                 self._i_ = 0
 215             self._data.tofile('data.%d' % self._i_, sep='\n')
 216             self.model(params).tofile('model.%d' % self._i_, sep='\n')
 217             self._i_ += 1
 218         if self._rescale == True:
 219             residual /= self._data_scale_factor
 220         return residual
 221
 222     def fit(self, initial_params=None, scale=None, outqueue=None, **kwargs):
 223         """
 224         Parameters
 225         ----------
 226         initial_params : iterable or None
 227             Initial parameter values for residual minimization.  If
 228             `None`, they are estimated from the data using
 229             :meth:`guess_initial_params`.
 230         scale : iterable or None
 231             Parameter length scales for residual minimization.  If
 232             `None`, they are estimated from the data using
 233             :meth:`guess_scale`.
 234         outqueue : Queue or None
 235             If given, will be used to output the data and fitted model
 236             for user verification.
 237         kwargs :
 238             Any additional arguments are passed through to `leastsq`.
 239         """
 240         if initial_params == None:
 241             initial_params = self.guess_initial_params(outqueue)
 242         if scale == None:
 243             scale = self.guess_scale(initial_params, outqueue)
 244         if scale != None:
 245             assert min(scale) > 0, scale
 246         if self._rescale == True:
 247             self._param_scale_factors = initial_params
 248             for i,s in enumerate(self._param_scale_factors):
 249                 if s == 0:
 250                     self._param_scale_factors[i] = 1.0
 251             active_params = [p/s for p,s in zip(initial_params,
 252                                                 self._param_scale_factors)]
 253         else:
 254             active_params = initial_params
 255         params,cov,info,mesg,ier = leastsq(
 256             func=self.residual, x0=active_params, full_output=True,
 257             diag=scale, **kwargs)
 258         if len(initial_params) == 1 and _SCIPY_VERSION < (0, 8, '0'):
 259             # params is a float for scipy < 0.8.0.  Convert to list.
 260             params = [params]
 261         if self._rescale == True:
 262             active_params = params
 263             params = [p*s for p,s in zip(params,
 264                                          self._param_scale_factors)]
 265         else:
 266             active_params = params
 267         if outqueue != None:
 268             outqueue.put({
 269                     'rescaled': self._rescale,
 270                     'initial parameters': initial_params,
 271                     'active parameters': active_params,
 272                     'scale': scale,
 273                     'data scale factor': self._data_scale_factor,
 274                     'active fitted parameters': active_params,
 275                     'fitted parameters': params,
 276                     'covariance matrix': cov,
 277                     'info': info,
 278                     'message': mesg,
 279                     'convergence flag': ier,
 280                     })
 281         return params
 282
 283 # Example ORD code from the old fit.py
 284 #        def dist(px,py,linex,liney):
 285 #            distancesx=scipy.array([(px-x)**2 for x in linex])
 286 #            minindex=numpy.argmin(distancesx)
 287 #            print px, linex[0], linex[-1]
 288 #            return (py-liney[minindex])**2
 289 #
 290 #
 291 #        def f_wlc(params,x,T=T):
 292 #            '''
 293 #            wlc function for ODR fitting
 294 #            '''
 295 #            lambd,pii=params
 296 #            Kb=(1.38065e-23)
 297 #            therm=Kb*T
 298 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 299 #            return y
 300 #
 301 #        def f_wlc_plfix(params,x,pl_value=pl_value,T=T):
 302 #            '''
 303 #            wlc function for ODR fitting
 304 #            '''
 305 #            lambd=params
 306 #            pii=1/pl_value
 307 #            Kb=(1.38065e-23)
 308 #            therm=Kb*T
 309 #            y=(therm*pii/4.0) * (((1-(x*lambd))**-2) - 1 + (4*x*lambd))
 310 #            return y
 311 #
 312 #        #make the ODR fit
 313 #        realdata=scipy.odr.RealData(xchunk_corr_up,ychunk_corr_up)
 314 #        if pl_value:
 315 #            model=scipy.odr.Model(f_wlc_plfix)
 316 #            o = scipy.odr.ODR(realdata, model, p0_plfix)
 317 #        else:
 318 #            model=scipy.odr.Model(f_wlc)
 319 #            o = scipy.odr.ODR(realdata, model, p0)
 320 #
 321 #        o.set_job(fit_type=2)
 322 #        out=o.run()
 323 #        fit_out=[(1/i) for i in out.beta]
 324 #
 325 #        #Calculate fit errors from output standard deviations.
 326 #        #We must propagate the error because we fit the *inverse* parameters!
 327 #        #The error = (error of the inverse)*(value**2)
 328 #        fit_errors=[]
 329 #        for sd,value in zip(out.sd_beta, fit_out):
 330 #            err_real=sd*(value**2)
 331 #            fit_errors.append(err_real)
 332