--- /dev/null
+# Copyright (C) 2008-2010 W. Trevor King <wking@drexel.edu>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# The author may be contacted at <wking@drexel.edu> on the Internet, or
+# write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
+# Philadelphia PA 19104, USA.
+
+"""Provide :class:`ModelFitter` to make arbitrary model fitting easy.
+"""
+
+from copy import deepcopy
+
+from numpy import arange, array, ndarray, exp, pi, sqrt
+from scipy.optimize import leastsq
+
+
+class ModelFitter (object):
+ """A convenient wrapper around :func:`scipy.optimize.leastsq`.
+
+ For minimizing fit error: returns the normalized rms difference
+ between a dataset and a function `y(X)`. In order to use this
+ class, you must sublcass it and overwrite `ModelFitter.model()`.
+ It is recommended that you also overwrite
+ `ModelFitter.guess_initial_params()`, and then not pass
+ initializing params to `ModelFitter.model()`. This helps ensure
+ good automation for unsupervised testing. Developing good
+ heuristics for `ModelFitter.guess_initial_params()` is usually the
+ hardest part writing a theoretical histogram test.
+
+ For bonus points, you can override `ModelFitter.print_model()` and
+ `ModelFitter.print_params()`, to give your users an easy to
+ understand idea of what's going on. It's better to be verbose and
+ clear. Remember, this is the testing code. It needs to be solid.
+ Save your wonderful, sneaky hacks for the main code ;).
+
+ TODO: look into :mod:`scipy.odr` as an alternative fitting
+ algorithm (minimizing the sum of squares of orthogonal distances,
+ vs. minimizing y distances).
+
+ Parameters
+ ----------
+ d_data : array_like
+ Deflection data to be analyzed for the contact position.
+ info :
+ Store any extra information useful inside your overridden
+ methods.
+ rescale : boolean
+ Rescale parameters so the guess for each is 1.0. Also rescale
+ the data so data.std() == 1.0.
+
+ Examples
+ --------
+ >>> from pprint import pprint
+ >>> import numpy
+ >>> class LinearModel (ModelFitter):
+ ... '''Simple linear model.
+ ...
+ ... Levenberg-Marquardt is not how you want to solve this problem
+ ... for real systems, but it's a simple test case.
+ ... '''
+ ... def model(self, params):
+ ... '''A linear model.
+ ...
+ ... Notes
+ ... -----
+ ... .. math:: y = p_0 x + p_1
+ ... '''
+ ... p = params # convenient alias
+ ... self._model_data[:] = p[0]*arange(len(self._data)) + p[1]
+ ... return self._model_data
+ ... def guess_initial_params(self):
+ ... return [float(self._data[-1] - self._data[0])/len(self._data),
+ ... self._data[0]]
+ ... def guess_scale(self, params):
+ ... slope_scale = 0.1
+ ... if params[1] == 0:
+ ... offset_scale = 1
+ ... else:
+ ... offset_scale = 0.1*self._data.std()/abs(params[1])
+ ... if offset_scale == 0: # data is completely flat
+ ... offset_scale = 1.
+ ... return [slope_scale, offset_scale]
+ ... def model_string(self):
+ ... return 'y = A x + B'
+ ... def param_string(self, params):
+ ... pstrings = []
+ ... for name,param in zip(['A', 'B'], params):
+ ... pstrings.append('%s=%.3f' % (name, param))
+ ... return ', '.join(pstrings)
+ >>> data = 20*numpy.sin(arange(1000)) + 7.*arange(1000) - 33.0
+ >>> m = LinearModel(data)
+ >>> slope,offset = m.fit()
+ >>> pprint(m.fit_info)
+ ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +REPORT_UDIFF
+ {'active fitted parameters': array([ 6.999..., -32.889...]),
+ 'active parameters': array([ 6.999..., -32.889...]),
+ 'convergence flag': ...,
+ 'covariance matrix': array([[ 1.199...e-08, -5.993...e-06],
+ [ -5.993...e-06, 3.994...e-03]]),
+ 'data scale factor': 1.0,
+ 'fitted parameters': array([ 6.999..., -32.889...]),
+ 'info': {'fjac': array([[...]]),
+ 'fvec': array([...]),
+ 'ipvt': array([1, 2]),
+ 'nfev': 7,
+ 'qtf': array([...])},
+ 'initial parameters': [6.992..., -33.0],
+ 'message': '...relative error between two consecutive iterates is at most 0.000...',
+ 'rescaled': False,
+ 'scale': [0.100..., 6.123...]}
+
+ We round the outputs to protect the doctest against differences in
+ machine rounding during computation. We expect the values to be close
+ to the input settings (slope 7, offset -33).
+
+ >>> print '%.3f' % slope
+ 7.000
+ >>> print '%.3f' % offset
+ -32.890
+
+ >>> print m.model_string()
+ y = A x + B
+ >>> print m.param_string([slope,offset])
+ A=7.000, B=-32.890
+ >>> print ModelFitter.param_string(m, [slope,offset]) # doctest: +ELLIPSIS
+ param_0=6.9..., param_1=-32.8...
+
+ The offset is a bit off because, the range is not a multiple of
+ :math:`2\pi`.
+
+ We could also use rescaled parameters:
+
+ >>> m = LinearModel(data, rescale=True)
+ >>> slope,offset = m.fit()
+ >>> print '%.3f' % slope
+ 7.000
+ >>> print '%.3f' % offset
+ -32.890
+ """
+ def __init__(self, *args, **kwargs):
+ self.set_data(*args, **kwargs)
+
+ def set_data(self, data, info=None, rescale=False):
+ self._data = data
+ self._model_data = ndarray(shape=data.shape, dtype=data.dtype)
+ self.info = info
+ self._rescale = rescale
+ if rescale == True:
+ for x in [data.std(), data.max()-data.min(), abs(data.max()), 1.0]:
+ if x != 0:
+ self._data_scale_factor = x
+ break
+ else:
+ self._data_scale_factor = 1.0
+
+ def model(self, params):
+ p = params # convenient alias
+ self._model_data[:] = arange(len(self._data))
+ raise NotImplementedError
+
+ def model_string(self):
+ """OVERRIDE: Give the user some idea of what model we're using.
+ """
+ return 'model string not defined'
+
+ def param_string(self, params):
+ """OVERRIDE: Give the user nicely named format for a parameter vector.
+ """
+ pstrings = []
+ for i,p in enumerate(params):
+ pstrings.append('param_%d=%g' % (i,p))
+ return ', '.join(pstrings)
+
+ def guess_initial_params(self):
+ """OVERRIDE: Provide some intelligent heuristic to pick a
+ starting point for minimizing your model"""
+ raise NotImplementedError
+
+ def guess_scale(self, params):
+ """Guess the problem length scale in each parameter dimension.
+
+ Notes
+ -----
+ From the :func:`scipy.optimize.leastsq` documentation, `diag`
+ (which we refer to as `scale`, sets `factor * || diag * x||`
+ as the initial step. If `x == 0`, then `factor` is used
+ instead (from `lmdif.f`_)::
+
+ do 70 j = 1, n
+ wa3(j) = diag(j)*x(j)
+ 70 continue
+ xnorm = enorm(n,wa3)
+ delta = factor*xnorm
+ if (delta .eq. zero) delta = factor
+
+ For most situations then, you don't need to do anything fancy.
+ The default scaling (if you don't set a scale) is::
+
+ c on the first iteration and if mode is 1, scale according
+ c to the norms of the columns of the initial jacobian.
+
+ (i.e. `diag(j) = acnorm(j)`, where `acnorm(j) is the norm of the `j`th column
+ of the initial Jacobian).
+
+ .. _lmdif.f: http://www.netlib.org/minpack/lmdif.f
+ """
+ return None
+
+ def residual(self, params):
+ if self._rescale == True:
+ params = [p*s for p,s in zip(params, self._param_scale_factors)]
+ residual = self._data - self.model(params)
+ if False: # fit debugging
+ if not hasattr(self, '_i_'):
+ self._i_ = 0
+ self._data.tofile('data.%d' % self._i_, sep='\n')
+ self._model_data.tofile('model.%d' % self._i_, sep='\n')
+ self._i_ += 1
+ if self._rescale == True:
+ residual /= self._data_scale_factor
+ return residual
+
+ def fit(self, initial_params=None, scale=None, **kwargs):
+ """
+ Parameters
+ ----------
+ initial_params : iterable or None
+ Initial parameter values for residual minimization. If
+ `None`, they are estimated from the data using
+ :meth:`guess_initial_params`.
+ scale : iterable or None
+ Parameter length scales for residual minimization. If
+ `None`, they are estimated from the data using
+ :meth:`guess_scale`.
+ kwargs :
+ Any additional arguments are passed through to `leastsq`.
+ """
+ if initial_params == None:
+ initial_params = self.guess_initial_params()
+ if scale == None:
+ scale = self.guess_scale(initial_params)
+ if scale != None:
+ assert min(scale) > 0, scale
+ if self._rescale == True:
+ self._param_scale_factors = initial_params
+ for i,s in enumerate(self._param_scale_factors):
+ if s == 0:
+ self._param_scale_factors[i] = 1.0
+ active_params = [p/s for p,s in zip(initial_params,
+ self._param_scale_factors)]
+ else:
+ active_params = initial_params
+ params,cov,info,mesg,ier = leastsq(
+ func=self.residual, x0=active_params, full_output=True,
+ diag=scale, **kwargs)
+ if self._rescale == True:
+ active_params = params
+ if len(initial_params) == 1: # params is a float
+ params = params * self._param_scale_factors[0]
+ else:
+ params = [p*s for p,s in zip(params,
+ self._param_scale_factors)]
+ else:
+ active_params = params
+ self.fit_info = {
+ 'rescaled': self._rescale,
+ 'initial parameters': initial_params,
+ 'active parameters': active_params,
+ 'scale': scale,
+ 'data scale factor': self._data_scale_factor,
+ 'active fitted parameters': active_params,
+ 'fitted parameters': params,
+ 'covariance matrix': cov,
+ 'info': info,
+ 'message': mesg,
+ 'convergence flag': ier,
+ }
+ return params
+
+
+class HistogramModelFitter (ModelFitter):
+ """Adapt ModelFitter for easy Histogram() comparison.
+
+ TODO: look into :func:`scipy.optimize.fmin_powell` as way to
+ minimize a single residul number. Useful for fitting `Histograms`
+ with their assorted residal methods.
+
+ Examples
+ --------
+ >>> from random import gauss
+ >>> from numpy import array
+ >>> from .histogram import Histogram
+
+ >>> class GaussianModel (HistogramModelFitter):
+ ... '''Simple gaussian model.
+ ... '''
+ ... def model(self, params):
+ ... '''A gaussian model.
+ ...
+ ... Notes
+ ... -----
+ ... .. math:: y \\propto e^{-\\frac{(x-p_0)^2}{2 p_1^2}}
+ ... '''
+ ... p = params # convenient alias
+ ... p[1] = abs(p[1]) # cannot have negative std. dev.
+ ... self._model_data.counts = self.info['N']/(p[1]*sqrt(2*pi)) * (
+ ... exp(-((self._model_data.bin_centers - p[0])/p[1])**2 / 2))
+ ... return self._model_data
+ ... def guess_initial_params(self):
+ ... return [self._data.mean, self._data.std_dev]
+ ... def guess_scale(self, params):
+ ... return [self._data.std_dev, self._data.std_dev]
+ ... def model_string(self):
+ ... return 'p(x) = A exp((x-mu)^2 / (2 sigma^2))'
+ ... def param_string(self, params):
+ ... pstrings = []
+ ... for name,param in zip(['mu', 'sigma'], params):
+ ... pstrings.append('%s=%.3f' % (name, param))
+ ... return ', '.join(pstrings)
+
+ >>> mu = 1.45
+ >>> sigma = 3.14
+ >>> data = array([gauss(mu,sigma) for i in range(int(1e4))])
+ >>> h = Histogram()
+ >>> h.from_data(data, h.calculate_bin_edges(data, 1))
+ >>> m = GaussianModel(h)
+ >>> params = m.fit()
+ >>> print m.model_string()
+ p(x) = A exp((x-mu)^2 / (2 sigma^2))
+ >>> print m.param_string(params) # doctest: +ELLIPSIS
+ mu=1.4..., sigma=3.1...
+ """
+ def set_data(self, data, info=None, rescale=False):
+ self._data = data # Histogram() instance
+ self._model_data = deepcopy(data)
+ self._model_data.bin_edges = array(self._model_data.bin_edges)
+ self._model_data.counts = array(self._model_data.counts)
+ if info == None:
+ info = {}
+ self.info = info
+ self._rescale = rescale
+ self._data_scale_factor = 1.0 # bo need to rescale normalized hists.
+ self.info['N'] = self._data.total
+ # assume constant binwidth
+ self.info['binwidth'] = (
+ self._data.bin_edges[1] - self._data.bin_edges[0])
+ self._model_data.bin_centers = (
+ self._model_data.bin_edges[:-1] + self.info['binwidth']/2)
+
+ def residual(self, params):
+ if self._rescale == True:
+ params = [p*s for p,s in zip(params, self._param_scale_factors)]
+ residual = self._data.counts - self.model(params).counts
+ if True: # fit debugging
+ if not hasattr(self, '_i_'):
+ self._i_ = 0
+ self._data.to_stream(open('hist-data.%d' % self._i_, 'w'))
+ self._model_data.headings = [str(x) for x in params]
+ self._model_data.to_stream(open('hist-model.%d' % self._i_, 'w'))
+ self._i_ += 1
+ if self._rescale == True:
+ residual /= self._data_scale_factor
+ return residual
+++ /dev/null
-# -*- coding: utf-8 -*-
-
-"""Fit histogram data coming in on stdin to a user-defined model. You
-should write a python script sublcassing fit-hist.Model."""
-
-import scipy.optimize, scipy
-import sys
-
-kB=1.3806503e-23 # m²·kg/s²·K
-
-class Model (object) :
-
- """For minimizing fit error: returns the normalized rms difference
- between a dataset and a function y(X). In order to use this
- class, you must sublcass it and overwrite Model.model(x,params).
- It is recommended that you also overwrite
- Model.guess_initial_params(), and then not pass initializing
- params to Model.model(). This helps ensure good automation for
- unsupervised testing. Developing good heuristics for
- Model.guessInitialParams() is usually the hardest part writing a
- theoretical histogram test.
-
- For bonus points, you can override Model.printModel() and
- Model.printParams(), to give your users an easy to understand idea
- of what's going on. It's better to be verbose and clear.
- Remember, this is the testing branch. It needs to be solid. Save
- your wonderful, sneaky hacks for the main code ;).
-
- """
- def __init__(self, numParams, data) :
- """Initialize the Model instance with a set of data, for
- example: data = [(0,4),(1,3),(2,3),(4,0)]"""
- self.data = data # an iterable filled with (x,y) pairs
- # N (# unfolding events) = the sum of the y values (# hits per bin)
- self.N = sum(zip(*data)[1]) # zip(* takes data to [[x0,x1,..],[y..]]
- # assumes constant binwidth
- self.binwidth = self.data[1][0] - self.data[0][0]
- self.numParams = numParams
- def nrms(self, params) :
- """Returns the normalized root mean squagred between the data
- and a model for a given set of parameters"""
- s = 0.0
- for x,y in self.data :
- yResidual = y-self.model(x, params)
- s += yResidual**2
- s /= float(len(self.data))
- return s
- def fit(self, x0=None) :
- """Minimize error given an initial guess vector x0, the heart of
- the class."""
- if x0 == None :
- x0 = self.guessInitialParams()
- xopt = scipy.optimize.fmin_powell(self.nrms, x0)
- return xopt
- def printModel(self) :
- "OVERRIDE: Give the user some idea of what model we're fitting with"
- print "model string not defined"
- def printParams(self) :
- "OVERRIDE: Give the user nicely named format for a parameter vector"
- for i,p in zip(range(len(params)), params) :
- print "param_%d\t=\t%g\n" % (i,p)
- def model(self, x, params) :
- "OVERRIDE: Implement your model"
- assert len(params) == self.numParams
- raise Exception, "model method not defined"
- return 1.0
- def guessInitialParams(self) :
- """OVERRIDE: Provide some intelligent heuristic to pick a
- starting point for minimizing your model"""
- return [1.0] * self.numParams
-
-def readData(fileObj) :
- """Read histogram data from a whitespace-seperated, two-column
- ASCII text-file into a list of tuples"""
- data = []
- for line in fileObj :
- if line[0] == "#" : continue
- data.append([float(x) for x in line.split()])
- return data
-
-def run(numParams, modelClass) :
- import sys
- data = readData(sys.stdin)
- mod = modelClass(numParams, data)
- popt = mod.fit()
- mod.printModel()
- mod.printParams(popt)
- print "N:\t%g" % mod.N
- print "bin:\t%g" % mod.binwidth