hooke/plugin/polymer_fit.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # Copyright (C) 2010-2012 W. Trevor King <wking@tremily.us>
   4 #
   5 # This file is part of Hooke.
   6 #
   7 # Hooke is free software: you can redistribute it and/or modify it under the
   8 # terms of the GNU Lesser General Public License as published by the Free
   9 # Software Foundation, either version 3 of the License, or (at your option) any
  10 # later version.
  11 #
  12 # Hooke is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  14 # A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
  15 # details.
  16 #
  17 # You should have received a copy of the GNU Lesser General Public License
  18 # along with Hooke.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 """The ``polymer_fit`` module proviews :class:`PolymerFitPlugin` and
  21 serveral associated :class:`hooke.command.Command`\s for Fitting
  22 velocity-clamp data to various polymer models (WLC, FJC, etc.).
  23 """
  24
  25 import copy
  26 from Queue import Queue
  27 import logging
  28 import StringIO
  29 import sys
  30
  31 import numpy
  32 from scipy.optimize import newton
  33
  34 from ..command import Command, Argument, Success, Failure
  35 from ..config import Setting
  36 from ..curve import Data
  37 from ..util.callback import is_iterable
  38 from ..util.fit import PoorFit, ModelFitter
  39 from ..util.peak import Peak
  40 from ..util.quickhull import qhull, points_inside_hull
  41 from ..util.si import join_data_label, split_data_label
  42 from . import Plugin, argument_to_setting
  43 from .curve import ColumnAccessCommand, ColumnAddingCommand
  44
  45
  46 kB = 1.3806503e-23  # Joules/Kelvin
  47
  48
  49 def coth(z):
  50     """Hyperbolic cotangent.
  51
  52     Examples
  53     --------
  54     >>> coth(1.19967874)  # doctest: +ELLIPSIS
  55     1.199678...
  56
  57     Notes
  58     -----
  59     From `MathWorld`_
  60
  61     .. math::
  62       \coth(z) \equiv \frac{\exp(z) + \exp(-z)}{\exp(z) - \exp(-z)}
  63          = \frac{1}{\tanh(z)}
  64
  65     .. _MathWorld:
  66       http://mathworld.wolfram.com/HyperbolicCotangent.html
  67     """
  68     return 1.0/numpy.tanh(z)
  69
  70 def arccoth(z):
  71     """Inverse hyperbolic cotangent.
  72
  73     Examples
  74     --------
  75     >>> arccoth(1.19967874)  # doctest: +ELLIPSIS
  76     1.199678...
  77     >>> arccoth(coth(numpy.pi))  # doctest: +ELLIPSIS
  78     3.1415...
  79
  80     Notes
  81     -----
  82     Inverting from the :func:`definition <coth>`.
  83
  84     .. math::
  85       z \equiv \atanh\left( \frac{1}{\coth(z)} \right)
  86     """
  87     return numpy.arctanh(1.0/z)
  88
  89 def Langevin(z):
  90     """Langevin function.
  91
  92     Examples
  93     --------
  94     >>> Langevin(numpy.pi)  # doctest: +ELLIPSIS
  95     0.685...
  96
  97     Notes
  98     -----
  99     From `Wikipedia`_ or Hatfield [#]_
 100
 101     .. math::
 102       L(z) \equiv \coth(z) - \frac{1}{z}
 103
 104     .. _Wikipedia:
 105       http://en.wikipedia.org/wiki/Brillouin_and_Langevin_functions#Langevin_Function
 106
 107     .. [#]: J.W. Hatfield and S.R. Quake
 108       "Dynamic Properties of an Extended Polymer in Solution."
 109       Phys. Rev. Lett. 1999.
 110       doi: `10.1103/PhysRevLett.82.3548 <http://dx.doi.org/10.1103/PhysRevLett.82.3548>`
 111     """
 112     return coth(z) - 1.0/z
 113
 114 def inverse_Langevin(z, extreme=1.0 - 1e-8):
 115     """Inverse Langevin function.
 116
 117     Parameters
 118     ----------
 119     z : float or array_like
 120         object whose inverse Langevin will be returned
 121     extreme : float
 122         value (close to one) for which we assume the inverse is +/-
 123         infinity.  This avoids problems with Newton-Raphson
 124         convergence in regions with low slope.
 125
 126     Examples
 127     --------
 128     >>> inverse_Langevin(Langevin(numpy.pi))  # doctest: +ELLIPSIS
 129     3.14159...
 130     >>> inverse_Langevin(Langevin(numpy.array([1,2,3])))  # doctest: +ELLIPSIS
 131     array([ 1.,  2.,  3.])
 132
 133     Notes
 134     -----
 135     We approximate the inverse Langevin via Newton's method
 136     (:func:`scipy.optimize.newton`).  For the starting point, we take
 137     the first three terms from the Taylor series (from `Wikipedia`_).
 138
 139     .. math::
 140       L^{-1}(z) = 3z + \frac{9}{5}z^3 + \frac{297}{175}z^5 + \dots
 141
 142     .. _Wikipedia:
 143       http://en.wikipedia.org/wiki/Brillouin_and_Langevin_functions#Langevin_Function
 144     """
 145     if is_iterable(z):
 146         ret = numpy.ndarray(shape=z.shape, dtype=z.dtype)
 147         for i,z_i in enumerate(z):
 148             ret[i] = inverse_Langevin(z_i)
 149         return ret
 150     if z > extreme:
 151         return numpy.inf
 152     elif z < -extreme:
 153         return -numpy.inf
 154     # Catch stdout since sometimes the newton function prints exit
 155     # messages to stdout, e.g. "Tolerance of %s reached".
 156     stdout = sys.stdout
 157     tmp_stdout = StringIO.StringIO()
 158     sys.stdout = tmp_stdout
 159     ret = newton(func=lambda x: Langevin(x)-z,
 160                   x0=3*z + 9./5.*z**3 + 297./175.*z**5,)
 161     sys.stdout = stdout
 162     output = tmp_stdout.getvalue()
 163     return ret
 164
 165 def FJC_fn(x_data, T, L, a):
 166     """The freely jointed chain model.
 167
 168     Parameters
 169     ----------
 170     x_data : array_like
 171         x values for which the WLC tension should be calculated.
 172     T : float
 173         temperature in Kelvin.
 174     L : float
 175         contour length in meters.
 176     a : float
 177         Kuhn length in meters.
 178
 179     Returns
 180     -------
 181     f_data : array
 182         `F(x_data)`.
 183
 184     Examples
 185     --------
 186     >>> FJC_fn(numpy.array([1e-9, 5e-9, 10e-9], dtype=numpy.float),
 187     ...        T=300, L=15e-9, a=2.5e-10) # doctest: +ELLIPSIS
 188     array([  3.322...-12,   1.78...e-11,   4.889...e-11])
 189
 190     Notes
 191     -----
 192     The freely jointed chain model is
 193
 194     .. math::
 195       F(x) = \frac{k_B T}{a} L^{-1}\left( \frac{x}{L} \right)
 196
 197     where :math:`L^{-1}` is the :func:`inverse_Langevin`, :math:`a` is
 198     the Kuhn length, and :math:`L` is the contour length [#]_.  For
 199     the inverse formulation (:math:`x(F)`), see Ray and Akhremitchev [#]_.
 200
 201
 202     .. [#]: J.W. Hatfield and S.R. Quake
 203       "Dynamic Properties of an Extended Polymer in Solution."
 204       Phys. Rev. Lett. 1999.
 205       doi: `10.1103/PhysRevLett.82.3548 <http://dx.doi.org/10.1103/PhysRevLett.82.3548>`
 206
 207     .. [#] C. Ray and B.B. Akhremitchev.
 208       `"Fitting Force Curves by the Extended Freely Jointed Chain Model" <http://www.chem.duke.edu/~boris/research/force_spectroscopy/fit_efjc.pdf>`
 209     """
 210     return kB*T / a * inverse_Langevin(x_data/L)
 211
 212 class FJC (ModelFitter):
 213     """Fit the data to a freely jointed chain.
 214
 215     Examples
 216     --------
 217     Generate some example data
 218
 219     >>> T = 300  # Kelvin
 220     >>> L = 35e-9  # meters
 221     >>> a = 2.5e-10  # meters
 222     >>> x_data = numpy.linspace(10e-9, 30e-9, num=20)
 223     >>> d_data = FJC_fn(x_data, T=T, L=L, a=a)
 224
 225     Fit the example data with a two-parameter fit (`L` and `a`).
 226
 227     >>> info = {
 228     ...     'temperature (K)': T,
 229     ...     'x data (m)': x_data,
 230     ...     }
 231     >>> model = FJC(d_data, info=info, rescale=True)
 232     >>> outqueue = Queue()
 233     >>> L,a = model.fit(outqueue=outqueue)
 234     >>> fit_info = outqueue.get(block=False)
 235     >>> print(L)
 236     3.5e-08
 237     >>> print(a)
 238     2.5e-10
 239
 240     Fit the example data with a one-parameter fit (`L`).  We introduce
 241     some error in our fixed Kuhn length for fun.
 242
 243     >>> info['Kuhn length (m)'] = 2*a
 244     >>> model = FJC(d_data, info=info, rescale=True)
 245     >>> L, = model.fit(outqueue=outqueue)
 246     >>> fit_info = outqueue.get(block=False)
 247     >>> print(L)  # doctest: +ELLIPSIS
 248     3.199...e-08
 249     """
 250     def Lp(self, L):
 251         """To avoid invalid values of `L`, we reparametrize with `Lp`.
 252
 253         Parameters
 254         ----------
 255         L : float
 256             contour length in meters
 257
 258         Returns
 259         -------
 260         Lp : float
 261             rescaled version of `L`.
 262
 263         Examples
 264         --------
 265         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 266         >>> model = FJC(data=x_data, info={'x data (m)':x_data})
 267         >>> model.Lp(20e-9)  # doctest: +ELLIPSIS
 268         -inf
 269         >>> model.Lp(19e-9)  # doctest: +ELLIPSIS
 270         nan
 271         >>> model.Lp(21e-9)  # doctest: +ELLIPSIS
 272         -2.99...
 273         >>> model.Lp(100e-9)  # doctest: +ELLIPSIS
 274         1.386...
 275
 276         Notes
 277         -----
 278         The rescaling is designed to limit `L` to values strictly
 279         greater than the maximum `x` data value, so we use
 280
 281         .. math::
 282             L = [\exp(L_p))+1] * x_\text{max}
 283
 284         which has the inverse
 285
 286         .. math::
 287             L_p = \ln(L/x_\text{max}-1)
 288
 289         This will obviously effect the interpretation of the fit's covariance matrix.
 290         """
 291         x_max = self.info['x data (m)'].max()
 292         return numpy.log(L/x_max - 1)
 293
 294     def L(self, Lp):
 295         """Inverse of :meth:`Lp`.
 296
 297         Parameters
 298         ----------
 299         Lp : float
 300             rescaled version of `L`
 301
 302         Returns
 303         -------
 304         L : float
 305             contour length in meters
 306
 307         Examples
 308         --------
 309         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 310         >>> model = FJC(data=x_data, info={'x data (m)':x_data})
 311         >>> model.L(model.Lp(21e-9))  # doctest: +ELLIPSIS
 312         2.100...e-08
 313         >>> model.L(model.Lp(100e-9))  # doctest: +ELLIPSIS
 314         9.999...e-08
 315         """
 316         x_max = self.info['x data (m)'].max()
 317         return (numpy.exp(Lp)+1)*x_max
 318
 319     def model(self, params):
 320         """Extract the relavant arguments and use :func:`FJC_fn`.
 321
 322         Parameters
 323         ----------
 324         params : list of floats
 325             `[Lp, a]`, where the presence of `a` is optional.
 326         """
 327         # setup convenient aliases
 328         T = self.info['temperature (K)']
 329         x_data = self.info['x data (m)']
 330         L = self.L(params[0])
 331         if len(params) > 1:
 332             a = abs(params[1])
 333         else:
 334             a = self.info['Kuhn length (m)']
 335         # compute model data
 336         self._model_data[:] = FJC_fn(x_data, T, L, a)
 337         return self._model_data
 338
 339     def fit(self, *args, **kwargs):
 340         params = super(FJC, self).fit(*args, **kwargs)
 341         params[0] = self.L(params[0])  # convert Lp -> L
 342         if len(params) > 1:
 343             params[1] = abs(params[1])  # take the absolute value of `a`
 344         return params
 345
 346     def guess_initial_params(self, outqueue=None):
 347         """Guess initial fitting parameters.
 348
 349         Returns
 350         -------
 351         Lp : float
 352             A guess at the reparameterized contour length in meters.
 353         a : float (optional)
 354             A guess at the Kuhn length in meters.  If `.info` has a
 355             setting for `'Kuhn length (m)'`, `a` is not returned.
 356         """
 357         x_max = self.info['x data (m)'].max()
 358         a_given = 'Kuhn length (m)' in self.info
 359         if a_given:
 360             a = self.info['Kuhn length (m)']
 361         else:
 362             a = x_max / 10.0
 363         L = 1.5 * x_max
 364         Lp = self.Lp(L)
 365         if a_given:
 366             return [Lp]
 367         return [Lp, a]
 368
 369
 370 def inverse_FJC_PEG_fn(F_data, T=300, N=1, k=150., Lp=3.58e-10, Lh=2.8e-10, dG=3., a=7e-10):
 371     """Inverse poly(ethylene-glycol) adjusted extended FJC model.
 372
 373     Examples
 374     --------
 375     >>> kwargs = {'T':300.0, 'N':1, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10,
 376     ...           'dG':3.0, 'a':7e-10}
 377     >>> inverse_FJC_PEG_fn(F_data=200e-12, **kwargs)  # doctest: +ELLIPSIS
 378     3.487...e-10
 379
 380     Notes
 381     -----
 382     The function is that proposed by F. Oesterhelt, et al. [#]_.
 383
 384     .. math::
 385       x(F) = N_\text{S} \cdot \left[
 386         \left(
 387             \frac{L_\text{planar}}{
 388                   \exp\left(-\Delta G/k_B T\right) + 1}
 389             + \frac{L_\text{helical}}{
 390                   \exp\left(+\Delta G/k_B T\right) + 1}
 391           \right) \cdot \left(
 392             \coth\left(\frac{Fa}{k_B T}\right)
 393             - \frac{k_B T}{Fa}
 394           \right)
 395         + \frac{F}{K_\text{S}}
 396
 397     where :math:`N_\text{S}` is the number of segments,
 398     :math:`K_\text{S}` is the segment elasticicty,
 399     :math:`L_\text{planar}` is the ttt contour length,
 400     :math:`L_\text{helical}` is the ttg contour length,
 401     :math:`\Delta G` is the Gibbs free energy difference
 402      :math:`G_\text{planar}-G_\text{helical}`,
 403     :math:`a` is the Kuhn length,
 404     and :math:`F` is the chain tension.
 405
 406     .. [#]: F. Oesterhelt, M. Rief, and H.E. Gaub.
 407       "Single molecule force spectroscopy by AFM indicates helical
 408       structure of poly(ethylene-glycol) in water."
 409       New Journal of Physics. 1999.
 410       doi: `10.1088/1367-2630/1/1/006 <http://dx.doi.org/10.1088/1367-2630/1/1/006>`
 411       (section 4.2)
 412     """
 413     kBT = kB * T
 414     g = (dG - F_data*(Lp-Lh)) / kBT
 415     z = F_data*a/kBT
 416     return N * ((Lp/(numpy.exp(-g)+1) + Lh/(numpy.exp(+g)+1)) * (coth(z)-1./z)
 417                 + F_data/k)
 418
 419 def FJC_PEG_fn(x_data, T, N, k, Lp, Lh, dG, a):
 420     """Poly(ethylene-glycol) adjusted extended FJC model.
 421
 422     Parameters
 423     ----------
 424     z : float or array_like
 425         object whose inverse Langevin will be returned
 426     extreme : float
 427         value (close to one) for which we assume the inverse is +/-
 428         infinity.  This avoids problems with Newton-Raphson
 429         convergence in regions with low slope.
 430
 431     Examples
 432     --------
 433     >>> kwargs = {'T':300.0, 'N':1, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10,
 434     ...           'dG':3.0, 'a':7e-10}
 435     >>> FJC_PEG_fn(inverse_FJC_PEG_fn(1e-11, **kwargs), **kwargs)  # doctest: +ELLIPSIS
 436     9.999...e-12
 437     >>> FJC_PEG_fn(inverse_FJC_PEG_fn(3.4e-10, **kwargs), **kwargs)  # doctest: +ELLIPSIS
 438     3.400...e-10
 439     >>> FJC_PEG_fn(numpy.array([1e-10,2e-10,3e-10]), **kwargs)  # doctest: +ELLIPSIS
 440     array([  5.207...e-12,   1.257...e-11,   3.636...e-11])
 441     >>> kwargs['N'] = 123
 442     >>> FJC_PEG_fn(numpy.array([1e-8,2e-8,3e-8]), **kwargs)  # doctest: +ELLIPSIS
 443     array([  4.160...e-12,   9.302...e-12,   1.830...e-11])
 444
 445     Notes
 446     -----
 447     We approximate the PEG adjusted eFJC via Newton's method
 448     (:func:`scipy.optimize.newton`).  For the starting point, we use
 449     the standard FJC function with an averaged contour length.
 450     """
 451     kwargs = {'T':T, 'N':N, 'k':k, 'Lp':Lp, 'Lh':Lh, 'dG':dG, 'a':a}
 452     if is_iterable(x_data):
 453         ret = numpy.ndarray(shape=x_data.shape, dtype=x_data.dtype)
 454         for i,x in enumerate(x_data):
 455             ret[i] = FJC_PEG_fn(x, **kwargs)
 456         return ret
 457     if x_data == 0:
 458         return 0
 459     # Approximate with the simple FJC_fn()
 460     guess = numpy.inf
 461     L = N*max(Lp, Lh)
 462     while guess == numpy.inf:
 463         guess = FJC_fn(x_data, T=T, L=L, a=a)
 464         L *= 2.0
 465
 466     fn = lambda f: inverse_FJC_PEG_fn(guess*abs(f), **kwargs) - x_data
 467     ret = guess*abs(newton(func=fn, x0=1.0))
 468     return ret
 469
 470 class FJC_PEG (ModelFitter):
 471     """Fit the data to an poly(ethylene-glycol) adjusted extended FJC.
 472
 473     Examples
 474     --------
 475     Generate some example data
 476
 477     >>> kwargs = {'T':300.0, 'N':123, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10,
 478     ...           'dG':3.0, 'a':7e-10}
 479     >>> x_data = numpy.linspace(10e-9, 30e-9, num=20)
 480     >>> d_data = FJC_PEG_fn(x_data, **kwargs)
 481
 482     Fit the example data with a two-parameter fit (`N` and `a`).
 483
 484     >>> info = {
 485     ...     'temperature (K)': kwargs['T'],
 486     ...     'x data (m)': x_data,
 487     ...     'section elasticity (N/m)': kwargs['k'],
 488     ...     'planar section length (m)': kwargs['Lp'],
 489     ...     'helical section length (m)': kwargs['Lh'],
 490     ...     'Gibbs free energy difference (Gp - Gh) (kBT)': kwargs['dG'],
 491     ...     }
 492     >>> model = FJC_PEG(d_data, info=info, rescale=True)
 493     >>> outqueue = Queue()
 494     >>> N,a = model.fit(outqueue=outqueue)
 495     >>> fit_info = outqueue.get(block=False)
 496     >>> print(N)
 497     123.0
 498     >>> print(a)
 499     7e-10
 500
 501     Fit the example data with a one-parameter fit (`N`).  We introduce
 502     some error in our fixed Kuhn length for fun.
 503
 504     >>> info['Kuhn length (m)'] = 2*kwargs['a']
 505     >>> model = FJC_PEG(d_data, info=info, rescale=True)
 506     >>> N, = model.fit(outqueue=outqueue)
 507     >>> fit_info = outqueue.get(block=False)
 508     >>> print(N)  # doctest: +ELLIPSIS
 509     96.931...
 510     """
 511     def Lr(self, L):
 512         """To avoid invalid values of `L`, we reparametrize with `Lr`.
 513
 514         Parameters
 515         ----------
 516         L : float
 517             contour length in meters
 518
 519         Returns
 520         -------
 521         Lr : float
 522             rescaled version of `L`.
 523
 524         Examples
 525         --------
 526         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 527         >>> model = FJC_PEG(data=x_data, info={'x data (m)':x_data})
 528         >>> model.Lr(20e-9)  # doctest: +ELLIPSIS
 529         0.0
 530         >>> model.Lr(19e-9)  # doctest: +ELLIPSIS
 531         -0.0512...
 532         >>> model.Lr(21e-9)  # doctest: +ELLIPSIS
 533         0.0487...
 534         >>> model.Lr(100e-9)  # doctest: +ELLIPSIS
 535         1.609...
 536
 537         Notes
 538         -----
 539         The rescaling is designed to limit `L` to values strictly
 540         greater than zero, so we use
 541
 542         .. math::
 543             L = \exp(L_p) * x_\text{max}
 544
 545         which has the inverse
 546
 547         .. math::
 548             L_p = \ln(L/x_\text{max})
 549
 550         This will obviously effect the interpretation of the fit's covariance matrix.
 551         """
 552         x_max = self.info['x data (m)'].max()
 553         return numpy.log(L/x_max)
 554
 555     def L(self, Lr):
 556         """Inverse of :meth:`Lr`.
 557
 558         Parameters
 559         ----------
 560         Lr : float
 561             rescaled version of `L`
 562
 563         Returns
 564         -------
 565         L : float
 566             contour length in meters
 567
 568         Examples
 569         --------
 570         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 571         >>> model = FJC_PEG(data=x_data, info={'x data (m)':x_data})
 572         >>> model.L(model.Lr(21e-9))  # doctest: +ELLIPSIS
 573         2.100...e-08
 574         >>> model.L(model.Lr(100e-9))  # doctest: +ELLIPSIS
 575         9.999...e-08
 576         """
 577         x_max = self.info['x data (m)'].max()
 578         return numpy.exp(Lr)*x_max
 579
 580     def _kwargs(self):
 581         return {
 582             'T': self.info['temperature (K)'],
 583             'k': self.info['section elasticity (N/m)'],
 584             'Lp': self.info['planar section length (m)'],
 585             'Lh': self.info['helical section length (m)'],
 586             'dG': self.info['Gibbs free energy difference (Gp - Gh) (kBT)'],
 587             }
 588
 589     def model(self, params):
 590         """Extract the relavant arguments and use :func:`FJC_PEG_fn`.
 591
 592         Parameters
 593         ----------
 594         params : list of floats
 595             `[N, a]`, where the presence of `a` is optional.
 596         """
 597         # setup convenient aliases
 598         T = self.info['temperature (K)']
 599         x_data = self.info['x data (m)']
 600         N = self.L(params[0])
 601         if len(params) > 1:
 602             a = abs(params[1])
 603         else:
 604             a = self.info['Kuhn length (m)']
 605         kwargs = self._kwargs()
 606         # compute model data
 607         self._model_data[:] = FJC_PEG_fn(x_data, N=N, a=a, **kwargs)
 608         return self._model_data
 609
 610     def fit(self, *args, **kwargs):
 611         params = super(FJC_PEG, self).fit(*args, **kwargs)
 612         params[0] = self.L(params[0])  # convert Nr -> N
 613         if len(params) > 1:
 614             params[1] = abs(params[1])  # take the absolute value of `a`
 615         return params
 616
 617     def guess_initial_params(self, outqueue=None):
 618         """Guess initial fitting parameters.
 619
 620         Returns
 621         -------
 622         N : float
 623             A guess at the section count.
 624         a : float (optional)
 625             A guess at the Kuhn length in meters.  If `.info` has a
 626             setting for `'Kuhn length (m)'`, `a` is not returned.
 627         """
 628         x_max = self.info['x data (m)'].max()
 629         a_given = 'Kuhn length (m)' in self.info
 630         if a_given:
 631             a = self.info['Kuhn length (m)']
 632         else:
 633             a = x_max / 10.0
 634         f_max = self._data.max()
 635         kwargs = self._kwargs()
 636         kwargs['a'] = a
 637         x_section = inverse_FJC_PEG_fn(F_data=f_max, **kwargs)
 638         N = x_max / x_section;
 639         Nr = self.Lr(N)
 640         if a_given:
 641             return [Nr]
 642         return [Nr, a]
 643
 644
 645 def WLC_fn(x_data, T, L, p):
 646     """The worm like chain interpolation model.
 647
 648     Parameters
 649     ----------
 650     x_data : array_like
 651         x values for which the WLC tension should be calculated.
 652     T : float
 653         temperature in Kelvin.
 654     L : float
 655         contour length in meters.
 656     p : float
 657         persistence length in meters.
 658
 659     Returns
 660     -------
 661     f_data : array
 662         `F(x_data)`.
 663
 664     Examples
 665     --------
 666     >>> WLC_fn(numpy.array([1e-9, 5e-9, 10e-9], dtype=numpy.float),
 667     ...        T=300, L=15e-9, p=2.5e-10) # doctest: +ELLIPSIS
 668     array([  1.717...e-12,   1.070...e-11,   4.418...e-11])
 669
 670     Notes
 671     -----
 672     The function is the simple polynomial worm like chain
 673     interpolation forumula proposed by C. Bustamante, et al. [#]_.
 674
 675     .. math::
 676       F(x) = \frac{k_B T}{p} \left[
 677         \frac{1}{4}\left( \frac{1}{(1-x/L)^2} - 1 \right)
 678         + \frac{x}{L}
 679                              \right]
 680
 681     .. [#] C. Bustamante, J.F. Marko, E.D. Siggia, and S.Smith.
 682     "Entropic elasticity of lambda-phage DNA."
 683     Science. 1994.
 684     doi: `10.1126/science.8079175 <http://dx.doi.org/10.1126/science.8079175>`
 685     """
 686     a = kB * T / p
 687     scaled_data = x_data / L
 688     return a * (0.25*((1-scaled_data)**-2 - 1) + scaled_data)
 689
 690 class WLC (ModelFitter):
 691     """Fit the data to a worm like chain.
 692
 693     Examples
 694     --------
 695     Generate some example data
 696
 697     >>> T = 300  # Kelvin
 698     >>> L = 35e-9  # meters
 699     >>> p = 2.5e-10  # meters
 700     >>> x_data = numpy.linspace(10e-9, 30e-9, num=20)
 701     >>> d_data = WLC_fn(x_data, T=T, L=L, p=p)
 702
 703     Fit the example data with a two-parameter fit (`L` and `p`).
 704
 705     >>> info = {
 706     ...     'temperature (K)': T,
 707     ...     'x data (m)': x_data,
 708     ...     }
 709     >>> model = WLC(d_data, info=info, rescale=True)
 710     >>> outqueue = Queue()
 711     >>> L,p = model.fit(outqueue=outqueue)
 712     >>> fit_info = outqueue.get(block=False)
 713     >>> print(L)
 714     3.5e-08
 715     >>> print(p)
 716     2.5e-10
 717
 718     Fit the example data with a one-parameter fit (`L`).  We introduce
 719     some error in our fixed persistence length for fun.
 720
 721     >>> info['persistence length (m)'] = 2*p
 722     >>> model = WLC(d_data, info=info, rescale=True)
 723     >>> L, = model.fit(outqueue=outqueue)
 724     >>> fit_info = outqueue.get(block=False)
 725     >>> print(L)  # doctest: +ELLIPSIS
 726     3.318...e-08
 727     """
 728     def Lp(self, L):
 729         """To avoid invalid values of `L`, we reparametrize with `Lp`.
 730
 731         Parameters
 732         ----------
 733         L : float
 734             contour length in meters
 735
 736         Returns
 737         -------
 738         Lp : float
 739             rescaled version of `L`.
 740
 741         Examples
 742         --------
 743         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 744         >>> model = WLC(data=x_data, info={'x data (m)':x_data})
 745         >>> model.Lp(20e-9)  # doctest: +ELLIPSIS
 746         -inf
 747         >>> model.Lp(19e-9)  # doctest: +ELLIPSIS
 748         nan
 749         >>> model.Lp(21e-9)  # doctest: +ELLIPSIS
 750         -2.99...
 751         >>> model.Lp(100e-9)  # doctest: +ELLIPSIS
 752         1.386...
 753
 754         Notes
 755         -----
 756         The rescaling is designed to limit `L` to values strictly
 757         greater than the maximum `x` data value, so we use
 758
 759         .. math::
 760             L = [\exp(L_p))+1] * x_\text{max}
 761
 762         which has the inverse
 763
 764         .. math::
 765             L_p = \ln(L/x_\text{max}-1)
 766
 767         This will obviously effect the interpretation of the fit's covariance matrix.
 768         """
 769         x_max = self.info['x data (m)'].max()
 770         return numpy.log(L/x_max - 1)
 771
 772     def L(self, Lp):
 773         """Inverse of :meth:`Lp`.
 774
 775         Parameters
 776         ----------
 777         Lp : float
 778             rescaled version of `L`
 779
 780         Returns
 781         -------
 782         L : float
 783             contour length in meters
 784
 785         Examples
 786         --------
 787         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 788         >>> model = WLC(data=x_data, info={'x data (m)':x_data})
 789         >>> model.L(model.Lp(21e-9))  # doctest: +ELLIPSIS
 790         2.100...e-08
 791         >>> model.L(model.Lp(100e-9))  # doctest: +ELLIPSIS
 792         9.999...e-08
 793         """
 794         x_max = self.info['x data (m)'].max()
 795         return (numpy.exp(Lp)+1)*x_max
 796
 797     def model(self, params):
 798         """Extract the relavant arguments and use :func:`WLC_fn`.
 799
 800         Parameters
 801         ----------
 802         params : list of floats
 803             `[Lp, p]`, where the presence of `p` is optional.
 804         """
 805         # setup convenient aliases
 806         T = self.info['temperature (K)']
 807         x_data = self.info['x data (m)']
 808         L = self.L(params[0])
 809         if len(params) > 1:
 810             p = abs(params[1])
 811         else:
 812             p = self.info['persistence length (m)']
 813         # compute model data
 814         self._model_data[:] = WLC_fn(x_data, T, L, p)
 815         return self._model_data
 816
 817     def fit(self, *args, **kwargs):
 818         params = super(WLC, self).fit(*args, **kwargs)
 819         params[0] = self.L(params[0])  # convert Lp -> L
 820         if len(params) > 1:
 821             params[1] = abs(params[1])  # take the absolute value of `p`
 822         return params
 823
 824     def guess_initial_params(self, outqueue=None):
 825         """Guess initial fitting parameters.
 826
 827         Returns
 828         -------
 829         Lp : float
 830             A guess at the reparameterized contour length in meters.
 831         p : float (optional)
 832             A guess at the persistence length in meters.  If `.info`
 833             has a setting for `'persistence length (m)'`, `p` is not
 834             returned.
 835         """
 836         x_max = self.info['x data (m)'].max()
 837         p_given = 'persistence length (m)' in self.info
 838         if p_given:
 839             p = self.info['persistence length (m)']
 840         else:
 841             p = x_max / 10.0
 842         L = 1.5 * x_max
 843         Lp = self.Lp(L)
 844         if p_given:
 845             return [Lp]
 846         return [Lp, p]
 847
 848
 849 class PolymerFitPlugin (Plugin):
 850     """Polymer model (WLC, FJC, etc.) fitting.
 851     """
 852     def __init__(self):
 853         super(PolymerFitPlugin, self).__init__(name='polymer_fit')
 854         self._arguments = [  # For Command initialization
 855             Argument('polymer model', default='WLC', help="""
 856 Select the default polymer model for 'polymer fit'.  See the
 857 documentation for descriptions of available algorithms.
 858 """.strip()),
 859             Argument('FJC Kuhn length', type='float', default=4e-10,
 860                     help='Kuhn length in meters'),
 861             Argument('FJC_PEG Kuhn length', type='float', default=4e-10,
 862                     help='Kuhn length in meters'),
 863             Argument('FJC_PEG elasticity', type='float', default=150.0,
 864                     help='Elasticity of a PEG segment in Newtons per meter.'),
 865             Argument('FJC_PEG delta G', type='float', default=3.0, help="""
 866 Gibbs free energy difference between trans-trans-trans (ttt) and
 867 trans-trans-gauche (ttg) PEG states in units of kBT.
 868 """.strip()),
 869             Argument('FJC_PEG L_helical', type='float', default=2.8e-10,
 870                     help='Contour length of PEG in the ttg state.'),
 871             Argument('FJC_PEG L_planar', type='float', default=3.58e-10,
 872                     help='Contour length of PEG in the ttt state.'),
 873             Argument('WLC persistence length', type='float', default=4e-10,
 874                     help='Persistence length in meters'),
 875             ]
 876         self._settings = [
 877             Setting(section=self.setting_section, help=self.__doc__)]
 878         for argument in self._arguments:
 879             self._settings.append(argument_to_setting(
 880                     self.setting_section, argument))
 881             argument.default = None # if argument isn't given, use the config.
 882         self._input_columns = [
 883             ('distance column', 'cantilever adjusted extension (m)',
 884              """
 885 Name of the column to use as the surface position input.
 886 """.strip()),
 887             ('deflection column', 'deflection (N)',
 888              """
 889 Name of the column to use as the deflection input.
 890 """.strip()),
 891             ]
 892         self._commands = [
 893             PolymerFitCommand(self), PolymerFitPeaksCommand(self),
 894             TranslateFlatPeaksCommand(self),
 895             ]
 896
 897     def dependencies(self):
 898         return ['vclamp']
 899
 900     def default_settings(self):
 901         return self._settings
 902
 903
 904 class PolymerFitCommand (ColumnAddingCommand):
 905     """Polymer model (WLC, FJC, etc.) fitting.
 906
 907     Fits an entropic elasticity function to a given chunk of the
 908     curve.  Fit quality compares the residual with the thermal noise
 909     (a good fit should be 1 or less).
 910
 911     Because the models are complicated and varied, you should
 912     configure the command by setting configuration options instead of
 913     using command arguments.  TODO: argument_to_setting()
 914     """
 915     def __init__(self, plugin):
 916         super(PolymerFitCommand, self).__init__(
 917             name='polymer fit',
 918             columns=plugin._input_columns,
 919             new_columns=[
 920                 ('output tension column', 'polymer tension',
 921                  """
 922 Name of the column (without units) to use as the polymer tension output.
 923 """.strip()),
 924                 ],
 925             arguments=[
 926                 Argument(name='fit parameters info name', type='string',
 927                          default='polymer fit',
 928                          help="""
 929 Name (without units) for storing the fit parameters in the `.info` dictionary.
 930 """.strip()),
 931                 Argument(name='bounds', type='point', optional=False, count=2,
 932                          help="""
 933 Indicies of points bounding the selected data.
 934 """.strip()),
 935                 Argument(name='peak extraction method', default='convex hull',
 936                          help=(
 937                         'Select the method used to extract the peak '
 938                         'deflection from the fitted model.  `convex hull`, '
 939                         '`peak data`, or `peak model`.')),
 940                 ] + plugin._arguments,
 941             help=self.__doc__, plugin=plugin)
 942
 943     def _run(self, hooke, inqueue, outqueue, params):
 944         self._add_to_command_stack(params)
 945         log = logging.getLogger('hooke')
 946         params = self._setup_params(hooke, params)
 947         data = self._block(hooke, params)
 948         model = params['polymer model']
 949         dist_data = self._get_column(
 950             hooke=hooke, params=params, column_name='distance column')
 951         def_data = self._get_column(
 952             hooke=hooke, params=params, column_name='deflection column')
 953         start,stop = params['bounds']
 954         tension_data,ps = self.fit_polymer_model(
 955             params, dist_data, def_data, start, stop, outqueue)
 956         if params['peak extraction method'] == 'convex hull':
 957             peak_def,hull = self._convex_hull_deflection(
 958                 distance_data=dist_data, deflection_data=def_data,
 959                 deflection_model=tension_data, start=start, stop=stop,
 960                 outqueue=outqueue)
 961         elif params['peak extraction method'] == 'peak data':
 962             peak_def = numpy.nanmax(def_data[start:stop])
 963         elif params['peak extraction method'] == 'peak model':
 964             peak_def = numpy.nanmax(tension_data[start:stop])
 965         else:
 966             raise ValueError(params['peak extraction method'])
 967         log.debug('{}: {}'.format(params['peak extraction method'], peak_def))
 968         ps['peak deflection'] = peak_def
 969         ps['peak extraction method'] = params['peak extraction method']
 970         ps['model'] = model
 971         data.info[params['fit parameters info name']] = ps
 972         self._set_column(hooke=hooke, params=params,
 973                          column_name='output tension column',
 974                          values=tension_data)
 975
 976     def _setup_params(self, hooke, params):
 977         for key,value in params.items():
 978             if value == None:  # Use configured default value.
 979                 params[key] = self.plugin.config[key]
 980         name,def_unit = split_data_label(params['deflection column'])
 981         params['output tension column'] = join_data_label(
 982             params['output tension column'], def_unit)
 983         return params
 984
 985     def fit_polymer_model(self, params, dist_data, def_data, start, stop,
 986                           outqueue=None):
 987         """Railyard for the `fit_*_model` family.
 988
 989         Uses the `polymer model` configuration setting to call the
 990         appropriate backend algorithm.
 991         """
 992         fn = getattr(self, 'fit_%s_model'
 993                      % params['polymer model'].replace('-','_'))
 994         return fn(params, dist_data, def_data, start, stop, outqueue)
 995
 996     def fit_FJC_model(self, params, dist_data, def_data, start, stop,
 997                       outqueue=None):
 998         """Fit the data with :class:`FJC`.
 999         """
1000         info = {
1001             'temperature (K)': self.plugin.config['temperature'],
1002             'x data (m)': dist_data[start:stop],
1003             }
1004         if True:  # TODO: optionally free persistence length
1005             info['Kuhn length (m)'] = (
1006                 params['FJC Kuhn length'])
1007         model = FJC(def_data[start:stop], info=info, rescale=True)
1008         queue = Queue()
1009         params = model.fit(outqueue=queue)
1010         if True:  # TODO: if Kuhn length fixed
1011             a = info['Kuhn length (m)']
1012         else:
1013             a = params[1]
1014         L = params[0]
1015         T = info['temperature (K)']
1016         fit_info = queue.get(block=False)
1017         f_data = numpy.ones(dist_data.shape, dtype=dist_data.dtype) * numpy.nan
1018         f_data[start:stop] = FJC_fn(dist_data[start:stop], T=T, L=L, a=a)
1019         return [f_data, fit_info]
1020
1021     def fit_FJC_PEG_model(self, params, dist_data, def_data, start, stop,
1022                           outqueue=None):
1023         """Fit the data with :class:`FJC_PEG`.
1024         """
1025         info = {
1026             'temperature (K)': self.plugin.config['temperature'],
1027             'x data (m)': dist_data[start:stop],
1028             # TODO: more info
1029             }
1030         if True:  # TODO: optionally free persistence length
1031             info['Kuhn length (m)'] = (
1032                 params['FJC Kuhn length'])
1033         model = FJC_PEG(def_data[start:stop], info=info, rescale=True)
1034         queue = Queue()
1035         params = model.fit(outqueue=queue)
1036         if True:  # TODO: if Kuhn length fixed
1037             a = info['Kuhn length (m)']
1038         else:
1039             a = params[1]
1040         N = params[0]
1041         T = info['temperature (K)']
1042         fit_info = queue.get(block=False)
1043         f_data = numpy.ones(dist_data.shape, dtype=dist_data.dtype) * numpy.nan
1044         f_data[start:stop] = FJC_PEG_fn(dist_data[start:stop], **kwargs)
1045         return [f_data, fit_info]
1046
1047     def fit_WLC_model(self, params, dist_data, def_data, start, stop,
1048                       outqueue=None):
1049         """Fit the data with :class:`WLC`.
1050         """
1051         info = {
1052             'temperature (K)': self.plugin.config['temperature'],
1053             'x data (m)': dist_data[start:stop],
1054             }
1055         if True:  # TODO: optionally free persistence length
1056             info['persistence length (m)'] = (
1057                 params['WLC persistence length'])
1058         model = WLC(def_data[start:stop], info=info, rescale=True)
1059         queue = Queue()
1060         params = model.fit(outqueue=queue)
1061         if True:  # TODO: if persistence length fixed
1062             p = info['persistence length (m)']
1063         else:
1064             p = params[1]
1065             info['persistence length (m)'] = p
1066         L = params[0]
1067         info['contour length (m)'] = L
1068         T = info['temperature (K)']
1069         fit_info = queue.get(block=False)
1070         info['fit'] = fit_info
1071         info.pop('x data (m)')
1072         f_data = numpy.ones(dist_data.shape, dtype=dist_data.dtype) * numpy.nan
1073         f_data[start:stop] = WLC_fn(dist_data[start:stop], T=T, L=L, p=p)
1074         return [f_data, info]
1075
1076     def _convex_hull_deflection(self, distance_data, deflection_data,
1077                                 deflection_model, start, stop, outqueue=None):
1078         """Return the last model deflection point inside the data hull.
1079
1080         If the `stop` point is a bit past it's ideal location, the
1081         rapid rise of some polymer models can lead to artificially
1082         high peak deflections if the largest distance value is plugged
1083         into the polymer model directly.  As a more robust estimation
1084         of the peak deflection, we calculate the convex hull
1085         surrounding the distance-deflection data and return the last
1086         model deflection point that is inside that hull.
1087         """
1088         distance_data = distance_data[start:stop]
1089         deflection_data = deflection_data[start:stop]
1090         deflection_model = deflection_model[start:stop]
1091         unfolds = []
1092         hulls = []
1093         data = numpy.array(
1094             [[x,y] for x,y in zip(distance_data, deflection_data)])
1095         model = numpy.array(
1096             [[x,y] for x,y in zip(distance_data, deflection_model)])
1097         hull = qhull(data)
1098         inside = points_inside_hull(hull, model)
1099         # distance data is not necessarily monatonic
1100         index_inside = [j for j,i in enumerate(inside) if i is True]
1101         distance_inside = [(distance_data[i],i) for i in index_inside]
1102         if distance_inside:
1103             peak_dist,peak_index = max(distance_inside)
1104             unfold = deflection_model[peak_index]
1105         else:  # all model points are outside the hull?!
1106             unfold = None
1107         return (unfold, hull)
1108
1109
1110 class PolymerFitPeaksCommand (ColumnAccessCommand):
1111     """Polymer model (WLC, FJC, etc.) fitting.
1112
1113     Use :class:`PolymerFitCommand` to fit the each peak in a list of
1114     previously determined peaks.
1115     """
1116     def __init__(self, plugin):
1117         super(PolymerFitPeaksCommand, self).__init__(
1118             name='polymer fit peaks',
1119             columns=plugin._input_columns,
1120             arguments=[
1121                 Argument(name='peak info name', type='string',
1122                          default='polymer peaks',
1123                          help="""
1124 Name for the list of peaks in the `.info` dictionary.
1125 """.strip()),
1126                 Argument(name='peak index', type='int', count=-1, default=None,
1127                          help="""
1128 Index of the selected peak in the list of peaks.  Use `None` to fit all peaks.
1129 """.strip()),
1130                 ] + plugin._arguments,
1131             help=self.__doc__, plugin=plugin)
1132
1133     def _run(self, hooke, inqueue, outqueue, params):
1134         self._add_to_command_stack(params)
1135         data = self._block(hooke, params)
1136         fit_command = hooke.command_by_name['polymer fit']
1137         inq = Queue()
1138         outq = Queue()
1139         curve = params['curve']
1140         params['curve'] = None
1141         p = copy.deepcopy(params)
1142         p['curve'] = params['curve']
1143         del(p['peak info name'])
1144         del(p['peak index'])
1145         for i,peak in enumerate(data.info[params['peak info name']]):
1146             if params['peak index'] == None or i in params['peak index']:
1147                 p['bounds'] = [peak.index, peak.post_index()]
1148                 p['output tension column'] = peak.name
1149                 p['fit parameters info name'] = peak.name
1150                 p['stack'] = False
1151                 fit_command.run(hooke, inq, outq, **p)
1152             ret = outq.get()
1153             if not isinstance(ret, Success):
1154                 raise ret
1155
1156
1157 class TranslateFlatPeaksCommand (ColumnAccessCommand):
1158     """Translate flat filter peaks into polymer peaks for fitting.
1159
1160     Use :class:`~hooke.plugin.flatfilt.FlatPeaksCommand` creates a
1161     list of peaks for regions with large derivatives.  For velocity
1162     clamp measurements, these regions are usually the rebound phase
1163     after a protein domain unfolds, the cantilever detaches, etc.
1164     Because these features occur after the polymer loading phase, we
1165     need to shift the selected regions back to align them with the
1166     polymer loading regions.
1167     """
1168     def __init__(self, plugin):
1169         super(TranslateFlatPeaksCommand, self).__init__(
1170             name='flat peaks to polymer peaks',
1171             columns=plugin._input_columns,
1172             arguments=[
1173                 Argument(name='input peak info name', type='string',
1174                          default='flat filter peaks',
1175                          help="""
1176 Name for the input peaks in the `.info` dictionary.
1177 """.strip()),
1178                 Argument(name='output peak info name', type='string',
1179                          default='polymer peaks',
1180                          help="""
1181 Name for the ouptput peaks in the `.info` dictionary.
1182 """.strip()),
1183                 Argument(name='end offset', type='int', default=-1,
1184                          help="""
1185 Number of points between the end of a new peak and the start of the old.
1186 """.strip()),
1187                 Argument(name='start fraction', type='float', default=0.2,
1188                          help="""
1189 Place the start of the new peak at `start_fraction` from the end of
1190 the previous old peak to the end of the new peak.  Because the first
1191 new peak will have no previous old peak, it uses a distance of zero
1192 instead.
1193 """.strip()),
1194             ] + plugin._arguments,
1195             help=self.__doc__, plugin=plugin)
1196
1197     def _run(self, hooke, inqueue, outqueue, params):
1198         self._add_to_command_stack(params)
1199         data = self._block(hooke, params)
1200         dist_data = self._get_column(
1201             hooke=hooke, params=params, column_name='distance column')
1202         def_data = self._get_column(
1203             hooke=hooke, params=params, column_name='deflection column')
1204         previous_old_stop = numpy.absolute(dist_data).argmin()
1205         new = []
1206         try:
1207             peaks = data.info[params['input peak info name']]
1208         except KeyError, e:
1209             raise Failure('No value for %s in %s.info (%s): %s'
1210                           % (params['input peak info name'], data.info['name'],
1211                              sorted(data.info.keys()), e))
1212         for i,peak in enumerate(peaks):
1213             next_old_start = peak.index
1214             stop = next_old_start + params['end offset']
1215             dist_start = (
1216                 dist_data[previous_old_stop]
1217                 + params['start fraction']*(
1218                     dist_data[stop] - dist_data[previous_old_stop]))
1219             start = numpy.absolute(dist_data - dist_start).argmin()
1220             p = Peak('polymer peak %d' % i,
1221                      index=start,
1222                      values=def_data[start:stop])
1223             new.append(p)
1224             previous_old_stop = peak.post_index()
1225         data.info[params['output peak info name']] = new
1226
1227
1228 # TODO:
1229 # def dist2fit(self):
1230 #     '''Calculates the average distance from data to fit, scaled by
1231 #     the standard deviation of the free cantilever area (thermal
1232 #     noise).
1233 #     '''