hooke/plugin/polymer_fit.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # Copyright (C) 2010-2012 W. Trevor King <wking@drexel.edu>
   4 #
   5 # This file is part of Hooke.
   6 #
   7 # Hooke is free software: you can redistribute it and/or modify it under the
   8 # terms of the GNU Lesser General Public License as published by the Free
   9 # Software Foundation, either version 3 of the License, or (at your option) any
  10 # later version.
  11 #
  12 # Hooke is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  14 # A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
  15 # details.
  16 #
  17 # You should have received a copy of the GNU Lesser General Public License
  18 # along with Hooke.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 """The ``polymer_fit`` module proviews :class:`PolymerFitPlugin` and
  21 serveral associated :class:`hooke.command.Command`\s for Fitting
  22 velocity-clamp data to various polymer models (WLC, FJC, etc.).
  23 """
  24
  25 import copy
  26 from Queue import Queue
  27 import logging
  28 import StringIO
  29 import sys
  30
  31 import numpy
  32 from scipy.optimize import newton
  33
  34 from ..command import Command, Argument, Success, Failure
  35 from ..config import Setting
  36 from ..curve import Data
  37 from ..util.callback import is_iterable
  38 from ..util.fit import PoorFit, ModelFitter
  39 from ..util.peak import Peak
  40 from ..util.quickhull import qhull, points_inside_hull
  41 from ..util.si import join_data_label, split_data_label
  42 from . import Plugin, argument_to_setting
  43 from .curve import ColumnAccessCommand, ColumnAddingCommand
  44
  45
  46 kB = 1.3806503e-23  # Joules/Kelvin
  47
  48
  49 def coth(z):
  50     """Hyperbolic cotangent.
  51
  52     Examples
  53     --------
  54     >>> coth(1.19967874)  # doctest: +ELLIPSIS
  55     1.199678...
  56
  57     Notes
  58     -----
  59     From `MathWorld`_
  60
  61     .. math::
  62       \coth(z) \equiv \frac{\exp(z) + \exp(-z)}{\exp(z) - \exp(-z)}
  63          = \frac{1}{\tanh(z)}
  64
  65     .. _MathWorld:
  66       http://mathworld.wolfram.com/HyperbolicCotangent.html
  67     """
  68     return 1.0/numpy.tanh(z)
  69
  70 def arccoth(z):
  71     """Inverse hyperbolic cotangent.
  72
  73     Examples
  74     --------
  75     >>> arccoth(1.19967874)  # doctest: +ELLIPSIS
  76     1.199678...
  77     >>> arccoth(coth(numpy.pi))  # doctest: +ELLIPSIS
  78     3.1415...
  79
  80     Notes
  81     -----
  82     Inverting from the :func:`definition <coth>`.
  83
  84     .. math::
  85       z \equiv \atanh\left( \frac{1}{\coth(z)} \right)
  86     """
  87     return numpy.arctanh(1.0/z)
  88
  89 def Langevin(z):
  90     """Langevin function.
  91
  92     Examples
  93     --------
  94     >>> Langevin(numpy.pi)  # doctest: +ELLIPSIS
  95     0.685...
  96
  97     Notes
  98     -----
  99     From `Wikipedia`_ or Hatfield [#]_
 100
 101     .. math::
 102       L(z) \equiv \coth(z) - \frac{1}{z}
 103
 104     .. _Wikipedia:
 105       http://en.wikipedia.org/wiki/Brillouin_and_Langevin_functions#Langevin_Function
 106
 107     .. [#]: J.W. Hatfield and S.R. Quake
 108       "Dynamic Properties of an Extended Polymer in Solution."
 109       Phys. Rev. Lett. 1999.
 110       doi: `10.1103/PhysRevLett.82.3548 <http://dx.doi.org/10.1103/PhysRevLett.82.3548>`
 111     """
 112     return coth(z) - 1.0/z
 113
 114 def inverse_Langevin(z, extreme=1.0 - 1e-8):
 115     """Inverse Langevin function.
 116
 117     Parameters
 118     ----------
 119     z : float or array_like
 120         object whose inverse Langevin will be returned
 121     extreme : float
 122         value (close to one) for which we assume the inverse is +/-
 123         infinity.  This avoids problems with Newton-Raphson
 124         convergence in regions with low slope.
 125
 126     Examples
 127     --------
 128     >>> inverse_Langevin(Langevin(numpy.pi))  # doctest: +ELLIPSIS
 129     3.14159...
 130     >>> inverse_Langevin(Langevin(numpy.array([1,2,3])))  # doctest: +ELLIPSIS
 131     array([ 1.,  2.,  3.])
 132
 133     Notes
 134     -----
 135     We approximate the inverse Langevin via Newton's method
 136     (:func:`scipy.optimize.newton`).  For the starting point, we take
 137     the first three terms from the Taylor series (from `Wikipedia`_).
 138
 139     .. math::
 140       L^{-1}(z) = 3z + \frac{9}{5}z^3 + \frac{297}{175}z^5 + \dots
 141
 142     .. _Wikipedia:
 143       http://en.wikipedia.org/wiki/Brillouin_and_Langevin_functions#Langevin_Function
 144     """
 145     if is_iterable(z):
 146         ret = numpy.ndarray(shape=z.shape, dtype=z.dtype)
 147         for i,z_i in enumerate(z):
 148             ret[i] = inverse_Langevin(z_i)
 149         return ret
 150     if z > extreme:
 151         return numpy.inf
 152     elif z < -extreme:
 153         return -numpy.inf
 154     # Catch stdout since sometimes the newton function prints exit
 155     # messages to stdout, e.g. "Tolerance of %s reached".
 156     stdout = sys.stdout
 157     tmp_stdout = StringIO.StringIO()
 158     sys.stdout = tmp_stdout
 159     ret = newton(func=lambda x: Langevin(x)-z,
 160                   x0=3*z + 9./5.*z**3 + 297./175.*z**5,)
 161     sys.stdout = stdout
 162     output = tmp_stdout.getvalue()
 163     return ret
 164
 165 def FJC_fn(x_data, T, L, a):
 166     """The freely jointed chain model.
 167
 168     Parameters
 169     ----------
 170     x_data : array_like
 171         x values for which the WLC tension should be calculated.
 172     T : float
 173         temperature in Kelvin.
 174     L : float
 175         contour length in meters.
 176     a : float
 177         Kuhn length in meters.
 178
 179     Returns
 180     -------
 181     f_data : array
 182         `F(x_data)`.
 183
 184     Examples
 185     --------
 186     >>> FJC_fn(numpy.array([1e-9, 5e-9, 10e-9], dtype=numpy.float),
 187     ...        T=300, L=15e-9, a=2.5e-10) # doctest: +ELLIPSIS
 188     array([  3.322...-12,   1.78...e-11,   4.889...e-11])
 189
 190     Notes
 191     -----
 192     The freely jointed chain model is
 193
 194     .. math::
 195       F(x) = \frac{k_B T}{a} L^{-1}\left( \frac{x}{L} \right)
 196
 197     where :math:`L^{-1}` is the :func:`inverse_Langevin`, :math:`a` is
 198     the Kuhn length, and :math:`L` is the contour length [#]_.  For
 199     the inverse formulation (:math:`x(F)`), see Ray and Akhremitchev [#]_.
 200
 201
 202     .. [#]: J.W. Hatfield and S.R. Quake
 203       "Dynamic Properties of an Extended Polymer in Solution."
 204       Phys. Rev. Lett. 1999.
 205       doi: `10.1103/PhysRevLett.82.3548 <http://dx.doi.org/10.1103/PhysRevLett.82.3548>`
 206
 207     .. [#] C. Ray and B.B. Akhremitchev.
 208       `"Fitting Force Curves by the Extended Freely Jointed Chain Model" <http://www.chem.duke.edu/~boris/research/force_spectroscopy/fit_efjc.pdf>`
 209     """
 210     return kB*T / a * inverse_Langevin(x_data/L)
 211
 212 class FJC (ModelFitter):
 213     """Fit the data to a freely jointed chain.
 214
 215     Examples
 216     --------
 217     Generate some example data
 218
 219     >>> T = 300  # Kelvin
 220     >>> L = 35e-9  # meters
 221     >>> a = 2.5e-10  # meters
 222     >>> x_data = numpy.linspace(10e-9, 30e-9, num=20)
 223     >>> d_data = FJC_fn(x_data, T=T, L=L, a=a)
 224
 225     Fit the example data with a two-parameter fit (`L` and `a`).
 226
 227     >>> info = {
 228     ...     'temperature (K)': T,
 229     ...     'x data (m)': x_data,
 230     ...     }
 231     >>> model = FJC(d_data, info=info, rescale=True)
 232     >>> outqueue = Queue()
 233     >>> L,a = model.fit(outqueue=outqueue)
 234     >>> fit_info = outqueue.get(block=False)
 235     >>> print L
 236     3.5e-08
 237     >>> print a
 238     2.5e-10
 239
 240     Fit the example data with a one-parameter fit (`L`).  We introduce
 241     some error in our fixed Kuhn length for fun.
 242
 243     >>> info['Kuhn length (m)'] = 2*a
 244     >>> model = FJC(d_data, info=info, rescale=True)
 245     >>> L, = model.fit(outqueue=outqueue)
 246     >>> fit_info = outqueue.get(block=False)
 247     >>> print L  # doctest: +ELLIPSIS
 248     3.199...e-08
 249     """
 250     def Lp(self, L):
 251         """To avoid invalid values of `L`, we reparametrize with `Lp`.
 252
 253         Parameters
 254         ----------
 255         L : float
 256             contour length in meters
 257
 258         Returns
 259         -------
 260         Lp : float
 261             rescaled version of `L`.
 262
 263         Examples
 264         --------
 265         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 266         >>> model = FJC(data=x_data, info={'x data (m)':x_data})
 267         >>> model.Lp(20e-9)  # doctest: +ELLIPSIS
 268         -inf
 269         >>> model.Lp(19e-9)  # doctest: +ELLIPSIS
 270         nan
 271         >>> model.Lp(21e-9)  # doctest: +ELLIPSIS
 272         -2.99...
 273         >>> model.Lp(100e-9)  # doctest: +ELLIPSIS
 274         1.386...
 275
 276         Notes
 277         -----
 278         The rescaling is designed to limit `L` to values strictly
 279         greater than the maximum `x` data value, so we use
 280
 281         .. math::
 282             L = [\exp(L_p))+1] * x_\text{max}
 283
 284         which has the inverse
 285
 286         .. math::
 287             L_p = \ln(L/x_\text{max}-1)
 288
 289         This will obviously effect the interpretation of the fit's covariance matrix.
 290         """
 291         x_max = self.info['x data (m)'].max()
 292         return numpy.log(L/x_max - 1)
 293
 294     def L(self, Lp):
 295         """Inverse of :meth:`Lp`.
 296
 297         Parameters
 298         ----------
 299         Lp : float
 300             rescaled version of `L`
 301
 302         Returns
 303         -------
 304         L : float
 305             contour length in meters
 306
 307         Examples
 308         --------
 309         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 310         >>> model = FJC(data=x_data, info={'x data (m)':x_data})
 311         >>> model.L(model.Lp(21e-9))  # doctest: +ELLIPSIS
 312         2.100...e-08
 313         >>> model.L(model.Lp(100e-9))  # doctest: +ELLIPSIS
 314         9.999...e-08
 315         """
 316         x_max = self.info['x data (m)'].max()
 317         return (numpy.exp(Lp)+1)*x_max
 318
 319     def model(self, params):
 320         """Extract the relavant arguments and use :func:`FJC_fn`.
 321
 322         Parameters
 323         ----------
 324         params : list of floats
 325             `[Lp, a]`, where the presence of `a` is optional.
 326         """
 327         # setup convenient aliases
 328         T = self.info['temperature (K)']
 329         x_data = self.info['x data (m)']
 330         L = self.L(params[0])
 331         if len(params) > 1:
 332             a = abs(params[1])
 333         else:
 334             a = self.info['Kuhn length (m)']
 335         # compute model data
 336         self._model_data[:] = FJC_fn(x_data, T, L, a)
 337         return self._model_data
 338
 339     def fit(self, *args, **kwargs):
 340         params = super(FJC, self).fit(*args, **kwargs)
 341         params[0] = self.L(params[0])  # convert Lp -> L
 342         if len(params) > 1:
 343             params[1] = abs(params[1])  # take the absolute value of `a`
 344         return params
 345
 346     def guess_initial_params(self, outqueue=None):
 347         """Guess initial fitting parameters.
 348
 349         Returns
 350         -------
 351         Lp : float
 352             A guess at the reparameterized contour length in meters.
 353         a : float (optional)
 354             A guess at the Kuhn length in meters.  If `.info` has a
 355             setting for `'Kuhn length (m)'`, `a` is not returned.
 356         """
 357         x_max = self.info['x data (m)'].max()
 358         a_given = 'Kuhn length (m)' in self.info
 359         if a_given:
 360             a = self.info['Kuhn length (m)']
 361         else:
 362             a = x_max / 10.0
 363         L = 1.5 * x_max
 364         Lp = self.Lp(L)
 365         if a_given:
 366             return [Lp]
 367         return [Lp, a]
 368
 369
 370 def inverse_FJC_PEG_fn(F_data, T=300, N=1, k=150., Lp=3.58e-10, Lh=2.8e-10, dG=3., a=7e-10):
 371     """Inverse poly(ethylene-glycol) adjusted extended FJC model.
 372
 373     Examples
 374     --------
 375     >>> kwargs = {'T':300.0, 'N':1, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10,
 376     ...           'dG':3.0, 'a':7e-10}
 377     >>> inverse_FJC_PEG_fn(F_data=200e-12, **kwargs)  # doctest: +ELLIPSIS
 378     3.487...e-10
 379
 380     Notes
 381     -----
 382     The function is that proposed by F. Oesterhelt, et al. [#]_.
 383
 384     .. math::
 385       x(F) = N_\text{S} \cdot \left[
 386         \left(
 387             \frac{L_\text{planar}}{
 388                   \exp\left(-\Delta G/k_B T\right) + 1}
 389             + \frac{L_\text{helical}}{
 390                   \exp\left(+\Delta G/k_B T\right) + 1}
 391           \right) \cdot \left(
 392             \coth\left(\frac{Fa}{k_B T}\right)
 393             - \frac{k_B T}{Fa}
 394           \right)
 395         + \frac{F}{K_\text{S}}
 396
 397     where :math:`N_\text{S}` is the number of segments,
 398     :math:`K_\text{S}` is the segment elasticicty,
 399     :math:`L_\text{planar}` is the ttt contour length,
 400     :math:`L_\text{helical}` is the ttg contour length,
 401     :math:`\Delta G` is the Gibbs free energy difference
 402      :math:`G_\text{planar}-G_\text{helical}`,
 403     :math:`a` is the Kuhn length,
 404     and :math:`F` is the chain tension.
 405
 406     .. [#]: F. Oesterhelt, M. Rief, and H.E. Gaub.
 407       "Single molecule force spectroscopy by AFM indicates helical
 408       structure of poly(ethylene-glycol) in water."
 409       New Journal of Physics. 1999.
 410       doi: `10.1088/1367-2630/1/1/006 <http://dx.doi.org/10.1088/1367-2630/1/1/006>`
 411       (section 4.2)
 412     """
 413     kBT = kB * T
 414     g = (dG - F_data*(Lp-Lh)) / kBT
 415     z = F_data*a/kBT
 416     return N * ((Lp/(numpy.exp(-g)+1) + Lh/(numpy.exp(+g)+1)) * (coth(z)-1./z)
 417                 + F_data/k)
 418
 419 def FJC_PEG_fn(x_data, T, N, k, Lp, Lh, dG, a):
 420     """Poly(ethylene-glycol) adjusted extended FJC model.
 421
 422     Parameters
 423     ----------
 424     z : float or array_like
 425         object whose inverse Langevin will be returned
 426     extreme : float
 427         value (close to one) for which we assume the inverse is +/-
 428         infinity.  This avoids problems with Newton-Raphson
 429         convergence in regions with low slope.
 430
 431     Examples
 432     --------
 433     >>> kwargs = {'T':300.0, 'N':1, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10,
 434     ...           'dG':3.0, 'a':7e-10}
 435     >>> FJC_PEG_fn(inverse_FJC_PEG_fn(1e-11, **kwargs), **kwargs)  # doctest: +ELLIPSIS
 436     9.999...e-12
 437     >>> FJC_PEG_fn(inverse_FJC_PEG_fn(3.4e-10, **kwargs), **kwargs)  # doctest: +ELLIPSIS
 438     3.400...e-10
 439     >>> FJC_PEG_fn(numpy.array([1e-10,2e-10,3e-10]), **kwargs)  # doctest: +ELLIPSIS
 440     array([  5.207...e-12,   1.257...e-11,   3.636...e-11])
 441     >>> kwargs['N'] = 123
 442     >>> FJC_PEG_fn(numpy.array([1e-8,2e-8,3e-8]), **kwargs)  # doctest: +ELLIPSIS
 443     array([  4.160...e-12,   9.302...e-12,   1.830...e-11])
 444
 445     Notes
 446     -----
 447     We approximate the PEG adjusted eFJC via Newton's method
 448     (:func:`scipy.optimize.newton`).  For the starting point, we use
 449     the standard FJC function with an averaged contour length.
 450     """
 451     kwargs = {'T':T, 'N':N, 'k':k, 'Lp':Lp, 'Lh':Lh, 'dG':dG, 'a':a}
 452     if is_iterable(x_data):
 453         ret = numpy.ndarray(shape=x_data.shape, dtype=x_data.dtype)
 454         for i,x in enumerate(x_data):
 455             ret[i] = FJC_PEG_fn(x, **kwargs)
 456         return ret
 457     if x_data == 0:
 458         return 0
 459     # Approximate with the simple FJC_fn()
 460     guess = numpy.inf
 461     L = N*max(Lp, Lh)
 462     while guess == numpy.inf:
 463         guess = FJC_fn(x_data, T=T, L=L, a=a)
 464         L *= 2.0
 465
 466     fn = lambda f: inverse_FJC_PEG_fn(guess*abs(f), **kwargs) - x_data
 467     ret = guess*abs(newton(func=fn, x0=1.0))
 468     return ret
 469
 470 class FJC_PEG (ModelFitter):
 471     """Fit the data to an poly(ethylene-glycol) adjusted extended FJC.
 472
 473     Examples
 474     --------
 475     Generate some example data
 476
 477     >>> kwargs = {'T':300.0, 'N':123, 'k':150.0, 'Lp':3.58e-10, 'Lh':2.8e-10,
 478     ...           'dG':3.0, 'a':7e-10}
 479     >>> x_data = numpy.linspace(10e-9, 30e-9, num=20)
 480     >>> d_data = FJC_PEG_fn(x_data, **kwargs)
 481
 482     Fit the example data with a two-parameter fit (`N` and `a`).
 483
 484     >>> info = {
 485     ...     'temperature (K)': kwargs['T'],
 486     ...     'x data (m)': x_data,
 487     ...     'section elasticity (N/m)': kwargs['k'],
 488     ...     'planar section length (m)': kwargs['Lp'],
 489     ...     'helical section length (m)': kwargs['Lh'],
 490     ...     'Gibbs free energy difference (Gp - Gh) (kBT)': kwargs['dG'],
 491     ...     }
 492     >>> model = FJC_PEG(d_data, info=info, rescale=True)
 493     >>> outqueue = Queue()
 494     >>> N,a = model.fit(outqueue=outqueue)
 495     >>> fit_info = outqueue.get(block=False)
 496     >>> print N
 497     123.0
 498     >>> print a
 499     7e-10
 500
 501     Fit the example data with a one-parameter fit (`N`).  We introduce
 502     some error in our fixed Kuhn length for fun.
 503
 504     >>> info['Kuhn length (m)'] = 2*kwargs['a']
 505     >>> model = FJC_PEG(d_data, info=info, rescale=True)
 506     >>> N, = model.fit(outqueue=outqueue)
 507     >>> fit_info = outqueue.get(block=False)
 508     >>> print N  # doctest: +ELLIPSIS
 509     96.931...
 510     """
 511     def Lr(self, L):
 512         """To avoid invalid values of `L`, we reparametrize with `Lr`.
 513
 514         Parameters
 515         ----------
 516         L : float
 517             contour length in meters
 518
 519         Returns
 520         -------
 521         Lr : float
 522             rescaled version of `L`.
 523
 524         Examples
 525         --------
 526         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 527         >>> model = FJC_PEG(data=x_data, info={'x data (m)':x_data})
 528         >>> model.Lr(20e-9)  # doctest: +ELLIPSIS
 529         0.0
 530         >>> model.Lr(19e-9)  # doctest: +ELLIPSIS
 531         -0.0512...
 532         >>> model.Lr(21e-9)  # doctest: +ELLIPSIS
 533         0.0487...
 534         >>> model.Lr(100e-9)  # doctest: +ELLIPSIS
 535         1.609...
 536
 537         Notes
 538         -----
 539         The rescaling is designed to limit `L` to values strictly
 540         greater than zero, so we use
 541
 542         .. math::
 543             L = \exp(L_p) * x_\text{max}
 544
 545         which has the inverse
 546
 547         .. math::
 548             L_p = \ln(L/x_\text{max})
 549
 550         This will obviously effect the interpretation of the fit's covariance matrix.
 551         """
 552         x_max = self.info['x data (m)'].max()
 553         return numpy.log(L/x_max)
 554
 555     def L(self, Lr):
 556         """Inverse of :meth:`Lr`.
 557
 558         Parameters
 559         ----------
 560         Lr : float
 561             rescaled version of `L`
 562
 563         Returns
 564         -------
 565         L : float
 566             contour length in meters
 567
 568         Examples
 569         --------
 570         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 571         >>> model = FJC_PEG(data=x_data, info={'x data (m)':x_data})
 572         >>> model.L(model.Lr(21e-9))  # doctest: +ELLIPSIS
 573         2.100...e-08
 574         >>> model.L(model.Lr(100e-9))  # doctest: +ELLIPSIS
 575         9.999...e-08
 576         """
 577         x_max = self.info['x data (m)'].max()
 578         return numpy.exp(Lr)*x_max
 579
 580     def _kwargs(self):
 581         return {
 582             'T': self.info['temperature (K)'],
 583             'k': self.info['section elasticity (N/m)'],
 584             'Lp': self.info['planar section length (m)'],
 585             'Lh': self.info['helical section length (m)'],
 586             'dG': self.info['Gibbs free energy difference (Gp - Gh) (kBT)'],
 587             }
 588
 589     def model(self, params):
 590         """Extract the relavant arguments and use :func:`FJC_PEG_fn`.
 591
 592         Parameters
 593         ----------
 594         params : list of floats
 595             `[N, a]`, where the presence of `a` is optional.
 596         """
 597         # setup convenient aliases
 598         T = self.info['temperature (K)']
 599         x_data = self.info['x data (m)']
 600         N = self.L(params[0])
 601         if len(params) > 1:
 602             a = abs(params[1])
 603         else:
 604             a = self.info['Kuhn length (m)']
 605         kwargs = self._kwargs()
 606         # compute model data
 607         self._model_data[:] = FJC_PEG_fn(x_data, N=N, a=a, **kwargs)
 608         return self._model_data
 609
 610     def fit(self, *args, **kwargs):
 611         params = super(FJC_PEG, self).fit(*args, **kwargs)
 612         params[0] = self.L(params[0])  # convert Nr -> N
 613         if len(params) > 1:
 614             params[1] = abs(params[1])  # take the absolute value of `a`
 615         return params
 616
 617     def guess_initial_params(self, outqueue=None):
 618         """Guess initial fitting parameters.
 619
 620         Returns
 621         -------
 622         N : float
 623             A guess at the section count.
 624         a : float (optional)
 625             A guess at the Kuhn length in meters.  If `.info` has a
 626             setting for `'Kuhn length (m)'`, `a` is not returned.
 627         """
 628         x_max = self.info['x data (m)'].max()
 629         a_given = 'Kuhn length (m)' in self.info
 630         if a_given:
 631             a = self.info['Kuhn length (m)']
 632         else:
 633             a = x_max / 10.0
 634         f_max = self._data.max()
 635         kwargs = self._kwargs()
 636         kwargs['a'] = a
 637         x_section = inverse_FJC_PEG_fn(F_data=f_max, **kwargs)
 638         N = x_max / x_section;
 639         Nr = self.Lr(N)
 640         if a_given:
 641             return [Nr]
 642         return [Nr, a]
 643
 644
 645 def WLC_fn(x_data, T, L, p):
 646     """The worm like chain interpolation model.
 647
 648     Parameters
 649     ----------
 650     x_data : array_like
 651         x values for which the WLC tension should be calculated.
 652     T : float
 653         temperature in Kelvin.
 654     L : float
 655         contour length in meters.
 656     p : float
 657         persistence length in meters.
 658
 659     Returns
 660     -------
 661     f_data : array
 662         `F(x_data)`.
 663
 664     Examples
 665     --------
 666     >>> WLC_fn(numpy.array([1e-9, 5e-9, 10e-9], dtype=numpy.float),
 667     ...        T=300, L=15e-9, p=2.5e-10) # doctest: +ELLIPSIS
 668     array([  1.717...e-12,   1.070...e-11,   4.418...e-11])
 669
 670     Notes
 671     -----
 672     The function is the simple polynomial worm like chain
 673     interpolation forumula proposed by C. Bustamante, et al. [#]_.
 674
 675     .. math::
 676       F(x) = \frac{k_B T}{p} \left[
 677         \frac{1}{4}\left( \frac{1}{(1-x/L)^2} - 1 \right)
 678         + \frac{x}{L}
 679                              \right]
 680
 681     .. [#] C. Bustamante, J.F. Marko, E.D. Siggia, and S.Smith.
 682     "Entropic elasticity of lambda-phage DNA."
 683     Science. 1994.
 684     doi: `10.1126/science.8079175 <http://dx.doi.org/10.1126/science.8079175>`
 685     """
 686     a = kB * T / p
 687     scaled_data = x_data / L
 688     return a * (0.25*((1-scaled_data)**-2 - 1) + scaled_data)
 689
 690 class WLC (ModelFitter):
 691     """Fit the data to a worm like chain.
 692
 693     Examples
 694     --------
 695     Generate some example data
 696
 697     >>> T = 300  # Kelvin
 698     >>> L = 35e-9  # meters
 699     >>> p = 2.5e-10  # meters
 700     >>> x_data = numpy.linspace(10e-9, 30e-9, num=20)
 701     >>> d_data = WLC_fn(x_data, T=T, L=L, p=p)
 702
 703     Fit the example data with a two-parameter fit (`L` and `p`).
 704
 705     >>> info = {
 706     ...     'temperature (K)': T,
 707     ...     'x data (m)': x_data,
 708     ...     }
 709     >>> model = WLC(d_data, info=info, rescale=True)
 710     >>> outqueue = Queue()
 711     >>> L,p = model.fit(outqueue=outqueue)
 712     >>> fit_info = outqueue.get(block=False)
 713     >>> print L
 714     3.5e-08
 715     >>> print p
 716     2.5e-10
 717
 718     Fit the example data with a one-parameter fit (`L`).  We introduce
 719     some error in our fixed persistence length for fun.
 720
 721     >>> info['persistence length (m)'] = 2*p
 722     >>> model = WLC(d_data, info=info, rescale=True)
 723     >>> L, = model.fit(outqueue=outqueue)
 724     >>> fit_info = outqueue.get(block=False)
 725     >>> print L  # doctest: +ELLIPSIS
 726     3.318...e-08
 727     """
 728     def Lp(self, L):
 729         """To avoid invalid values of `L`, we reparametrize with `Lp`.
 730
 731         Parameters
 732         ----------
 733         L : float
 734             contour length in meters
 735
 736         Returns
 737         -------
 738         Lp : float
 739             rescaled version of `L`.
 740
 741         Examples
 742         --------
 743         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 744         >>> model = WLC(data=x_data, info={'x data (m)':x_data})
 745         >>> model.Lp(20e-9)  # doctest: +ELLIPSIS
 746         -inf
 747         >>> model.Lp(19e-9)  # doctest: +ELLIPSIS
 748         nan
 749         >>> model.Lp(21e-9)  # doctest: +ELLIPSIS
 750         -2.99...
 751         >>> model.Lp(100e-9)  # doctest: +ELLIPSIS
 752         1.386...
 753
 754         Notes
 755         -----
 756         The rescaling is designed to limit `L` to values strictly
 757         greater than the maximum `x` data value, so we use
 758
 759         .. math::
 760             L = [\exp(L_p))+1] * x_\text{max}
 761
 762         which has the inverse
 763
 764         .. math::
 765             L_p = \ln(L/x_\text{max}-1)
 766
 767         This will obviously effect the interpretation of the fit's covariance matrix.
 768         """
 769         x_max = self.info['x data (m)'].max()
 770         return numpy.log(L/x_max - 1)
 771
 772     def L(self, Lp):
 773         """Inverse of :meth:`Lp`.
 774
 775         Parameters
 776         ----------
 777         Lp : float
 778             rescaled version of `L`
 779
 780         Returns
 781         -------
 782         L : float
 783             contour length in meters
 784
 785         Examples
 786         --------
 787         >>> x_data = numpy.linspace(1e-9, 20e-9, num=10)
 788         >>> model = WLC(data=x_data, info={'x data (m)':x_data})
 789         >>> model.L(model.Lp(21e-9))  # doctest: +ELLIPSIS
 790         2.100...e-08
 791         >>> model.L(model.Lp(100e-9))  # doctest: +ELLIPSIS
 792         9.999...e-08
 793         """
 794         x_max = self.info['x data (m)'].max()
 795         return (numpy.exp(Lp)+1)*x_max
 796
 797     def model(self, params):
 798         """Extract the relavant arguments and use :func:`WLC_fn`.
 799
 800         Parameters
 801         ----------
 802         params : list of floats
 803             `[Lp, p]`, where the presence of `p` is optional.
 804         """
 805         # setup convenient aliases
 806         T = self.info['temperature (K)']
 807         x_data = self.info['x data (m)']
 808         L = self.L(params[0])
 809         if len(params) > 1:
 810             p = abs(params[1])
 811         else:
 812             p = self.info['persistence length (m)']
 813         # compute model data
 814         self._model_data[:] = WLC_fn(x_data, T, L, p)
 815         return self._model_data
 816
 817     def fit(self, *args, **kwargs):
 818         params = super(WLC, self).fit(*args, **kwargs)
 819         params[0] = self.L(params[0])  # convert Lp -> L
 820         if len(params) > 1:
 821             params[1] = abs(params[1])  # take the absolute value of `p`
 822         return params
 823
 824     def guess_initial_params(self, outqueue=None):
 825         """Guess initial fitting parameters.
 826
 827         Returns
 828         -------
 829         Lp : float
 830             A guess at the reparameterized contour length in meters.
 831         p : float (optional)
 832             A guess at the persistence length in meters.  If `.info`
 833             has a setting for `'persistence length (m)'`, `p` is not
 834             returned.
 835         """
 836         x_max = self.info['x data (m)'].max()
 837         p_given = 'persistence length (m)' in self.info
 838         if p_given:
 839             p = self.info['persistence length (m)']
 840         else:
 841             p = x_max / 10.0
 842         L = 1.5 * x_max
 843         Lp = self.Lp(L)
 844         if p_given:
 845             return [Lp]
 846         return [Lp, p]
 847
 848
 849 class PolymerFitPlugin (Plugin):
 850     """Polymer model (WLC, FJC, etc.) fitting.
 851     """
 852     def __init__(self):
 853         super(PolymerFitPlugin, self).__init__(name='polymer_fit')
 854         self._arguments = [  # For Command initialization
 855             Argument('polymer model', default='WLC', help="""
 856 Select the default polymer model for 'polymer fit'.  See the
 857 documentation for descriptions of available algorithms.
 858 """.strip()),
 859             Argument('FJC Kuhn length', type='float', default=4e-10,
 860                     help='Kuhn length in meters'),
 861             Argument('FJC_PEG Kuhn length', type='float', default=4e-10,
 862                     help='Kuhn length in meters'),
 863             Argument('FJC_PEG elasticity', type='float', default=150.0,
 864                     help='Elasticity of a PEG segment in Newtons per meter.'),
 865             Argument('FJC_PEG delta G', type='float', default=3.0, help="""
 866 Gibbs free energy difference between trans-trans-trans (ttt) and
 867 trans-trans-gauche (ttg) PEG states in units of kBT.
 868 """.strip()),
 869             Argument('FJC_PEG L_helical', type='float', default=2.8e-10,
 870                     help='Contour length of PEG in the ttg state.'),
 871             Argument('FJC_PEG L_planar', type='float', default=3.58e-10,
 872                     help='Contour length of PEG in the ttt state.'),
 873             Argument('WLC persistence length', type='float', default=4e-10,
 874                     help='Persistence length in meters'),
 875             ]
 876         self._settings = [
 877             Setting(section=self.setting_section, help=self.__doc__)]
 878         for argument in self._arguments:
 879             self._settings.append(argument_to_setting(
 880                     self.setting_section, argument))
 881             argument.default = None # if argument isn't given, use the config.
 882         self._input_columns = [
 883             ('distance column', 'cantilever adjusted extension (m)',
 884              """
 885 Name of the column to use as the surface position input.
 886 """.strip()),
 887             ('deflection column', 'deflection (N)',
 888              """
 889 Name of the column to use as the deflection input.
 890 """.strip()),
 891             ]
 892         self._commands = [
 893             PolymerFitCommand(self), PolymerFitPeaksCommand(self),
 894             TranslateFlatPeaksCommand(self),
 895             ]
 896
 897     def dependencies(self):
 898         return ['vclamp']
 899
 900     def default_settings(self):
 901         return self._settings
 902
 903
 904 class PolymerFitCommand (ColumnAddingCommand):
 905     """Polymer model (WLC, FJC, etc.) fitting.
 906
 907     Fits an entropic elasticity function to a given chunk of the
 908     curve.  Fit quality compares the residual with the thermal noise
 909     (a good fit should be 1 or less).
 910
 911     Because the models are complicated and varied, you should
 912     configure the command by setting configuration options instead of
 913     using command arguments.  TODO: argument_to_setting()
 914     """
 915     def __init__(self, plugin):
 916         super(PolymerFitCommand, self).__init__(
 917             name='polymer fit',
 918             columns=plugin._input_columns,
 919             new_columns=[
 920                 ('output tension column', 'polymer tension',
 921                  """
 922 Name of the column (without units) to use as the polymer tension output.
 923 """.strip()),
 924                 ],
 925             arguments=[
 926                 Argument(name='fit parameters info name', type='string',
 927                          default='polymer fit',
 928                          help="""
 929 Name (without units) for storing the fit parameters in the `.info` dictionary.
 930 """.strip()),
 931                 Argument(name='bounds', type='point', optional=False, count=2,
 932                          help="""
 933 Indicies of points bounding the selected data.
 934 """.strip()),
 935                 Argument(name='peak extraction method', default='convex hull',
 936                          help=(
 937                         'Select the method used to extract the peak '
 938                         'deflection from the fitted model.  `convex hull`, '
 939                         '`peak data`, or `peak model`.')),
 940                 ] + plugin._arguments,
 941             help=self.__doc__, plugin=plugin)
 942
 943     def _run(self, hooke, inqueue, outqueue, params):
 944         log = logging.getLogger('hooke')
 945         params = self._setup_params(hooke, params)
 946         data = self._block(hooke, params)
 947         model = params['polymer model']
 948         dist_data = self._get_column(
 949             hooke=hooke, params=params, column_name='distance column')
 950         def_data = self._get_column(
 951             hooke=hooke, params=params, column_name='deflection column')
 952         start,stop = params['bounds']
 953         tension_data,ps = self.fit_polymer_model(
 954             params, dist_data, def_data, start, stop, outqueue)
 955         if params['peak extraction method'] == 'convex hull':
 956             peak_def,hull = self._convex_hull_deflection(
 957                 distance_data=dist_data, deflection_data=def_data,
 958                 deflection_model=tension_data, start=start, stop=stop,
 959                 outqueue=outqueue)
 960         elif params['peak extraction method'] == 'peak data':
 961             peak_def = numpy.nanmax(def_data[start:stop])
 962         elif params['peak extraction method'] == 'peak model':
 963             peak_def = numpy.nanmax(tension_data[start:stop])
 964         else:
 965             raise ValueError(params['peak extraction method'])
 966         log.debug('{}: {}'.format(params['peak extraction method'], peak_def))
 967         ps['peak deflection'] = peak_def
 968         ps['peak extraction method'] = params['peak extraction method']
 969         ps['model'] = model
 970         data.info[params['fit parameters info name']] = ps
 971         self._set_column(hooke=hooke, params=params,
 972                          column_name='output tension column',
 973                          values=tension_data)
 974
 975     def _setup_params(self, hooke, params):
 976         for key,value in params.items():
 977             if value == None:  # Use configured default value.
 978                 params[key] = self.plugin.config[key]
 979         name,def_unit = split_data_label(params['deflection column'])
 980         params['output tension column'] = join_data_label(
 981             params['output tension column'], def_unit)
 982         return params
 983
 984     def fit_polymer_model(self, params, dist_data, def_data, start, stop,
 985                           outqueue=None):
 986         """Railyard for the `fit_*_model` family.
 987
 988         Uses the `polymer model` configuration setting to call the
 989         appropriate backend algorithm.
 990         """
 991         fn = getattr(self, 'fit_%s_model'
 992                      % params['polymer model'].replace('-','_'))
 993         return fn(params, dist_data, def_data, start, stop, outqueue)
 994
 995     def fit_FJC_model(self, params, dist_data, def_data, start, stop,
 996                       outqueue=None):
 997         """Fit the data with :class:`FJC`.
 998         """
 999         info = {
1000             'temperature (K)': self.plugin.config['temperature'],
1001             'x data (m)': dist_data[start:stop],
1002             }
1003         if True:  # TODO: optionally free persistence length
1004             info['Kuhn length (m)'] = (
1005                 params['FJC Kuhn length'])
1006         model = FJC(def_data[start:stop], info=info, rescale=True)
1007         queue = Queue()
1008         params = model.fit(outqueue=queue)
1009         if True:  # TODO: if Kuhn length fixed
1010             a = info['Kuhn length (m)']
1011         else:
1012             a = params[1]
1013         L = params[0]
1014         T = info['temperature (K)']
1015         fit_info = queue.get(block=False)
1016         f_data = numpy.ones(dist_data.shape, dtype=dist_data.dtype) * numpy.nan
1017         f_data[start:stop] = FJC_fn(dist_data[start:stop], T=T, L=L, a=a)
1018         return [f_data, fit_info]
1019
1020     def fit_FJC_PEG_model(self, params, dist_data, def_data, start, stop,
1021                           outqueue=None):
1022         """Fit the data with :class:`FJC_PEG`.
1023         """
1024         info = {
1025             'temperature (K)': self.plugin.config['temperature'],
1026             'x data (m)': dist_data[start:stop],
1027             # TODO: more info
1028             }
1029         if True:  # TODO: optionally free persistence length
1030             info['Kuhn length (m)'] = (
1031                 params['FJC Kuhn length'])
1032         model = FJC_PEG(def_data[start:stop], info=info, rescale=True)
1033         queue = Queue()
1034         params = model.fit(outqueue=queue)
1035         if True:  # TODO: if Kuhn length fixed
1036             a = info['Kuhn length (m)']
1037         else:
1038             a = params[1]
1039         N = params[0]
1040         T = info['temperature (K)']
1041         fit_info = queue.get(block=False)
1042         f_data = numpy.ones(dist_data.shape, dtype=dist_data.dtype) * numpy.nan
1043         f_data[start:stop] = FJC_PEG_fn(dist_data[start:stop], **kwargs)
1044         return [f_data, fit_info]
1045
1046     def fit_WLC_model(self, params, dist_data, def_data, start, stop,
1047                       outqueue=None):
1048         """Fit the data with :class:`WLC`.
1049         """
1050         info = {
1051             'temperature (K)': self.plugin.config['temperature'],
1052             'x data (m)': dist_data[start:stop],
1053             }
1054         if True:  # TODO: optionally free persistence length
1055             info['persistence length (m)'] = (
1056                 params['WLC persistence length'])
1057         model = WLC(def_data[start:stop], info=info, rescale=True)
1058         queue = Queue()
1059         params = model.fit(outqueue=queue)
1060         if True:  # TODO: if persistence length fixed
1061             p = info['persistence length (m)']
1062         else:
1063             p = params[1]
1064             info['persistence length (m)'] = p
1065         L = params[0]
1066         info['contour length (m)'] = L
1067         T = info['temperature (K)']
1068         fit_info = queue.get(block=False)
1069         info['fit'] = fit_info
1070         info.pop('x data (m)')
1071         f_data = numpy.ones(dist_data.shape, dtype=dist_data.dtype) * numpy.nan
1072         f_data[start:stop] = WLC_fn(dist_data[start:stop], T=T, L=L, p=p)
1073         return [f_data, info]
1074
1075     def _convex_hull_deflection(self, distance_data, deflection_data,
1076                                 deflection_model, start, stop, outqueue=None):
1077         """Return the last model deflection point inside the data hull.
1078
1079         If the `stop` point is a bit past it's ideal location, the
1080         rapid rise of some polymer models can lead to artificially
1081         high peak deflections if the largest distance value is plugged
1082         into the polymer model directly.  As a more robust estimation
1083         of the peak deflection, we calculate the convex hull
1084         surrounding the distance-deflection data and return the last
1085         model deflection point that is inside that hull.
1086         """
1087         distance_data = distance_data[start:stop]
1088         deflection_data = deflection_data[start:stop]
1089         deflection_model = deflection_model[start:stop]
1090         unfolds = []
1091         hulls = []
1092         data = numpy.array(
1093             [[x,y] for x,y in zip(distance_data, deflection_data)])
1094         model = numpy.array(
1095             [[x,y] for x,y in zip(distance_data, deflection_model)])
1096         hull = qhull(data)
1097         inside = points_inside_hull(hull, model)
1098         # distance data is not necessarily monatonic
1099         index_inside = [j for j,i in enumerate(inside) if i is True]
1100         distance_inside = [(distance_data[i],i) for i in index_inside]
1101         if distance_inside:
1102             peak_dist,peak_index = max(distance_inside)
1103             unfold = deflection_model[peak_index]
1104         else:  # all model points are outside the hull?!
1105             unfold = None
1106         return (unfold, hull)
1107
1108
1109 class PolymerFitPeaksCommand (ColumnAccessCommand):
1110     """Polymer model (WLC, FJC, etc.) fitting.
1111
1112     Use :class:`PolymerFitCommand` to fit the each peak in a list of
1113     previously determined peaks.
1114     """
1115     def __init__(self, plugin):
1116         super(PolymerFitPeaksCommand, self).__init__(
1117             name='polymer fit peaks',
1118             columns=plugin._input_columns,
1119             arguments=[
1120                 Argument(name='peak info name', type='string',
1121                          default='polymer peaks',
1122                          help="""
1123 Name for the list of peaks in the `.info` dictionary.
1124 """.strip()),
1125                 Argument(name='peak index', type='int', count=-1, default=None,
1126                          help="""
1127 Index of the selected peak in the list of peaks.  Use `None` to fit all peaks.
1128 """.strip()),
1129                 ] + plugin._arguments,
1130             help=self.__doc__, plugin=plugin)
1131
1132     def _run(self, hooke, inqueue, outqueue, params):
1133         data = self._block(hooke, params)
1134         fit_command = hooke.command_by_name['polymer fit']
1135         inq = Queue()
1136         outq = Queue()
1137         curve = params['curve']
1138         params['curve'] = None
1139         p = copy.deepcopy(params)
1140         p['curve'] = params['curve']
1141         del(p['peak info name'])
1142         del(p['peak index'])
1143         for i,peak in enumerate(data.info[params['peak info name']]):
1144             if params['peak index'] == None or i in params['peak index']:
1145                 p['bounds'] = [peak.index, peak.post_index()]
1146                 p['output tension column'] = peak.name
1147                 p['fit parameters info name'] = peak.name
1148                 fit_command.run(hooke, inq, outq, **p)
1149             ret = outq.get()
1150             if not isinstance(ret, Success):
1151                 raise ret
1152
1153
1154 class TranslateFlatPeaksCommand (ColumnAccessCommand):
1155     """Translate flat filter peaks into polymer peaks for fitting.
1156
1157     Use :class:`~hooke.plugin.flatfilt.FlatPeaksCommand` creates a
1158     list of peaks for regions with large derivatives.  For velocity
1159     clamp measurements, these regions are usually the rebound phase
1160     after a protein domain unfolds, the cantilever detaches, etc.
1161     Because these features occur after the polymer loading phase, we
1162     need to shift the selected regions back to align them with the
1163     polymer loading regions.
1164     """
1165     def __init__(self, plugin):
1166         super(TranslateFlatPeaksCommand, self).__init__(
1167             name='flat peaks to polymer peaks',
1168             columns=plugin._input_columns,
1169             arguments=[
1170                 Argument(name='input peak info name', type='string',
1171                          default='flat filter peaks',
1172                          help="""
1173 Name for the input peaks in the `.info` dictionary.
1174 """.strip()),
1175                 Argument(name='output peak info name', type='string',
1176                          default='polymer peaks',
1177                          help="""
1178 Name for the ouptput peaks in the `.info` dictionary.
1179 """.strip()),
1180                 Argument(name='end offset', type='int', default=-1,
1181                          help="""
1182 Number of points between the end of a new peak and the start of the old.
1183 """.strip()),
1184                 Argument(name='start fraction', type='float', default=0.2,
1185                          help="""
1186 Place the start of the new peak at `start_fraction` from the end of
1187 the previous old peak to the end of the new peak.  Because the first
1188 new peak will have no previous old peak, it uses a distance of zero
1189 instead.
1190 """.strip()),
1191             ] + plugin._arguments,
1192             help=self.__doc__, plugin=plugin)
1193
1194     def _run(self, hooke, inqueue, outqueue, params):
1195         data = self._block(hooke, params)
1196         dist_data = self._get_column(
1197             hooke=hooke, params=params, column_name='distance column')
1198         def_data = self._get_column(
1199             hooke=hooke, params=params, column_name='deflection column')
1200         previous_old_stop = numpy.absolute(dist_data).argmin()
1201         new = []
1202         try:
1203             peaks = data.info[params['input peak info name']]
1204         except KeyError, e:
1205             raise Failure('No value for %s in %s.info (%s): %s'
1206                           % (params['input peak info name'], data.info['name'],
1207                              sorted(data.info.keys()), e))
1208         for i,peak in enumerate(peaks):
1209             next_old_start = peak.index
1210             stop = next_old_start + params['end offset']
1211             dist_start = (
1212                 dist_data[previous_old_stop]
1213                 + params['start fraction']*(
1214                     dist_data[stop] - dist_data[previous_old_stop]))
1215             start = numpy.absolute(dist_data - dist_start).argmin()
1216             p = Peak('polymer peak %d' % i,
1217                      index=start,
1218                      values=def_data[start:stop])
1219             new.append(p)
1220             previous_old_stop = peak.post_index()
1221         data.info[params['output peak info name']] = new
1222
1223
1224 # TODO:
1225 # def dist2fit(self):
1226 #     '''Calculates the average distance from data to fit, scaled by
1227 #     the standard deviation of the free cantilever area (thermal
1228 #     noise).
1229 #     '''