% sawsim - simulating single molecule mechanical unfolding.
% Copyright (C) 2008-2010 William Trevor King
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see .
%
% The author may be contacted at on the Internet, or
% write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
% Philadelphia PA 19104, USA.
%%%%%%%%%%%%%%%%%%%%%%%%% Start LaTeX boilerplate %%%%%%%%%%%%%%%%%%%%%%%%%%%
% we have our own preamble,
% use `noweave -delay` to not print noweb's automatic one
% -*- mode: Noweb; noweb-code-mode: c-mode -*-
\documentclass[letterpaper, 11pt]{article}
\usepackage{noweb}
\pagestyle{noweb}
\noweboptions{smallcode}
\usepackage{url} % needed for natbib for underscores in urls?
\usepackage[breaklinks,colorlinks]{hyperref} % for internal links, \phantomsection
% breaklinks breaks long links
% colorlinks colors link text instead of boxing it
\usepackage{amsmath} % for \text{}, \xrightarrow[sub]{super}
\usepackage{amsthm} % for theorem and proof environments
\newtheorem{theorem}{Theorem}
\usepackage{subfig} % Support side-by-side figures
\usepackage{pgf} % Fancy graphics
\usepackage{tikz} % A nice, inline PGF frontend
\usetikzlibrary{automata} % Graph-theory library
\usepackage{doc} % BibTeX
\usepackage[super,sort&compress]{natbib} % fancy citation extensions
% super selects citations in superscript mode
% sort&compress automatically sorts and compresses compound citations (\cite{a,b,...})
\usepackage[mathletters]{ucs} % use math fonts to allow Greek UTF-8 in the text
\usepackage[utf8x]{inputenc} % allow UTF-8 characters
%\bibliographystyle{ieeetr} % pick the bibliography style, short and sweet
%\bibliographystyle{plain} % pick the bibliography style, includes dates
\bibliographystyle{plainnat}
\defcitealias{sw:noweb}{{\tt noweb}}
\defcitealias{galassi05}{GSL}
\defcitealias{sw:check}{{\tt check}}
\defcitealias{sw:python}{Python}
\topmargin -1.0in
\headheight 0.5in
\headsep 0.5in
\textheight 9.5in % leave a bit of extra room for page numbers (at top of page)
\oddsidemargin -0.5in
\textwidth 7.5in
\setlength{\parindent}{0pt}
\setlength{\parskip}{5pt}
% For some reason, the \maketitle wants to go on it's own page
% so we'll just hardcode the following in our first page.
%\title{Sawsim: a sawtooth protein unfolding simulator}
%\author{W.~Trevor King}
%\date{\today}
\newcommand{\prog}{[[sawsim]]}
\newcommand{\lang}{[[C]]}
\newcommand{\p}[3]{\left#1 #2 \right#3} % e.g. \p({complicated expr})
\newcommand{\Th}{\ensuremath{\sup{\text{th}}}} % ordinal th
\newcommand{\U}[1]{\ensuremath{\text{ #1}}} % units: $1\U{m/s}$
% single spaced lists, from Alvin Alexander
% http://www.devdaily.com/blog/post/latex/control-line-spacing-in-itemize-enumerate-tags/
\newenvironment{packed_item}{
\begin{itemize}
\setlength{\itemsep}{1pt}
\setlength{\parskip}{0pt}
\setlength{\parsep}{0pt}
}{\end{itemize}}
\begin{document}
\nwfilename{sawsim.nw}
\nwbegindocs{0}
%\maketitle
\begin{centering}
Sawsim: a sawtooth protein unfolding simulator \\
W.~Trevor King \\
\today \\
\end{centering}
\bigskip
\tableofcontents
%%%%%%%%%%%%%%%%%%%%%%%%%%%% End LaTeX boilerplate %%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Introduction}
The unfolding of multi-globular protein strings is a stochastic
process. In the \prog\ program, we use Monte Carlo methods to
simulate this unfolding through an explicit time-stepping approach.
We develop a program in \lang\ to simulate probability of unfolding
under a constant extension velocity of a chain of globular domains.
In order to extract physical meaning from many mechanical unfolding
simulations, it is necessary to compare the experimental results
against the results of simulations using different models and
parameters. The model and parameters that best match the experimental
data provide the ‘best’ model for that experiment.
Previous mechanical unfolding studies have rolled their own
simulations for modelling their experiment, and there has been little
exchange and standardization of these simulations between groups.
The \prog\ program attempts to fill this gap, providing a flexible,
extensible, \emph{community} program, to make it easier for groups
to share the burden and increase the transparency of data analysis.
‘Sawsim’ is blend of ‘sawtooth simulation’.
\subsection{Related work}
Sawim has been published\citep{king10}! It is, as far as I know, the
only open-source Monte Carlo force spectroscopy simulator, but similar
closed source simulators have been around since the seminal paper by
\citet{rief97a}. In chronological order, major contributions have
come from
\begin{packed_item}
\item \citet{rief97a} \citeyear{rief97a}:
\item \citet{rief97b} \citeyear{rief97b}:
\item \citet{kellermayer97} \citeyear{kellermayer97}:
\item \citet{rief98} \citeyear{rief98}:
first paper to focus mainly on the simulation
\item \citet{oberhauser98} \citeyear{oberhauser98}:
\item \citet{carrion-vazquez99a} \citeyear{carrion-vazquez99a}:
\item \citet{best02} \citeyear{best02}:
pointed out large fit valley
\item \citet{zinober02} \citeyear{zinober02}:
introduced the scaffold effect
\item \citet{jollymore09} \citeyear{jollymore09}:
\item \citet{king10} \citeyear{king10}:
introduced sawsim
\end{packed_item}
\subsection{About this document}
This document was written in \citetalias{sw:noweb} with code blocks in
\lang\ and comment blocks in \LaTeX.
\subsection{System Requirements and Dependencies}
\subsection{Change Log}
Version 0.0 used only the unfolded domain WLC to determine the tension
and had a fixed timestep $dt$.
Version 0.1 added adaptive timesteps, adjusting so that the unfolding
probability for a given domain was constant.
Version 0.2 added Kramers' model unfolding rate calculations, and a
switch to select Bell's or Kramers' model. This induced a major
rewrite, introducing the tension group abstraction, and a split to
multiple \lang\ source files. Also added a unit-test suites for
sanity checking, and switched to SI units throughout.
Version 0.3 added integrated Kramers' model unfolding rate
calculations. Also replaced some of my hand-coded routines with GNU
Scientific Library \citepalias{galassi05} calls.
Version 0.4 added Python evaluation for the saddle-point Kramers'
model unfolding rate calculations, so that the model functions could
be controlled from the command line. Also added interpolating lookup
tables to accelerate slow unfolding rate model evaluation, and command
line control of tension groups.
Version 0.5 added the stiffness environmental parameter and it's
associated unfolding models.
Version 0.6 generalized from two state unfolding to arbitrary
$N$-state rate reactions. This allows simulations of
unfolding-refolding, metastable intermediates, etc., but required
reorganizing the command line interface.
Version 0.7 added tension model inverses, which seems to reduce
computation time by about a factor of 2. I was expecting more, but
I'll take what I can get.
Version 0.8 fixed a minor bug in unfolding probability for
multi-domain groups. The probability of at least one domain unfolding
had been calculated as $P_N=NP_1$, rather than $P_N=1-(1-P_1)^N$.
However, for small $P$ the two are equivalent.
Version 0.9 added the [[-P]] option to sawsim, setting the target
probability for the per-state transition $P_N$, not the per-domain
transisiton $P_1$.
Version 0.10 added the [[-F]] option to sawsim, setting a limit on the
force change $dF$ in a single timestep for continuous pulling
simulations. It also added the piston tension model (Section
\ref{sec.piston_tension}), and adjusted the stiffness calculations to
handle domains with undefined stiffness.
<>=
#define VERSION "0.10"
@
\subsection{License}
<>=
sawsim - simulating single molecule mechanical unfolding.
Copyright (C) 2008-2010, William Trevor King
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
The author may be contacted at on the Internet, or
write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
Philadelphia PA 19104, USA.
@
<>=
/*
<>
*/
@
\section{Main}
\label{sec.main}
The unfolding system is stored as a chain of \emph{domains} (Figure
\ref{fig.domain_chain}). Domains can be folded, globular protein
domains, unfolded protein linkers, AFM cantilevers, or other
stretchable system components. The system is modeled as graph of
possible domain states with transitions between them (Figure
\ref{fig.domain_states}).
\begin{figure}
\begin{center}
\subfloat[][]{\label{fig.domain_chain}
\begin{tikzpicture}[->,node distance=2cm,font=\footnotesize]
\tikzstyle{every state}=[fill,draw=red!50,very thick,fill=red!20]
\node[state] (A) {domain 1};
\node[state] (B) [below of=A] {domain 2};
\node[state] (C) [below of=B] {.~.~.};
\node[state] (D) [below of=C] {domain $N$};
\node (S) [below of=D] {Surface};
\node (E) [above of=A] {};
\path[-] (A) edge (B)
(B) edge node [right] {Tension} (C)
(C) edge (D)
(D) edge (S);
\path[->,green] (A) edge node [right,black] {Extension} (E);
\end{tikzpicture}
}
\qquad
\subfloat[][]{\label{fig.domain_states}
\begin{tikzpicture}[->,node distance=2.5cm,shorten <=1pt,shorten >=1pt,font=\footnotesize]
\tikzstyle{every state}=[fill,draw=blue!50,very thick,fill=blue!20]
\node[state] (A) {cantilever};
\node[state] (C) [below of=A] {transition};
\node[state] (B) [left of=C] {folded};
\node[state] (D) [right of=C] {unfolded};
\path (B) edge [bend left] node [above] {$k_1$} (C)
(C) edge [bend left] node [below] {$k_1'$} (B)
edge [bend left] node [above] {$k_2$} (D)
(D) edge [bend left] node [below] {$k_2'$} (C);
\end{tikzpicture}
}
\caption{\subref{fig.domain_chain} Extending a chain of domains. One end
of the chain is fixed, while the other is extended at a constant
speed. The domains are coupled with rigid linkers, so the domains
themselves must stretch to accomodate the extension.
\subref{fig.domain_states} Each domain exists in a discrete state. At
each timestep, it may transition into another state following a
user-defined state matrix such as this one, showing a metastable
transition state and an explicit ``cantilever'' domain.}
\end{center}
\end{figure}
Each domain contributes to the total tension, which depends on the
chain extension. The particular model for the tension as a function
of extension is handled generally with function pointers. So far the
following models have been implemented:
\begin{packed_item}
\item Null (Appendix \ref{sec.null_tension}),
\item Constant (Appendix \ref{sec.const_tension}),
\item Hookean spring (Appendix \ref{sec.hooke}),
\item Worm-like chain (WLC, Appendix \ref{sec.wlc}), and
\item Freely-jointed chain (FJC, Appendix \ref{sec.fjc}).
\item Piston (Appendix \ref{sec.piston_tension}),
\end{packed_item}
The tension model and parameters used for a particular domain depend
on the domain's current state. The transition rates between states
are also handled generally with function pointers, with
implementations for
\begin{packed_item}
\item Null (Appendix \ref{sec.null_k}),
\item Bell's model (Appendix \ref{sec.bell}),
\item Spring constant corrected Bell model (Appendix \ref{sec.kbell}),
\item Kramers' integral model (Appendix \ref{sec.kramers_integ}), and
\item Kramers' saddle point model (Appendix \ref{sec.kramers}).
\end{packed_item}
The unfolding of the chain domains is modeled in two stages. First
the tension of the chain is calculated. Then the tension (assumed to
be constant over the length of the chain, see Section
\ref{sec.timescales}), is applied to each domain, and used to
calculate the probability of that domain changing state in the next
timestep $dt$. Then the time is stepped forward, and the process is
repeated. The simulation is complete when a pre-set time limit
[[t_max]] is reached or a pre-selected state [[stop_state]] is empty.
<>=
int main(int argc, char **argv)
{
<>
<>
/* variables initialized in get_options() */
list_t *state_list=NULL, *transition_list=NULL;
environment_t env={0};
double P, dF, t_max, dt_max, v, x_step;
state_t *stop_state=NULL;
/* variables used in the simulation loop */
double t=0, x=0, dt, F; /* time, distance, timestep, force */
int transition=1; /* boolean marking a transition for tension and timestep optimization */
get_options(argc, argv, &P, &dF, &t_max, &dt_max, &v, &x_step,
&stop_state, &env, NUM_TENSION_MODELS, tension_models,
NUM_K_MODELS, k_models, &state_list, &transition_list);
setup();
if (flags & FLAG_OUTPUT_FULL_CURVE) printf("#Distance (m)\tForce (N)\tStiffness (N/m)\n");
if (flags & FLAG_OUTPUT_TRANSITION_FORCES) printf("#Force (N)\tInitial state\tFinal state\n");
while ((t_max > 0 && t <= t_max) || (stop_state != NULL && stop_state->num_domains > 0)) {
F = find_tension(state_list, &env, x, transition, 0);
if (x_step == 0)
dt = determine_dt(state_list, transition_list, &env, F, x, dF, P, dt_max, v, transition);
else
dt = determine_dt(state_list, transition_list, &env, F, x, dF, P, dt_max, 0, transition);
transition=random_transitions(transition_list, F, dt, &env);
if (flags & FLAG_OUTPUT_FULL_CURVE) printf("%g\t%g\t%g\n", x, F, env.stiffness);
t += dt;
if (x_step == 0) {
x += v*dt;
} else {
if (dt == dt_max) { /* step completed */
x += x_step;
dt_max = x_step / v;
} else { /* still working on this step */
dt_max -= dt;
}
}
}
destroy_state_list(state_list);
destroy_transition_list(transition_list);
free_accels();
return 0;
}
@ The meat of the simulation is bundled into the three functions
[[find_tension]], [[determine_dt]], and [[random_transitions]].
[[find_tension]] is discussed in Section \ref{sec.find_tension},
[[determine_dt]] in Section \ref{sec.adaptive_dt}, and
[[random_transitions]] in Section \ref{sec.transition_rate}.
The stretched distance is extended in one of two modes depending on
[[xstep]]. If [[xstep == 0]], then the pulling is continuous, at
least within the limits of the inherent discretization of a time
stepped approach. At any rate, the timestep is limited by the maximum
allowed timestep [[dt_max]] and the maximum allowed unfolding
probability in a given step [[P]]. In the second mode [[xstep > 0]],
and the end to end distance increases in discrete steps of that
length. The time between steps is chosen to maintain an average
unfolding velocity [[v]]. This approach is less work to simulate than
smooth pulling and also closer to the reality of many experiments, but
it is practically impossible to treat analytically. With both pulling
styles implemented, the effects of the discretization can be easily
calculated, bridging the gap between theory and experiment.
Environmental parameters important in determining reaction rates and
tensions (e.g.\ temperature, pH) are stored in a single structure to
facilitate extension to more complicated models in the future.
<>=
typedef struct environment_struct {
double T;
double stiffness;
} environment_t;
@
<>=
#ifndef GLOBAL_H
#define GLOBAL_H
#define DOUBLE_PRECISION 1e-12
<>
<>
<>
<>
#endif /* GLOBAL_H */
@
Where the [[GLOBAL_H]] defines are protecting [[global.h]] from being
included multiple times.
\section{Simulation functions}
<>=
<>
<>
<>
<>
<>
@
\subsection{Tension}
\label{sec.find_tension}
Because the stretched system may be made up of several parts (folded
domains, unfolded domains, spring-like cantilever, \ldots), we will
assign the domains to states with tension models and groups. The
models are used to determine the tension of all the domain in a given
state following some model (Hookean spring, WLC, \ldots). The domains
are assumed to be commutative, so ordering is ignored. The
interactions between the groups are assumed to be linear, but the
interactions between domains of the same group need not be. Each
model has a tension handler function, which gives the tension $F$
needed to stretch a given group of domains a distance $x$.
Groups represent collections of states which the model should treat as
a single entity. To understand the purpose of groups, consider a
system with two unfolded domain states (e.g.\ for two different
proteins) that were both modeled as WLCs. If both unfolded states had
the same persistence length, it would be wise to assign them to the
same group. This would reduce both the computational cost of
balancing the tension and the error associated with the inter-group
interaction linearity. Note that group numbers only matter
\emph{within} model classes, since grouping domains with seperate
models doesn't make sense. Within designated groups, the tension
parameters for each domain are still checked for compatibility, so if
you accidentally assign incompatible domains to the same group you
should get an appropriate error message.
<>=
#define NUM_TENSION_MODELS 6
@
<>=
extern one_dim_fn_t *tension_handlers[];
@
<>=
one_dim_fn_t *tension_handlers[] = {
NULL,
&const_tension_handler,
&hooke_handler,
&wlc_handler,
&fjc_handler,
};
@
<>=
<>
@
<>=
typedef struct tension_handler_data_struct {
list_t *group_tension_params; /* one item for each domain in the group */
environment_t *env;
void *persist;
} tension_handler_data_t;
@
After sorting the chain into separate groups $G_i$, with tension
handlers $F_i(G_i; x_i)$, we need to balance the extension $x_i$ of
each group to get a consistent tension $F_i(x_i) = F_j(x_j) \;\;
\forall i,j$. Note that there may be several states within each
group. [[find_tension]] is basically a wrapper to feed proper input
into the [[tension_balance]] function. [[transition]] should be set
to 0 if there were no transitions since the previous call to
[[find_tension]] to support some optimizations that assume a small
increase in tension between steps (only true if no transition has
occured). While we're messing with the tension handlers and if
[[const_env==0]], we also grab a measure of the current linker
stiffness for the environmental variable, which is needed by some of
the unfolding rate models (e.g.\ the linker-stiffness corrected Bell
model, Appendix \ref{sec.kbell}).
<>=
double find_tension(list_t *state_list, environment_t *env, double x,
int transition, int const_env)
{ // TODO: !cont_env -> step_call, only alter env or statics if step_call==1
static int num_active_groups=0;
static one_dim_fn_t **per_group_handlers = NULL;
static one_dim_fn_t **per_group_inverse_handlers = NULL;
static void **per_group_data = NULL; /* array of pointers to tension_handler_data_t */
static double last_x;
tension_handler_data_t *tdata;
double F, *pStiffness;
int i;
#ifdef DEBUG
fprintf(stderr, "%s:\tfinding tension at distance %g%s\n", __FUNCTION__, x, transition ? " after a transition" : "");
#endif
if (transition != 0 || num_active_groups == 0) { /* setup statics */
/* get new statics, freeing the old ones if they aren't NULL */
get_active_groups(state_list, &num_active_groups, &per_group_handlers, &per_group_inverse_handlers, &per_group_data);
/* fill in the group handlers and params */
#ifdef DEBUG
fprintf(stderr, "%s:\t%d groups, tension handler array %p, inverse tension handler array %p, handler data array %p, (lead data element %p)\n", __FUNCTION__, num_active_groups, per_group_handlers, per_group_inverse_handlers, per_group_data, per_group_data[0]);
#endif
for (i=0; igroup_tension_params);
list_print(stderr, tdata->group_tension_params, " parameters");
#endif
tdata->env = env;
/* tdata->persist continues unchanged */
}
last_x = -1.0;
} /* else continue with the current statics */
if (const_env == 1)
pStiffness = NULL;
else
pStiffness = &env->stiffness;
F = tension_balance(num_active_groups, per_group_handlers, per_group_inverse_handlers, (void **)per_group_data, last_x, x, pStiffness);
last_x = x;
return F;
}
@ For the moment, we will restrict our group boundaries to a single
dimension, so $\sum_i x_i = x$, our total extension (it is this
restriction that causes the groups to interact linearly). We'll also
restrict our extensions to all be positive. With these restrictions,
the problem of balancing the tensions reduces to solving a system of
$N+1$ possibly non-linear equations with $N$ unknowns, where $N$ is
the number of active groups. In general this can be fairly
complicated, but without much loss of practicality we can restrict
ourselves to strictly monotonically increasing, non-negative tension
functions $F_i(x)$, with $F_i(0)=0$, which makes the situation much
simpler. For example, it guarantees the existence of a unique, real
solution for finite forces. See Appendix \ref{sec.tension_balance}
for the tension balancing implementation.
Each group must define a parameter structure if the tension model is
parametrized,
a function to create the parameter structure,
a function to destroy the parameter structure, and
a tension function of type [[one_dim_fn_t]] that receives a [[tension_handler_data_t *]] as its data argument.
The tension-specific parameter structures are contained in the domain
group field. For optimization, a group may also define an inverse
tension function as an optimization. See the Section
\ref{sec.model_selection} for a discussion on the command line
framework and inverse function discussion. See the worm-like chain
implementation for an example (Section \ref{sec.wlc}).
The major limitation of our approach is that all of our tension models
are Markovian (memory-less), which is not really a problem for the
chains (see \citet{evans99} for WLC thermalization rate calculations).
\subsection{Transition rate}
\label{sec.transition_rate}
Each state transition is modeled as unimolecular, first order reaction
$$
\text{State 1} \xrightarrow{k} \text{State 2}
$$
With the rate constant $k$ defined by
$$
\frac{d[\text{State 2}]}{dt} = -\frac{d[\text{State 1}]}{dt} = k [\text{State 1}].
$$
So the probability of a given domain transitioning in a short time
$dt$ is given by
$$
P_1 = k \cdot dt.
$$
For a group of $N$ identical domains, and therefore identical
unfolding probabilities $P_1$, the probability of $n$ domains
unfolding in a given timestep is given by the binomial distribution
$$
P(n) = \frac{N!}{n!(N-n)!}P_1^n(1-P_1)^{(N-n)}.
$$
The probability that \emph{none} of these domains unfold is then
$$
P(0) = (1-P_1)^N,
$$
so the probability that \emph{at least one} domain unfolds is
$$
P_N = 1-(1-P_1)^N.
$$
Note that for small $P$,
$$
p(n>0) = 1-(1-NP_1+\mathcal{O}(P_1^2)) = NP_1 - \mathcal{O}(P^2)
\approx NP_1.
$$
This conversion from $P_1$ to $P_N$ is handled by the [[pN]] macro
<>=
/* find multi-domain transition rate from the single domain rate */
#define PN(P,N) (1.0-pow(1.0-(P), (N)))
@
We can also define a macro to find the approprate $dt$ to achieve a
target $P_N$ with a given $k$ and $N$.
\begin{align}
P_N &= 1-(1-P_1)^N \\
1-P_1 &= (1-P_N)^{1/N} \\
P_1 &= 1-(1-P_N)^{1/N}
\end{align}
<>=
#define P1(PN,N) (1.0-pow(1.0-(PN), 1.0/(N)))
@
We take some time to discuss the meaning of $p(n>1)$
(i.e. multi-unfolding timesteps) in Section \ref{sec.timescales}.
<>=
int domain_transitions(double F, double dt, environment_t *env, transition_t *transition)
{ /* returns 1 or 0, F in N, dt in s, pointer to env. data, pointer to transition structure */
double k;
k = accel_k(transition->k, F, env, transition->k_params);
//(*transition->k)(F, env, domain->k_params);
//printf("k = %g,\tdt = %g,\tk dt = %g\n", k, dt, k*dt);
return happens(PN(k*dt, N_INIT(transition)));
}
@ [[happens]] is a random decision making function defined in Appendix \ref{sec.utils}.
<>=
int random_transitions(list_t *transition_list, double tension,
double dt, environment_t *env)
{ /* returns 1 if there was a transition and 0 otherwise */
while (transition_list != NULL) {
if (T(transition_list)->initial_state->num_domains > 0
&& domain_transitions(tension, dt, env, T(transition_list))) {
if (flags & FLAG_OUTPUT_TRANSITION_FORCES)
fprintf(stdout, "%g\t%s\t%s\n", tension, T(transition_list)->initial_state->name, T(transition_list)->final_state->name);
T(transition_list)->initial_state->num_domains--;
T(transition_list)->final_state->num_domains++;
return 1; /* our one domain has transitioned, stop looking for others */
}
transition_list = transition_list->next;
}
return 0;
}
@
\subsection{Timescales}
\label{sec.timescales}
The simulation assumes that chain equilibration is instantaneous
relative to the simulation timestep $dt$, so that tension is uniform
along the chain. The quality of this assumption depends on your
particular chain. For example, a damped spring thermalizes on a
timescale of order $\tau = 1/\gamma$ where the damping ratio $\gamma
\equiv \omega_0(-\zeta \pm \sqrt{\zeta^2-1}$, the natural angular
frequency $\omega_0 \equiv \sqrt{k/m}$, $\zeta \equiv b/2\sqrt{km}$,
and $b$ sets the drag $F_b = -bv$. For our cantilevers $\tau$ is
on the order of milliseconds, which is longer than a timestep.
However, the approximation is still reasonable, because there is
rarely unfolding during the cantilever return. You could, of course,
take cantilever, WLC, etc.\ response times into effect, but that
would significantly complicate a simulation that seems to work
well enough without bothering :p. For WLC and FJC relaxation times,
see the Appendix of \citet{evans99} and \citet{kroy07}.
Besides assuming our timestep is much greater than equilibration
times, we also force it to be short enough so that only one domain may
unfold in a given timestep. For an unfolding timescale of $dt_u =
1/k$, we adapt our timesteps to keep $dt \ll dt_u$, so the probability
of two domains unfolding in the same timestep is small (see
[[determine_dt]] in Section \ref{sec.adaptive_dt} and [[P]] in
[[main()]] in Section \ref{sec.main} set by [[-P]] in
[[get_options()]] in Section \ref{sec.get_options}). This approach
breaks down as the adaptive timestep scheme approaches the transition
timestep $dt_t$, but $dt_t \sim 1\U{$\mu$s}$ for Ig27-like proteins
\citep{klimov00}, so this shouldn't be a problem. To reassure
yourself, you can ask the simulator to print the smallest timestep
that was used with TODO.
Even if the likelyhood of two domains unfolding in the same timestep
is small, if you run long enough simulations it will eventually occur.
In this case, we assume that $dt_t \ll dt$, so even if two domains
would unfold if we stuck to our unfolding rate $k$ for an entire
timestep $dt$, in ``reality'' only one of those domains unfolds.
Because we do not know when in the timestep the unfolding took place,
we move on to the next timestep without checking for further
unfoldings. This ``unchecked timestep portion'' should not contribute
significant errors to the calculation, because if $dt$ was small
enough that unfolding was unlikely at the pre-unfolding force, it
should be even more unlikely at the post-unfolding force, especially
over only a fraction of the timestep. The only dangerous cases for
the current implementation are those where unfolding intermediates are
much less stable than their precursor states, in which case an
unfolding event during the remaining timestep may actually be likely.
A simple workaround for such cases is to pick the value for [[P]] to
be small enough that the $dt \ll dt_u$ assumption survives even under
a transition populating the unstable intermediate.
\subsection{Adaptive timesteps}
\label{sec.adaptive_dt}
We'd like to pick $dt$ so the probability of changing state
(e.g.\ unfolding) in the next timestep is small. If we don't adapt our
timestep, we also risk breaking our approximation that $F(x' \in [x,
x+v \cdot dt]) = F(x)$ or that only one domain changes state in a
given timestep. The simulation would have been accurate for
sufficiently small fixed timesteps, but adaptive timesteps will allow
us to move through low tension regions in fewer steps, leading to a
more efficient simulation.
Consider the two state folded $\rightarrow$ unfolded transition.
Because $F(x)$ is monotonically increasing between unfoldings,
excessively large timesteps will lead to erroneously small unfolding
rates (an thus, higher average unfolding force).
The actual adaptive timestep implementation is not particularly
interesting, since we are only required to reduce $dt$ to somewhere
below a set threshold, so I've removed it to Appendix
\ref{sec.adaptive_dt_implementation}.
\section{Models}
TODO: model intro.
The models provide the physics of the simulation, but the simulation
allows interchangeable models, and we are currently focusing on the
simulation itself, so we remove the actual model implementation to the
appendices.
The tension models are defined in Appendix \ref{sec.tension_models},
and unfolding models are defined in Appendix \ref{sec.k_models}.
<>=
#define k_B 1.3806503e-23 /* J/K */
@
\section{Command line interface}
<>=
<>
<>
<>
<>
@
\subsection{Model selection}
\label{sec.model_selection}
The main difficulty with the command line interface in \prog\ is
developing an intuitive method for accessing the possibly large number
of available models. We'll treat this generally by defining an array
of available models, containing their important parameters.
<>=
<>
@
<>=
typedef void *create_data_func_t(char **param_strings);
typedef void destroy_data_func_t(void *param_struct);
@
<>=
<>
<>
@
In order to choose models, we need a method of assembling a reaction
graph such as that in Figure \ref{fig.domain_states} and population
the graph with a set of domains. First we declare the domain states
following
\begin{center}
[[-s ,[,] -S [-N ,, -K ]] \\
\end{center}
e.g.
\begin{center}
[[-k folded,unfolded,bell -K 3.3e-4,0.25e-9]]
\end{center}
This creates a reaction going from the [[folded]] state to the
[[unfolded]] state, with the rate constant given by the Bell model
(Appendix \ref{sec.bell}). The unfolding parameters are then set to
[[3.3e-4,0.25e-9]], which in the case of the Bell model are the
unforced rate and transition state distance respectively.
\subsubsection{Tension}
To access the various tension models from the command line, we define
a structure that contains all the neccessary information about the
model.
<>=
typedef struct tension_model_getopt_struct {
one_dim_fn_t *handler; /* fn implementing the model on a group */
one_dim_fn_t *inverse_handler; /* fn implementing inverse on group */
char *name; /* name string identifying the model */
char *description; /* a brief description of the model */
int num_params; /* number of extra params for the model */
char **param_descriptions; /* descriptions of extra params */
char *params; /* default values for the extra params */
create_data_func_t *creator; /* param-string -> model-data-struct fn */
destroy_data_func_t *destructor; /* fn to free the model data structure */
} tension_model_getopt_t;
@ Set [[inverse_handler]] to [[NULL]] if you wish to invert using
bisection. Obviously, this will be slower than computing an
analytical inverse and more prone to rounding errors, but it may be
easier if a closed-form inverse does not exist.
<>=
tension_model_getopt_t tension_models[NUM_TENSION_MODELS] = {
<>,
<>,
<>,
<>,
<>,
<>
};
@
\subsubsection{Unfolding rate}
<>=
#define NUM_K_MODELS 6
typedef struct k_model_getopt_struct {
char *name;
char *description;
k_func_t *k;
int num_params;
char **param_descriptions;
char *params;
create_data_func_t *creator;
destroy_data_func_t *destructor;
} k_model_getopt_t;
@
<>=
k_model_getopt_t k_models[NUM_K_MODELS] = {
<>,
<>,
<>,
<>,
<>,
<>
};
@
\subsection{help}
<>=
void help(char *prog_name, double P, double dF,
double t_max, double dt_max, double v, double x_step,
state_t *stop_state, environment_t *env,
int n_tension_models, tension_model_getopt_t *tension_models,
int n_k_models, k_model_getopt_t *k_models,
int k_model, list_t *state_list)
{
state_t *state=NULL;
int i, j;
if (state_list != NULL)
state = S(tail(state_list));
printf("usage: %s [options]\n", prog_name);
printf("Version %s\n\n", VERSION);
printf("Monte Carlo simulation of a multi-globular domain protein unfolding\n\n");
printf("Simulation options:\n");
printf("");
printf("-q state\tStop simulation when this state empties (- for no limit, currently %s)\n", stop_state ? stop_state->name : "-" );
printf("-t tmax\tMaximum simulated time (-1 for no limit, currently %g s)\n", t_max);
printf("-d dt\tMaximum allowed timestep dt (currently %g s)\n", dt_max);
printf("-P P\tMaximum transition probability for dt (currently %g)\n", P);
printf("-F dF\tMaximum change in force over dt. Only relevant if dx > 0. (currently %g N)\n", dF);
printf("-v v\tPulling velocity v (currently %g nm/s)\n", v);
printf("-x dx\tPulling step size (currently %g m)\n", x_step);
printf("\tWhen dx = 0, the pulling is continuous.\n");
printf("\tWhen dx > 0, the pulling occurs in discrete steps.\n");
printf("Environmental options:\n");
printf("-T T\tTemperature T (currently %g K)\n", env->T);
printf("-C T\tYou can also set the temperature T in Celsius\n");
printf("State creation options:\n");
printf("-s name,model[[,group],params]\tCreate a new state.\n");
printf("Once you've set up the state, you may populate it with domains\n");
printf("-N n\tAdd n domains in the current state (%s)\n", state ? state->name : "-");
printf("Transition creation options:\n");
printf("-k initial_state,final_state,model[,params]\tCreate a new transition\n");
printf("To double check your reaction graph, you can produce graphviz dot output\n");
printf("describing the current states and transitions.\n");
printf("-D\tPrint dot description of states and transitions and exit.\n");
printf("Simulation output mode options:\n");
printf("There are two output modes. In standard mode, only the transition\n");
printf("events are printed. For example:\n");
printf(" #Force (N)\tInitial state\tFinal state\n");
printf(" 123.456e-12\tfolded\tunfolded\n");
printf(" ...\n");
printf("In verbose mode, the entire Force-vs-distance curve is output:\n");
printf(" #Distance (m)\tForce (N)\tStiffness (N/m)\n");
printf(" 0.001\t0.0023\t0.002\n");
printf(" ...\n");
printf("-V\tChange output to verbose mode.\n");
printf("-h\tPrint this help and exit.\n");
printf("\n");
printf("Tension models:\n");
for (i=0; i>=
<>
void get_options(int argc, char **argv, double *pP, double *pDF,
double *pT_max, double *pDt_max, double *pV,
double *pX_step, state_t **pStop_state, environment_t *env,
int n_tension_models, tension_model_getopt_t *tension_models,
int n_k_models, k_model_getopt_t *k_models,
list_t **pState_list, list_t **pTransition_list)
{
char *prog_name = NULL;
char c, options[] = "q:t:d:P:F:v:x:T:C:s:N:k:DVh";
int tension_model=0, k_model=0, initial_state=-1, final_state=-1;
char *state_name=NULL;
int i, n, tension_group=0;
list_t *temp_list=NULL;
int num_strings;
char **strings;
void *model_params; /* for INIT_MODEL() */
int num_param_args; /* for INIT_MODEL() */
char **param_args; /* for INIT_MODEL() */
extern char *optarg;
extern int optind, optopt, opterr;
assert(argc > 0);
#ifdef DEBUG
for (i=0; iT = 300.0; /* K */
*pState_list = NULL;
*pTransition_list = NULL;
while ((c=getopt(argc, argv, options)) != -1) {
switch(c) {
case 'q':
if (STRMATCH(optarg, "-")) {
*pStop_state = NULL;
} else {
temp_list = head(*pState_list);
while (temp_list != NULL) {
if (STRMATCH(S(temp_list)->name, optarg)) {
*pStop_state = S(temp_list);
break;
}
temp_list = temp_list->next;
}
}
break;
case 't': *pT_max = safe_strtod(optarg, "-t"); break;
case 'd': *pDt_max = safe_strtod(optarg, "-d"); break;
case 'P': *pP = safe_strtod(optarg, "-P"); break;
case 'F': *pDF = safe_strtod(optarg, "-F"); break;
case 'v': *pV = safe_strtod(optarg, "-v"); break;
case 'x': *pX_step = safe_strtod(optarg, "-x"); break;
case 'T': env->T = safe_strtod(optarg, "-T"); break;
case 'C': env->T = safe_strtod(optarg, "-C")+273.15; break;
case 's':
parse_list_string(optarg, SEP, '{', '}', &num_strings, &strings);
assert(num_strings >= 2 && num_strings <= 4);
state_name = strings[0];
if (state_index(*pState_list, state_name) != -1) {
fprintf(stderr, "Invalid option '%s', repeated state name '%s'\n", optarg, state_name);
exit(1);
}
tension_model = index_tension_model(n_tension_models, tension_models, strings[1]);
if (num_strings == 4)
tension_group = safe_strtoi(strings[2], optarg);
else
tension_group = 0;
if (num_strings >= 3)
tension_models[tension_model].params = strings[num_strings-1];
#ifdef DEBUG
fprintf(stderr, "%s:\t%s -> state %s, model %s, params %s, group %d\n", __FUNCTION__, optarg, state_name, tension_models[tension_model].name, tension_models[tension_model].params, tension_group);
#endif
INIT_MODEL(state_name, &tension_models[tension_model], tension_models[tension_model].params, model_params);
push(pState_list, create_state(state_name,
tension_models[tension_model].handler,
tension_models[tension_model].inverse_handler,
tension_group,
model_params,
tension_models[tension_model].destructor,
0), 1);
*pState_list = tail(*pState_list);
state_name = NULL;
free_parsed_list(num_strings, strings);
break;
case 'N':
n = safe_strtoi(optarg, "-N");
#ifdef DEBUG
fprintf(stderr, "%s:\tadding %d domains to %s\n", __FUNCTION__, n, S(*pState_list)->name);
#endif
S(*pState_list)->num_domains += n;
break;
case 'k':
parse_list_string(optarg, SEP, '{', '}', &num_strings, &strings);
assert(num_strings >= 3 && num_strings <= 4);
initial_state = state_index(*pState_list, strings[0]);
final_state = state_index(*pState_list, strings[1]);
k_model = index_k_model(n_k_models, k_models, strings[2]);
#ifdef DEBUG
fprintf(stderr, "%s:\t%s -> %s reaction from state %s (%d) -> state %s (%d)\n", __FUNCTION__, optarg, strings[2], strings[0], initial_state, strings[1], final_state);
#endif
assert(initial_state != final_state);
if (num_strings == 4)
k_models[k_model].params = strings[3];
INIT_MODEL(state_name, &k_models[k_model], k_models[k_model].params, model_params);
push(pTransition_list, create_transition(list_element(*pState_list, initial_state),
list_element(*pState_list, final_state),
k_models[k_model].k,
model_params,
k_models[k_model].destructor), 1);
free_parsed_list(num_strings, strings);
break;
case 'D':
print_state_graph(stdout, *pState_list, *pTransition_list);
exit(0);
break;
case 'V':
flags = FLAG_OUTPUT_FULL_CURVE;
break;
case '?':
fprintf(stderr, "unrecognized option '%c'\n", optopt);
/* fall through to default case */
default:
help(prog_name, *pP, *pDF, *pT_max, *pDt_max, *pV, *pX_step,
*pStop_state, env, n_tension_models, tension_models,
n_k_models, k_models, k_model, *pState_list);
exit(1);
}
}
/* check the arguments */
assert(*pState_list != NULL); /* no domains! */
assert(*pP > 0.0); assert(*pP < 1.0);
assert(*pDt_max > 0.0);
assert(*pV > 0.0);
assert(env->T > 0.0);
crosslink_groups(*pState_list, *pTransition_list);
return;
}
@
<>=
int index_tension_model(int n_models, tension_model_getopt_t *models, char *name)
{
int i;
for (i=0; i>=
int index_k_model(int n_models, k_model_getopt_t *models, char *name)
{
int i;
for (i=0; i>=
/* requires int num_param_args and char **param_args in the current scope
* usage:
* INIT_MODEL("folded", folded_model, folded_param_string, folded_params);
* defined as a macro, so it can work on both tension_model_getopt_t and k_model_getopt_t types.
*/
#define INIT_MODEL(role, model, param_string, param_pointer) \
do { \
parse_list_string((param_string), SEP, '{', '}', \
&num_param_args, ¶m_args); \
if (num_param_args != (model)->num_params) { \
fprintf(stderr, \
"%s model %s expected %d params,\n", \
(role), (model)->name, (model)->num_params); \
fprintf(stderr, \
"not the %d params in '%s'\n", \
num_param_args, (param_string)); \
assert(num_param_args == (model)->num_params); \
} \
if ((model)->creator) \
param_pointer = (*(model)->creator)(param_args); \
else \
param_pointer = NULL; \
free_parsed_list(num_param_args, param_args); \
} while (0);
@
First we define some safe string parsing functions. Currently these
abort on any irregularity, but in the future they could print error
messages, etc. [[static]] because the functions are currently
defined in each [[*.c]] file that uses them, but perhaps they should
be moved to [[utils.h/c]] or some such instead\ldots
<>=
static int safe_strtoi(const char *s, const char *description) {
char *endp = NULL;
long int ret;
assert(s != NULL);
ret = strtol(s, &endp, 10);
if (endp[0] != '\0') { //strlen(endp) > 0) {
fprintf(stderr, "Error: unparsable '%s' while parsing '%s' for %s\n",
endp, s, description);
assert(1==0); //strlen(endp) == 0);
} if (endp == s) {
fprintf(stderr, "Error: unparsable empty string '%s' for %s\n",
s, description);
assert(endp != s);
} else if (errno == ERANGE) {
fprintf(stderr, "Error: '%s' out of range for %s\n", s, description);
assert(errno != ERANGE);
}
return (int) ret;
}
static double safe_strtod(const char *s, const char *description) {
char *endp = NULL;
double ret;
assert(s != NULL);
ret = strtod(s, &endp);
if (endp[0] != '\0') { //strlen(endp) > 0) {
fprintf(stderr, "Error: unparsable '%s' while parsing '%s' for %s\n",
endp, s, description);
assert(1==0); //strlen(endp) == 0);
} if (endp == s) {
fprintf(stderr, "Error: unparsable empty string '%s' for %s\n",
s, description);
assert(endp != s);
} else if (errno == ERANGE) {
fprintf(stderr, "Error: '%s' out of range for %s\n", s, description);
assert(errno != ERANGE);
}
return ret;
}
@
\phantomsection
\appendix
\addcontentsline{toc}{section}{Appendicies}
\section{sawsim.c details}
\subsection{Layout}
The general layout of our simulation code is:
<<*>>=
//#define DEBUG
<>
<>
<>
<>
<>
<>
@
We include [[math.h]], so don't forget to link to the libm with `-lm'.
<>=
#include /* assert() */
#include /* malloc(), free(), rand(), strto*() */
#include /* fprintf(), stdout */
#include /* strlen, strtok() */
#include /* exp(), M_PI, sqrt() */
#include /* pid_t (returned by getpid()) */
#include /* getpid() (for seeding rand()), getopt() */
#include /* errno, ERANGE (for safe_strto*()) */
#include "global.h"
#include "list.h"
#include "tension_balance.h"
#include "k_model.h"
#include "tension_model.h"
#include "parse.h"
#include "accel_k.h"
@
<>=
<>
<>
<>
<>
<>
<>
<>
<>
@
<>=
<>
<>
@
<>=
<>
<>
<>
<>
<>
<>
<>
<>
<>
@
<>=
<>
<>
@
<>=
void setup(void)
{
srand(getpid()*time(NULL)); /* seed rand() */
}
@
<>=
/* in octal b/c of prefixed '0' */
#define FLAG_OUTPUT_FULL_CURVE 01
#define FLAG_OUTPUT_TRANSITION_FORCES 02
@
<>=
static unsigned long int flags = 0;
@
\subsection{Utilities}
\label{sec.utils}
<>=
#define MAX(a,b) ((a)>(b) ? (a) : (b))
#define MIN(a,b) ((a)<(b) ? (a) : (b))
@
Note that [[STRMATCH]] chokes if one of the strings is [[NULL]].
<>=
// Check if two strings match, return 1 if they do
static char *temp_string_A;
static char *temp_string_B;
#define STRMATCH(a,b) (temp_string_A=a, temp_string_B=b, \
strlen(temp_string_A) != strlen(temp_string_B) ? 0 : \
!strncmp(temp_string_A,temp_string_B,strlen(temp_string_B)+1) )
/* +1 to also compare the '\0' */
@
We also define a macro for our [[check]] unit testing
<>=
#define CHECK_ERR(max_err, expected, received) \
do { \
fail_unless( (received-expected)/expected < max_err, \
"relative error %g >= %g in %s (Expected %g, received %g)", \
(received-expected)/expected, max_err, #received, \
expected, received); \
fail_unless(-(received-expected)/expected < max_err, \
"relative error %g >= %g in %s (Expected %g, received %g)", \
-(received-expected)/expected, max_err, #received, \
expected, received); \
} while(0)
@
<>=
int happens(double probability)
{
assert(probability >= 0.0); assert(probability <= 1.0);
return (double)rand()/RAND_MAX < probability; /* TODO: replace with GSL rand http://www.gnu.org/software/gsl/manual/html_node/Random-number-generator-algorithms.html x*/
}
@
\subsection{Adaptive timesteps}
\label{sec.adaptive_dt_implementation}
$F(x)$ increases with $x$, possibly exploding (e.g.\ in the worm-like
chain model), so basing the timestep on the unfolding probability at
the current tension is dangerous, and we need to search for a $dt$ for
which $P_N(F(x+v*dt))>=
double search_dt(transition_t *transition,
list_t *state_list,
environment_t *env, double x,
double max_prob, double max_dt, double v)
{ /* Returns a valid timestep dt in seconds for the given transition.
* Takes a list of all states, a pointer env to the environmental data,
* a starting separation x in m, a max_prob between 0 and 1,
* max_dt in s, stretching velocity v in m/s.
*/
double F, k, P, dtCur, dtU, dtUCur, dtL, dt;
/* get upper bound using the current position */
F = find_tension(state_list, env, x, 0, 1); /* BUG. repeated calculation */
//printf("Start with x = %g (F = %g)\n", x, F);
k = accel_k(transition->k, F, env, transition->k_params);
//printf("x %g\tF %g\tk %g\n", x, F, k);
dtU = P1(max_prob, N_INIT(transition)) / k; /* upper bound on dt */
if (dtU > max_dt) {
//printf("overshot max_dt\n");
dtU = max_dt;
}
if (v == 0) /* discrete stepping, so force is temporarily constant */
return dtU;
/* set a lower bound on dt too */
dtL = 0.0;
/* The dt determined above may produce illegitimate forces or ks.
* Reduce the upper bound until we have valid ks. */
dt = dtU;
F = find_tension(state_list, env, x+v*dt, 0, 1);
while (F == HUGE_VAL) { /* reduce step until we hit a valid force */
dtU /= 2.0;
dt = dtU;
F = find_tension(state_list, env, x+v*dt, 0, 1);
}
//printf("Try for dt = %g (F = %g)\n", dt, F);
k = accel_k(transition->k, F, env, transition->k_params);
/* returned k may be -1.0 */
//printf("x %g\tF %g\tdt %g\tv dt %g\tk %g\n", x, F, dt, v*dt, k);
while (k == -1.0) { /* reduce step until we hit a valid k */
dtU /= 2.0;
dt = dtU; /* hopefully, we can use the max dt, see if it works */
F = find_tension(state_list, env, x+v*dt, 0, 1);
//printf("Try for dt = %g (F = %g)\n", dt, F);
k = accel_k(transition->k, F, env, transition->k_params);
//printf("x %g\tF %g\tdt %g\tv dt %g\tk %g\n", x, F, dt, v*dt, k);
}
assert(dtU > 1e-14); /* timestep to valid k too small */
/* safe timestep back from x+dtU */
dtUCur = P1(max_prob, N_INIT(transition)) / k;
if (dtUCur >= dt)
return dt; /* dtU is safe. */
/* dtU wasn't safe, lets see what would be. */
while (dtU > 1.1*dtL) { /* until the range is reasonably small */
dt = (dtU + dtL) / 2.0;
F = find_tension(state_list, env, x+v*dt, 0, 1);
//printf("Try for dt = %g (F = %g) (dt bounds %g, %g)\n", dt, F, dtL, dtU);
k = accel_k(transition->k, F, env, transition->k_params);
dtCur = P1(max_prob, N_INIT(transition)) / k;
//printf("x %g\tF %g\tdt %g\tv dt %g\tk %g\tdtCur = %g\n", x, F, dt, v*dt, k, dtCur);
if (dtCur > dt) /* safe timestep back from x+dt covers dt */
dtL = dt;
else if (dtCur < dt) { /* unsafe timestep back from x+dt, but... */
dtU = dt; /* ... stepping out only dtCur would be safe */
dtUCur = dtCur;
} else
break; /* dtCur = dt */
}
return MAX(dtUCur, dtL);
}
@
Determine $dt$ for an array of potentially different folded domains.
We apply [[search_dt()]] to each populated domain to find the maximum
$dt$ the domain can handle. The final $dt$ is less than the
individual domain $dt$s (to satisfy [[search_dt()]], see above). If
$v>0$ (continuous pulling), $dt$ is also reduced to satisfy
$|F(x+v*dt)-F(x)|>=
<>
double determine_dt(list_t *state_list, list_t *transition_list,
environment_t *env, double F, double x, double max_dF,
double max_prob, double dt_max, double v, int transition)
{ /* Returns the timestep dt in seconds.
* Takes the state and transition lists, a pointer to the environment,
* the force F(x) in N, an extension x in m, max_dF in N,
* max_prob between 0 and 1, dt_max in s, v in m/s, and transition in {0,1}*/
double dt=dt_max, new_dt, F_new;
assert(max_dF > 0.0);
assert(max_prob > 0.0); assert(max_prob < 1.0);
assert(dt_max > 0.0);
assert(state_list != NULL);
assert(transition_list != NULL);
transition_list = head(transition_list);
if (v > 0) {
/* Ensure |dF| < max_dF */
F_new = find_tension(state_list, env, x+v*dt, transition, 1);
while (fabs(F_new - F) > max_dF) {
dt /= 2.0;
F_new = find_tension(state_list, env, x+v*dt, transition, 1);
}
}
/* Ensure all individual domains pass search_dt() */
while (transition_list != NULL) {
if (T(transition_list)->initial_state->num_domains > 0) {
new_dt = search_dt(T(transition_list),state_list,env,x,max_prob,dt,v);
dt = MIN(dt, new_dt);
}
transition_list = transition_list->next;
}
return dt;
}
@
\subsection{State data}
\label{sec.state_data}
The domains exist in one of an array of possible states. Each state
has a tension model which determines it's force/extension curve
(possibly [[null]] for rigid states, see Appendix
\ref{sec.null_tension}).
Groups are, for example, for two domain states with WLC tensions; one
with $p=0.4\U{nm}$, $L=10\U{nm}$ and the other with $p=0.4\U{nm}$,
$L=28\U{nm}$. Since the persistence lengths are the same, you would
like to add the contour lengths of all the domains in \emph{both}
states, and plug that total contour length into a single WLC
evaluation (see Section \ref{sec.find_tension}).
<>=
typedef struct state_struct {
char *name; /* name string */
one_dim_fn_t *tension_handler; /* tension handler */
one_dim_fn_t *inverse_tension_handler; /* inverse tension handler */
int tension_group; /* for grouping similar tension states */
void *tension_params; /* pointer to tension parameters */
destroy_data_func_t *destroy_tension;
int num_domains; /* number of domains currently in state */
/* cached lookups generated from the above data */
int num_out_trans; /* length of out_trans array */
struct transition_struct **out_trans; /* transitions leaving this state */
int num_grouped_states; /* length of grouped states array */
struct state_struct **grouped_states; /* states grouped with this one (doesn't include self) */
} state_t;
/* get the state data for the current list node */
#define S(list) ((state_t *)(list)->d)
@ [[tension_params]] is a pointer to the parameters used by the
handler function when determining the tension. [[destroy_tension]]
points to a function for freeing [[tension_params]]. We it in the
state structure so that [[destroy_state]] and will have an easier job
cleaning up.
[[create_]] and [[destroy_state]] are simple wrappers around
[[malloc]] and [[free]].
<>=
state_t *create_state(char *name,
one_dim_fn_t *tension_handler,
one_dim_fn_t *inverse_tension_handler,
int tension_group,
void *tension_params,
destroy_data_func_t *destroy_tension,
int num_domains)
{
state_t *ret = (state_t *)malloc(sizeof(state_t));
assert(ret != NULL);
/* make a copy of the name, so we aren't dependent on the original */
ret->name = (char *)malloc(sizeof(char)*(strlen(name)+1));
assert(ret->name != NULL);
strcpy(ret->name, name); /* we know ret->name is long enough */
ret->tension_handler = tension_handler;
ret->inverse_tension_handler = inverse_tension_handler;
ret->tension_group = tension_group;
ret->tension_params = tension_params;
ret->destroy_tension = destroy_tension;
ret->num_domains = num_domains;
ret->num_out_trans = 0;
ret->out_trans = NULL;
ret->num_grouped_states = 0;
ret->grouped_states = NULL;
#ifdef DEBUG
fprintf(stderr, "generated state %s (%p) with tension handler %p, inverse handler %p, params %p, group %d.\n", ret->name, ret, ret->tension_handler, ret->inverse_tension_handler, ret->tension_params, ret->tension_group);
#endif
return ret;
}
void destroy_state(state_t *state)
{
if (state) {
#ifdef DEBUG
fprintf(stderr, "freeing state %s (%p)\n", state->name, state);
#endif
if (state->name)
free(state->name);
if (state->destroy_tension)
(*state->destroy_tension)(state->tension_params);
if (state->out_trans)
free(state->out_trans);
if (state->grouped_states)
free(state->grouped_states);
free(state);
}
}
@
We'll be storing the states in a list (see Appendix \ref{sec.lists}),
so we also define a convenience function for cleaning up.
<>=
void destroy_state_list(list_t *state_list)
{
state_list = head(state_list);
while (state_list != NULL)
destroy_state((state_t *) pop(&state_list));
}
@
We can also define a convenient way to get a state index from it's name.
<>=
int state_index(list_t *state_list, char *name)
{
int i=0;
state_list = head(state_list);
while (state_list != NULL) {
if (STRMATCH(S(state_list)->name, name)) return i;
state_list = state_list->next;
i++;
}
return -1;
}
@
\subsection{Transition data}
\label{sec.transition_data}
Once you've created states (Sections \ref{sec.main},
\ref{sec.model_selection}, and \ref{sec.state_data}), you need to
create transitions between them.
<>=
typedef struct transition_struct {
state_t *initial_state;
state_t *final_state;
k_func_t *k; /* transition rate model */
void *k_params; /* pointer to k parameters */
destroy_data_func_t *destroy_k;
} transition_t;
/* get the transition data for the current list node */
#define T(list) ((transition_t *)(list)->d)
/* get the number of initial state population for a transition state */
#define N_INIT(transition) ((transition)->initial_state->num_domains)
<>
<>
@ [[k]] is a pointer to the function determining the transition rate
for a given tension. [[k_params]] and [[destroy_k]] have similar
roles with regards to [[k]] as [[tension_params]] and
[[destroy_tension]] have with regards to [[tension_handler]] (see
Section \ref{sec.state_data}).
[[create_]] and [[destroy_transition]] are simple wrappers around
[[malloc]] and [[free]].
<>=
transition_t *create_transition(state_t *initial_state, state_t *final_state,
k_func_t *k,
void *k_params,
destroy_data_func_t *destroy_k)
{
transition_t *ret = (transition_t *)malloc(sizeof(transition_t));
assert(ret != NULL);
assert(initial_state != NULL);
assert(final_state != NULL);
ret->initial_state = initial_state;
ret->final_state = final_state;
ret->k = k;
ret->k_params = k_params;
ret->destroy_k = destroy_k;
#ifdef DEBUG
fprintf(stderr, "generated transition %p from %s to %s, params %p\n", ret, ret->initial_state->name, ret->final_state->name, ret->k_params);
#endif
return ret;
}
void destroy_transition(transition_t *transition)
{
if (transition) {
#ifdef DEBUG
fprintf(stderr, "freeing transition %p (%s to %s)\n", transition, transition->initial_state->name, transition->final_state->name);
#endif
if (transition->destroy_k)
(*transition->destroy_k)(transition->k_params);
free(transition);
}
}
@
We'll be storing the transitions in a list (see Appendix
\ref{sec.lists}), so we also define a convenience function for
cleaning up.
<>=
void destroy_transition_list(list_t *transition_list)
{
transition_list = head(transition_list);
while (transition_list != NULL)
destroy_transition((transition_t *) pop(&transition_list));
}
@
\subsection{Graphviz output}
\label{sec.graphviz_output}
<>=
void print_state_graph(FILE *file, list_t *state_list, list_t *transition_list)
{
state_list = head(state_list);
transition_list = head(transition_list);
fprintf(file, "digraph states {\n node [shape=record,color=black,fillcolor=white,style=filled];\n\n");
while (1) {
fprintf(file, " n%d [label=\"%s\"]\n", state_index(state_list, S(state_list)->name), S(state_list)->name);
if (state_list->next == NULL) break;
state_list = state_list->next;
}
fprintf(file, "\n");
while (transition_list != NULL) {
fprintf(file, " n%d -> n%d;\n", state_index(state_list, T(transition_list)->initial_state->name), state_index(state_list, T(transition_list)->final_state->name));
transition_list = transition_list->next;
}
fprintf(file, "}\n");
}
@
\subsection{Domain model and group handling}
<>=
<>
<>
@
Scan through a list of states and construct an array of all the
active groups.
<>=
void get_active_groups(list_t *state_list,
int *pNum_active_groups,
one_dim_fn_t ***pPer_group_handlers,
one_dim_fn_t ***pPer_group_inverse_handlers,
void ***pPer_group_data /* pointer to array of pointers to tension_handler_data_t */)
{
void **active_groups=NULL;
one_dim_fn_t *handler, *old_handler, *inverse_handler, *old_inverse_handler;
one_dim_fn_t **per_group_handlers=NULL, **per_group_inverse_handlers=NULL;
void **per_group_data=NULL; /* array of pointers to tension_handler_data_t */
int i, j, num_domains, group, old_group, num_active_groups=0;
list_t *active_states_list=NULL;
tension_handler_data_t *tdata=NULL;
state_t *state;
/* get all the active groups in a list */
state_list = head(state_list);
#ifdef DEBUG
fprintf(stderr, "%s:\t", __FUNCTION__);
list_print(stderr, state_list, "states");
#endif
while (state_list != NULL) {
state = S(state_list);
handler = state->tension_handler;
inverse_handler = state->inverse_tension_handler;
group = state->tension_group;
num_domains = state->num_domains;
if (list_index(active_states_list, state) == -1) {
/* we haven't added this state (or it's associated group) yet */
if (num_domains > 0 && handler != NULL) { /* add the state */
num_active_groups++;
push(&active_states_list, state, 1);
#ifdef DEBUG
fprintf(stderr, "%s:\tactive %s state (%p) with %d domain(s)\n", __FUNCTION__, state->name, state, state->num_domains);
#endif
for (i=0; i < state->num_grouped_states; i++) {
if (state->grouped_states[i]->num_domains > 0) {
/* add this related (and active) state now */
assert(state->grouped_states[i]->tension_handler == handler);
assert(state->grouped_states[i]->tension_group == group);
push(&active_states_list, state->grouped_states[i], 1);
#ifdef DEBUG
fprintf(stderr, "%s:\tactive grouped %s state (%p) with %d domain(s)\n", __FUNCTION__, state->grouped_states[i]->name, state->grouped_states[i], state->grouped_states[i]->num_domains);
#endif
}
}
} /* else empty state or NULL handler */
} /* else state already added */
state_list = state_list->next;
}
#ifdef DEBUG
fprintf(stderr, "%s:\t", __FUNCTION__);
list_print(stderr, active_states_list, "active states");
#endif
assert(num_active_groups <= list_length(active_states_list));
assert(num_active_groups > 0);
/* allocate the arrays we'll be returning */
per_group_handlers = (one_dim_fn_t **)malloc(sizeof(one_dim_fn_t *)*num_active_groups);
assert(per_group_handlers != NULL);
per_group_inverse_handlers = (one_dim_fn_t **)malloc(sizeof(one_dim_fn_t *)*num_active_groups);
assert(per_group_inverse_handlers != NULL);
per_group_data = (void **)malloc(sizeof(void *)*num_active_groups);
assert(per_group_data != NULL);
#ifdef DEBUG
fprintf(stderr, "%s:\tallocating per group data %p:", __FUNCTION__, per_group_data);
#endif
for (i=0; itension_handler;
inverse_handler = state->inverse_tension_handler;
group = state->tension_group;
num_domains = state->num_domains;
if (handler != old_handler || inverse_handler != old_inverse_handler || group != old_group) {
j++; /* move to the next group */
tdata = (tension_handler_data_t *) per_group_data[j];
per_group_handlers[j] = handler;
per_group_inverse_handlers[j] = inverse_handler;
tdata->group_tension_params = NULL;
tdata->env = NULL;
tdata->persist = NULL; /* we might want to grab any appropriate data from the old persist here... */
}
#ifdef DEBUG
fprintf(stderr, "%s:\tadding active state %s to group %d (%p:%p:%d) with %d domain(s)\n", __FUNCTION__, state->name, j, handler, inverse_handler, group, num_domains);
#endif
for (i=0; igroup_tension_params, state->tension_params, 1);
}
#ifdef DEBUG
fprintf(stderr, "%s:\tpushed %d copies of %p onto data %p's param list %p\n", __FUNCTION__, num_domains, state->tension_params, tdata, tdata->group_tension_params);
#endif
old_handler = handler;
old_inverse_handler = inverse_handler;
old_group = group;
}
/* free old groups */
if (*pPer_group_handlers != NULL)
free(*pPer_group_handlers);
if (*pPer_group_inverse_handlers != NULL)
free(*pPer_group_inverse_handlers);
if (*pPer_group_data != NULL) {
for (i=0; i<*pNum_active_groups; i++)
free((*pPer_group_data)[i]);
free(*pPer_group_data);
}
/* set new groups */
#ifdef DEBUG
fprintf(stderr, "%s:\t%d groups, tension handler array %p, inverse tension handler array %p, handler data array %p (lead data element %p)\n", __FUNCTION__, num_active_groups, per_group_handlers, per_group_inverse_handlers, per_group_data, per_group_data[0]);
#endif
*pNum_active_groups = num_active_groups;
*pPer_group_handlers = per_group_handlers;
*pPer_group_inverse_handlers = per_group_inverse_handlers;
*pPer_group_data = per_group_data;
}
@
<>=
void crosslink_groups(list_t *state_groups, list_t *transition_list)
{
list_t *list, *out_trans=NULL, *associates=NULL;
one_dim_fn_t *handler;
int i, n, group;
state_groups = head(state_groups);
while (state_groups != NULL) {
/* find transitions leaving this state */
#ifdef DEBUG
fprintf(stderr, "finding transitions leaving %s\n", S(state_groups)->name);
#endif
list = head(transition_list);
while (list != NULL) {
if (T(list)->initial_state == S(state_groups) && list_index(associates, T(list)->initial_state) != -1) {
push(&out_trans, T(list), 1);
}
list = list->next;
}
n = list_length(out_trans);
S(state_groups)->num_out_trans = n;
S(state_groups)->out_trans = (transition_t **)malloc(sizeof(transition_t *)*n);
assert(S(state_groups)->out_trans != NULL);
for (i=0; iout_trans[i] = (transition_t *)pop(&out_trans);
}
/* find states grouped with this state */
#ifdef DEBUG
fprintf(stderr, "finding states grouped with %s\n", S(state_groups)->name);
#endif
handler = S(state_groups)->tension_handler;
group = S(state_groups)->tension_group;
list = head(state_groups);
while (list != NULL) {
if (S(list)->tension_handler == handler && S(list)->tension_group == group && list != state_groups) {
push(&associates, S(list), 1);
}
list = list->next;
}
n = list_length(associates);
S(state_groups)->num_grouped_states = n;
S(state_groups)->grouped_states = (state_t **)malloc(sizeof(state_t *)*n);
assert(S(state_groups)->grouped_states != NULL);
for (i=0; igrouped_states[i] = (state_t *)pop(&associates);
}
state_groups = state_groups->next;
}
}
@
\section{String parsing}
For handling command line arguments, we need parse delimited strings (e.g.\ [["param1,param2,param3..."]]).
The model handling in getopt is set up to handle a fixed number of arguments for each model,
so models (like [[kramers_integ]]) that have complicated parameters (location of spline knots)
need to take care of parsing those parameters themselves.
We implement this parsing in [[parse.c]], define the interface in [[parse.h]], and the the unit testing in [[check_parse.c]].
<>=
#ifndef PARSE
#define PARSE
<>
<>
<>
#endif /* PARSE */
@
<>=
NW_SAWSIM_MODS += parse
CHECK_BINS += check_parse
check_parse_MODS =
@
<>=
#define SEP ',' /* argument separator character */
@
<>=
extern void parse_list_string(char *string, char sep, char deeper, char shallower,
int *num, char ***string_array);
extern void free_parsed_list(int num, char **string_array);
@
[[parse_list_string]] allocates memory, don't forget to free it
afterward with [[free_parsed_list]]. It does not alter the original.
The string may start off with a [[deeper]] character
(i.e. [["{x,y}"]]), and when it does, brace stripping will set leave
[[x,y]], where the pointer is one character in on the copied string.
However, when we go to free the memory, we need a pointer to the
beginning of the string. In order to accommodate this for a string
with $N$ argument, allocate a pointer array with $N+1$ elements, let
the first $N$ elements point to the separated arguments, and let the
last element point to the start of the copied string regardless of
braces.
<>=
/* TODO, split out into parse.hc */
static int next_delim_index(char *string, char sep, char deeper, char shallower)
{
int i=0, depth = 0;
while (string[i] != '\0' && !(string[i] == sep && depth == 0)) {
if (string[i] == deeper) {depth++;}
else if (string[i] == shallower) {depth--; assert(depth >= 0);}
i++;
}
return i;
}
void parse_list_string(char *string, char sep, char deeper, char shallower,
int *num, char ***string_array)
{
char *str=NULL, **ret=NULL;
int i, j, n;
if (string==NULL || strlen(string) == 0) { /* handle the trivial cases */
*num = 0;
*string_array = NULL;
return;
}
/* make a copy of the string, so we don't change the original */
str = (char *)malloc(sizeof(char)*(strlen(string)+1));
assert(str != NULL);
strcpy(str, string); /* we know str is long enough */
/* count the number of regions, so we can allocate pointers to them */
i=-1; n=0;
do {
n++; i++; /* move on to next argument */
i += next_delim_index(str+i, sep, deeper, shallower);
//fprintf( stderr, "delim at %d (%d) ('%s' -> '%s')\n", i, (int)str[i], str, str+i+1); fflush(stderr);
} while (str[i] != '\0');
ret = (char **)malloc(sizeof(char *)*(n+1));
assert(ret != NULL);
/* replace the separators with '\0' & assign pointers */
ret[n] = str; /* point to the front of the copied string */
j=0;
ret[0] = str;
for(i=1; i>=
<>
<>
#include "parse.h"
<>
@
<>=
#include /* assert() */
#include /* NULL */
#include /* fprintf(), stdout *//*!!*/
#include /* strlen() */
#include "parse.h"
@
\subsection{Parsing unit tests}
Here we check to make sure the various functions work as expected, using \citetalias{sw:check}.
<>=
<>
<>
<>
<>
<>
<>
@
<>=
#include /* EXIT_SUCCESS and EXIT_FAILURE */
#include /* printf() */
#include /* assert() */
#include /* strlen() */
<>
#include "parse.h"
@
<>=
<>
<>
@
<>=
Suite *test_suite (void)
{
Suite *s = suite_create ("parse");
<>
<>
return s;
}
@
<>=
/*
START_TEST(test_next_delim_index)
{
fail_unless(next_delim_index("", ',', '{', '}')==0, NULL);
fail_unless(next_delim_index(",arg,{str,ing},test", ',', '{', '}')==0, NULL);
fail_unless(next_delim_index("arg,{str,ing},test", ',', '{', '}')==3, NULL);
fail_unless(next_delim_index("{str,ing},test", ',', '{', '}')==9, NULL);
fail_unless(next_delim_index("test", ',', '{', '}')==4, NULL);
}
END_TEST
*/
START_TEST(test_parse_list_null)
{
int num_param_args;
char **param_args;
parse_list_string(NULL, SEP, '{', '}',
&num_param_args, ¶m_args);
fail_unless(num_param_args == 0, NULL);
fail_unless(param_args == NULL, NULL);
}
END_TEST
START_TEST(test_parse_list_single_simple)
{
int num_param_args;
char **param_args;
parse_list_string("arg", SEP, '{', '}',
&num_param_args, ¶m_args);
fail_unless(num_param_args == 1, NULL);
fail_unless(STRMATCH(param_args[0],"arg"), NULL);
}
END_TEST
START_TEST(test_parse_list_single_compound)
{
int num_param_args;
char **param_args;
parse_list_string("{x,y,z}", SEP, '{', '}',
&num_param_args, ¶m_args);
fail_unless(num_param_args == 1, NULL);
fail_unless(STRMATCH(param_args[0],"x,y,z"), "got '%s', expected '%s'", param_args[0], "x,y,z");
}
END_TEST
START_TEST(test_parse_list_double_simple)
{
int num_param_args;
char **param_args;
parse_list_string("abc,def", SEP, '{', '}',
&num_param_args, ¶m_args);
fail_unless(num_param_args == 2, NULL);
fail_unless(STRMATCH(param_args[0],"abc"), NULL);
fail_unless(STRMATCH(param_args[1],"def"), NULL);
}
END_TEST
START_TEST(test_parse_list_compound)
{
int num_param_args;
char **param_args;
parse_list_string("abc,{def,ghi}", SEP, '{', '}',
&num_param_args, ¶m_args);
fail_unless(num_param_args == 2, NULL);
fail_unless(STRMATCH(param_args[0],"abc"), NULL);
fail_unless(STRMATCH(param_args[1],"def,ghi"), NULL);
}
END_TEST
@
<>=
TCase *tc_parse_list_string = tcase_create("parse list string");
@
<>=
//tcase_add_test(tc_parse_list_string, test_next_delim_index);
tcase_add_test(tc_parse_list_string, test_parse_list_null);
tcase_add_test(tc_parse_list_string, test_parse_list_single_simple);
tcase_add_test(tc_parse_list_string, test_parse_list_single_compound);
tcase_add_test(tc_parse_list_string, test_parse_list_double_simple);
tcase_add_test(tc_parse_list_string, test_parse_list_compound);
suite_add_tcase(s, tc_parse_list_string);
@
\section{Unit tests}
Here we check to make sure the various functions work as expected, using \citetalias{sw:check}.
<>=
<>
<>
<>
<>
<>
<>
<>
<>
<>
@
<>=
#include
@
<>=
@
<>=
<>
<>
<>
<>
<>
@
<>=
Suite *test_suite (void)
{
Suite *s = suite_create ("sawsim");
<>
<>
<>
<>
<>
<>
<>
<>
/*
tcase_add_checked_fixture(tc_strip_address,
setup_strip_address,
teardown_strip_address);
*/
return s;
}
@
<>=
int main(void)
{
int number_failed;
Suite *s = test_suite();
SRunner *sr = srunner_create(s);
srunner_run_all(sr, CK_ENV);
number_failed = srunner_ntests_failed(sr);
srunner_free(sr);
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
}
@
\subsection{Model tests}
Check the searching with [[linear_k]].
Check overwhelming force treatment with the heavyside-esque step [[?]].
Hmm, I'm not really sure what I was doing here...
<>=
double linear_k(double F, environment_t *env, void *params)
{
double Fnot = *(double *)params;
return Fnot+F;
}
START_TEST(test_determine_dt_linear_k)
{
state_t folded={0};
transition_t unfold={0};
environment_t env = {0};
double F[]={0,1,2,3,4,5,6};
double dt_max=3.0, Fnot=3.0, x=1.0, max_p=0.1, max_dF=0.1, v=1.0;
list_t *state_list=NULL, *transition_list=NULL;
int i;
env.T = 300.0;
folded.tension_handler = &hooke_handler;
folded.num_domains = 1;
unfold.initial_state = &folded;
unfold.k = linear_k;
unfold.k_params = &Fnot;
push(&state_list, &folded, 1);
push(&transition_list, &unfold, 1);
for( i=0; i < sizeof(F)/sizeof(double); i++) {
//fail_unless(determine_dt(state_list, transition_list, &env, x, max_p, max_dF, dt_max, v)==1, NULL);
}
}
END_TEST
@
<>=
TCase *tc_determine_dt = tcase_create("Determine dt");
@
<>=
tcase_add_test(tc_determine_dt, test_determine_dt_linear_k);
suite_add_tcase(s, tc_determine_dt);
@
<>=
@
<>=
@
<>=
@
<>=
@
<>=
@
<>=
@
<>=
@
<>=
@
<>=
@
\section{Balancing group extensions}
\label{sec.tension_balance}
For a given total extension $x$ (of the piezo), the various domain
models (WLC, FJC, Hookean springs, \ldots) and groups extend different
amounts, and we need to tweak the portion that each extends to balance
the tension amongst the active groups. Since the tension balancing is
separable from the bulk of the simulation, we break it out into a
separate module. The interface is defined in [[tension_balance.h]],
the implementation in [[tension_balance.c]], and the unit testing in
[[check_tension_balance.c]]
<>=
#ifndef TENSION_BALANCE_H
#define TENSION_BALANCE_H
<>
<>
#endif /* TENSION_BALANCE_H */
@
<>=
<>
<>
<>
<>
<>
<>
<>
@
<>=
NW_SAWSIM_MODS += tension_balance
CHECK_BINS += check_tension_balance
check_tension_balance_MODS =
@
The entire force balancing problem reduces to a solving two nested
one-dimensional root-finding problems. First we define the one
dimensional tension $F(x_0)$ (where $i=0$ is the index of the first
populated group). $x(x_0)$ is also strictly monotonically increasing,
so we can solve for $x_0$ such that $\sum_i x_i = x$. [[last_x]]
stores the last successful value of $x$, since we expect to be taking
small steps ($x \approx$ [[last_x]]). Setting [[last_x = -1]]
indicates that the groups have changed.
<>=
double tension_balance(int num_tension_groups,
one_dim_fn_t **tension_handlers,
one_dim_fn_t **inverse_tension_handlers,
void **params, /* array of pointers to tension_handler_data_t */
double last_x,
double x,
double *stiffness);
@
<>=
double tension_balance(int num_tension_groups,
one_dim_fn_t **tension_handlers,
one_dim_fn_t **inverse_tension_handlers,
void **params, /* array of pointers to tension_handler_data_t */
double last_x,
double x,
double *stiffness)
{
static x_of_xo_data_t x_xo_data = {0,NULL,NULL,NULL,NULL};
double F, xo_guess, xo, lb, ub;
double min_relx=1e-6, min_rely=1e-6;
int max_steps=200, i;
assert(num_tension_groups > 0);
assert(tension_handlers != NULL);
assert(params != NULL);
assert(num_tension_groups > 0);
if (last_x == -1 || x_xo_data.xi == NULL) { /* new group setup, reset x_xo_data */
/* x_xo_data tension_handler and handler_data arrays freed by find_tension() */
if (x_xo_data.xi != NULL)
free(x_xo_data.xi);
/* construct new x_xo_data */
x_xo_data.n_groups = num_tension_groups;
x_xo_data.tension_handler = tension_handlers;
x_xo_data.inverse_tension_handler = inverse_tension_handlers;
x_xo_data.handler_data = params;
#ifdef DEBUG
fprintf(stderr, "%s:\t%d groups, tension handler array %p, handler data array %p (lead data element %p), x_of_xo_data %p\n", __FUNCTION__, x_xo_data.n_groups, x_xo_data.tension_handler, x_xo_data.handler_data, x_xo_data.handler_data[0], &x_xo_data);
#endif
x_xo_data.xi = (double *) malloc(sizeof(double)*num_tension_groups);
assert(x_xo_data.xi != NULL);
for (i=0; i last_x) {
lb = x_xo_data.xi[0];
ub = x_xo_data.xi[0]+ x-last_x; /* apply all change to x0 */
} else if (x < last_x) {
lb = x_xo_data.xi[0]- (x-last_x); /* apply all change to x0 */
ub = x_xo_data.xi[0];
} else { /* x == last_x */
#ifdef DEBUG
fprintf(stderr, "not moving\n");
#endif
lb = x_xo_data.xi[0];
ub = x_xo_data.xi[0];
}
}
#ifdef DEBUG
fprintf(stderr, "%s:\tsolve x(x0) so x = %g with bounds x0 in [%g, %g]\n",
__FUNCTION__, x, lb, ub);
#endif
xo = oneD_solve(x_of_xo, &x_xo_data, x, lb, ub, min_relx, min_rely, max_steps, NULL);
F = x_of_xo(xo, &x_xo_data); /* HACK, bonus call to setup x_xo_data.xi */
#ifdef DEBUG
if (num_tension_groups > 1) {
fprintf(stderr, "%s:\tbalanced for x = %g with ", __FUNCTION__, x);
for(i=0; i>=
<>
@
<>=
typedef struct x_of_xo_data_struct {
int n_groups;
one_dim_fn_t **tension_handler; /* array of fn pointers */
one_dim_fn_t **inverse_tension_handler; /* inverse fn pointers */
void **handler_data; /* array of pointers to tension_handler_data_t */
double *xi; /* array of group extensions */
} x_of_xo_data_t;
@
<>=
double x_of_xo(double xo, void *pdata)
{
x_of_xo_data_t *data = (x_of_xo_data_t *)pdata;
double F, x=0, xi, xi_guess, lb, ub, slope;
int i;
double min_relx=1e-6, min_rely=1e-6;
int max_steps=200;
assert(data->n_groups > 0);
data->xi[0] = xo;
#ifdef DEBUG
fprintf(stderr, "%s:\tget initial tension at %g with %d groups, handler %p, data %p (x_xo_data %p)\n", __FUNCTION__, xo, data->n_groups, data->tension_handler[0], data->handler_data[0], data);
fflush(stderr);
#endif
F = (*data->tension_handler[0])(xo, data->handler_data[0]);
#ifdef DEBUG
fprintf(stderr, "%s:\tfinding x(x0=%g). F0(x0) = %g\n", __FUNCTION__, xo, F);
fflush(stderr);
#endif
x += xo;
if (data->xi) data->xi[0] = xo;
for (i=1; i < data->n_groups; i++) {
if (data->inverse_tension_handler[i] != NULL) {
xi = (*data->inverse_tension_handler[i])(F, data->handler_data[i]);
} else { /* invert numerically */
if (data->xi[i] == -1) xi_guess = length_scale; /* HACK */
else xi_guess = data->xi[i];
#ifdef DEBUG
fprintf(stderr, "%s:\tsearching for proper x[%d] for F[%d](x[%d]) = %g N (handler %p, data %p)\n", __FUNCTION__, i, i, i, F, data->tension_handler[i], data->handler_data[i]);
#endif
oneD_bracket(data->tension_handler[i], data->handler_data[i], F, xi_guess, &lb, &ub);
xi = oneD_solve(data->tension_handler[i], data->handler_data[i], F, lb, ub, min_relx, min_rely, max_steps, NULL);
}
x += xi;
data->xi[i] = xi;
#ifdef DEBUG
fprintf(stderr, "%s:\tfound F[%d](x[%d]=%g) = %g N\n", __FUNCTION__, i, i, xi, F);
#endif
}
#ifdef DEBUG
fprintf(stderr, "%s:\tfound x(x0=%g) = %g\n", __FUNCTION__, xo, x);
#endif
return x;
}
@
Determine the stiffness of a single tension group by taking a small
step [[dx]] back from the position [[x]] for which we want the
stiffness. The touchy part is determining a good [[dx]] and ensuring
the whole interval is on [[x>=0]].
<>=
double group_stiffness(one_dim_fn_t *tension_handler, void *handler_data, double x, double relx)
{
double dx, xl, F, Fl, stiffness;
assert(x >= 0);
assert(relx > 0 && relx < 1);
if (x == 0 || relx == 0) {
dx = 1e-12; /* HACK, how to get length scale? */
xl = x-dx;
if (xl < 0) {
xl = 0;
x = xl+dx;
}
} else {
dx = x*relx;
xl = x-dx;
}
F = (*tension_handler)(x, handler_data);
Fl = (*tension_handler)(xl, handler_data);
stiffness = (F-Fl)/dx;
assert(stiffness >= 0);
return stiffness;
}
@
Attempt to calculate the chain stiffness by adding group stiffnesses
as springs in series. In the case where a group has undefined
stiffness (e.g. piston tension, Section \ref{sec.piston_tension}),
this algorithm breaks down. In that case, [[tension_balance()]]
(\ref{sec.tension_balance}) should use [[full_chain_stiffness()]]
which uses the full chain's $F(x)$ rather than that of the individual
domains'. We attempt the series approach first because, lacking the
numerical inversion inside [[tension_balance()]], it is both faster
and more accurate than the full-chain derivative.
<>=
double chain_stiffness(x_of_xo_data_t *x_xo_data, double relx)
{
double stiffness, gstiff;
int i;
/* add group stiffnesses in series */
for (i=0; i < x_xo_data->n_groups; i++) {
#ifdef DEBUG
fprintf(stderr, "%s:\tgetting stiffness of active state %d of %d for x[%d]=%g, relx=%g\n", __FUNCTION__, i, x_xo_data->n_groups, i, x_xo_data->xi[i], relx);
fflush(stderr);
#endif
gstiff = group_stiffness(x_xo_data->tension_handler[i], x_xo_data->handler_data[i], x_xo_data->xi[i], relx);
if (gstiff == 0.0);
return 0.0;
stiffness += 1.0/gstiff;
}
stiffness = 1.0 / stiffness;
return stiffness;
}
@
Determine the chain stiffness using only [[tension_balance()]]. This
works around difficulties with tension models that have undefined
stiffness (see discussion for [[chain_stiffness()]] above).
<>=
double full_chain_stiffness(int num_tension_groups,
one_dim_fn_t **tension_handlers,
one_dim_fn_t **inverse_tension_handlers,
void **params, /* array of pointers to tension_handler_data_t */
double last_x,
double x,
double relx,
double F /* already evaluated F(x) */)
{
double dx, xl, Fl, stiffness;
assert(x >= 0);
assert(relx > 0 && relx < 1);
/* Other option for dx that we ignore because of our poor tension_balance()
* resolution (i.e. bad slopes if you zoom in too close):
* if (last_x != -1.0 && last_x != x) // last_x limited
* dx fabs(x-last_x);
* else
* dx = HUGE_VAL;
* if (1==1) { // constant max-value limited
* dx_p = 1e-12;
* dx = MIN(dx, dx_p);
* }
* if (x != 0 && relx != 0) { // relx limited
* dx_p = x*relx;
* dx = MIN(dx, dx_p);
* }
* TODO, determine which of (min_relx, min_rely, max_steps) is actually
* limiting tension_balance.
*/
dx = 1e-12; /* HACK, how to get length scale? */
xl = x-dx;
if (xl >= 0) {
Fl = tension_balance(num_tension_groups, tension_handlers,
inverse_tension_handlers, params,
last_x, xl, NULL);
} else {
xl = x;
Fl = F;
x += dx;
F = tension_balance(num_tension_groups, tension_handlers,
inverse_tension_handlers, params,
last_x, x, NULL);
}
stiffness = (F-Fl)/dx;
assert(stiffness >= 0);
return stiffness;
}
@
Solve $f(x) = y$ to a certain precision in $x$ or $y$ in a limited
number of steps for monotonically increasing $f(x)$. Simple
bisection, so it's robust and converges fairly quickly. You can set
the maximum number of steps to take, but if you have enough processor
power, it's better to limit your precision with the relative limits
[[min_relx]] and [[y]]. These ensure that the width of the bracket is
small on the length scale of it's larger side. Note that \emph{both}
relative limits must be satisfied for the search to stop.
<>=
/* equivalent to gsl_func_t */
typedef double one_dim_fn_t(double x, void *params);
@
<>=
double oneD_solve(one_dim_fn_t *f, void *params, double y, double lb, double ub,
double min_relx, double min_rely, int max_steps,
double *pYx);
@
<>=
/* TODO, replace with GSL one d root finder http://www.gnu.org/software/gsl/manual/html_node/One-dimensional-Root_002dFinding.html */
double oneD_solve(one_dim_fn_t *f, void *params, double y, double lb, double ub,
double min_relx, double min_rely, int max_steps,
double *pYx)
{
double yx, ylb, yub, x;
int i=0;
assert(ub >= lb);
ylb = (*f)(lb, params);
yub = (*f)(ub, params);
/* check some simple cases */
if (ylb == yub) {
if (ylb != y) return HUGE_VAL; /* error! f(x)=y not bracketed */
else return lb; /* any x on the range [lb, ub] would work */
}
if (ylb == y) { x=lb; yx=ylb; goto end; }
if (yub == y) { x=ub; yx=yub; goto end; }
#ifdef DEBUG
fprintf(stderr, "%s:\tstarting bracket: lb %g, x ~, ub %g\tylb %g, y %g, yub %g\n", __FUNCTION__, lb, ub, ylb, y, yub);
#endif
assert(ylb < y); assert(yub > y);
for (i=0; i y) { ub=x; yub=yx; }
else /* yx < y */ { lb=x; ylb=yx; }
}
end:
if (pYx) *pYx = yx;
#ifdef DEBUG
fprintf(stderr, "%s:\tsolved f(%g) = %g ~= %g\n", __FUNCTION__, x, yx, y);
#endif
return x;
}
@
The one dimensional solver needs a bracketed solution, which sometimes we don't have.
Generate bracketing $x$ values through bisection or exponential growth.
<>=
void oneD_bracket(one_dim_fn_t *f, void *params, double y, double xguess, double *lb, double *ub);
@
<>=
void oneD_bracket(one_dim_fn_t *f, void *params, double y, double xguess, double *lb, double *ub)
{
double yx, step, x=xguess;
int i=0;
#ifdef DEBUG
fprintf(stderr, "%s:\tbracketing %g (params %p)\n", __FUNCTION__, y, params);
#endif
yx = (*f)(x, params);
#ifdef DEBUG
fprintf(stderr, "%s:\tbracketing %g, start at f(%g) = %g guessing x = %g (params %p)\n", __FUNCTION__, y, x, yx, xguess, params);
#endif
if (yx > y) {
assert(x > 0.0);
*ub = x;
*lb = 0;
#ifdef DEBUG
fprintf(stderr, "%s:\tbracketted by [0, guess = %g]\n", __FUNCTION__, x);
#endif
} else {
*lb = x;
if (x == 0) x = length_scale/2.0; /* HACK */
while (yx < y) {
x *= 2.0;
yx = (*f)(x, params);
#ifdef DEBUG
fprintf(stderr, "%s:\tincreasing to f(%g) = %g\n", __FUNCTION__, x, yx);
#endif
}
*ub = x;
#ifdef DEBUG
fprintf(stderr, "%s:\tbracketted ouput %g by [guess = %g, %g]\n", __FUNCTION__, y, xguess, x);
#endif
}
//fprintf(stdout, "ub %g, %g lb\n", *ub, *lb);
}
@
\subsection{Balancing implementation}
<