>= int main(int argc, char **argv) { <> <> list_t *domain_list=NULL; /* variables initialized in get_options() */ environment_t env={0}; double P, dt_max, v; int num_folded=0; double x=0, dt, F; /* variables used in the simulation loop */ one_dim_func_t *tension_handler[NUM_TENSION_GROUPS] = {0}; get_options(argc, argv, &P, &dt_max, &v, &env, NUM_TENSION_GROUPS, tension_models, NUM_K_MODELS, k_models, &domain_list, &num_folded); setup(tension_handler); if (flags & FLAG_OUTPUT_FULL_CURVE) printf("#Position (m)\tForce (N)\n"); if (flags & FLAG_OUTPUT_UNFOLDING_FORCES) printf("#Unfolding Force (N)\n"); while (num_folded > 0) { F = find_tension(tension_handler, domain_list, &env, x); dt = determine_dt(tension_handler, domain_list, &env, x, P, dt_max, v); random_unfoldings(domain_list, F, dt, &env, &num_folded); if (flags & FLAG_OUTPUT_FULL_CURVE) printf("%g\t%g\n", x, F); x += v*dt; } destroy_domain_list(domain_list); free_accels(); return 0; } @ The meat of the simulation is bundled into the three functions [[find_tension]], [[determine_dt]], and [[random_unfoldings]]. [[find_tension]] is discussed in Section \ref{sec.find_tension}, [[determine_dt]] in Section \ref{sec.adaptive_dt}, and [[random_unfoldings]] in Section \ref{sec.unfolding_rate}. Environmental parameters important in determining reaction rates and tensions (e.g. temperature, pH) are stored in a single structure to facilitate extension to more complicated models in the future. <>= typedef struct environment_struct { double T; } environment_t; @ <>= <> <> <> <> @ \section{Simulation functions} <>= <> <> <> <> <> @ \subsection{Tension} \label{sec.find_tension} Because the stretched system may be made up of several parts (folded domains, unfolded domains, spring-like cantilever, \ldots), we will assign the domains to groups. For example, a domain might be in the [[NULL]] group when it's folded and in the worm-like chain group when it is unfolded. The domains are assumed to be commutative, so ordering is ignored. The interactions between the groups are assumed to be linear, but the interactions between domains of the same group need not be. This allows for non-linear group models such as th worm-like or freely-jointed chains. Each handler function receives a list of domains matching it's group number. <>= enum tension_group_t {TG_NULL=0, TG_CONST, TG_HOOKE, TG_WLC, TG_FJC, }; #define NUM_TENSION_GROUPS 5 @ <>= typedef struct tension_handler_data_struct { list_t *group; environment_t *env; void *persist; } tension_handler_data_t; @ After sorting the chain into separate groups $G_i$, with tension handlers $F_i(G_i; x_i)$, we need to balance the extension $x_i$ of each group to get a consistent tension $F_i(x_i) = F_j(x_j) \;\; \forall i,j$. For the moment, we will restrict our group boundaries to a single dimension, so $\sum_i x_i = x$, our total extension (it is this restriction that causes the groups to interact linearly). We'll also restrict our extensions to all be positive. With these restrictions, the problem of balancing the tensions reduces to solving a system of $N+1$ possibly non-linear equations with $N$ unknowns, where $N$ is the number of active groups. In general this can be fairly complicated, but without much loss of practicality we can restrict ourselves to strictly monotonically increasing, non-negative tension functions $F_i(x)$, with $F_i(0)=0$, which makes the situation much simpler. For example, it guarantees the existence of a unique, real solution for finite forces. <>= double find_tension(one_dim_func_t **tension_handler, list_t *domain_list, environment_t *env, double x) { static tension_handler_data_t data[NUM_TENSION_GROUPS] = {0}; static void *pdata[NUM_TENSION_GROUPS] = {0}; static double xi[NUM_TENSION_GROUPS] = {0}, last_x = 0; static int active_groups[NUM_TENSION_GROUPS] = {0}; int i, new_active_groups=0; double F; for (i=0; i>= int domain_unfolds(double F, double dt, environment_t *env, domain_t *domain) { /* returns 1 or 0, F in N, dt in s, pointer to env. data, pointer to a folded domain */ double k; k = accel_k(domain->k, F, env, domain->k_params); //(*domain->k)(F, env, domain->k_params); //printf("k = %g,\tdt = %g,\tk dt = %g\n", k, dt, k*dt); return happens(k*dt); /* dice roll for prob. k*dt event */ } @ [[happens]] is a random decision making function defined in Appendix \ref{app.utils}. Only one domain can unfold in each timestep, because the timescale of a domain unfolding $dt_u$ is assumed to be less than the simulation timestep $dt$, so a domain will completely unfold in a single timestep. We adapt our timesteps to keep the probability of a single domain unfolding low, so the probability of two domains unfolding in the same timestep is negligible. This approach breaks down as the adaptive timestep scheme approaches $dt \sim dt_u$, but $dt_u \sim 1\U{$\mu$s}$ for Ig27-like proteins \citep{klimov00}, so this shouldn't be a problem. To reassure yourself, you can ask the simulator to print the smallest timestep that was used. <>= void random_unfoldings(list_t *domain_list, double tension, double dt, environment_t *env, int *num_folded_domains) { while (domain_list != NULL) { if (D(domain_list)->state == DS_FOLDED && domain_unfolds(tension, dt, env, D(domain_list))) { if (flags & FLAG_OUTPUT_UNFOLDING_FORCES) fprintf(stdout, "%g\n", tension); D(domain_list)->state = DS_UNFOLDED; (*num_folded_domains)--; break; /* our one domain has unfolded, stop looking for others */ } domain_list = domain_list->next; } } @ \subsection{Adaptive timesteps} \label{sec.adaptive_dt} We'd like to pick $dt$ so the probability of unfolding in the next timestep is small. If we don't adapt our timestep, we risk breaking our approximation that $F(x' \in [x, x+v \cdot dt]) = F(x)$ or that only one domain unfolds in a given timestep. Because $F(x)$ is monotonically increasing, excessively large timesteps will lead to erroneously large unfolding forces. The simulation would have been accurate for sufficiently small fixed timesteps, but adaptive timesteps will allow us to move through low tension regions in fewer steps, leading to a more efficient simulation. The actual adaptive timestep implementation is not particularly interesting, since we are only required to reduce $dt$ to somewhere below a set threshold, so I've removed it to Appendix \ref{app.adaptive_dt}. \section{Models} TODO: model intro. The models provide the physics of the simulation, but the simulation allows interchangeable models, and we are currently focusing on the simulation itself, so we remove the actual model implementation to the appendices. The tension models are defined in Appendix \ref{sec.tension_models} and unfolding models are defined in Appendix \ref{sec.k_models}. <>= #define k_B 1.3806503e-23 /* J/K */ @ \section{Command line interface} <>= <> <> <> <> <> @ \subsection{Model selection} \label{app.model_selection} The main difficulty with the command line interface in \prog\ is developing an intuitive method for accessing the possibly large number of available models. We'll treat this generally by defining an array of available models, containing their important parameters. <>= <> @ <>= typedef void *create_data_func_t(char **param_strings); typedef void destroy_data_func_t(void *param_struct); @ <>= <> <> @ \subsubsection{Tension} <>= typedef struct tension_model_getopt_struct { enum tension_group_t tg_group; char *name; char *description; int num_params; char **param_descriptions; char *params; create_data_func_t *creator; destroy_data_func_t *destructor; } tension_model_getopt_t; @ <>= tension_model_getopt_t tension_models[NUM_TENSION_GROUPS] = { <>, <>, <>, <>, <> }; @ \subsubsection{Unfolding rate} <>= #define NUM_K_MODELS 5 typedef struct k_model_getopt_struct { char *name; char *description; k_func_t *k; int num_params; char **param_descriptions; char *params; create_data_func_t *creator; destroy_data_func_t *destructor; } k_model_getopt_t; @ <>= k_model_getopt_t k_models[NUM_K_MODELS] = { <>, <>, <>, <>, <> }; @ \subsection{help} <>= void help(char *prog_name, double P, double dt_max, double v, environment_t *env, int n_tension_models, tension_model_getopt_t *tension_models, int folded_tension_model, int unfolded_tension_model, int n_k_models, k_model_getopt_t *k_models, int k_model) { int i, j; printf("usage: %s [options]\n", prog_name); printf("Version %s\n\n", VERSION); printf("Monte Carlo simulation of a multi-globular domain protein unfolding\n\n"); printf("Simulation options:\n"); printf("-P P\tTarget probability for dt (currently %g)\n", P); printf("-t dt\tMaximum allowed timestep dt (currently %g)\n", dt_max); printf("-v v\tPulling velocity v (currently %g nm/s)\n", v); printf("Environmental options:\n"); printf("-T T\tTemperature T (currently %g K)\n", env->T); printf("-C T\tYou can also set the temperature T in Celsius\n"); printf("Model options:\n"); printf("The domains exist in either the folded or unfolded state\n"); printf("The following options change the default behavior in each state.\n"); printf("-m model\tFolded tension model (currently %s)\n", tension_models[folded_tension_model].name); printf("-a args\tFolded tension model argument string (currently %s)\n", tension_models[folded_tension_model].params); printf("-M model\tUnfolded tension model (currently %s)\n", tension_models[unfolded_tension_model].name); printf("-A args\tUnfolded tension model argument string (currently %s)\n", tension_models[unfolded_tension_model].params); printf("The following options change the unfolding rate.\n"); printf("-k model\tTransition rate model (currently %s)\n", k_models[k_model].name); printf("-K args\tTransition rate model argument string (currently %s)\n", k_models[k_model].params); printf("Domain creation options:\n"); printf("Once you've set up the appropriate default models, you need to add the domains\n"); printf("-F n\tAdd n folded domains with the current models\n"); printf("-U n\tAdd n unfolded domains with the current models\n"); printf("Output mode options:\n"); printf("There are two output modes. In standard mode, only the unfolding\n"); printf("events are printed. For example:\n"); printf(" #Unfolding Force (N)\n"); printf(" 123.456e-12\n"); printf(" ...\n"); printf("In verbose mode, the entire Force-vs-distance curve is output:\n"); printf(" #Position (m)\tForce (N)\n"); printf(" 0.001\t0.0023\n"); printf(" ...\n"); printf("-V\tChange output to verbose mode\n"); printf("-h\tPrint this help and exit\n"); printf("\n"); printf("Tension models:\n"); for (i=0; i>= void get_options(int argc, char **argv, double *pP, double *pDt_max, double *pV, environment_t *env, int n_tension_models, tension_model_getopt_t *tension_models, int n_k_models, k_model_getopt_t *k_models, list_t **pDomain_list, int *pNum_folded) { char *prog_name = NULL; char c, options[] = "P:t:v:T:C:m:a:M:A:k:K:F:U:Vh"; int ftension_model=0, utension_model=0, k_model=0; int i, n; extern char *optarg; extern int optind, optopt, opterr; assert (argc > 0); /* setup defaults */ flags = FLAG_OUTPUT_UNFOLDING_FORCES; prog_name = argv[0]; *pP = 1e-3; /* % pop per s */ *pDt_max = 0.001; /* s */ *pV = 1e-6; /* m/s */ env->T = 300.0; /* K */ while ((c=getopt(argc, argv, options)) != -1) { switch(c) { case 'P': *pP = atof(optarg); break; case 't': *pDt_max = atof(optarg); break; case 'v': *pV = atof(optarg); break; case 'T': env->T = atof(optarg); break; case 'C': env->T = atof(optarg)+273.15; break; case 'm': ftension_model = index_tension_model(n_tension_models, tension_models, optarg); break; case 'a': tension_models[ftension_model].params = optarg; break; case 'M': utension_model = index_tension_model(n_tension_models, tension_models, optarg); break; case 'A': tension_models[utension_model].params = optarg; break; case 'k': k_model = index_k_model(n_k_models, k_models, optarg); break; case 'K': k_models[k_model].params = optarg; break; case 'F': n = atoi(optarg); assert(n > 0); for (i=0; i 0); for (i=0; i 0.0); assert(*pP < 1.0); assert(*pDt_max > 0.0); assert(*pV > 0.0); assert(env->T > 0.0); return; } @ <>= int index_tension_model(int n_models, tension_model_getopt_t *models, char *name) { int i; for (i=0; i>= int index_k_model(int n_models, k_model_getopt_t *models, char *name) { int i; for (i=0; i>= /* requires int num_param_args and char **param_args in the current scope * usage: * INIT_MODEL("folded", folded_model, folded_params); * defined as a macro, so it can work on both tension_model_getopt_t and k_model_getopt_t types. */ #define INIT_MODEL(role, model, param_pointer) \ do { \ parse_list_string(model->params, SEP, '{', '}', \ &num_param_args, ¶m_args); \ if (num_param_args != model->num_params) { \ fprintf(stderr, \ "%s model %s expected %d params," \ role, model->name, model->num_params); \ fprintf(stderr, \ "not the %d params in '%s'\n", \ num_param_args, model->params); \ assert(num_param_args == model->num_params); \ } \ if (model->creator) \ param_pointer = (*model->creator)(param_args); \ else \ param_pointer = NULL; \ free_parsed_list(num_param_args, param_args); \ } while (0); @ <>= void *generate_domain(enum domain_state_t state, tension_model_getopt_t *folded_model, tension_model_getopt_t *unfolded_model, k_model_getopt_t *k_model) { void *ftension_params, *utension_params, *k_params; int num_param_args; /* for INIT_MODEL() */ char **param_args; /* for INIT_MODEL() */ #ifdef DEBUG fprintf(stderr, "generating %s ", state==DS_FOLDED ? "folded" : "unfolded"); fprintf(stderr, "domain (k: \"%s\", \"%s\", f: \"%s\", \"%s\", u: \"%s\", \"%s\")\n", k_model->name, k_model->params, folded_model->name, folded_model->params, unfolded_model->name, unfolded_model->params); #endif INIT_MODEL("folded", folded_model, ftension_params); INIT_MODEL("unfolded", unfolded_model, utension_params); INIT_MODEL("k", k_model, k_params); return create_domain(state, k_model->k, k_params, k_model->destructor, folded_model->tg_group, ftension_params, folded_model->destructor, unfolded_model->tg_group, utension_params, unfolded_model->destructor); } @ \phantomsection \appendix \addcontentsline{toc}{section}{Appendicies} \section{sawsim.c details} \subsection{Layout} The general layout of our simulation code is: <<*>>= <> <> <> <> <> <

> @ We include [[math.h]], so don't forget to link to the libm with `-lm'. <>= #include /* assert() */ #include /* malloc(), free(), rand() */ #include /* fprintf(), stdout */ #include /* strlen, strtok() */ #include /* exp(), M_PI, sqrt() */ #include /* pid_t (returned by getpid()) */ #include /* getpid() (for seeding rand()), getopt() */ #include "global.h" #include "list.h" #include "tension_balance.h" #include "k_model.h" #include "tension_model.h" #include "parse.h" #include "accel_k.h" @ <>= <> <> <> <> <> <> <> @ <>= <> <> @ <>= <> <> <> <> <> @ <>= <> <> @ <>= void setup(one_dim_func_t **tension_handler) { srand(getpid()*time(NULL)); /* seed rand() */ tension_handler[TG_NULL] = NULL; tension_handler[TG_CONST] = &const_tension_handler; tension_handler[TG_HOOKE] = &hooke_handler; tension_handler[TG_WLC] = &wlc_handler; tension_handler[TG_FJC] = &fjc_handler; } @ <>= /* in octal b/c of prefixed '0' */ #define FLAG_OUTPUT_FULL_CURVE 01 #define FLAG_OUTPUT_UNFOLDING_FORCES 02 @ <>= static unsigned long int flags = 0; @ \subsection{Utilities} \label{app.utils} <>= #define MAX(a,b) ((a)>(b) ? (a) : (b)) #define MIN(a,b) ((a)<(b) ? (a) : (b)) @ Note that [[STRMATCH]] chokes if one of the strings is [[NULL]]. <>= // Check if two strings match, return 1 if they do static char *temp_string_A; static char *temp_string_B; #define STRMATCH(a,b) (temp_string_A=a, temp_string_B=b, \ strlen(temp_string_A) != strlen(temp_string_B) ? 0 : \ !strncmp(temp_string_A,temp_string_B,strlen(temp_string_B)+1) ) /* +1 to also compare the '\0' */ @ We also define a macro for our [[check]] unit testing <>= #define CHECK_ERR(max_err, expected, received) \ do { \ fail_unless( (received-expected)/expected < max_err, \ "relative error %g >= %g in %s (Expected %g, received %g)", \ (received-expected)/expected, max_err, #received, \ expected, received); \ fail_unless(-(received-expected)/expected < max_err, \ "relative error %g >= %g in %s (Expected %g, received %g)", \ -(received-expected)/expected, max_err, #received, \ expected, received); \ } while(0) @ <>= int happens(double probability) { assert(probability >= 0.0); assert(probability <= 1.0); return (double)rand()/RAND_MAX < probability; /* TODO: replace with GSL rand http://www.gnu.org/software/gsl/manual/html_node/Random-number-generator-algorithms.html x*/ } @ \subsection{Adaptive timesteps} \label{app.adaptive_dt} $F(x)$ increases with $x$, possibly exploding, as in the worm-like chain model, so basing the timestep on the the unfolding probability at the current tension is dangerous, and we need to search for a $dt$ for which $P(F(x+v*dt)) < P_\text{target}$. There are two cases to consider. In the most common, no domains have unfolded since the last step, and we expect the next step to be slightly shorter than the previous one. In the less common, domains did unfold in the last step, and we expect the next step to be considerably longer than the previous one. <>= double search_dt(one_dim_func_t **tension_handler, list_t *domain_list, environment_t *env, double x, double target_prob, double max_dt, double v) { /* Returns the timestep dt in seconds for the current folded domain. * Takes a list of tension handlers, the list of domains, * a pointer env to the environmental data, a starting separation x in m, * a target_prob between 0 and 1, * max_dt in s, stretching velocity v in m/s. */ double F, k, dtCur, dtU, dtUCur, dtL, dt; /* get upper bound using the current position */ F = find_tension(tension_handler, domain_list, env, x); /* BUG. repeated calculation */ //printf("Start with x = %g (F = %g)\n", x, F); k = accel_k(D(domain_list)->k, F, env, D(domain_list)->k_params); //printf("x %g\tF %g\tk %g\n", x, F, k); dtU = target_prob / k; /* P = k dt, dtU is an upper bound on dt */ if (dtU > max_dt) { //printf("overshot max_dt\n"); dtU = max_dt; } /* set a lower bound on dt too */ dtL = 0.0; /* The dt determined above may produce illegitimate forces or ks. * Reduce the upper bound until we have valid ks. */ dt = dtU; F = find_tension(tension_handler, domain_list, env, x+v*dt); while (F == HUGE_VAL) { /* reduce step until we hit a valid force */ dtU /= 2.0; dt = dtU; F = find_tension(tension_handler, domain_list, env, x+v*dt); } //printf("Try for dt = %g (F = %g)\n", dt, F); k = accel_k(D(domain_list)->k, F, env, D(domain_list)->k_params); /* returned k may be -1.0 */ //printf("x %g\tF %g\tdt %g\tv dt %g\tk %g\n", x, F, dt, v*dt, k); while (k == -1.0) { /* reduce step until we hit a valid k */ dtU /= 2.0; dt = dtU; /* hopefully, we can use the max dt, see if it works */ F = find_tension(tension_handler, domain_list, env, x+v*dt); //printf("Try for dt = %g (F = %g)\n", dt, F); k = accel_k(D(domain_list)->k, F, env, D(domain_list)->k_params); //printf("x %g\tF %g\tdt %g\tv dt %g\tk %g\n", x, F, dt, v*dt, k); } assert(dtU > 1e-14); /* timestep to valid k too small */ dtUCur = target_prob / k; /* safe timestep back from x+dtU */ if (dtUCur >= dt) return dt; /* dtU is safe. */ /* dtU wasn't safe, lets see what would be. */ while (dtU > 1.1*dtL) { /* until the range is reasonably small */ dt = (dtU + dtL) / 2.0; F = find_tension(tension_handler, domain_list, env, x+v*dt); //printf("Try for dt = %g (F = %g) (dt bounds %g, %g)\n", dt, F, dtL, dtU); k = accel_k(D(domain_list)->k, F, env, D(domain_list)->k_params); dtCur = target_prob / k; //printf("x %g\tF %g\tdt %g\tv dt %g\tk %g\tdtCur = %g\n", x, F, dt, v*dt, k, dtCur); if (dtCur > dt) /* safe timestep back from x+dt covers dt */ dtL = dt; else if (dtCur < dt) { /* unsafe timestep back from x+dt, but... */ dtU = dt; /* ... stepping out only dtCur would be safe */ dtUCur = dtCur; } else break; /* dtCur = dt */ } return MAX(dtUCur, dtL); } @ To determine $dt$ for an array of potentially different folded domains, we need to find the maximum $dt$ that satisfies $k dt < P$ for all domains. <>= <> double determine_dt(one_dim_func_t **tension_handler, list_t *domain_list, environment_t *env, double x, double target_prob, double dt_max, double v) { /* Returns the timestep dt in seconds. * Takes the list of folded domains, target_prob between 0 and 1, * F in N, and T in K. */ double dt=dt_max, new_dt; assert(target_prob > 0.0); assert(target_prob < 1.0); assert(dt_max > 0.0); /* .5 nm steps = v * dt */ //return 0.5e-9/v; while (domain_list != NULL) { if (D(domain_list)->state == DS_FOLDED) { new_dt = search_dt(tension_handler, domain_list, env, x, target_prob, dt, v); dt = MIN(dt, new_dt); } domain_list = domain_list->next; } return dt; } @ \subsection{Domain data} Currently domains exist in two states, folded and unfolded, and the only allowed transitions are folded $\rightarrow$ unfolded. Of course, it wouldn't be too complicated to extent this to a multi-state system, with an array containing the domains group for each possible state, and a matrix of transition-rate-calculating functions. However, at this point such generality seems unnecessary at this point. <>= enum domain_state_t {DS_FOLDED, DS_UNFOLDED }; typedef struct domain_struct { enum domain_state_t state; enum tension_group_t folded_group; enum tension_group_t unfolded_group; k_func_t *k; /* function returning unfolding rate */ void *folded_params; /* pointer to folded parameters */ void *unfolded_params; /* pointer to unfolded parameters */ void *k_params; /* pointer to k parameters */ destroy_data_func_t *destroy_folded; destroy_data_func_t *destroy_unfolded; destroy_data_func_t *destroy_k; } domain_t; /* get the domain data for the current list node */ #define D(list) ((domain_t *)(list)->d) /* get the tension params for the current list node */ #define D_TP(list) (((domain_t *)(list)->d)->state == DS_FOLDED \ ? ((domain_t *)(list)->d)->folded_params \ : ((domain_t *)(list)->d)->unfolded_params) @ [[k]] is a pointer to the function determining the unfolding rate for a given tension. [[folded_params]] is a pointer to the parameters used by the function pointed to by [[k]]. [[unfolded_params]] is a pointer to the parameters used by the group-appropriate handler function when determining the tension. The [[destroy_*]] pointers point to functions for freeing the memory [[*_params]]. We store them with the domain data so that [[destroy_domain]] doesn't have to know which type of domain it's cleaning up after. [[create_]] and [[destroy_domain]] are simple wrappers around [[malloc]] and [[free]]. <>= domain_t *create_domain(enum domain_state_t state, k_func_t *k, void *k_params, destroy_data_func_t *destroy_k, enum tension_group_t folded_group, void *folded_params, destroy_data_func_t *destroy_folded, enum tension_group_t unfolded_group, void *unfolded_params, destroy_data_func_t *destroy_unfolded) { domain_t *ret = (domain_t *)malloc(sizeof(domain_t)); assert(ret != NULL); if (state == DS_FOLDED) { assert(k != NULL); /* the pointer points somewhere valid */ assert(*k != NULL); /* and there is something useful there */ } ret->state = state; ret->folded_group = folded_group; ret->unfolded_group = unfolded_group; ret->k = k; ret->k_params = k_params; ret->destroy_k = destroy_k; ret->folded_params = folded_params; ret->unfolded_params = unfolded_params; ret->destroy_folded = destroy_folded; ret->destroy_unfolded = destroy_unfolded; return ret; } void destroy_domain(domain_t *domain) { if (domain) { //printf("domain %p & %p\n", *domain, domain); if (domain->destroy_folded) (*domain->destroy_folded)(domain->folded_params); if (domain->destroy_unfolded) (*domain->destroy_unfolded)(domain->unfolded_params); if (domain->destroy_k) (*domain->destroy_k)(domain->k_params); free(domain); } } @ <>= void destroy_domain_list(list_t *domain_list) { domain_list = head(domain_list); while (domain_list != NULL) destroy_domain((domain_t *) pop(&domain_list)); } @ \subsection{Group handling} <>= <> <> @ <>= enum tension_group_t get_group(domain_t *domain) { if (domain->state == DS_FOLDED) return domain->folded_group; else { assert(domain->state == DS_UNFOLDED); return domain->unfolded_group; } } @ <>= list_t *get_group_list(list_t *list, enum tension_group_t group) { list_t *ret = NULL; list = head(list); while (list != NULL) { if (get_group(D(list)) == group) push(&ret, D_TP(list)); /* add a pointer to the appropriate tension parameters to our new list. */ list = list->next; } return ret; } @ Because all the node data in lists returned by [[get_group_list]] is also in the main domain list, you shouldn't destroy and node data popped off when destroying the group lists. It will all get cleaned up when the main domain list is destroyed. \section{String parsing} For handling command line arguments, we need parse delimited strings (e.g. [["param1,param2,param3..."]]). The model handling in getopt is set up to handle a fixed number of arguments for each model, so models (like [[kramers_integ]]) that have complicated parameters (location of spline knots) need to take care of parsing those parameters themselves. We implement this parsing in [[parse.c]], define the interface in [[parse.h]], and the the unit testing in [[check_parse.c]]. <>= <> <> <> @ <>= parse.c : sawsim.nw notangle -Rparse.c $^ > $@ parse.h : sawsim.nw notangle -Rparse.h $^ > $@ check_parse.c : sawsim.nw notangle -Rcheck-parse.c $^ > $@ check_parse : check_parse.c parse.c parse.h gcc -g -o $@ $< parse.c -lcheck clean_parse : rm -f parse.c parse.h check_parse.c check_parse @ <>= #define SEP ',' /* argument separator character */ @ <>= extern void parse_list_string(char *string, char sep, char deeper, char shallower, int *num, char ***string_array); extern void free_parsed_list(int num, char **string_array); @ [[parse_list_string]] allocates memory, don't forget to free it afterward with [[free_parsed_list]]. It does not alter the original. The string may start off with a [[deeper]] character (i.e. [["{x,y}"]]), and when it does, brace stripping will set leave [[x,y]], where the pointer is one character in on the copied string. However, when we go to free the memory, we need a pointer to the beginning of the string. In order to accommodate this for a string with $N$ argument, allocate a pointer array with $N+1$ elements, let the first $N$ elements point to the separated arguments, and let the last element point to the start of the copied string regardless of braces. <>= /* TODO, split out into parse.hc */ static int next_delim_index(char *string, char sep, char deeper, char shallower) { int i=0, depth = 0; while (string[i] != '\0' && !(string[i] == sep && depth == 0)) { if (string[i] == deeper) {depth++;} else if (string[i] == shallower) {depth--; assert(depth >= 0);} i++; } return i; } void parse_list_string(char *string, char sep, char deeper, char shallower, int *num, char ***string_array) { char *str=NULL, **ret=NULL; int i, j, n; if (string==NULL || strlen(string) == 0) { /* handle the trivial cases */ *num = 0; *string_array = NULL; return; } /* make a copy of the string, so we don't change the original */ str = (char *)malloc(sizeof(char)*(strlen(string)+1)); assert(str != NULL); strcpy(str, string); /* we know str is long enough */ /* count the number of regions, so we can allocate pointers to them */ i=-1; n=0; do { n++; i++; /* move on to next argument */ i += next_delim_index(str+i, sep, deeper, shallower); //fprintf( stderr, "delim at %d (%d) ('%s' -> '%s')\n", i, (int)str[i], str, str+i+1); fflush(stderr); } while (str[i] != '\0'); ret = (char **)malloc(sizeof(char *)*(n+1)); assert(ret != NULL); /* replace the separators with '\0' & assign pointers */ ret[n] = str; /* point to the front of the copied string */ j=0; ret[0] = str; for(i=1; i>= <> <> #include "parse.h" <> @ <>= #include /* assert() */ #include /* NULL */ #include /* fprintf(), stdout *//*!!*/ #include /* strlen() */ #include "parse.h" @ \subsection{Parsing unit tests} Here we check to make sure the various functions work as expected, using \citetalias{sw:check}. <>= <> <> <> <> <

> @ <>= #include /* EXIT_SUCCESS and EXIT_FAILURE, atof() */ #include /* printf() */ #include /* assert() */ #include /* strlen() */ <> #include "parse.h" @ <>= <> <> @ <>= Suite *test_suite (void) { Suite *s = suite_create ("k model"); <> <> return s; } @ <>= /* START_TEST(test_next_delim_index) { fail_unless(next_delim_index("", ',', '{', '}')==0, NULL); fail_unless(next_delim_index(",arg,{str,ing},test", ',', '{', '}')==0, NULL); fail_unless(next_delim_index("arg,{str,ing},test", ',', '{', '}')==3, NULL); fail_unless(next_delim_index("{str,ing},test", ',', '{', '}')==9, NULL); fail_unless(next_delim_index("test", ',', '{', '}')==4, NULL); } END_TEST */ START_TEST(test_parse_list_null) { int num_param_args; char **param_args; parse_list_string(NULL, SEP, '{', '}', &num_param_args, ¶m_args); fail_unless(num_param_args == 0, NULL); fail_unless(param_args == NULL, NULL); } END_TEST START_TEST(test_parse_list_single_simple) { int num_param_args; char **param_args; parse_list_string("arg", SEP, '{', '}', &num_param_args, ¶m_args); fail_unless(num_param_args == 1, NULL); fail_unless(STRMATCH(param_args[0],"arg"), NULL); } END_TEST START_TEST(test_parse_list_single_compound) { int num_param_args; char **param_args; parse_list_string("{x,y,z}", SEP, '{', '}', &num_param_args, ¶m_args); fail_unless(num_param_args == 1, NULL); fail_unless(STRMATCH(param_args[0],"x,y,z"), "got '%s', expected '%s'", param_args[0], "x,y,z"); } END_TEST START_TEST(test_parse_list_double_simple) { int num_param_args; char **param_args; parse_list_string("abc,def", SEP, '{', '}', &num_param_args, ¶m_args); fail_unless(num_param_args == 2, NULL); fail_unless(STRMATCH(param_args[0],"abc"), NULL); fail_unless(STRMATCH(param_args[1],"def"), NULL); } END_TEST @ <>= TCase *tc_parse_list_string = tcase_create("parse list string"); @ <>= //tcase_add_test(tc_parse_list_string, test_next_delim_index); tcase_add_test(tc_parse_list_string, test_parse_list_null); tcase_add_test(tc_parse_list_string, test_parse_list_single_simple); tcase_add_test(tc_parse_list_string, test_parse_list_single_compound); tcase_add_test(tc_parse_list_string, test_parse_list_double_simple); suite_add_tcase(s, tc_parse_list_string); @ \section{Unit tests} Here we check to make sure the various functions work as expected, using \citetalias{sw:check}. <>= <> <> <> <> <> <> <> <

> @ <>= #include @ <>= @ <>= <> <> <> <> <> <> @ <>= Suite *test_suite (void) { Suite *s = suite_create ("sawsim"); <> <> <> <> <> <> <> <> <> <> /* tcase_add_checked_fixture(tc_strip_address, setup_strip_address, teardown_strip_address); */ return s; } @ <

>= int main(void) { int number_failed; Suite *s = test_suite(); SRunner *sr = srunner_create(s); srunner_run_all(sr, CK_ENV); number_failed = srunner_ntests_failed(sr); srunner_free(sr); return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; } @ \subsection{F tests} <>= <> @ <>= <> @ <>= <> @ <>= START_TEST(test_wlc_at_zero) { double T=1.0, L=1.0, p=0.1, x=0.0; fail_unless(wlc(x, T, p, L)==0, NULL); } END_TEST START_TEST(test_wlc_at_half) { double T=1.0, L=1.0, p=0.1*k_B, x=0.5; /* prefactor = k_B T / p = k_B 1.0 / (k_B*0.1) = 10.0 J/nm = 10.0e21 pN * nonlinear term = 0.25 (1/(1-x/L)^2-1) = 0.25(1/.25 - 1) * = 0.25 * 3 = 3/4 * linear term = x/L = 0.5 * nonlinear + linear = 0.75 + 0.5 = 1.25 * wlc = 10e21*1.25 = 12.5e21 */ fail_unless(wlc(x, T, p, L)-12.5e21 < 1e16, "wlc(%g, %g, %g, %g) = %g != %g", x, T, p, L, wlc(x, T, p, L), 12.5e21); } END_TEST @ <>= TCase *tc_wlc = tcase_create("WLC"); @ <>= tcase_add_test(tc_wlc, test_wlc_at_zero); tcase_add_test(tc_wlc, test_wlc_at_half); suite_add_tcase(s, tc_wlc); @ \subsection{Model tests} Check the searching with [[linear_k]]. Check overwhelming force treatment with the heavyside-esque step [[?]]. <>= double linear_k(double F, environment_t *env, void *params) { double Fnot = *(double *)params; return Fnot+F; } START_TEST(test_determine_dt_linear_k) { environment_t env; double dt_max=3.0, Fnot=3.0; double F[]={0,1,2,3,4,5,6}; domain_t dom; /* use both parts at once for folded/unfolded */ int i; env.T = 300.0; /* dom->next = dom->prev = NULL; dom->k_func_t = linear_k; dom->folded_params = &Fnot; dom->unfolded_params = !!!!!!!!! dom->destroy_folded = dom->destroy_unfolded = NULL; for( i=0; i < sizeof(F)/sizeof(double); i++) { fail_unless(determine_dt(folded, unfolded, targ_p, dt_max, v, x, T)==1, NULL); } */ } END_TEST @ <>= TCase *tc_determine_dt = tcase_create("Determine dt"); @ <>= tcase_add_test(tc_determine_dt, test_determine_dt_linear_k); suite_add_tcase(s, tc_determine_dt); @ <>= @ <>= @ <>= @ <>= @ <>= @ <>= @ <>= @ <>= @ <>= @ \section{Balancing group extensions} \label{app.tension_balance} For a given total extension $x$ (of the piezo), the various domain groups (WLC, FJC, Hookean springs, \ldots) extend different amounts, and we need to tweak the portion that each extends to balance the tension amongst the active groups. Since the tension balancing is separable from the bulk of the simulation, we break it out into a separate module. The interface is defined in [[tension_balance.h]], the implementation in [[tension_balance.c]], and the unit testing in [[check_tension_balance.c]] <>= <> <> @ <>= <> <> <> <> @ <>= tension_balance.c : sawsim.nw notangle -Rtension-balance.c $^ > $@ tension_balance.h : sawsim.nw notangle -Rtension-balance.h $^ > $@ check_tension_balance.c : sawsim.nw notangle -Rcheck-tension-balance.c $^ > $@ check_tension_balance : check_tension_balance.c global.h tension_balance.c tension_balance.h gcc -g -o $@ $< tension_balance.c -lcheck clean_tension : rm -f tension_balance.c tension_balance.h check_tension_balance.c check_tension_balance @ The entire force balancing problem reduces to a solving two nested one-dimensional root-finding problems. First we define the one dimensional tension $F(x_0)$ (where $i=0$ is the index of the first non-empty group). $x(x_0)$ is also strictly monotonically increasing, so we can solve for $x_0$ such that $\sum_i x_i = x$. <>= double tension_balance(int num_tension_groups, one_dim_func_t **tension_handler, void **params, int *active, int new_active_groups, double *xi, double last_x, double x); @ <>= double tension_balance(int num_tension_groups, one_dim_func_t **tension_handler, void **params, int *active, int new_active_groups, double *xi, double last_x, double x) { /* xi initialized to x values for groups at last x, * returned as x values for groups at current x */ double F, xo; one_dim_func_t **active_handlers=NULL; void **active_params=NULL; double *active_xi=NULL; int i, active_groups=0; x_of_xo_data_t x_xo_data; double min_dx=1e-10, min_dy=1e-15; int max_steps=100; double lb, ub; assert(num_tension_groups > 0); active_handlers = (one_dim_func_t **)malloc(sizeof(one_dim_func_t *)*num_tension_groups); assert(active_handlers != NULL); active_params = (void **)malloc(sizeof(void *)*num_tension_groups); assert(active_params != NULL); active_xi = (double *)malloc(sizeof(double)*num_tension_groups); assert(active_xi != NULL); for (i=0; i last_x) { lb = active_xi[0]; ub = active_xi[0]+ x-last_x; /* apply all change to x0 */ } else if (x < last_x) { lb = active_xi[0]- (x-last_x); /* apply all change to x0 */ ub = active_xi[0]; } else { /* x == last_x */ printf("not moving\n"); lb= active_xi[0]; ub= active_xi[0]; } } //printf("lb %g,\tub %g\n", lb, ub); xo = oneD_solve(x_of_xo, &x_xo_data, x, lb, ub, min_dx, min_dy, max_steps, NULL); } F = (*active_handlers[0])(xo, active_params[0]); /* go back through and place the active xi data in the complete xi array */ active_groups = 0; for (i=0; i 1 && 0) { printf("balanced for x = %g with ", x); for(i=0; i>= <> @ <>= typedef struct x_of_xo_data_struct { int n_groups; one_dim_func_t **tension_handler; /* array of fn pointers */ void **handler_data; /* array of void* pointers */ double *xi; } x_of_xo_data_t; @ <>= double x_of_xo(double xo, void *pdata) { x_of_xo_data_t *data = (x_of_xo_data_t *)pdata; double F, x=0, xi, lb, ub; int i; double min_dx=1e-10, min_dy=1e-14; int max_steps=100; assert(data->n_groups > 0); data->xi[0] = xo; F = (*data->tension_handler[0])(xo, data->handler_data[0]); x += xo; if (data->xi) data->xi[0] = xo; for (i=1; i < data->n_groups; i++) { oneD_bracket(data->tension_handler[i], data->handler_data[i], F, data->xi[i], &lb, &ub); xi = oneD_solve(data->tension_handler[i], data->handler_data[i], F, lb, ub, min_dx, min_dy, max_steps, NULL); data->xi[i] = xi; x += xi; if (data->xi) data->xi[i] = xi; } return x; } @ Solve $f(x) = y$ to a certain precision in $x$ or $y$ in a limited number of steps for monotonically increasing $f(x)$. Simple bisection, so it's robust and converges fairly quickly. <>= /* equivalent to gsl_func_t */ typedef double one_dim_func_t(double x, void *params); @ <>= double oneD_solve(one_dim_func_t *f, void *params, double y, double lb, double ub, double min_dx, double min_dy, int max_steps, double *pYx); @ <>= /* TODO, replace with GSL one d root finder http://www.gnu.org/software/gsl/manual/html_node/One-dimensional-Root_002dFinding.html */ double oneD_solve(one_dim_func_t *f, void *params, double y, double lb, double ub, double min_dx, double min_dy, int max_steps, double *pYx) { double yx, ylb, yub, x; int i=0; assert(ub >= lb); ylb = (*f)(lb, params); yub = (*f)(ub, params); /* check some simple cases */ if (ylb == yub) { if (ylb != y) return HUGE_VAL; /* error! f(x)=y not bounded */ else return lb; /* any x on the range [lb, ub] would work */ } if (ylb == y) { x=lb; yx=ylb; goto end; } if (yub == y) { x=ub; yx=yub; goto end; } //printf("lb %g, x %g, ub %g\tylb %g, y %g, yub %g\n", lb, x, ub, ylb, y, yub); assert(ylb < y); assert(yub > y); for (i=0; i y) { ub=x; yub=yx; } else /* yx < y */ { lb=x; ylb=yx; } } end: if (pYx) *pYx = yx; return x; } @ The one dimensional solver needs a bracketed solution, which sometimes we don't have. Generate bracketing $x$ values through bisection or exponential growth. <>= void oneD_bracket(one_dim_func_t *f, void *params, double y, double xguess, double *lb, double *ub); @ <>= void oneD_bracket(one_dim_func_t *f, void *params, double y, double xguess, double *lb, double *ub) { double yx, step, x=xguess; int i=0; yx = (*f)(x, params); //fprintf(stdout, "bracketing %g, start at f(%g) = %g\n", y, x, yx); if (yx > y) { assert(x > 0.0); *ub = x; *lb = 0; } else { *lb = x; if (x == 0) x = 0.5; /* guess a scale of 1.0 */ while (yx < y) { x *= 2.0; yx = (*f)(x, params); //fprintf(stdout, "increasing to f(%g) = %g\n", x, yx); } *ub = x; } //fprintf(stdout, "ub %g, %g lb\n", *ub, *lb); } @ \subsection{Balancing implementation} <>= <> <> #include "tension_balance.h" <> <> @ <>= #include /* assert() */ #include /* NULL */ #include /* HUGE_VAL, macro constant, so don't need to link to math */ #include /* fprintf(), stdout */ #include "global.h" @ \subsection{Balancing unit tests} Here we check to make sure the various functions work as expected, using \citetalias{sw:check}. <>= <> <> <

> @ <>= #include /* EXIT_SUCCESS and EXIT_FAILURE */ #include /* assert() */ <> #include "global.h" #include "tension_balance.h" @ <>= <> <> @ <>= Suite *test_suite (void) { Suite *s = suite_create ("tension balance"); <> <> return s; } @ <>= <> double hooke(void *pK, double x) { assert(x >= 0); return *((double*)pK) * x; } START_TEST(test_single_function) { double k=5, x=3, last_x=2.0, F; one_dim_func_t *handlers[] = {&hooke}; void *data[] = {&k}; double xi[] = {0.0}; int active[] = {1}; int new_active_groups = 1; F = tension_balance(1, handlers, data, active, new_active_groups, xi, last_x, x); fail_unless(F = k*x, NULL); } END_TEST @ We can also test balancing two springs with different spring constants. The analytic solution for a general number of springs is given in Appendix \ref{app.math_hooke}. <>= START_TEST(test_double_hooke) { double k1=5, k2=4, x=3, last_x=2.0, F, Fe, x1e, x2e; one_dim_func_t *handlers[] = {&hooke, &hooke, NULL}; void *data[] = {&k1, &k2, NULL}; double xi[] = {0.0, 0.0, 0.0}; int active[] = {1, 1, 0}; int new_active_groups = 1; F = tension_balance(3, handlers, data, active, new_active_groups, xi, last_x, x); x1e = x*k2/(k1+k2); Fe = k1*x1e; x2e = x1e*k1/k2; //fail_unless(-(F-Fe)/Fe < 1e-6, "relative error %g > 1e-6 (Expected %g, got %g)",-(F-Fe)/Fe, Fe, F); CHECK_ERR(1e-6, x1e, xi[0]); CHECK_ERR(1e-6, x2e, xi[1]); CHECK_ERR(1e-6, Fe, F); } END_TEST @ <>= TCase *tc_tbfunc = tcase_create("tension balance function"); @ <>= tcase_add_test(tc_tbfunc, test_single_function); tcase_add_test(tc_tbfunc, test_double_hooke); suite_add_tcase(s, tc_tbfunc); @ \section{Lists} The globular domains (and cantilever params, etc.) are saved in bi-directional lists. Since the list handling is so general, and separable from the bulk of the simulation, we break it out into a separate module. The interface is defined in [[list.h]], the implementation in [[list.c]], and the unit testing in [[check_list.c]] <>= <> <> <> @ <>= <> <> <> <> @ <>= <> <> <> <> <> @ <>= list.c : sawsim.nw notangle -Rlist.c $^ > $@ list.h : sawsim.nw notangle -Rlist.h $^ > $@ check_list.c : sawsim.nw notangle -Rcheck-list.c $^ > $@ check_list : check_list.c global.h list.c list.h gcc -g -o $@ $< list.c -lcheck clean_list : rm -f list.c list.h check_list.c check_list @ <>= typedef struct list_struct { struct list_struct *next; struct list_struct *prev; void *d; /* data */ } list_t; @ [[head]] and [[tail]] return pointers to the head and tail nodes of the list: <>= list_t *head(list_t *list); list_t *tail(list_t *list); @ <>= list_t *head(list_t *list) { if (list == NULL) return list; while (list->prev != NULL) { list = list->prev; } return list; } list_t *tail(list_t *list) { if (list == NULL) return list; while (list->next != NULL) { list = list->next; } return list; } @ <>= int list_length(list_t *list); @ <>= int list_length(list_t *list) { int i; if (list == NULL) return 0; list = head(list); i = 1; while (list->next != NULL) { list = list->next; i += 1; } return i; } @ [[push]] inserts a node after the current position in the list without changing the current position. However, if the list we're pushing onto is [[NULL]], the current position isn't defined so we set it to that of the pushed domain. <>= void push(list_t **pList, void *data); @ <>= void push(list_t **pList, void *data) { list_t *list, *new_node; assert(pList != NULL); list = *pList; new_node = create_node(data); if (list == NULL) *pList = new_node; else { if (list->next != NULL) list->next->prev = new_node; new_node->next = list->next; list->next = new_node; new_node->prev = list; } } @ [[pop]] removes the current domain node, moving the current position to the node after it, unless that node is [[NULL]], in which case move the current position to the node before it. <>= void *pop(list_t **pList); @ <>= void *pop(list_t **pList) { list_t *list, *popped; void *data; assert(pList != NULL); list = *pList; assert(list != NULL); /* not an empty list */ popped = list; /* bypass the popped node */ if (list->prev != NULL) list->prev->next = list->next; if (list->next != NULL) { list->next->prev = list->prev; *pList = list->next; } else *pList = list->prev; /* note: list->prev may == NULL. that's ok ;) */ data = popped->d; destroy_node(popped); return data; } @ [[create_]] and [[destroy_node]] are simple wrappers around [[malloc]] and [[free]]. <>= list_t *create_node(void *data) { list_t *ret = (list_t *)malloc(sizeof(list_t)); assert(ret != NULL); ret->prev = NULL; ret->next = NULL; ret->d = data; return ret; } void destroy_node(list_t *node) { if (node) free(node); } @ The user must free the data pointed to by the node on their own. \subsection{List implementation} <>= <> <> #include "list.h" <> @ <>= #include /* assert() */ #include /* malloc(), free(), rand() */ //#include /* fprintf(), stdout */ @ \subsection{List unit tests} Here we check to make sure the various functions work as expected, using \citetalias{sw:check}. <>= <> <> <

> @ <>= #include /* EXIT_SUCCESS and EXIT_FAILURE */ <> #include "list.h" @ <>= <> <> <> @ <>= Suite *test_suite (void) { Suite *s = suite_create ("list"); <> <> <> <> return s; } @ <>= START_TEST(test_push) { list_t *list=NULL; int i, p, e, n=3; for (i=0; id == 0 ); for (i=0; i>= TCase *tc_push = tcase_create("push"); @ <>= tcase_add_test(tc_push, test_push); suite_add_tcase(s, tc_push); @ <>= @ <>= @ <>= @ \section{Function string evaluation} For the saddle-point approximated Kramers' model (Section \ref{sec.kramers}) we need the ability to evaluate user-supplied functions ($E(x)$, $x_{ts}(F)$, \ldots). We want the ability to handle fairly general functions, but don't want to reinvent the wheel by writing our own parser/evaluator. The solution is to run a scripting language as a subprocess accessed via pipes. We use \citetalias{sw:python} here, but it would be a simple matter to replace it with another evaluator if so desired. We spawn the subprocess using the standard [[pipe]], [[fork]], [[exec]] idiom. This is one of the more complicated software ideas in \prog, so we'll go into more detail than we have been. Most of this is also POSIX-specific (as far as I know), so you'll have to do some legwork to port it to non-POSIX systems. We persevere despite the difficulties, because without command-line access to new functions, the saddle-point Kramers' approximation is of very limited utiltiy. If you feel the benefits do \emph{not} outweigh the costs, or you are on a non-POSIX system (e.g. MS Windows without Cygwin), you should probably hardcode your functions in \lang. Then you can either statically or dynamically link to those hardcoded functions. While much less flexible, this approach would be a fairly simple-to-implement fallback. Because this functionality is independent of the rest of the simulation, we'll split its definition out into its own set of files. The interface is defined in [[string_eval.h]], the implementation in [[string_eval.c]], and the unit testing in [[check_string_eval.c]]. <>= <> <> <> <> @ <>= string_eval.c : sawsim.nw notangle -Rstring-eval.c $^ > $@ string_eval.h : sawsim.nw notangle -Rstring-eval.h $^ > $@ check_string_eval.c : sawsim.nw notangle -Rcheck-string-eval.c $^ > $@ check_string_eval : check_string_eval.c string_eval.c string_eval.h gcc -g -o $@ $< string_eval.c -lcheck -lgsl -lgslcblas -lm clean_string_eval : rm -f string_eval.c string_eval.h check_string_eval.c check_string_eval @ For an introduction to POSIX process control, see\\ \url{http://www.ibm.com/developerworks/aix/library/au-speakingunix8/} (very simple, but a good intro.), \\ \url{http://www.ibm.com/developerworks/aix/library/au-unixprocess.html} (more detail), and of course, the relavent [[man]] pages. We start our subprocess with [[execvp]], one of the [[exec]] family of functions. [[execvp]] replaces the calling process' program with a new program. The [[p]] in [[execvp]] signifies that the new program will be found by searching the the paths listed in the [[PATH]] environment variable (this may be a security hole if someone messes about with [[PATH]] before you run \prog, but if they had the ability to change [[PATH]], the security of \prog\ is the least of your worries). The new program needs command line arguments, just like it would if you were running it from a shell. The [[v]] in [[execvp]] signifies that these command line arguments will be provided as an array of [[NULL]] terminated strings, with the final array entry being a [[NULL]] pointer. Now that we know how [[execvp]] works, we store it's arguments in some definitions, to make it easy to change the evaluating subprocess to, say, Ruby, or the users personal evaluation language. <>= #define SUBPROCESS "python" //#define SUBPROCESS_ARGS {SUBPROCESS, "-ic", "import sys;sys.ps1='';sys.ps2=''", (char *)NULL} static char *SUBPROCESS_ARGS[] = {SUBPROCESS, "-ic", "import sys;sys.ps1='';sys.ps2=''", (char *)NULL}; //static char *SUBPROCESS_ARGS[] = {SUBPROCESS, "-ic", "pass", (char *)NULL}; @ The [[i]] option lets Python know that it should run in interactive mode. In it's standard mode, python reads all of the supplied instructions, and then acts on them in a single pass. In interactive mode, python acts on every instruction as soon as it is recieved. The [[c]] option signals a command line instruction for Python to execute on startup, which in our case simply turns off the default prompting ([[ps1]] and [[ps2]]), since that is mostly for human users, and our program doesn't need it. %The [[c]] option signals a command line instruction for Python to execute on startup, which in our case simply [[pass]]es so that the silly Python header information is not printed. We leave the prompts in, because we scan for them to determine when the output has completed. Since the call to [[execvp]] replaces the calling program, we need to split out original program in half useing [[fork]]. The parent half of the program can continue on to run our simulation, while the child half [[exec]]s and turns into Python. [[fork]] returns two copies of the same program, executing the original code. In the child process [[fork]] returns 0, and in the parent it returns the process ID of the child. We use this difference allows to write seperate parent/child code in the section immediately following the [[fork]]. We communicate with the child (Python) process using \emph{pipes}, with one process writing data into one end of the pipe, and the other process reading the data out of the other end. The [[pipe]] function creates an unnamed pipe, and returns the file descriptors for reading and writing into the new pipe. We need two pipes, one for the subprocess's [[stdin]] and one for its [[stdout]]. We store the pipe file descriptors in an array of 4 [[int]]s, and use the following definitions for access. <>= #define PIPE_READ 0 /* the end you read from */ #define PIPE_WRITE 1 /* the end you write to */ #define STDIN 0 /* initial index of stdin pair */ #define STDOUT 2 /* initial index of stdout pair */ @ So [[pfd[STDIN+PIPE_READ]]] is the file descriptor you would read from for the [[stdin]] pipe, and similarly for the other combinations. As a finishing touch, we can promote the POSIX file descriptors ([[read]]/[[write]] interface) into the more familiar [[stdio.h]] \emph{streams} ([[fprintf]]/[[fgetc]] interface) using [[fdopen]], which creates a stream from an open file descriptor. <>= extern void string_eval_setup(FILE **pIn, FILE **pOut); @ <>= void string_eval_setup(FILE **pIn, FILE **pOut) { pid_t pid; int pfd[4]; /* file descriptors for stdin and stdout */ int rc; assert(pipe(pfd+STDIN) != -1); /* stdin pair (in, out) */ assert(pipe(pfd+STDOUT) != -1); /* stdout pair (in, out) */ pid = fork(); /* split process into two copies */ if (pid == -1) { /* fork error */ perror("fork error"); exit(1); } else if (pid == 0) { /* child */ close(pfd[STDIN+PIPE_WRITE]); /* close stdin pipe input */ close(pfd[STDOUT+PIPE_READ]); /* close stdout pipe output */ dup2(pfd[STDIN+PIPE_READ],0); /* wire stdin pipe output to stdin (closes original stdin) */ dup2(pfd[STDOUT+PIPE_WRITE],1); /* wire stdout pipe input to stdout (closes original stdout) */ execvp(SUBPROCESS, SUBPROCESS_ARGS); /* run subprocess */ perror("exec error"); /* exec shouldn't return */ _exit(1); } else { /* parent */ close(pfd[STDIN+PIPE_READ]); /* close stdin pipe output */ close(pfd[STDOUT+PIPE_WRITE]); /* close stdout pipe input */ *pIn = fdopen(pfd[STDIN+PIPE_WRITE], "w"); /* 'upgrade' our file descriptors to streams */ if ( *pIn == NULL ) { perror("fdopen (in)"); exit(1); } *pOut = fdopen(pfd[STDOUT+PIPE_READ], "r"); if ( *pOut == NULL ) { perror("fdopen (out)"); exit(1); } } } @ To use the evaluating subprocess, we just pipe in our command, and read out the results. For the simple cases we expect here, we restrict ourselves to a single line of returned text. <>= extern void string_eval(FILE *in, FILE *out, char *input, int buflen, char *output); @ <>= void string_eval(FILE *in, FILE *out, char *input, int buflen, char *output) { int rc; rc = fprintf(in, "%s", input); assert(rc == strlen(input)); fflush(in); fflush(out); alarm(1); /* set a one second timeout on the read */ assert( fgets(output, buflen, out) != NULL ); alarm(0); /* cancel the timeout */ //fprintf(stderr, "eval: %s ----> %s", input, output); } @ The [[alarm]] calls set and clear a timeout on the returned output. If the timeout expires, the process would get a [[SIGALRM]], but it doesn't have a [[SIGALRM]] handler, so it gets a [[SIGKILL]] and dies. This protects against invalid input for which a line of output is not printed to [[stdout]]. Other invalid inputs (e.g. those generating multiple lines on [[stdout]] or a single line on [[stdout]] and more in [[stderr]]) are silently ignored. If you are getting strange results, check your python code seperately. TODO, better error handling. Cleanup is fairly straightforward, we just close the connecting streams from the parent side. With the stdin pipe close on the parent side, the reading child will recive the broken pipe signal [[SIGPIPE]], and closes. The parent waits to confirm the child closing, recieves the child's exit status, and cleans up to prevent zombies. As an added touch, we redirect Python's [[stderr]] before closing the pipes, otherwise it prints a blank line when it exits. <>= extern void string_eval_teardown(FILE **pIn, FILE **pOut); @ <>= void string_eval_teardown(FILE **pIn, FILE **pOut) { pid_t pid=0; int stat_loc; /* redirect Python's stderr */ fprintf(*pIn, "sys.stderr = open('/dev/null', 'w')\n"); fflush(*pIn); /* close pipes */ assert( fclose(*pIn) == 0 ); *pIn = NULL; assert( fclose(*pOut) == 0 ); *pOut = NULL; /* wait for python to exit */ while (pid <= 0) { pid = wait(&stat_loc); if (pid < 0) { perror("pid"); } } /* if (WIFEXITED(stat_loc)) { printf("child exited with status %d\n", WEXITSTATUS(stat_loc)); } else if (WIFSIGNALED(stat_loc)) { printf("child terminated with signal %d\n", WTERMSIG(stat_loc)); } */ } @ The [[while]] loop around [[wait]] protects [[wait]] from interrupting signals. \subsection{String evaluation implementation} <>= <> <> #include "string_eval.h" <> <> @ <>= #include /* assert() */ #include /* NULL */ #include /* fprintf(), stdout, fdopen() */ #include /* strlen() */ #include /* pid_t */ #include /* pipe(), fork(), execvp(), alarm() */ #include /* wait() */ @ <>= <> <> @ <>= <> <> <> @ \subsection{String evaluation unit tests} Here we check to make sure the various functions work as expected, using \citetalias{sw:check}. <>= <> <> <> <

> @ <>= #include /* EXIT_SUCCESS and EXIT_FAILURE, atof() */ #include /* printf() */ #include /* assert() */ #include /* strlen() */ #include /* SIGKILL */ <> #include "string_eval.h" @ <>= <> <> @ <>= Suite *test_suite (void) { Suite *s = suite_create ("string eval"); <> <> return s; } @ <>= START_TEST(test_setup_teardown) { FILE *in, *out; string_eval_setup(&in, &out); string_eval_teardown(&in, &out); } END_TEST START_TEST(test_invalid_command) { FILE *in, *out; char input[80], output[80]={}; string_eval_setup(&in, &out); sprintf(input, "print ABCDefg\n"); string_eval(in, out, input, 80, output); string_eval_teardown(&in, &out); } END_TEST START_TEST(test_simple_eval) { FILE *in, *out; char input[80], output[80]={}; string_eval_setup(&in, &out); sprintf(input, "print 3+4*5\n"); string_eval(in, out, input, 80, output); fail_unless(STRMATCH(output,"23\n"), NULL); string_eval_teardown(&in, &out); } END_TEST START_TEST(test_multiple_evals) { FILE *in, *out; char input[80], output[80]={}; string_eval_setup(&in, &out); sprintf(input, "print 3+4*5\n"); string_eval(in, out, input, 80, output); fail_unless(STRMATCH(output,"23\n"), NULL); sprintf(input, "print (3**2 + 4**2)**0.5\n"); string_eval(in, out, input, 80, output); fail_unless(STRMATCH(output,"5.0\n"), NULL); string_eval_teardown(&in, &out); } END_TEST START_TEST(test_eval_with_state) { FILE *in, *out; char input[80], output[80]={}; string_eval_setup(&in, &out); sprintf(input, "print 3+4*5\n"); fprintf(in, "A = 3\n"); sprintf(input, "print A*3\n"); string_eval(in, out, input, 80, output); fail_unless(STRMATCH(output,"9\n"), NULL); string_eval_teardown(&in, &out); } END_TEST @ <>= TCase *tc_string_eval = tcase_create("string_eval"); @ <>= tcase_add_test(tc_string_eval, test_setup_teardown); tcase_add_test_raise_signal(tc_string_eval, test_invalid_command, SIGKILL); tcase_add_test(tc_string_eval, test_simple_eval); tcase_add_test(tc_string_eval, test_multiple_evals); tcase_add_test(tc_string_eval, test_eval_with_state); suite_add_tcase(s, tc_string_eval); @ \section{Accelerating function evaluation} My first version-0.3 code was running very slowly. With the options suggested in the help ([[-v1e-6 -Mhooke -A.05 -U1 -kbell -K3.3e-4,.25e-9 -mnull -Mwlc -A0.39e-9,28e-9 -F8]]), the program spent almost all of it's time in the functions [[oneD_solve]], [[wlc]], and [[wlc_handler]], making 0.66 million calls to [[oneD_solve]] and [[9.6]] million calls each to the WLC functions. That's only 15 calls per solution, so the search algorithm seems reasonable. The number of evaluation calls could be drastically reduced, however, by implementing an $x(x_0)$ lookup table. <>= double accel_k(k_func_t *k, double F, environment_t *env, void *params); void free_accels(); @ <>= accel_k.c : sawsim.nw notangle -Raccel-k.c $^ > $@ accel_k.h : sawsim.nw notangle -Raccel-k.h $^ > $@ check_accel_k.c : sawsim.nw notangle -Rcheck-accel_k.c $^ > $@ check_accel_k : check_accel_k.c global.h gcc -g -o $@ $< accel_k.c -lcheck -lgsl -lgslcblas -lm clean_accel_k : rm -f accel_k.c accel_k.h check_accel_k.c check_accel_k @ <>= #include /* assert() */ #include /* realloc(), free(), NULL */ #include "global.h" /* environment_t */ #include "k_model.h" /* k_func_t */ #include "interp.h" /* interp_* */ #include "accel_k.h" typedef struct accel_k_struct { interp_table_t *itable; k_func_t *k; environment_t *env; void *params; } accel_k_t; /* keep an array of all the ks we accelerate, so the caller doesn't have to worry */ static int num_accels = 0; static accel_k_t *accels=NULL; /* Wrap k in the standard f(x) acceleration form */ static double k_wrap(double F, void *params) { accel_k_t *p = (accel_k_t *)params; return (*p->k)(F, p->env, p->params); } static int k_tol(double FA, double kA, double FB, double kB) { assert(FB > FA); if (FB-FA > 1e-12) { //printf("unacceptable tol (x %g > 1 (y %g)\n", fabs(xA-xB), fabs(yA-yB)); return 1; /* unnacceptable */ } else { //printf("acceptable tol\n"); return 0; /* acceptable */ } } static int add_accel_k(k_func_t *k, environment_t *env, void *params) { int i=num_accels; accels = (accel_k_t *)realloc(accels, sizeof(accel_k_t) * ++num_accels); assert(accels != NULL); accels[i].itable = interp_table_allocate(&k_wrap, &k_tol); accels[i].k = k; accels[i].env = env; accels[i].params = params; return i; } void free_accels() { int i; for (i=0; i>= <> <> <> <> <> <> <> @ <>= tension_model.c : sawsim.nw notangle -Rtension-model.c $^ > $@ tension_model.h : sawsim.nw notangle -Rtension-model.h $^ > $@ check_tension_model.c : sawsim.nw notangle -Rcheck-tension-model.c $^ > $@ check_tension_model : check_tension_model.c global.h tension_model.c tension_model.h gcc -g -o $@ $< tension_model.c -lcheck -lgsl -lgslcblas -lm clean_tension_model : clean_tension_model_utils rm -f tension_model.c tension_model.h check_tension_model.c check_tension_model tension_model_utils.c : sawsim.nw notangle -Rtension-model-utils.c $^ > $@ tension_model_utils : tension_model_utils.c global.h tension_model.c tension_model.h parse.c parse.h \ list.c list.h tension_balance.c tension_balance.h gcc -g -o $@ $< tension_model.c parse.c list.c tension_balance.c -lgsl -lgslcblas -lm tension_model_utils_static : tension_model_utils.c global.h tension_model.c tension_model.h parse.c parse.h \ list.c list.h tension_balance.c tension_balance.h gcc -g -static -o $@ $< tension_model.c parse.c list.c tension_balance.c -lgsl -lgslcblas -lm clean_tension_model_utils : rm -f tension_model_utils.c tension_model_utils @ \subsection{Null} \label{sec.null_tension} For unstretchable domains. <>= {TG_NULL, "null", "an unstretchable domain", 0, NULL, NULL, NULL, NULL} @ \subsection{Constant} \label{sec.const_tension} <>= <> <> @ <>= <> <> <> @ An infinitely stretchable domain providing a constant tension. <>= extern double const_tension_handler(double x, void *pdata); @ <>= double const_tension_handler(double x, void *pdata) { tension_handler_data_t *data = (tension_handler_data_t *)pdata; list_t *list = data->group; double F; assert (x >= 0.0); list = head(list); assert(list != NULL); /* empty active group?! */ F = ((const_tension_param_t *)list->d)->F; while (list != NULL) { assert(((const_tension_param_t *)list->d)->F == F); list = list->next; } return F; } @ <>= typedef struct const_tension_param_struct { double F; /* tension (force) in N */ } const_tension_param_t; @ <>= extern void *string_create_const_tension_param_t(char **param_strings); extern void destroy_const_tension_param_t(void *p); @ <>= const_tension_param_t *create_const_tension_param_t(double F) { const_tension_param_t *ret = (const_tension_param_t *) malloc(sizeof(const_tension_param_t)); assert(ret != NULL); ret->F = F; return ret; } void *string_create_const_tension_param_t(char **param_strings) { return create_const_tension_param_t(atof(param_strings[0])); } void destroy_const_tension_param_t(void *p) { if (p) free(p); } @ <>= extern int num_const_tension_params; extern char *const_tension_param_descriptions[]; extern char const_tension_param_string[]; @ <>= int num_const_tension_params = 1; char *const_tension_param_descriptions[] = {"tension F, N"}; char const_tension_param_string[] = "0"; @ <>= {TG_CONST, "const", "an infinitely stretchable domain with constant tension", 1, const_tension_param_descriptions, const_tension_param_string, &string_create_const_tension_param_t, &destroy_const_tension_param_t} @ \subsection{Hooke} \label{sec.hooke} <>= <> <> @ <>= <> <> <> @ The tension of a single spring is given by $F=kx$ for some spring constant $k$. The behavior of a series of springs $k_i$ in series is given by $$ F = \p({\sum_i \frac{1}{k_i}})^{-1} x $$ For a simple proof, see Appendix \ref{app.math_hooke}. <>= extern double hooke_handler(double x, void *pdata); @ <>= double hooke_handler(double x, void *pdata) { tension_handler_data_t *data = (tension_handler_data_t *)pdata; list_t *list = data->group; double k=0.0; assert (x >= 0.0); list = head(list); assert(list != NULL); /* empty active group?! */ while (list != NULL) { assert( ((hooke_param_t *)list->d)->k > 0 ); k += 1.0/ ((hooke_param_t *)list->d)->k; list = list->next; } k = 1.0 / k; return k*x; } @ <>= typedef struct hooke_param_struct { double k; /* spring constant in N/m */ } hooke_param_t; @ <>= extern void *string_create_hooke_param_t(char **param_strings); extern void destroy_hooke_param_t(void *p); @ <>= hooke_param_t *create_hooke_param_t(double k) { hooke_param_t *ret = (hooke_param_t *) malloc(sizeof(hooke_param_t)); assert(ret != NULL); ret->k = k; return ret; } void *string_create_hooke_param_t(char **param_strings) { return create_hooke_param_t(atof(param_strings[0])); } void destroy_hooke_param_t(void *p) { if (p) free(p); } @ <>= extern int num_hooke_params; extern char *hooke_param_descriptions[]; extern char hooke_param_string[]; @ <>= int num_hooke_params = 1; char *hooke_param_descriptions[] = {"spring constant k, N/m"}; char hooke_param_string[]="0.05"; @ <