From: W. Trevor King Date: Sat, 23 Oct 2010 11:42:02 +0000 (-0400) Subject: Move sawsim running into a new pysawsim.sawsim.SawsimRunner class. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c2eb6307c6d34dd1ef3ad32301c094fff928424c;p=sawsim.git Move sawsim running into a new pysawsim.sawsim.SawsimRunner class. --- diff --git a/bin/vel_dep_scan.py b/bin/sawsim_hist_scan.py similarity index 59% rename from bin/vel_dep_scan.py rename to bin/sawsim_hist_scan.py index 7864380..e86c788 100755 --- a/bin/vel_dep_scan.py +++ b/bin/sawsim_hist_scan.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -from pysawsim import velocity_dependant_scan as vds +from pysawsim import parameter_scan as ps from pysawsim.manager.mpi import MPI_worker_death MPI_worker_death() -vds.main() +ps.main() diff --git a/pysawsim/histogram.py b/pysawsim/histogram.py index 32a12cf..ae8378a 100644 --- a/pysawsim/histogram.py +++ b/pysawsim/histogram.py @@ -22,6 +22,8 @@ import numpy +from . import log + class Histogram (object): """A histogram with a flexible comparison method, `residual()`. @@ -112,7 +114,11 @@ class Histogram (object): line = line[len('#'):] self.headings = [x.strip() for x in line.split('\t')] continue # ignore blank lines and comments - bin_edge,count = line.split() + try: + bin_edge,count = line.split() + except ValueError: + log().error('Unable to parse histogram line: "%s"' % line) + raise self.bin_edges.append(float(bin_edge)) self.counts.append(float(count)) bin_width = self.bin_edges[1] - self.bin_edges[0] @@ -140,13 +146,13 @@ class Histogram (object): >>> h.counts = [10, 40, 5] >>> h.to_stream(sys.stdout) ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF - # Force (N)\tUnfolding events + #Force (N)\tUnfolding events 1.5e-10\t10 2e-10\t40 2.5e-10\t5 """ if self.headings != None: - stream.write('# %s\n' % '\t'.join(self.headings)) + stream.write('#%s\n' % '\t'.join(self.headings)) for bin,count in zip(self.bin_edges, self.counts): stream.write('%g\t%g\n' % (bin, count)) diff --git a/pysawsim/velocity_dependant_scan.py b/pysawsim/parameter_scan.py similarity index 58% rename from pysawsim/velocity_dependant_scan.py rename to pysawsim/parameter_scan.py index 59c9cf4..60c60f9 100644 --- a/pysawsim/velocity_dependant_scan.py +++ b/pysawsim/parameter_scan.py @@ -17,31 +17,84 @@ # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St., # Philadelphia PA 19104, USA. -import os # HACK, for getpid() +"""Experiment vs. simulation comparison and scanning. +""" + +from os import getpid # for rss() import os.path import pickle -import shutil -import sys +from StringIO import StringIO import matplotlib matplotlib.use('Agg') # select backend that doesn't require X Windows import numpy import pylab -from scipy.optimize import leastsq from . import log from .histogram import Histogram from .manager import MANAGERS, get_manager -from . import sawsim_histogram +from .sawsim_histogram import sawsim_histogram +from .sawsim import SawsimRunner FIGURE = pylab.figure() # avoid memory problems. """`pylab` keeps internal references to all created figures, so share a single instance. """ +EXAMPLE_HISTOGRAM_FILE_CONTENTS = """# Velocity histograms +# Other general comments... + +#HISTOGRAM: -v 6e-7 +#Force (N)\tUnfolding events +1.8e-10\t1 +1.9e-10\t0 +2e-10\t0 +2.1e-10\t0 +2.2e-10\t0 +2.3e-10\t0 +2.4e-10\t1 +2.5e-10\t0 +2.6e-10\t2 +2.7e-10\t0 +2.8e-10\t1 +2.9e-10\t6 +3e-10\t2 +3.1e-10\t3 + +#HISTOGRAM: -v 8e-7 +#Force (N)\tUnfolding events +2.4e-10\t1 +2.5e-10\t0 +2.6e-10\t4 +2.7e-10\t2 +2.8e-10\t2 +2.9e-10\t3 +3e-10\t2 +3.1e-10\t1 +3.2e-10\t1 + +#HISTOGRAM: -v 1e-6 +#Force (N)\tUnfolding events +2e-10\t1 +2.1e-10\t0 +2.2e-10\t1 +2.3e-10\t1 +2.4e-10\t1 +2.5e-10\t0 +2.6e-10\t2 +2.7e-10\t1 +2.8e-10\t4 +2.9e-10\t2 +3e-10\t2 +3.1e-10\t0 +3.2e-10\t1 +""" + MEM_DEBUG = False + + def rss(): """ For debugging memory usage. @@ -49,57 +102,101 @@ def rss(): resident set size, the non-swapped physical memory that a task has used (in kilo-bytes). """ - call = "ps -o rss= -p %d" % os.getpid() + call = "ps -o rss= -p %d" % getpid() status,stdout,stderr = invoke(call) return int(stdout) class HistogramMatcher (object): - """Compare experimental velocity dependent histograms to simulated data. + """Compare experimental histograms to simulated data. The main entry points are `fit()` and `plot()`. + + The input `histogram_stream` should contain a series of + experimental histograms with '#HISTOGRAM: ` lines starting + each histogram. `` should list the `sawsim` parameters + that are unique to that experiment. + + >>> from .manager.thread import ThreadManager + >>> velocity_stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS) + >>> param_format_string = ( + ... '-s cantilever,hooke,0.05 -N1 ' + ... '-s folded,null -N8 ' + ... '-s "unfolded,wlc,{0.39e-9,28e-9}" ' + ... '-k "folded,unfolded,bell,{%g,%g}" -q folded') + >>> m = ThreadManager() + >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m) + >>> hm = HistogramMatcher(velocity_stream, param_format_string, sr, N=3) + >>> hm.plot([[1e-5,1e-3,3],[0.1e-9,1e-9,3]], logx=True, logy=False) + >>> m.teardown() """ - def __init__(self, velocity_stream, param_format_string, N=400, - manager=None, residual_type='jensen-shannon', plot=True, - use_cache=False, clean_cache=False): - self.experiment_histograms = self.read_force_histograms(velocity_stream) + def __init__(self, histogram_stream, param_format_string, + sawsim_runner, N=400, residual_type='jensen-shannon', + plot=True): + self.experiment_histograms = self._read_force_histograms( + histogram_stream) self.param_format_string = param_format_string - self.residual_type = residual_type + self.sawsim_runner = sawsim_runner self.N = N - self._manager = manager - self.plot = plot - self.sawsim_histogram = sawsim_histogram.SawsimHistogram( - use_cache=use_cache, clean_cach=clean_cache) + self.residual_type = residual_type + self._plot = plot - def read_force_histograms(self, stream): + def _read_force_histograms(self, stream): """ - v_file format: + File format: # comment and blank lines ignored ... - e.g. - - 5e-7 histA - 1e-6 histB + >>> import sys + >>> stream = StringIO(EXAMPLE_HISTOGRAM_FILE_CONTENTS) + >>> hm = HistogramMatcher(StringIO(), None, None, None) + >>> histograms = hm._read_force_histograms(stream) + >>> sorted(histograms.iterkeys()) + ['-v 1e-6', '-v 6e-7', '-v 8e-7'] + >>> histograms['-v 1e-6'].to_stream(sys.stdout) + ... # doctest: +NORMALIZE_WHITESPACE, +REPORT_UDIFF + #Force (N)\tUnfolding events + 2e-10\t1 + 2.1e-10\t0 + 2.2e-10\t1 + 2.3e-10\t1 + 2.4e-10\t1 + 2.5e-10\t0 + 2.6e-10\t2 + 2.7e-10\t1 + 2.8e-10\t4 + 2.9e-10\t2 + 3e-10\t2 + 3.1e-10\t0 + 3.2e-10\t1 """ - histograms = {} - v_file_dir = os.path.dirname(v_file) - for line in strem.readlines(): + token = '#HISTOGRAM:' + hist_blocks = {None: []} + params = None + for line in stream.readlines(): line = line.strip() - if len(line) == 0 or line[0] == "#": - continue # ignore blank lines and comments - v,h_file = line.split() + if line.startswith(token): + params = line[len(token):].strip() + assert params not in hist_blocks, params + hist_blocks[params] = [] + else: + hist_blocks[params].append(line) + + histograms = {} + for params,block in hist_blocks.iteritems(): + if params == None: + continue h = Histogram() - h.from_stream(file(os.path.join(v_file_dir, h_file), 'r')) - histograms[v] = h + h.from_stream(StringIO('\n'.join(block))) + histograms[params] = h return histograms - def param_string(self, params, velocity): + def param_string(self, params, hist_params): """Generate a string of options to pass to `sawsim`. """ - return '%s -v %g' % ( - self.param_format_string % tuple(params), velocity) + return '%s %s' % ( + self.param_format_string % tuple(params), hist_params) def get_all_unfolding_data(self, dirname, velocity_string): datafile = os.path.join(dirname, "data_" + velocity_string) @@ -117,32 +214,23 @@ class HistogramMatcher (object): def _residual(self, params): residual = 0 - for velocity,experiment_hist in self.experiment_histograms.iteritems(): - sawsim_hist = self.sawsim_histogram( - param_string=self.param_string(params, velocity), N=self.N, - bin_edges=experiment_hist.bin_edges, manager=self._manager) - r = experiment_histogram.residual( - sawsim_hist, type=self.residual_type) + for hist_params,experiment_hist in self.experiment_histograms.iteritems(): + sawsim_hist = sawsim_histogram( + sawsim_runner=self.sawsim_runner, + param_string=self.param_string(params, hist_params), + N=self.N, bin_edges=experiment_hist.bin_edges) + r = experiment_hist.residual(sawsim_hist, type=self.residual_type) residual += r - if self.plot == True: - title = ", ".join(["%g" % p for p in (params+[velocity])]) - filename = "residual-%s-%g.png" % (title.replace(', ','-'), r) + if self._plot == True: + title = ", ".join(["%g" % p for p in params]+[hist_params]) + filename = "residual-%s-%g.png" % ( + title.replace(', ', '_').replace(' ', '_'), r) self._plot_residual_comparison( experiment_hist, sawsim_hist, residual=r, title=title, filename=filename) log().debug('residual: %g' % residual) return residual - def fit(self, initial_params): - p,cov,info,mesg,ier = leastsq(self._residual, initial_params, - full_output=True, maxfev=1000) - _log = log() - _log.info('Fitted params: %s' % p) - _log.info('Covariance mx: %s' % cov) - _log.info('Info: %s' % info) - _log.info('Mesg: %s' % mesg) - return p - def plot(self, param_ranges, logx=False, logy=False, contour=False): xranges = param_ranges[0] yranges = param_ranges[1] @@ -159,20 +247,21 @@ class HistogramMatcher (object): X, Y = pylab.meshgrid(x,y) C = numpy.zeros((len(y)-1, len(x)-1)) for i,xi in enumerate(x[:-1]): - for j,yi in enumerate(y[:-1]): - print i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1) - params = (xi,yi) + for j,yj in enumerate(y[:-1]): + log().info('point %d %d (%d of %d)' + % (i, j, i*(len(y)-1) + j, (len(x)-1)*(len(y)-1))) + params = (xi,yj) r = self._residual(params) C[j,i] = numpy.log(r) # better resolution in valleys if MEM_DEBUG == True: log().debug('RSS: %d KB' % rss()) C = numpy.nan_to_num(C) # NaN -> 0 - fid = file("fit_force_histograms-XYC.pkl", "wb") + fid = file("histogram_matcher-XYC.pkl", "wb") pickle.dump([X,Y,C], fid) fid.close() # read in with # import pickle - # [X,Y,C] = pickle.load(file("fit_force_histograms-XYC.pkl", "rb")) + # [X,Y,C] = pickle.load(file("histogram_matcher-XYC.pkl", "rb")) # ... FIGURE.clear() axes = FIGURE.add_subplot(111) @@ -189,7 +278,7 @@ class HistogramMatcher (object): FIGURE.colorbar(p) FIGURE.savefig("figure.png") - def _plot_residual_comparison(self, expeiment_hist, theory_hist, + def _plot_residual_comparison(self, experiment_hist, theory_hist, residual, title, filename): FIGURE.clear() p = pylab.plot(experiment_hist.bin_edges[:-1], @@ -217,15 +306,33 @@ def parse_param_ranges_string(string): return ranges -def main(): - import optparse +def main(argv=None): + """ + >>> import tempfile + >>> f = tempfile.NamedTemporaryFile() + >>> f.write(EXAMPLE_HISTOGRAM_FILE_CONTENTS) + >>> f.flush() + >>> main(['-s', 'bin/sawsim', + ... '-r', '[1e-5,1e-3,3],[0.1e-9,1e-9,3]', + ... '-N', '2', + ... f.name]) + >>> f.close() + """ + from optparse import OptionParser + import sys + + if argv == None: + argv = sys.argv[1:] + + sr = SawsimRunner() + usage = "%prog [options] velocity_file" - parser = optparse.OptionParser(usage) - parser.add_option("-s","--sawsim", dest="sawsim", - metavar="PATH", - help="Set sawsim binary (%default).", - default=sawsim_histogram.SAWSIM) + parser = OptionParser(usage) + for option in sr.optparse_options: + if option.dest == 'param_string': + continue + parser.add_option(option) parser.add_option("-f","--param-format", dest="param_format", metavar="FORMAT", help="Convert params to sawsim options (%default).", @@ -238,25 +345,6 @@ def main(): metavar="PARAMS", help="Param range for plotting (%default).", default='[1e-5,1e-3,20],[0.1e-9,1e-9,20]') - parser.add_option("-N", "--number-of-runs", dest="N", - metavar="INT", type='int', - help="Number of sawsim runs at each point in parameter space (%default).", - default=400) - parser.add_option("-m", "--manager", dest="manager", - metavar="STRING", - help="Job manager name (one of %s) (%%default)." - % (', '.join(MANAGERS)), - default=MANAGERS[0]) - parser.add_option("-C","--use-cache", dest="use_cache", - help="Use cached simulations if they exist (vs. running new simulations) (%default)", - default=False, action="store_true") - parser.add_option("--clean-cache", dest="clean_cache", - help="Remove previously cached simulations if they exist (%default)", - default=False, action="store_true") - parser.add_option("-d","--cache-dir", dest="cache_dir", - metavar="STRING", - help="Cache directory for sawsim unfolding forces (%default).", - default=sawsim_histogram.CACHE_DIR) parser.add_option("-R","--residual", dest="residual", metavar="STRING", help="Residual type (from 'jensen-shannon', 'chi-squared', 'mean', 'std-dev'; default: %default).", @@ -273,22 +361,21 @@ def main(): parser.add_option("-c","--contour-plot", dest="contour_plot", help="Select contour plot (vs. the default pseudocolor plot).", default=False, action="store_true") - options,args = parser.parse_args() + + options,args = parser.parse_args(argv) + initial_params = [float(p) for p in options.initial_params.split(",")] param_ranges = parse_param_ranges_string(options.param_range) velocity_file = args[0] + sr_call_params = sr.initialize_from_options(options) - sawsim_histogram.SAWSIM = options.sawsim - sawsim_histogram.CACHE_DIR = options.cache_dir - manager = get_manager(options.manager)() try: hm = HistogramMatcher( file(velocity_file, 'r'), param_format_string=options.param_format, - N=options.N, manager=manager, residual_type=options.residual, - plot=options.plot_residuals, use_cache=options.use_cache, - clean_cache=options.clean_cache) + sawsim_runner=sr, residual_type=options.residual, + plot=options.plot_residuals, **sr_call_params) #hm.fit(initial_params) hm.plot(param_ranges, logx=options.logx, logy=options.logy, contour=options.contour_plot) finally: - manager.teardown() + sr.teardown() diff --git a/pysawsim/sawsim.py b/pysawsim/sawsim.py index a58deab..1500a93 100644 --- a/pysawsim/sawsim.py +++ b/pysawsim/sawsim.py @@ -18,149 +18,291 @@ # Philadelphia PA 19104, USA. -"""`sawsim` output parsing utilities. - -* `Event` instances represent domain state transitions. -* `parse()` parses the output of a typical `sawsim` run. +"""`Seminar` for running `sawsim` and parsing the results. """ try: from collections import namedtuple except ImportError: # work around Python < 2.6 - from operator import itemgetter as _itemgetter - from keyword import iskeyword as _iskeyword - import sys as _sys - def namedtuple(typename, field_names, verbose=False): - """Returns a new subclass of tuple with named fields. - - Copied from Python 2.6's collections.py. - - >>> Point = namedtuple('Point', 'x y') - >>> Point.__doc__ # docstring for the new class - 'Point(x, y)' - >>> p = Point(11, y=22) # instantiate with positional args or keywords - >>> p[0] + p[1] # indexable like a plain tuple - 33 - >>> x, y = p # unpack like a regular tuple - >>> x, y - (11, 22) - >>> p.x + p.y # fields also accessable by name - 33 - >>> d = p._asdict() # convert to a dictionary - >>> d['x'] - 11 - >>> Point(**d) # convert from a dictionary - Point(x=11, y=22) - >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields - Point(x=100, y=22) + from ._collections import namedtuple +import hashlib +from optparse import Option +import os +import os.path +import shutil +from uuid import uuid4 - """ +from .manager import MANAGERS, get_manager, InvokeJob - # Parse and validate the field names. Validation serves two purposes, - # generating informative error messages and preventing template injection attacks. - if isinstance(field_names, basestring): - field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas - field_names = tuple(map(str, field_names)) - for name in (typename,) + field_names: - if not all(c.isalnum() or c=='_' for c in name): - raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) - if _iskeyword(name): - raise ValueError('Type names and field names cannot be a keyword: %r' % name) - if name[0].isdigit(): - raise ValueError('Type names and field names cannot start with a number: %r' % name) - seen_names = set() - for name in field_names: - if name.startswith('_'): - raise ValueError('Field names cannot start with an underscore: %r' % name) - if name in seen_names: - raise ValueError('Encountered duplicate field name: %r' % name) - seen_names.add(name) - - # Create and fill-in the class template - numfields = len(field_names) - argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes - reprtxt = ', '.join('%s=%%r' % name for name in field_names) - dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names)) - template = '''class %(typename)s(tuple): - '%(typename)s(%(argtxt)s)' \n - __slots__ = () \n - _fields = %(field_names)r \n - def __new__(_cls, %(argtxt)s): - return _tuple.__new__(_cls, (%(argtxt)s)) \n - @classmethod - def _make(cls, iterable, new=tuple.__new__, len=len): - 'Make a new %(typename)s object from a sequence or iterable' - result = new(cls, iterable) - if len(result) != %(numfields)d: - raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result)) - return result \n - def __repr__(self): - return '%(typename)s(%(reprtxt)s)' %% self \n - def _asdict(t): - 'Return a new dict which maps field names to their values' - return {%(dicttxt)s} \n - def _replace(_self, **kwds): - 'Return a new %(typename)s object replacing specified fields with new values' - result = _self._make(map(kwds.pop, %(field_names)r, _self)) - if kwds: - raise ValueError('Got unexpected field names: %%r' %% kwds.keys()) - return result \n - def __getnewargs__(self): - return tuple(self) \n\n''' % locals() - for i, name in enumerate(field_names): - template += ' %s = _property(_itemgetter(%d))\n' % (name, i) - if verbose: - print template - - # Execute the template string in a temporary namespace and - # support tracing utilities by setting a value for frame.f_globals['__name__'] - namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, - _property=property, _tuple=tuple) - try: - exec template in namespace - except SyntaxError, e: - raise SyntaxError(e.message + ':\n' + template) - result = namespace[typename] - - # For pickling to work, the __module__ variable needs to be set to the frame - # where the named tuple is created. Bypass this step in enviroments where - # sys._getframe is not defined (Jython for example). - if hasattr(_sys, '_getframe'): - result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') - - return result +SAWSIM = 'sawsim' # os.path.expand(os.path.join('~', 'bin', 'sawsim')) +CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim_cache')) +DEFAULT_PARAM_STRING = ( + '-s cantilever,hooke,0.05 -N1 ' + '-s folded,null -N8 ' + "-s 'unfolded,wlc,{0.39e-9,28e-9}' " + "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' " + '-q folded -v 1e-6') + +# `Event` instances represent domain state transitions. Event = namedtuple( typename='Event', field_names=['force', 'initial_state', 'final_state']) -def parse(text): - """Parse the output of a `sawsim` run. - - >>> text = '''#Force (N)\\tinitial state\\tFinal state - ... 2.90301e-10\\tfolded\\tunfolded - ... 2.83948e-10\\tfolded\\tunfolded - ... 2.83674e-10\\tfolded\\tunfolded - ... 2.48384e-10\\tfolded\\tunfolded - ... 2.43033e-10\\tfolded\\tunfolded - ... 2.77589e-10\\tfolded\\tunfolded - ... 2.85343e-10\\tfolded\\tunfolded - ... 2.67796e-10\\tfolded\\tunfolded - ... ''' - >>> events = list(parse(text)) - >>> len(events) - 8 - >>> events[0] # doctest: +ELLIPSIS - Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded') +class SawsimRunner (object): + """ + >>> from .manager.thread import ThreadManager + >>> m = ThreadManager() + >>> sr = SawsimRunner(sawsim='bin/sawsim', manager=m) + >>> for run in sr(param_string=DEFAULT_PARAM_STRING, N=2): + ... print 'New run' + ... for i,event in enumerate(run): + ... print i, event # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + New run + 0 Event(force=..., initial_state='folded', final_state='unfolded') + 1 Event(force=..., initial_state='folded', final_state='unfolded') + 2 Event(force=..., initial_state='folded', final_state='unfolded') + 3 Event(force=..., initial_state='folded', final_state='unfolded') + 4 Event(force=..., initial_state='folded', final_state='unfolded') + 5 Event(force=..., initial_state='folded', final_state='unfolded') + 6 Event(force=..., initial_state='folded', final_state='unfolded') + 7 Event(force=..., initial_state='folded', final_state='unfolded') + New run + 0 Event(force=..., initial_state='folded', final_state='unfolded') + 1 Event(force=..., initial_state='folded', final_state='unfolded') + 2 Event(force=..., initial_state='folded', final_state='unfolded') + 3 Event(force=..., initial_state='folded', final_state='unfolded') + 4 Event(force=..., initial_state='folded', final_state='unfolded') + 5 Event(force=..., initial_state='folded', final_state='unfolded') + 6 Event(force=..., initial_state='folded', final_state='unfolded') + 7 Event(force=..., initial_state='folded', final_state='unfolded') + >>> m.teardown() """ - for line in text.splitlines(): - line = line.strip() - if len(line) == 0 or line.startswith('#'): - continue - fields = line.split('\t') - if len(fields) != 3: - raise ValueError(fields) - force,initial_state,final_state = fields - yield Event(float(force), initial_state, final_state) + + optparse_options = [ + Option("-s","--sawsim", dest="sawsim", + metavar="PATH", + help="Set sawsim binary (%default).", + default=SAWSIM), + Option("-p","--params", dest="param_string", + metavar="PARAMS", + help="Initial params for fitting (%default).", + default=DEFAULT_PARAM_STRING), + Option("-N", "--number-of-runs", dest="N", + metavar="INT", type='int', + help="Number of sawsim runs at each point in parameter space (%default).", + default=400), + Option("-m", "--manager", dest="manager", + metavar="STRING", + help="Job manager name (one of %s) (%%default)." + % (', '.join(MANAGERS)), + default=MANAGERS[0]), + Option("-C","--use-cache", dest="use_cache", + help="Use cached simulations if they exist (vs. running new simulations) (%default)", + default=False, action="store_true"), + Option("--clean-cache", dest="clean_cache", + help="Remove previously cached simulations if they exist (%default)", + default=False, action="store_true"), + Option("-d","--cache-dir", dest="cache_dir", + metavar="STRING", + help="Cache directory for sawsim unfolding forces (%default).", + default=CACHE_DIR), + ] + + def __init__(self, sawsim=None, cache_dir=None, + use_cache=False, clean_cache=False, + manager=None): + if sawsim == None: + sawsim = SAWSIM + self._sawsim = sawsim + if cache_dir == None: + cache_dir = CACHE_DIR + self._cache_dir = cache_dir + self._use_cache = use_cache + self._clean_cache = clean_cache + self._manager = manager + self._local_manager = False + self._headline = None + + def initialize_from_options(self, options): + self._sawsim = options.sawsim + self._cache_dir = options.cache_dir + self._use_cache = options.use_cache + self._clean_cache = options.clean_cache + self._manager = get_manager(options.manager)() + self._local_manager = True + call_params = {} + for param in ['param_string', 'N']: + try: + call_params[param] = getattr(options, param) + except AttributeError: + pass + return call_params + + def teardown(self): + if self._local_manager == True: + self._manager.teardown() + + def __call__(self, param_string, N): + """Run `N` simulations and yield `Event` generators for each run. + + Use the `JobManager` instance `manager` for asynchronous job + execution. + + If `_use_cache` is `True`, store an array of unfolding forces + in `cache_dir` for each simulated pull. If the cached forces + are already present for `param_string`, do not redo the + simulation unless `_clean_cache` is `True`. + """ + count = N + if self._use_cache == True: + d = self._param_cache_dir(param_string) + if os.path.exists(d): + if self._clean_cache == True: + shutil.rmtree(d) + self._make_cache(param_string) + else: + for data in self._load_cached_data(param_string): + yield data + count -= 1 + if count == 0: + return + else: + self._make_cache(param_string) + + jobs = {} + for i in range(count): + jobs[i] = self._manager.async_invoke(InvokeJob( + target='%s %s' % (self._sawsim, param_string))) + complete_jobs = self._manager.wait( + [job.id for job in jobs.itervalues()]) + for i,job in jobs.iteritems(): + j = complete_jobs[job.id] + assert j.status == 0, j.data + if self._use_cache == True: + self._cache_run(d, j.data['stdout']) + yield self.parse(j.data['stdout']) + del(jobs) + del(complete_jobs) + + def _param_cache_dir(self, param_string): + """ + >>> s = SawsimRunner() + >>> s._param_cache_dir(DEFAULT_PARAM_STRING) # doctest: +ELLIPSIS + '/.../.sawsim_cache/...' + """ + return os.path.join( + self._cache_dir, hashlib.sha256(param_string).hexdigest()) + + def _make_cache(self, param_string): + cache_dir = self._param_cache_dir(param_string) + os.makedirs(cache_dir) + with open(os.path.join(cache_dir, 'param_string'), 'w') as f: + f.write('# version: %s\n%s\n' % (__version__, param_string)) + + def _load_cached_data(self, param_string): + pcd = self._param_cache_dir(param_string) + for filename in os.listdir(pcd): + if not filename.endswith('.dat'): + continue + with open(os.path.join(pcd, filename), 'r') as f: + yield self.parse(f.read()) + + def _cache_run(self, cache_dir, stdout): + simulation_path = os.path.join(cache_dir, '%s.dat' % uuid4()) + with open(simulation_path, 'w') as f: + f.write(stdout) + + def parse(self, text): + """Parse the output of a `sawsim` run. + + >>> text = '''#Force (N)\\tinitial state\\tFinal state + ... 2.90301e-10\\tfolded\\tunfolded + ... 2.83948e-10\\tfolded\\tunfolded + ... 2.83674e-10\\tfolded\\tunfolded + ... 2.48384e-10\\tfolded\\tunfolded + ... 2.43033e-10\\tfolded\\tunfolded + ... 2.77589e-10\\tfolded\\tunfolded + ... 2.85343e-10\\tfolded\\tunfolded + ... 2.67796e-10\\tfolded\\tunfolded + ... ''' + >>> sr = SawsimRunner() + >>> events = list(sr.parse(text)) + >>> len(events) + 8 + >>> events[0] # doctest: +ELLIPSIS + Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded') + >>> sr._headline + ['Force (N)', 'initial state', 'Final state'] + """ + for line in text.splitlines(): + line = line.strip() + if len(line) == 0: + continue + elif line.startswith('#'): + if self._headline == None: + self._headline = line[len('#'):].split('\t') + continue + fields = line.split('\t') + if len(fields) != 3: + raise ValueError(fields) + force,initial_state,final_state = fields + yield Event(float(force), initial_state, final_state) + + +def main(argv=None): + """ + >>> try: + ... main(['--help']) + ... except SystemExit, e: + ... pass # doctest: +ELLIPSIS, +REPORT_UDIFF + Usage: ... [options] + + Options: + -h, --help show this help message and exit + -s PATH, --sawsim=PATH + Set sawsim binary (sawsim). + ... + >>> print e + 0 + >>> main(['--sawsim', 'bin/sawsim', '-N', '2']) + ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + #Force (N) Initial state Final state + ... folded unfolded + ... folded unfolded + ... folded unfolded + ... folded unfolded + ... + ... folded unfolded + """ + from optparse import OptionParser + import sys + + if argv == None: + argv = sys.argv[1:] + + sr = SawsimRunner() + + usage = "%prog [options]" + parser = OptionParser(usage) + for option in sr.optparse_options: + parser.add_option(option) + + options,args = parser.parse_args(argv) + + try: + sr_call_params = sr.initialize_from_options(options) + + first_run = True + for run in sr(**sr_call_params): + if first_run == True: + first_run = False + run = list(run) # force iterator evaluation + if sr._headline != None: + print '#%s' % '\t'.join(sr._headline) + for event in run: + print '\t'.join([str(x) for x in event]) + finally: + sr.teardown() diff --git a/pysawsim/sawsim_histogram.py b/pysawsim/sawsim_histogram.py index d5eb306..6ca2cb2 100644 --- a/pysawsim/sawsim_histogram.py +++ b/pysawsim/sawsim_histogram.py @@ -19,113 +19,25 @@ from __future__ import with_statement -import hashlib -import os.path -import shutil - import numpy from . import __version__, log from .histogram import Histogram -from .manager import InvokeJob, MANAGERS, get_manager -from .sawsim import parse - - -SAWSIM = 'sawsim' # os.path.expand(os.path.join('~', 'bin', 'sawsim')) -CACHE_DIR = os.path.expanduser(os.path.join('~', '.sawsim_histogram')) -DEFAULT_PARAM_STRING = ( - '-s cantilever,hooke,0.05 -N1 ' - '-s folded,null -N8 ' - "-s 'unfolded,wlc,{0.39e-9,28e-9}' " - "-k 'folded,unfolded,bell,{3.3e-4,0.25e-9}' " - '-q folded -v 1e-6') - - -class SawsimHistogram (object): - def __init__(self, use_cache=False, clean_cache=False): - self._use_cache = use_cache - self._clean_cache = clean_cache - - def _cache_dir(self, param_string): - """ - >>> s = SawsimHistogram() - >>> s._cache_dir(DEFAULT_PARAM_STRING) # doctest: +ELLIPSIS - '/.../.sawsim_histogram/...' - """ - return os.path.join( - CACHE_DIR, hashlib.sha256(param_string).hexdigest()) - - def _make_cache(self, param_string): - cache_dir = self._cache_dir(param_string) - os.makedirs(cache_dir) - with open(os.path.join(cache_dir, 'param_string'), 'w') as f: - f.write('# version: %s\n%s\n' % (__version__, param_string)) +from .manager import MANAGERS, get_manager +from .sawsim import SawsimRunner - def _load_cached_data(self, cache_dir, N): - data = {} - for i in range(N): - simulation_path = os.path.join(cache_dir, '%d.dat' % i) - if os.path.exists(simulation_path): - with open(simulation_path, 'r') as f: - data[i] = parse(f.read()) - else: - break - return data - def _cache_run(self, cache_dir, i, stdout): - simulation_path = os.path.join(cache_dir, '%d.dat' % i) - with open(simulation_path, 'w') as f: - f.write(stdout) - - def __call__(self, param_string=None, N=400, bin_edges=None, manager=None): +def sawsim_histogram(sawsim_runner, param_string, N=400, bin_edges=None): """Run `N` simulations and return a histogram with `bin_edges`. If `bin_edges == None`, return a numpy array of all unfolding forces. - - Use the `JobManager` instance `manager` for asynchronous job - execution. - - If `_use_cache` is `True`, store an array of unfolding forces - in `CACHE_DIR` for each simulated pull. If the cached forces - are already present for `param_string`, do not redo the - simulation unless `_clean_cache` is `True`. """ - data = {} - if self._use_cache == True: - d = self._cache_dir(param_string) - if os.path.exists(d): - if self._clean_cache == True: - shutil.rmtree(d) - self._make_cache(param_string) - else: - data = self._load_cached_data(d, N) - log().debug('using %d cached runs for %s' - % (len(data), param_string)) - else: - self._make_cache(param_string) - - jobs = {} - for i in range(N): - if i in data: - continue - jobs[i] = manager.async_invoke(InvokeJob( - target='%s %s' % (SAWSIM, param_string))) - complete_jobs = manager.wait([job.id for job in jobs.itervalues()]) - for i,job in jobs.iteritems(): - j = complete_jobs[job.id] - assert j.status == 0, j.data - if self._use_cache == True: - self._cache_run(d, i, j.data['stdout']) - data[i] = parse(j.data['stdout']) - del(jobs) - del(complete_jobs) - - # generate histogram events = [] - for d_i in data.values(): - events.extend([e.force for e in d_i - if e.initial_state == 'folded']) + for run in sawsim_runner(param_string=param_string, N=N): + events.extend([event.force for event in run + if (event.initial_state == 'folded' + and event.final_state == 'unfolded')]) events = numpy.array(events) if bin_edges == None: return events @@ -134,59 +46,38 @@ class SawsimHistogram (object): return h -def main(): - import optparse +def main(argv=None): + """ + >>> main(['--sawsim', 'bin/sawsim', '-N', '2']) + ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + #Force (N) Unfolding events + ... + """ + from optparse import OptionParser import sys - global SAWSIM, CACHE_DIR + if argv == None: + argv = sys.argv[1:] - usage = "%prog [options] velocity_file" + sr = SawsimRunner() - parser = optparse.OptionParser(usage) - parser.add_option("-s","--sawsim", dest="sawsim", - metavar="PATH", - help="Set sawsim binary (%default).", - default=SAWSIM) - parser.add_option("-p","--params", dest="params", - metavar="PARAMS", - help="Initial params for fitting (%default).", - default=DEFAULT_PARAM_STRING) - parser.add_option("-N", "--number-of-runs", dest="N", - metavar="INT", type='int', - help="Number of sawsim runs at each point in parameter space (%default).", - default=400) + usage = "%prog [options] velocity_file" + parser = OptionParser(usage) + for option in sr.optparse_options: + parser.add_option(option) parser.add_option("-w", "--bin-width", dest="bin_width", metavar="FLOAT", type='float', help="Histogram bin width in newtons (%default).", - default=None) - parser.add_option("-m", "--manager", dest="manager", - metavar="STRING", - help="Job manager name (one of %s) (%%default)." - % (', '.join(MANAGERS)), - default=MANAGERS[0]) - parser.add_option("-C","--use-cache", dest="use_cache", - help="Use cached simulations if they exist (vs. running new simulations) (%default)", - default=False, action="store_true") - parser.add_option("--clean-cache", dest="clean_cache", - help="Remove previously cached simulations if they exist (%default)", - default=False, action="store_true") - parser.add_option("-d","--cache-dir", dest="cache_dir", - metavar="STRING", - help="Cache directory for sawsim unfolding forces (%default).", - default=CACHE_DIR) - options,args = parser.parse_args() - - SAWSIM = options.sawsim - CACHE_DIR = options.cache_dir + default=10e-12) - sh = SawsimHistogram(use_cache=options.use_cache, - clean_cache=options.clean_cache) + options,args = parser.parse_args(argv) - manager = get_manager(options.manager)() + sr_call_params = sr.initialize_from_options(options) try: - events = sh(param_string=options.params, N=options.N, manager=manager) + events = sawsim_histogram( + sawsim_runner=sr, bin_edges=None, **sr_call_params) finally: - manager.teardown() + sr.teardown() if options.bin_width == None: sys.stdout.write('# Unfolding force (N)\n')