1 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 # The author may be contacted at <wking@drexel.edu> on the Internet, or
17 # write to Trevor King, Drudge's University, Physics Dept., 3141 Chestnut St.,
18 # Philadelphia PA 19104, USA.
21 """`sawsim` output parsing utilities.
23 * `Event` instances represent domain state transitions.
24 * `parse()` parses the output of a typical `sawsim` run.
28 from collections import namedtuple
29 except ImportError: # work around Python < 2.6
30 from operator import itemgetter as _itemgetter
31 from keyword import iskeyword as _iskeyword
33 def namedtuple(typename, field_names, verbose=False):
34 """Returns a new subclass of tuple with named fields.
36 Copied from Python 2.6's collections.py.
38 >>> Point = namedtuple('Point', 'x y')
39 >>> Point.__doc__ # docstring for the new class
41 >>> p = Point(11, y=22) # instantiate with positional args or keywords
42 >>> p[0] + p[1] # indexable like a plain tuple
44 >>> x, y = p # unpack like a regular tuple
47 >>> p.x + p.y # fields also accessable by name
49 >>> d = p._asdict() # convert to a dictionary
52 >>> Point(**d) # convert from a dictionary
54 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
59 # Parse and validate the field names. Validation serves two purposes,
60 # generating informative error messages and preventing template injection attacks.
61 if isinstance(field_names, basestring):
62 field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
63 field_names = tuple(map(str, field_names))
64 for name in (typename,) + field_names:
65 if not all(c.isalnum() or c=='_' for c in name):
66 raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
68 raise ValueError('Type names and field names cannot be a keyword: %r' % name)
70 raise ValueError('Type names and field names cannot start with a number: %r' % name)
72 for name in field_names:
73 if name.startswith('_'):
74 raise ValueError('Field names cannot start with an underscore: %r' % name)
75 if name in seen_names:
76 raise ValueError('Encountered duplicate field name: %r' % name)
79 # Create and fill-in the class template
80 numfields = len(field_names)
81 argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
82 reprtxt = ', '.join('%s=%%r' % name for name in field_names)
83 dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names))
84 template = '''class %(typename)s(tuple):
85 '%(typename)s(%(argtxt)s)' \n
87 _fields = %(field_names)r \n
88 def __new__(_cls, %(argtxt)s):
89 return _tuple.__new__(_cls, (%(argtxt)s)) \n
91 def _make(cls, iterable, new=tuple.__new__, len=len):
92 'Make a new %(typename)s object from a sequence or iterable'
93 result = new(cls, iterable)
94 if len(result) != %(numfields)d:
95 raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
98 return '%(typename)s(%(reprtxt)s)' %% self \n
100 'Return a new dict which maps field names to their values'
101 return {%(dicttxt)s} \n
102 def _replace(_self, **kwds):
103 'Return a new %(typename)s object replacing specified fields with new values'
104 result = _self._make(map(kwds.pop, %(field_names)r, _self))
106 raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
108 def __getnewargs__(self):
109 return tuple(self) \n\n''' % locals()
110 for i, name in enumerate(field_names):
111 template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
115 # Execute the template string in a temporary namespace and
116 # support tracing utilities by setting a value for frame.f_globals['__name__']
117 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
118 _property=property, _tuple=tuple)
120 exec template in namespace
121 except SyntaxError, e:
122 raise SyntaxError(e.message + ':\n' + template)
123 result = namespace[typename]
125 # For pickling to work, the __module__ variable needs to be set to the frame
126 # where the named tuple is created. Bypass this step in enviroments where
127 # sys._getframe is not defined (Jython for example).
128 if hasattr(_sys, '_getframe'):
129 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
136 field_names=['force', 'initial_state', 'final_state'])
140 """Parse the output of a `sawsim` run.
142 >>> text = '''#Force (N)\\tinitial state\\tFinal state
143 ... 2.90301e-10\\tfolded\\tunfolded
144 ... 2.83948e-10\\tfolded\\tunfolded
145 ... 2.83674e-10\\tfolded\\tunfolded
146 ... 2.48384e-10\\tfolded\\tunfolded
147 ... 2.43033e-10\\tfolded\\tunfolded
148 ... 2.77589e-10\\tfolded\\tunfolded
149 ... 2.85343e-10\\tfolded\\tunfolded
150 ... 2.67796e-10\\tfolded\\tunfolded
152 >>> events = list(parse(text))
155 >>> events[0] # doctest: +ELLIPSIS
156 Event(force=2.9030...e-10, initial_state='folded', final_state='unfolded')
158 for line in text.splitlines():
160 if len(line) == 0 or line.startswith('#'):
162 fields = line.split('\t')
164 raise ValueError(fields)
165 force,initial_state,final_state = fields
166 yield Event(float(force), initial_state, final_state)