Add information about JPKForceSpec.txt to hooke.driver.jpk.__doc__.
[hooke.git] / hooke / driver / jpk.py
1 # Copyright (C) 2008-2010 Massimo Sandal <devicerandom@gmail.com>
2 #                         W. Trevor King <wking@drexel.edu>
3 #
4 # This file is part of Hooke.
5 #
6 # Hooke is free software: you can redistribute it and/or modify it
7 # under the terms of the GNU Lesser General Public License as
8 # published by the Free Software Foundation, either version 3 of the
9 # License, or (at your option) any later version.
10 #
11 # Hooke is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
14 # Public License for more details.
15 #
16 # You should have received a copy of the GNU Lesser General Public
17 # License along with Hooke.  If not, see
18 # <http://www.gnu.org/licenses/>.
19
20 """Driver for JPK ForceRobot's velocity clamp data format.
21
22 This driver is based on JPK's :file:`JPKForceSpec.txt` version 0.12.
23 The specs are freely available from JPK, just email support@jpk.com.
24 """
25
26 import os.path
27 import pprint
28 import zipfile
29
30 import numpy
31
32 from .. import curve as curve
33 from .. import experiment as experiment
34 from ..util.util import Closing as Closing
35 from ..util.si import join_data_label, split_data_label
36 from . import Driver as Driver
37
38
39 class JPKDriver (Driver):
40     """Handle JPK ForceRobot's data format.
41     """
42     def __init__(self):
43         super(JPKDriver, self).__init__(name='jpk')
44
45     def is_me(self, path):
46         if os.path.isdir(path):
47             return False
48         if zipfile.is_zipfile(path):  # JPK file versions since at least 0.5
49             with Closing(zipfile.ZipFile(path, 'r')) as f:
50                 if 'header.properties' not in f.namelist():
51                     return False
52                 with Closing(f.open('header.properties')) as h:
53                     if 'jpk-data-file' in h.read():
54                         return True
55         else:
56             with Closing(open(path, 'r')) as f:
57                 headlines = []
58                 for i in range(3):
59                     headlines.append(f.readline())
60             if headlines[0].startswith('# xPosition') \
61                     and headlines[1].startswith('# yPosition'):
62                 return True
63         return False
64
65     def read(self, path, info=None):
66         if info == None:
67             info = {}
68         if zipfile.is_zipfile(path):  # JPK file versions since at least 0.5
69             return self._read_zip(path, info)
70         else:
71             return self._read_old(path, info)
72
73     def _read_zip(self, path, info):
74         with Closing(zipfile.ZipFile(path, 'r')) as f:
75             f.path = path
76             zip_info = self._zip_info(f)
77             segments = []
78             for i in range(len([p for p in f.namelist()
79                                 if p.endswith('segment-header.properties')])):
80                 segments.append(self._zip_segment(f, path, info, zip_info, i))
81         if zip_info['file-format-version'] not in ['0.3', '0.4', '0.5']:
82             raise NotImplementedError(
83                 'JPK file version %s not supported (yet).'
84                 % zip_info['file-format-version'])
85         for name in ['approach', 'retract']:
86             if len([s for s in segments if s.info['name'] == name]) == 0:
87                 raise ValueError(
88                     'No segment for %s in %s, only %s'
89                     % (name, path, [s.info['name'] for s in segments]))
90         curve_info = self._zip_translate_params(zip_info,
91                                                 segments[0].info['raw info'])
92         for segment in segments:
93             segment.info['spring constant (N/m)'] = \
94                 curve_info['spring constant (N/m)']
95         return (segments, curve_info)
96
97     def _zip_info(self, zipfile):
98         with Closing(zipfile.open('header.properties')) as f:
99             info = self._parse_params(f.readlines())
100             return info
101
102     def _zip_segment(self, zipfile, path, info, zip_info, index):
103         prop_file = zipfile.open(os.path.join(
104                 'segments', str(index), 'segment-header.properties'))
105         prop = self._parse_params(prop_file.readlines())
106         prop_file.close()
107         expected_shape = (int(prop['force-segment-header']['num-points']),)
108         channels = []
109         for chan in prop['channels']['list']:
110             chan_info = prop['channel'][chan]
111             channels.append(self._zip_channel(zipfile, index, chan, chan_info))
112             if channels[-1].shape != expected_shape:
113                     raise NotImplementedError(
114                         'Channel %d:%s in %s has strange shape %s != %s'
115                         % (index, chan, zipfile.path,
116                            channels[-1].shape, expected_shape))
117         d = curve.Data(
118             shape=(len(channels[0]), len(channels)),
119             dtype=channels[0].dtype,
120             info=self._zip_translate_segment_params(prop))
121         for i,chan in enumerate(channels):
122             d[:,i] = chan
123         return self._zip_scale_segment(d, path, info)
124
125     def _zip_channel(self, zipfile, segment_index, channel_name, chan_info):
126         f = zipfile.open(os.path.join(
127                 'segments', str(segment_index),
128                 chan_info['data']['file']['name']), 'r')
129         assert chan_info['data']['file']['format'] == 'raw', \
130             'Non-raw data format:\n%s' % pprint.pformat(chan_info)
131         assert chan_info['data']['type'] == 'float-data', \
132             'Non-float data format:\n%s' % pprint.pformat(chan_info)
133         data = numpy.frombuffer(
134             buffer(f.read()),
135             dtype=numpy.dtype(numpy.float32).newbyteorder('>'))
136         # '>' (big endian) byte order.
137         # From version 0.3 of JPKForceSpec.txt in the "Binary data" section:
138         #    All forms of raw data are stored in chronological order
139         #    (the order in which they were collected), and the
140         #    individual values are stored in network byte order
141         #    (big-endian). The data type used to store the data is
142         #    specified by the "channel.*.data.type" property, and is
143         #    either short (2 bytes per value), integer (4 bytes), or
144         #    float (4 bytes, IEEE format).
145         f.close()
146         return data
147
148     def _zip_translate_params(self, params, chan_info):
149         info = {
150             'raw info':params,
151             'filetype':self.name,
152             #'time':self._time_from_TODO(raw_info[]),
153             }
154         # TODO: distinguish between force clamp and velocity clamp
155         # experiments.  Note that the JPK file format is flexible
156         # enough to support mixed experiments (i.e. both force clamp
157         # and velocity clamp segments in a single experiment), but I
158         # have no idea what sort of analysis such experiments would
159         # require ;).
160         info['experiment'] = experiment.VelocityClamp()
161         force_unit = chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['scaling']['unit']['unit']
162         assert force_unit == 'N', force_unit
163         force_base = chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['base-calibration-slot']
164         assert force_base == 'distance', force_base
165         dist_unit = chan_info['channel']['vDeflection']['conversion-set']['conversion']['distance']['scaling']['unit']['unit']
166         assert dist_unit == 'm', dist_unit
167         force_mult = float(
168             chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['scaling']['multiplier'])
169         info['spring constant (N/m)'] = force_mult
170         return info
171
172     def _zip_translate_segment_params(self, params):
173         info = {
174             'raw info':params,
175             'columns':list(params['channels']['list']),
176             'name':params['force-segment-header']['name']['name'],
177             }
178         if info['name'] in ['extend-spm', 'retract-spm', 'pause-at-end-spm']:
179             info['name'] = info['name'][:-len('-spm')]
180             if info['name'] == 'extend':
181                 info['name'] = 'approach'
182         else:
183             raise NotImplementedError(
184                 'Unrecognized segment type %s' % info['name'])
185         return info
186
187     def _zip_scale_segment(self, segment, path, info):
188         data = curve.Data(
189             shape=segment.shape,
190             dtype=segment.dtype,
191             info={})
192         data[:,:] = segment
193         segment.info['raw data'] = data
194
195         # raw column indices
196         channels = segment.info['raw info']['channels']['list']
197         for i,channel in enumerate(channels):
198             conversion = None
199             if channel == 'vDeflection':
200                 conversion = 'distance'
201             segment = self._zip_scale_channel(
202                 segment, channel, conversion=conversion, path=path, info=info)
203             name,unit = split_data_label(segment.info['columns'][i])
204             if name == 'vDeflection':
205                 assert unit == 'm', segment.info['columns'][i]
206                 segment.info['columns'][i] = join_data_label('deflection', 'm')
207                 # Invert because deflection voltage increases as the
208                 # tip moves away from the surface, but it makes more
209                 # sense to me to have it increase as it moves toward
210                 # the surface (positive tension on the protein chain).
211                 segment[:,i] *= -1
212             elif name == 'height':
213                 assert unit == 'm', segment.info['columns'][i]
214                 segment.info['columns'][i] = join_data_label('z piezo', 'm')
215         return segment
216
217     def _zip_scale_channel(self, segment, channel_name, conversion=None,
218                            path=None, info={}):
219         channel = segment.info['raw info']['channels']['list'].index(
220             channel_name)
221         conversion_set = segment.info['raw info']['channel'][channel_name]['conversion-set']
222         if conversion == None:
223             conversion = conversion_set['conversions']['default']
224         if conversion == conversion_set['conversions']['base']:
225             # Our conversion is the base data.
226             if conversion != 'volts':
227                 raise NotImplementedError(
228                     'unknown units for base channel: %s' % conversion)
229             segment.info['columns'][channel] = join_data_label(
230                 channel_name, 'V')
231             return segment
232         conversion_info = conversion_set['conversion'][conversion]
233         if conversion_info['base-calibration-slot'] \
234                 != conversion_set['conversions']['base']:
235             # Our conversion is stacked on a previous conversion.  Do
236             # the previous conversion first.
237             segment = self._zip_scale_channel(
238                 segment, channel_name,
239                 conversion_info['base-calibration-slot'],
240                 path=path, info=info)
241         if conversion_info['type'] == 'file':
242             # Michael Haggerty at JPK points out that the conversion
243             # information stored in the external file is reproduced in
244             # the force curve file.  So there is no need to actually
245             # read `conversion_info['file']`.  In fact, the data there
246             # may have changed with future calibrations, while the
247             # information stored directly in conversion_info retains
248             # the calibration information as it was when the experiment
249             # was performed.
250             pass  # Fall through to 'simple' conversion processing.
251         else:
252             assert conversion_info['type'] == 'simple', conversion_info['type']
253         assert conversion_info['scaling']['type'] == 'linear', \
254             conversion_info['scaling']['type']
255         assert conversion_info['scaling']['style'] == 'offsetmultiplier', \
256             conversion_info['scaling']['style']
257         multiplier = float(conversion_info['scaling']['multiplier'])
258         offset = float(conversion_info['scaling']['offset'])
259         unit = conversion_info['scaling']['unit']['unit']
260         segment[:,channel] = segment[:,channel] * multiplier + offset
261         segment.info['columns'][channel] = join_data_label(channel_name, unit)
262         return segment
263
264     def _parse_params(self, lines):
265         info = {}
266         for line in lines:
267             line = line.strip()
268             if line.startswith('#'):
269                 continue
270             else:
271                 # e.g.: force-segment-header.type=xy-position-segment-header
272                 fields = line.split('=', 1)
273                 assert len(fields) == 2, line
274                 setting = fields[0].split('.')
275                 sub_info = info  # drill down, e.g. info['force-s..']['type']
276                 for s in setting[:-1]:
277                     if s not in sub_info:
278                         sub_info[s] = {}
279                     sub_info = sub_info[s]
280                 if setting[-1] == 'list':  # split a space-delimited list
281                     sub_info[setting[-1]] = fields[1].split(' ')
282                 else:
283                     sub_info[setting[-1]] = fields[1]
284         return info
285
286     def _read_old(self, path, info):
287         raise NotImplementedError(
288             "Early JPK files (pre-zip) are not supported by Hooke.  Please "
289             "use JPK's `out2jpk-force` script to convert your old files "
290             "to a more recent format before loading them with Hooke.")