X-Git-Url: http://git.tremily.us/?p=hooke.git;a=blobdiff_plain;f=hooke%2Fdriver%2Fjpk.py;h=ba38f6a485d5b3c764318be8e21d45399c7c00c9;hp=28887789203b315a84eb8503b19cc85978332a8b;hb=91cf002670de3d4668b84d53fad05be90ba7d585;hpb=ba4daa53462d81c9302a91d959612d534efc6be7 diff --git a/hooke/driver/jpk.py b/hooke/driver/jpk.py index 2888778..ba38f6a 100644 --- a/hooke/driver/jpk.py +++ b/hooke/driver/jpk.py @@ -1,137 +1,352 @@ -import string -from .. import curve as lhc - -class DataChunk(list): - #Dummy class to provide ext and ret methods to the data list. - - def ext(self): - halflen=(len(self)/2) - return self[0:halflen] - - def ret(self): - halflen=(len(self)/2) - return self[halflen:] - -class jpkDriver(lhc.Driver): - - def __init__(self, filename): - self.filename=filename #self.filename can always be useful, and should be defined - self.filedata = open(filename,'r') #We open the file - self.filelines=self.filedata.readlines() - self.filedata.close() - '''These are two strings that can be used by Hooke commands/plugins to understand what they are looking at. They have no other - meaning. They have to be somehow defined however - commands often look for those variables. - - self.filetype should contain the name of the exact filetype defined by the driver (so that filetype-specific commands can know - if they're dealing with the correct filetype) - self.experiment should contain instead the type of data involved (for example, various drivers can be used for force-clamp experiments, - but hooke commands could like to know if we're looking at force clamp data, regardless of their origin, and not other - kinds of data) - - Of course, all other variables you like can be defined in the class. - ''' - self.filetype = 'jpk' - self.experiment = 'smfs' - - - - def __del__(self): - self.filedata.close() - - def is_me(self): - ''' - we define our magic heuristic for jpk files - ''' - myfile=file(self.filename) - headerlines=myfile.readlines()[0:3] - myfile.close() - if headerlines[0][0:11]=='# xPosition' and headerlines[1][0:11]=='# yPosition': - return True - else: +# Copyright (C) 2008-2010 Massimo Sandal +# W. Trevor King +# +# This file is part of Hooke. +# +# Hooke is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# Hooke is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General +# Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with Hooke. If not, see +# . + +"""Driver for JPK ForceRobot's velocity clamp data format. + +This driver is based on JPK's :file:`JPKForceSpec.txt` version 0.12. +The specs are freely available from JPK, just email support@jpk.com. +""" + +import os.path +import pprint +import zipfile + +import numpy + +from .. import curve as curve +from ..util.util import Closing as Closing +from ..util.si import join_data_label, split_data_label +from . import Driver as Driver + + +class JPKDriver (Driver): + """Handle JPK ForceRobot's data format. + """ + def __init__(self): + super(JPKDriver, self).__init__(name='jpk') + + def is_me(self, path): + if os.path.isdir(path): return False - - def close_all(self): - self.filedata.close() - - def _read_data_segment(self): - #routine that actually reads the data - - height_ms=[] - height_m=[] - height=[] - v_deflection=[] - h_deflection=[] - - self.springconstant=0 #if we don't meet any spring constant, use deflection... - - for line in self.filelines: - #we meet the segment defining the order of data columns - - if line[0:9]=='# columns': - splitline=line.split()[2:] - height_ms_index=splitline.index('smoothedStrainGaugeHeight') - height_m_index=splitline.index('strainGaugeHeight') - height_index=splitline.index('height') - v_deflection_index=splitline.index('vDeflection') - #h_deflection=splitline.index('hDeflection') - - if line[0:16]=='# springConstant': - self.springconstant=float(line.split()[2]) - - if line[0] != '#' and len(line.split())>1: - dataline=line.split() - height_ms.append(float(dataline[height_ms_index])) - height_m.append(float(dataline[height_m_index])) - height.append(float(dataline[height_index])) - v_deflection.append(float(dataline[v_deflection_index])) - #h_deflection.append(float(dataline[h_deflection_index])) - - if self.springconstant != 0: - force=[item*self.springconstant for item in v_deflection] - else: #we have measured no spring constant :( - force=v_deflection - - height_ms=DataChunk([item*-1 for item in height_ms]) - height_m=DataChunk([item*-1 for item in height_m]) - height=DataChunk([item*-1 for item in height]) - deflection=DataChunk(v_deflection) - force=DataChunk(force) - - return height_ms,height_m,height,deflection,force - - def deflection(self): - height_ms,height_m,height,deflection,force=self._read_data_segment() - deflection_ext=deflection.ext() - deflection_ret=deflection.ret() - deflection_ret.reverse() - return deflection_ext,deflection_ret - - def default_plots(self): - - height_ms,height_m,height,deflection,force=self._read_data_segment() - - height_ms_ext=height_ms.ext() - height_ms_ret=height_ms.ret() - force_ext=force.ext() - force_ret=force.ret() - #reverse the return data, to make it coherent with hooke standard - height_ms_ret.reverse() - force_ret.reverse() - - main_plot=lhc.PlotObject() - main_plot.add_set(height_ms_ext,force_ext) - main_plot.add_set(height_ms_ret,force_ret) - - - - if self.springconstant != 0: - main_plot.units=['meters','force'] + if zipfile.is_zipfile(path): # JPK file versions since at least 0.5 + with Closing(zipfile.ZipFile(path, 'r')) as f: + if 'header.properties' not in f.namelist(): + return False + with Closing(f.open('header.properties')) as h: + if 'jpk-data-file' in h.read(): + return True else: - main_plot.units=['meters','meters'] - - main_plot.normalize_vectors() - - main_plot.destination=0 - main_plot.title=self.filename - - return [main_plot] + with Closing(open(path, 'r')) as f: + headlines = [] + for i in range(3): + headlines.append(f.readline()) + if headlines[0].startswith('# xPosition') \ + and headlines[1].startswith('# yPosition'): + return True + return False + + def read(self, path, info=None): + if info == None: + info = {} + if zipfile.is_zipfile(path): # JPK file versions since at least 0.5 + return self._read_zip(path, info) + else: + return self._read_old(path, info) + + def _read_zip(self, path, info): + with Closing(zipfile.ZipFile(path, 'r')) as f: + f.path = path + zip_info = self._zip_info(f) + version = zip_info['file-format-version'] + segments = [] + for i in range(len([p for p in f.namelist() + if p.endswith('segment-header.properties')])): + segments.append(self._zip_segment( + f, path, info, zip_info, i, version)) + if version not in ['0.%d' % i for i in range(12)]: + raise NotImplementedError( + 'JPK file version %s not supported (yet).' % version) + curve_info = self._zip_translate_params( + zip_info, segments[0].info['raw info'], version) + for segment in segments: # HACK, should use curve-level spring constant + segment.info['spring constant (N/m)'] = \ + curve_info['spring constant (N/m)'] + return (segments, curve_info) + + def _zip_info(self, zipfile): + with Closing(zipfile.open('header.properties')) as f: + info = self._parse_params(f.readlines()) + return info + + def _zip_segment(self, zipfile, path, info, zip_info, index, version): + prop_file = zipfile.open(os.path.join( + 'segments', str(index), 'segment-header.properties')) + prop = self._parse_params(prop_file.readlines()) + prop_file.close() + expected_shape = (int(prop['force-segment-header']['num-points']),) + channels = [] + if 'list' not in prop['channels']: + prop['channels'] = {'list': prop['channels'].split()} + for chan in prop['channels']['list']: + chan_info = prop['channel'][chan] + channels.append(self._zip_channel( + zipfile, index, chan, chan_info)) + if channels[-1].shape != expected_shape: + raise NotImplementedError( + 'Channel %d:%s in %s has strange shape %s != %s' + % (index, chan, zipfile.path, + channels[-1].shape, expected_shape)) + if len(channels) > 0: + shape = (len(channels[0]), len(channels)) + dtype = channels[0].dtype + else: # no channels for this data block + shape = (0,0) + dtype = numpy.float32 + d = curve.Data( + shape=shape, + dtype=dtype, + info=self._zip_translate_segment_params(prop)) + for i,chan in enumerate(channels): + d[:,i] = chan + return self._zip_scale_segment(d, path, info, version) + + def _zip_channel(self, zipfile, segment_index, channel_name, chan_info): + if chan_info['data']['type'] in ['constant-data', 'raster-data']: + return self._zip_calculate_channel(chan_info) + with Closing(zipfile.open(os.path.join( + 'segments', str(segment_index), + chan_info['data']['file']['name']), 'r')) as f: + assert chan_info['data']['file']['format'] == 'raw', \ + 'Non-raw data format:\n%s' % pprint.pformat(chan_info) + dtype = self._zip_channel_dtype(chan_info) + data = numpy.frombuffer( + buffer(f.read()), + dtype=dtype,) + return data + + def _zip_calculate_channel(self, chan_info): + type_ = chan_info['data']['type'] + n = int(chan_info['data']['num-points']) + if type_ == 'constant-data': + return float(chan_info['data']['value'])*numpy.ones( + shape=(n,), + dtype=numpy.float32) + elif type_ == 'raster-data': + start = float(chan_info['data']['start']) + step = float(chan_info['data']['step']) + return numpy.arange( + start=start, + stop=start + step*(n-0.5), + step=step, + dtype=numpy.float32) + else: + raise ValueError('Unrecognized data format "%s"' % type_) + + def _zip_channel_dtype(self, chan_info): + type_ = chan_info['data']['type'] + if type_ in ['float-data', 'float']: + dtype = numpy.dtype(numpy.float32) + elif type_ in ['integer-data', 'memory-integer-data']: + encoder = chan_info['data']['encoder']['type'] + if encoder in ['signedinteger', 'signedinteger-limited']: + dtype = numpy.dtype(numpy.int32) + elif encoder in ['unsignedinteger', 'unsignedinteger-limited']: + dtype = numpy.dtype(numpy.uint32) + else: + raise ValueError('Unrecognized encoder type "%s" for "%s" data' + % (encoder, type_)) + elif type_ in ['short-data', 'short', 'memory-short-data']: + encoder = chan_info['data']['encoder']['type'] + if encoder in ['signedshort', 'signedshort-limited']: + dtype = numpy.dtype(numpy.int16) + elif encoder in ['unsignedshort', 'unsignedshort-limited']: + dtype = numpy.dtype(numpy.uint16) + else: + raise ValueError('Unrecognized encoder type "%s" for "%s" data' + % (encoder, type_)) + else: + raise ValueError('Unrecognized data format "%s"' % type_) + byte_order = '>' + # '>' (big endian) byte order. + # From version 0.3 of JPKForceSpec.txt in the "Binary data" section: + # All forms of raw data are stored in chronological order + # (the order in which they were collected), and the + # individual values are stored in network byte order + # (big-endian). The data type used to store the data is + # specified by the "channel.*.data.type" property, and is + # either short (2 bytes per value), integer (4 bytes), or + # float (4 bytes, IEEE format). + return dtype.newbyteorder(byte_order) + + def _zip_translate_params(self, params, chan_info, version): + info = { + 'raw info':params, + #'time':self._time_from_TODO(raw_info[]), + } + force_unit = self._zip_segment_conversion_unit( + chan_info['channel']['vDeflection']['conversion-set']['conversion']['force'], + version) + assert force_unit == 'N', force_unit + force_base = chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['base-calibration-slot'] + assert force_base == 'distance', force_base + dist_unit = self._zip_segment_conversion_unit( + chan_info['channel']['vDeflection']['conversion-set']['conversion']['distance'], + version) + assert dist_unit == 'm', dist_unit + distance_base = chan_info['channel']['vDeflection']['conversion-set']['conversion']['distance']['base-calibration-slot'] + assert distance_base == 'volts', distance_base + # Assume volts unit is V, but it is not specified in the JPK + # file format. + force_mult = float( + chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['scaling']['multiplier']) + sens_mult = float( + chan_info['channel']['vDeflection']['conversion-set']['conversion']['distance']['scaling']['multiplier']) + info['spring constant (N/m)'] = force_mult + info['z piezo sensitivity (m/V)'] = sens_mult + return info + + def _zip_translate_segment_params(self, params): + info = { + 'raw info': params, + 'columns': list(params['channels']['list']), + 'name': self._zip_segment_name(params), + } + return info + + def _zip_segment_name(self, params): + name = params['force-segment-header']['name']['name'] + if name.endswith('-spm'): + name = name[:-len('-spm')] + if name == 'extend': + name = 'approach' + elif name.startswith('pause-at-'): + name = 'pause' + return name + + def _zip_scale_segment(self, segment, path, info, version): + data = curve.Data( + shape=segment.shape, + dtype=segment.dtype, + info={}) + data[:,:] = segment + segment.info['raw data'] = data + + # raw column indices + channels = segment.info['raw info']['channels']['list'] + for i,channel in enumerate(channels): + conversion = None + if channel == 'vDeflection': + conversion = 'distance' + segment = self._zip_scale_channel( + segment, channel, conversion=conversion, + path=path, info=info, version=version) + name,unit = split_data_label(segment.info['columns'][i]) + if name == 'vDeflection': + assert unit == 'm', segment.info['columns'][i] + segment.info['columns'][i] = join_data_label('deflection', 'm') + # Invert because deflection voltage increases as the + # tip moves away from the surface, but it makes more + # sense to me to have it increase as it moves toward + # the surface (positive tension on the protein chain). + segment[:,i] *= -1 + elif name == 'height': + assert unit == 'm', segment.info['columns'][i] + segment.info['columns'][i] = join_data_label('z piezo', 'm') + return segment + + def _zip_scale_channel(self, segment, channel_name, + conversion=None, path=None, info={}, version=None): + channel = segment.info['raw info']['channels']['list'].index( + channel_name) + conversion_set = segment.info['raw info']['channel'][channel_name]['conversion-set'] + if conversion == None: + conversion = conversion_set['conversions']['default'] + if conversion == conversion_set['conversions']['base']: + # Our conversion is the base data. + if conversion != 'volts': + raise NotImplementedError( + 'unknown units for base channel: %s' % conversion) + segment.info['columns'][channel] = join_data_label( + channel_name, 'V') + return segment + conversion_info = conversion_set['conversion'][conversion] + if conversion_info['base-calibration-slot'] \ + != conversion_set['conversions']['base']: + # Our conversion is stacked on a previous conversion. Do + # the previous conversion first. + segment = self._zip_scale_channel( + segment, channel_name, + conversion_info['base-calibration-slot'], + path=path, info=info, version=version) + if conversion_info['type'] == 'file': + # Michael Haggerty at JPK points out that the conversion + # information stored in the external file is reproduced in + # the force curve file. So there is no need to actually + # read `conversion_info['file']`. In fact, the data there + # may have changed with future calibrations, while the + # information stored directly in conversion_info retains + # the calibration information as it was when the experiment + # was performed. + pass # Fall through to 'simple' conversion processing. + else: + assert conversion_info['type'] == 'simple', conversion_info['type'] + assert conversion_info['scaling']['type'] == 'linear', \ + conversion_info['scaling']['type'] + assert conversion_info['scaling']['style'] == 'offsetmultiplier', \ + conversion_info['scaling']['style'] + multiplier = float(conversion_info['scaling']['multiplier']) + offset = float(conversion_info['scaling']['offset']) + unit = self._zip_segment_conversion_unit(conversion_info, version) + segment[:,channel] = segment[:,channel] * multiplier + offset + segment.info['columns'][channel] = join_data_label(channel_name, unit) + return segment + + def _zip_segment_conversion_unit(self, conversion_info, version): + if version in ['0.%d' % i for i in range(3)]: + return conversion_info['scaling']['unit'] + else: + return conversion_info['scaling']['unit']['unit'] + + def _parse_params(self, lines): + info = {} + for line in lines: + line = line.strip() + if line.startswith('#'): + continue + else: + # e.g.: force-segment-header.type=xy-position-segment-header + fields = line.split('=', 1) + assert len(fields) == 2, line + setting = fields[0].split('.') + sub_info = info # drill down, e.g. info['force-s..']['type'] + for s in setting[:-1]: + if s not in sub_info: + sub_info[s] = {} + sub_info = sub_info[s] + if setting[-1] == 'list': # split a space-delimited list + sub_info[setting[-1]] = fields[1].split(' ') + else: + sub_info[setting[-1]] = fields[1] + return info + + def _read_old(self, path, info): + raise NotImplementedError( + "Early JPK files (pre-zip) are not supported by Hooke. Please " + "use JPK's `out2jpk-force` script to convert your old files " + "to a more recent format before loading them with Hooke.")