Updated JPK driver for JPK's version 0.5 format and new driver architecture

author W. Trevor King <wking@drexel.edu>

Mon, 14 Jun 2010 13:56:38 +0000 (09:56 -0400)

committer W. Trevor King <wking@drexel.edu>

Mon, 14 Jun 2010 13:56:38 +0000 (09:56 -0400)
author W. Trevor King <wking@drexel.edu>
Mon, 14 Jun 2010 13:56:38 +0000 (09:56 -0400)
committer W. Trevor King <wking@drexel.edu>
Mon, 14 Jun 2010 13:56:38 +0000 (09:56 -0400)
diff --git a/hooke/driver/__init__.py b/hooke/driver/__init__.py

index b1ba57f6312918847cb5c1e4712e62ccfb0c3c59..15c05c1d7e038dafbba6e829194055858bb9db23 100644 (file)
--- a/hooke/driver/__init__.py
+++ b/hooke/driver/__init__.py
@@ -32,7 +32,7 @@ DRIVER_MODULES = [
  #    ('csvdriver', True),
  #    ('hdf5', True),
  #    ('hemingclamp', True),
-#    ('jpk', True),
+    ('jpk', True),
  #    ('mcs', True),
  #    ('mfp1dexport', True),
      ('mfp3d', True),
diff --git a/hooke/driver/jpk.py b/hooke/driver/jpk.py

index 6f519effbef0d6a1ecdcdbaadecdf8a9d1c83d43..9cc82ddb0abf4d7927dead499d9f0e5c3dc9cd59 100644 (file)
--- a/hooke/driver/jpk.py
+++ b/hooke/driver/jpk.py
@@ -17,140 +17,244 @@
  # License along with Hooke.  If not, see
  # <http://www.gnu.org/licenses/>.
  
-import string
-from .. import curve as lhc
-
-class DataChunk(list):
-    #Dummy class to provide ext and ret methods to the data list.
-
-    def ext(self):
-        halflen=(len(self)/2)
-        return self[0:halflen]
-
-    def ret(self):
-        halflen=(len(self)/2)
-        return self[halflen:]
-
-class jpkDriver(lhc.Driver):
-
-    def __init__(self, filename):
-        self.filename=filename #self.filename can always be useful, and should be defined
-        self.filedata = open(filename,'r') #We open the file
-        self.filelines=self.filedata.readlines()
-        self.filedata.close()
-        '''These are two strings that can be used by Hooke commands/plugins to understand what they are looking at. They have no other
-        meaning. They have to be somehow defined however - commands often look for those variables.
-
-        self.filetype should contain the name of the exact filetype defined by the driver (so that filetype-specific commands can know
-                      if they're dealing with the correct filetype)
-        self.experiment should contain instead the type of data involved (for example, various drivers can be used for force-clamp experiments,
-                      but hooke commands could like to know if we're looking at force clamp data, regardless of their origin, and not other
-                      kinds of data)
-
-        Of course, all other variables you like can be defined in the class.
-        '''
-        self.filetype = 'jpk'
-        self.experiment = 'smfs'
-
-
-
-    def __del__(self):
-        self.filedata.close()
-
-    def is_me(self):
-        '''
-        we define our magic heuristic for jpk files
-        '''
-        myfile=file(self.filename)
-        headerlines=myfile.readlines()[0:3]
-        myfile.close()
-        if headerlines[0][0:11]=='# xPosition' and headerlines[1][0:11]=='# yPosition':
-            return True
+"""Driver for JPK ForceRobot's velocity clamp data format.
+"""
+
+import logging
+import os.path
+import pprint
+import zipfile
+
+import numpy
+
+from .. import curve as curve
+from .. import experiment as experiment
+from . import Driver as Driver
+
+
+class JPKDriver (Driver):
+    """Handle JPK ForceRobot's data format.
+    """
+    def __init__(self):
+        super(JPKDriver, self).__init__(name='jpk')
+
+    def is_me(self, path):
+        if zipfile.is_zipfile(path):  # JPK file versions since at least 0.5
+            f = h = None
+            try:
+                f = zipfile.ZipFile(path, 'r')
+                if 'header.properties' not in f.namelist():
+                    return False
+                h = f.open('header.properties')
+                if 'jpk-data-file' in h.read():
+                    return True
+            finally:
+                if h != None:
+                    h.close()
+                if f != None:
+                    f.close()
          else:
-            return False
-
-    def close_all(self):
-        self.filedata.close()
-
-    def _read_data_segment(self):
-        #routine that actually reads the data
-
-        height_ms=[]
-        height_m=[]
-        height=[]
-        v_deflection=[]
-        h_deflection=[]
-
-        self.springconstant=0 #if we don't meet any spring constant, use deflection...
-
-        for line in self.filelines:
-            #we meet the segment defining the order of data columns
-
-            if line[0:9]=='# columns':
-                splitline=line.split()[2:]
-                height_ms_index=splitline.index('smoothedStrainGaugeHeight')
-                height_m_index=splitline.index('strainGaugeHeight')
-                height_index=splitline.index('height')
-                v_deflection_index=splitline.index('vDeflection')
-                #h_deflection=splitline.index('hDeflection')
-
-            if line[0:16]=='# springConstant':
-                self.springconstant=float(line.split()[2])
-
-            if line[0] != '#' and len(line.split())>1:
-                dataline=line.split()
-                height_ms.append(float(dataline[height_ms_index]))
-                height_m.append(float(dataline[height_m_index]))
-                height.append(float(dataline[height_index]))
-                v_deflection.append(float(dataline[v_deflection_index]))
-                #h_deflection.append(float(dataline[h_deflection_index]))
-
-        if self.springconstant != 0:
-            force=[item*self.springconstant for item in v_deflection]
-        else: #we have measured no spring constant :(
-            force=v_deflection
-
-        height_ms=DataChunk([item*-1 for item in height_ms])
-        height_m=DataChunk([item*-1 for item in height_m])
-        height=DataChunk([item*-1 for item in height])
-        deflection=DataChunk(v_deflection)
-        force=DataChunk(force)
-
-        return height_ms,height_m,height,deflection,force
-
-    def deflection(self):
-        height_ms,height_m,height,deflection,force=self._read_data_segment()
-        deflection_ext=deflection.ext()
-        deflection_ret=deflection.ret()
-        deflection_ret.reverse()
-        return deflection_ext,deflection_ret
-
-    def default_plots(self):
-
-        height_ms,height_m,height,deflection,force=self._read_data_segment()
-
-        height_ms_ext=height_ms.ext()
-        height_ms_ret=height_ms.ret()
-        force_ext=force.ext()
-        force_ret=force.ret()
-        #reverse the return data, to make it coherent with hooke standard
-        height_ms_ret.reverse()
-        force_ret.reverse()
-
-        main_plot=lhc.PlotObject()
-        main_plot.add_set(height_ms_ext,force_ext)
-        main_plot.add_set(height_ms_ret,force_ret)
-
-
-
-        if self.springconstant != 0:
-            main_plot.units=['meters','force']
+            f = None
+            try:
+                f = open(path, 'r')
+                headlines = []
+                for i in range(3):
+                    headlines.append(f.readline())
+                if headlines[0].startswith('# xPosition') \
+                        and headlines[1].startswith('# yPosition'):
+                    return True
+            finally:
+                if f != None:
+                    f.close()
+        return False
+
+    def read(self, path):
+        if zipfile.is_zipfile(path):  # JPK file versions since at least 0.5
+            return self._read_zip(path)
          else:
-            main_plot.units=['meters','meters']
-
-        main_plot.normalize_vectors()
-
-        main_plot.destination=0
-        main_plot.title=self.filename
-
-        return [main_plot]
+            return self._read_old(path)
+
+    def _read_zip(self, path):
+        f = None
+        try:
+            f = zipfile.ZipFile(path, 'r')
+            f.path = path
+            info = self._zip_info(f)
+            approach = self._zip_segment(f, info, 0)
+            retract = self._zip_segment(f, info, 1)
+            assert approach.info['name'] == 'approach', approach.info['name']
+            assert retract.info['name'] == 'retract', retract.info['name']
+            return ([approach, retract],
+                    self._zip_translate_params(info, retract.info['raw info']))
+        finally:
+            if f != None:
+                f.close()
+
+    def _zip_info(self, zipfile):
+        h = None
+        try:
+            h = zipfile.open('header.properties')
+            info = self._parse_params(h.readlines())
+            return info
+        finally:
+            if h != None:
+                h.close()
+
+    def _zip_segment(self, zipfile, info, index):
+        prop_file = zipfile.open(os.path.join(
+                'segments', str(index), 'segment-header.properties'))
+        prop = self._parse_params(prop_file.readlines())
+        prop_file.close()
+        expected_shape = (int(prop['force-segment-header']['num-points']),)
+        channels = []
+        for chan in prop['channels']['list']:
+            chan_info = prop['channel'][chan]
+            channels.append(self._zip_channel(zipfile, index, chan, chan_info))
+            if channels[-1].shape != expected_shape:
+                    raise NotImplementedError(
+                        'Channel %d:%s in %s has strange shape %s != %s'
+                        % (index, chan, zipfile.path,
+                           channels[-1].shape, expected_shape))
+        d = curve.Data(
+            shape=(len(channels[0]), len(channels)),
+            dtype=channels[0].dtype,
+            info=self._zip_translate_segment_params(prop))
+        for i,chan in enumerate(channels):
+            d[:,i] = chan
+        return self._zip_scale_segment(d)
+
+    def _zip_channel(self, zipfile, segment_index, channel_name, chan_info):
+        f = zipfile.open(os.path.join(
+                'segments', str(segment_index),
+                chan_info['data']['file']['name']), 'r')
+        assert chan_info['data']['file']['format'] == 'raw', \
+            'Non-raw data format:\n%s' % pprint.pformat(chan_info)
+        assert chan_info['data']['type'] == 'float-data', \
+            'Non-float data format:\n%s' % pprint.pformat(chan_info)
+        data = numpy.frombuffer(
+            buffer(f.read()),
+            dtype=numpy.dtype(numpy.float32).newbyteorder('>'),
+            # Is JPK data always big endian?  I can't find a config
+            # setting.  The ForceRobot brochure
+            #   http://www.jpk.com/forcerobot300-1.download.6d694150f14773dc76bc0c3a8a6dd0e8.pdf
+            # lists a PowerPC chip on page 4, under Control
+            # electronics, and PPCs are usually big endian.
+            #   http://en.wikipedia.org/wiki/PowerPC#Endian_modes
+            )
+        f.close()
+        return data
+
+    def _zip_translate_params(self, params, chan_info):
+        info = {
+            'raw info':params,
+            #'time':self._time_from_TODO(raw_info[]),
+            }
+        force_unit = chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['scaling']['unit']['unit']
+        assert force_unit == 'N', force_unit
+        force_base = chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['base-calibration-slot']
+        assert force_base == 'distance', force_base
+        dist_unit = chan_info['channel']['vDeflection']['conversion-set']['conversion']['distance']['scaling']['unit']['unit']
+        assert dist_unit == 'm', dist_unit
+        force_mult = float(
+            chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['scaling']['multiplier'])
+        info['spring constant (N/m)'] = force_mult
+        return info
+
+    def _zip_translate_segment_params(self, params):
+        info = {
+            'raw info':params,
+            'columns':list(params['channels']['list']),
+            'name':params['force-segment-header']['name']['name'],
+            }
+        if info['name'] == 'extend-spm':
+            info['name'] = 'approach'
+        elif info['name'] == 'retract-spm':
+            info['name'] = 'retract'
+        else:
+            raise NotImplementedError(
+                'Unrecognized segment type %s' % info['name'])
+        return info
+
+    def _zip_scale_segment(self, segment):
+        data = curve.Data(
+            shape=segment.shape,
+            dtype=segment.dtype,
+            info={})
+        data[:,:] = segment
+        segment.info['raw data'] = data
+
+        # raw column indices
+        channels = segment.info['raw info']['channels']['list']
+        z_col = channels.index('height')
+        d_col = channels.index('vDeflection')
+        
+        segment = self._zip_scale_channel(segment, z_col, 'calibrated')
+        segment = self._zip_scale_channel(segment, d_col, 'distance')
+
+        assert segment.info['columns'][z_col] == 'height (m)', \
+            segment.info['columns'][z_col]
+        assert segment.info['columns'][d_col] == 'vDeflection (m)', \
+            segment.info['columns'][d_col]
+
+        # scaled column indices same as raw column indices,
+        # because columns is a copy of channels.list
+        segment.info['columns'][z_col] = 'z piezo (m)'
+        segment.info['columns'][d_col] = 'deflection (m)'
+        return segment
+
+    def _zip_scale_channel(self, segment, channel, conversion):
+        channel_name = segment.info['raw info']['channels']['list'][channel]
+        conversion_set = segment.info['raw info']['channel'][channel_name]['conversion-set']
+        conversion_info = conversion_set['conversion'][conversion]
+        if conversion_info['base-calibration-slot'] \
+                != conversion_set['conversions']['base']:
+            # Our conversion is stacked on a previous conversion.  Do
+            # the previous conversion first.
+            segment = self._zip_scale_channel(
+                segment, channel, conversion_info['base-calibration-slot'])
+        if conversion_info['type'] == 'file':
+            if os.path.exists(conversion_info['file']):
+                raise NotImplementedError('No calibration files were available for testing')
+            else:
+                log = logging.getLogger('hooke')                
+                log.warn(
+                    'Skipping %s -> %s calibration for %s channel.  Calibration file %s not found'
+                    % (conversion_info['base-calibration-slot'],
+                       conversion, channel_name, conversion_info['file']))
+        else:
+            assert conversion_info['type'] == 'simple', conversion_info['type']
+        assert conversion_info['scaling']['type'] == 'linear', \
+            conversion_info['scaling']['type']
+        assert conversion_info['scaling']['style'] == 'offsetmultiplier', \
+            conversion_info['scaling']['style']
+        multiplier = float(conversion_info['scaling']['multiplier'])
+        offset = float(conversion_info['scaling']['offset'])
+        unit = conversion_info['scaling']['unit']['unit']
+        segment[:,channel] = segment[:,channel] * multiplier + offset
+        segment.info['columns'][channel] = '%s (%s)' % (channel_name, unit)
+        return segment
+
+    def _parse_params(self, lines):
+        info = {}
+        for line in lines:
+            line = line.strip()
+            if line.startswith('#'):
+                continue
+            else:
+                # e.g.: force-segment-header.type=xy-position-segment-header
+                fields = line.split('=', 1)
+                assert len(fields) == 2, line
+                setting = fields[0].split('.')
+                sub_info = info  # drill down, e.g. info['force-s..']['type']
+                for s in setting[:-1]:
+                    if s not in sub_info:
+                        sub_info[s] = {}
+                    sub_info = sub_info[s]
+                if setting[-1] == 'list':  # split a space-delimited list
+                    sub_info[setting[-1]] = fields[1].split(' ')
+                else:
+                    sub_info[setting[-1]] = fields[1]
+        return info
+
+    def _read_old(self, path):
+        raise NotImplementedError('No old-style JPK files were available for testing, please send us yours: %s' % path)
diff --git a/test/jpk_driver.py b/test/jpk_driver.py

new file mode 100644 (file)

index 0000000..d3b80b6
--- /dev/null
+++ b/test/jpk_driver.py
@@ -0,0 +1,40 @@
+# Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
+#
+# This file is part of Hooke.
+#
+# Hooke is free software: you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation, either
+# version 3 of the License, or (at your option) any later version.
+#
+# Hooke is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with Hooke.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+"""
+>>> import os.path
+>>> from hooke.hooke import Hooke, HookeRunner
+>>> h = Hooke()
+>>> r = HookeRunner()
+>>> playlist = os.path.join('test', 'data', 'vclamp_jpk', 'playlist')
+>>> h = r.run_lines(h, ['load_playlist ' + playlist]) # doctest: +ELLIPSIS
+<FilePlaylist playlist.hkp>
+Success
+<BLANKLINE>
+>>> h = r.run_lines(h, ['curve_info']) # doctest: +ELLIPSIS, +REPORT_UDIFF
+name: 2009.04.23-15.15.47.jpk
+path: test/data/vclamp_jpk/2009.04.23-15.15.47.jpk
+experiment: None
+driver: <hooke.driver.jpk.JPKDriver object at 0x...>
+filetype: None
+note: 
+blocks: 2
+block sizes: [(4096, 6), (4096, 4)]
+Success
+<BLANKLINE>
+"""
author	W. Trevor King <wking@drexel.edu>
	Mon, 14 Jun 2010 13:56:38 +0000 (09:56 -0400)
committer	W. Trevor King <wking@drexel.edu>
	Mon, 14 Jun 2010 13:56:38 +0000 (09:56 -0400)
hooke/driver/__init__.py		patch \| blob \| history
hooke/driver/jpk.py		patch \| blob \| history
test/jpk_driver.py	[new file with mode: 0644]	patch \| blob