Convert all channels to their default calibration in the JPK driver.

[hooke.git] / hooke / driver / jpk.py
diff --git a/hooke/driver/jpk.py b/hooke/driver/jpk.py

index 9cc82ddb0abf4d7927dead499d9f0e5c3dc9cd59..9ba649cf87f287a72145e586104bb797d60de0dd 100644 (file)
--- a/hooke/driver/jpk.py
+++ b/hooke/driver/jpk.py
@@ -3,15 +3,15 @@
  #
  # This file is part of Hooke.
  #
-# Hooke is free software: you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation, either
-# version 3 of the License, or (at your option) any later version.
+# Hooke is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
  #
-# Hooke is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
+# Hooke is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
+# Public License for more details.
  #
  # You should have received a copy of the GNU Lesser General Public
  # License along with Hooke.  If not, see
@@ -20,7 +20,6 @@
  """Driver for JPK ForceRobot's velocity clamp data format.
  """
  
-import logging
  import os.path
  import pprint
  import zipfile
@@ -29,6 +28,8 @@ import numpy
  
  from .. import curve as curve
  from .. import experiment as experiment
+from ..util.util import Closing as Closing
+from ..util.si import join_data_label, split_data_label
  from . import Driver as Driver
  
  
@@ -39,68 +40,63 @@ class JPKDriver (Driver):
          super(JPKDriver, self).__init__(name='jpk')
  
      def is_me(self, path):
+        if os.path.isdir(path):
+            return False
          if zipfile.is_zipfile(path):  # JPK file versions since at least 0.5
-            f = h = None
-            try:
-                f = zipfile.ZipFile(path, 'r')
+            with Closing(zipfile.ZipFile(path, 'r')) as f:
                  if 'header.properties' not in f.namelist():
                      return False
-                h = f.open('header.properties')
-                if 'jpk-data-file' in h.read():
-                    return True
-            finally:
-                if h != None:
-                    h.close()
-                if f != None:
-                    f.close()
+                with Closing(f.open('header.properties')) as h:
+                    if 'jpk-data-file' in h.read():
+                        return True
          else:
-            f = None
-            try:
-                f = open(path, 'r')
+            with Closing(open(path, 'r')) as f:
                  headlines = []
                  for i in range(3):
                      headlines.append(f.readline())
-                if headlines[0].startswith('# xPosition') \
-                        and headlines[1].startswith('# yPosition'):
-                    return True
-            finally:
-                if f != None:
-                    f.close()
+            if headlines[0].startswith('# xPosition') \
+                    and headlines[1].startswith('# yPosition'):
+                return True
          return False
  
-    def read(self, path):
+    def read(self, path, info=None):
+        if info == None:
+            info = {}
          if zipfile.is_zipfile(path):  # JPK file versions since at least 0.5
-            return self._read_zip(path)
+            return self._read_zip(path, info)
          else:
-            return self._read_old(path)
+            return self._read_old(path, info)
  
-    def _read_zip(self, path):
-        f = None
-        try:
-            f = zipfile.ZipFile(path, 'r')
+    def _read_zip(self, path, info):
+        with Closing(zipfile.ZipFile(path, 'r')) as f:
              f.path = path
-            info = self._zip_info(f)
-            approach = self._zip_segment(f, info, 0)
-            retract = self._zip_segment(f, info, 1)
-            assert approach.info['name'] == 'approach', approach.info['name']
-            assert retract.info['name'] == 'retract', retract.info['name']
-            return ([approach, retract],
-                    self._zip_translate_params(info, retract.info['raw info']))
-        finally:
-            if f != None:
-                f.close()
+            zip_info = self._zip_info(f)
+            segments = []
+            for i in range(len([p for p in f.namelist()
+                                if p.endswith('segment-header.properties')])):
+                segments.append(self._zip_segment(f, path, info, zip_info, i))
+        if zip_info['file-format-version'] not in ['0.5']:
+            raise NotImplementedError(
+                'JPK file version %s not supported (yet).'
+                % zip_info['file-format-version'])
+        for name in ['approach', 'retract']:
+            if len([s for s in segments if s.info['name'] == name]) == 0:
+                raise ValueError(
+                    'No segment for %s in %s, only %s'
+                    % (name, path, [s.info['name'] for s in segments]))
+        curve_info = self._zip_translate_params(zip_info,
+                                                segments[0].info['raw info'])
+        for segment in segments:
+            segment.info['spring constant (N/m)'] = \
+                curve_info['spring constant (N/m)']
+        return (segments, curve_info)
  
      def _zip_info(self, zipfile):
-        h = None
-        try:
-            h = zipfile.open('header.properties')
-            info = self._parse_params(h.readlines())
+        with Closing(zipfile.open('header.properties')) as f:
+            info = self._parse_params(f.readlines())
              return info
-        finally:
-            if h != None:
-                h.close()
  
-    def _zip_segment(self, zipfile, info, index):
+    def _zip_segment(self, zipfile, path, info, zip_info, index):
          prop_file = zipfile.open(os.path.join(
                  'segments', str(index), 'segment-header.properties'))
          prop = self._parse_params(prop_file.readlines())
@@ -121,7 +117,7 @@ class JPKDriver (Driver):
              info=self._zip_translate_segment_params(prop))
          for i,chan in enumerate(channels):
              d[:,i] = chan
-        return self._zip_scale_segment(d)
+        return self._zip_scale_segment(d, path, info)
  
      def _zip_channel(self, zipfile, segment_index, channel_name, chan_info):
          f = zipfile.open(os.path.join(
@@ -133,22 +129,32 @@ class JPKDriver (Driver):
              'Non-float data format:\n%s' % pprint.pformat(chan_info)
          data = numpy.frombuffer(
              buffer(f.read()),
-            dtype=numpy.dtype(numpy.float32).newbyteorder('>'),
-            # Is JPK data always big endian?  I can't find a config
-            # setting.  The ForceRobot brochure
-            #   http://www.jpk.com/forcerobot300-1.download.6d694150f14773dc76bc0c3a8a6dd0e8.pdf
-            # lists a PowerPC chip on page 4, under Control
-            # electronics, and PPCs are usually big endian.
-            #   http://en.wikipedia.org/wiki/PowerPC#Endian_modes
-            )
+            dtype=numpy.dtype(numpy.float32).newbyteorder('>'))
+        # '>' (big endian) byte order.
+        # From version 0.3 of JPKForceSpec.txt in the "Binary data" section:
+        #    All forms of raw data are stored in chronological order
+        #    (the order in which they were collected), and the
+        #    individual values are stored in network byte order
+        #    (big-endian). The data type used to store the data is
+        #    specified by the "channel.*.data.type" property, and is
+        #    either short (2 bytes per value), integer (4 bytes), or
+        #    float (4 bytes, IEEE format).
          f.close()
          return data
  
      def _zip_translate_params(self, params, chan_info):
          info = {
              'raw info':params,
+            'filetype':self.name,
              #'time':self._time_from_TODO(raw_info[]),
              }
+        # TODO: distinguish between force clamp and velocity clamp
+        # experiments.  Note that the JPK file format is flexible
+        # enough to support mixed experiments (i.e. both force clamp
+        # and velocity clamp segments in a single experiment), but I
+        # have no idea what sort of analysis such experiments would
+        # require ;).
+        info['experiment'] = experiment.VelocityClamp
          force_unit = chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['scaling']['unit']['unit']
          assert force_unit == 'N', force_unit
          force_base = chan_info['channel']['vDeflection']['conversion-set']['conversion']['force']['base-calibration-slot']
@@ -166,16 +172,16 @@ class JPKDriver (Driver):
              'columns':list(params['channels']['list']),
              'name':params['force-segment-header']['name']['name'],
              }
-        if info['name'] == 'extend-spm':
-            info['name'] = 'approach'
-        elif info['name'] == 'retract-spm':
-            info['name'] = 'retract'
+        if info['name'] in ['extend-spm', 'retract-spm', 'pause-at-end-spm']:
+            info['name'] = info['name'][:-len('-spm')]
+            if info['name'] == 'extend':
+                info['name'] = 'approach'
          else:
              raise NotImplementedError(
                  'Unrecognized segment type %s' % info['name'])
          return info
  
-    def _zip_scale_segment(self, segment):
+    def _zip_scale_segment(self, segment, path, info):
          data = curve.Data(
              shape=segment.shape,
              dtype=segment.dtype,
@@ -185,42 +191,56 @@ class JPKDriver (Driver):
  
          # raw column indices
          channels = segment.info['raw info']['channels']['list']
-        z_col = channels.index('height')
-        d_col = channels.index('vDeflection')
-        
-        segment = self._zip_scale_channel(segment, z_col, 'calibrated')
-        segment = self._zip_scale_channel(segment, d_col, 'distance')
-
-        assert segment.info['columns'][z_col] == 'height (m)', \
-            segment.info['columns'][z_col]
-        assert segment.info['columns'][d_col] == 'vDeflection (m)', \
-            segment.info['columns'][d_col]
-
-        # scaled column indices same as raw column indices,
-        # because columns is a copy of channels.list
-        segment.info['columns'][z_col] = 'z piezo (m)'
-        segment.info['columns'][d_col] = 'deflection (m)'
+        for i,channel in enumerate(channels):
+            conversion = None
+            if channel == 'vDeflection':
+                conversion = 'distance'
+            segment = self._zip_scale_channel(
+                segment, channel, conversion=conversion, path=path, info=info)
+            name,unit = split_data_label(segment.info['columns'][i])
+            if name == 'vDeflection':
+                assert unit == 'm', segment.info['columns'][i]
+                segment.info['columns'][i] = join_data_label('deflection', 'm')
+            elif name == 'height':
+                assert unit == 'm', segment.info['columns'][i]
+                segment.info['columns'][i] = join_data_label('z piezo', 'm')
          return segment
  
-    def _zip_scale_channel(self, segment, channel, conversion):
-        channel_name = segment.info['raw info']['channels']['list'][channel]
+    def _zip_scale_channel(self, segment, channel_name, conversion=None,
+                           path=None, info={}):
+        channel = segment.info['raw info']['channels']['list'].index(
+            channel_name)
          conversion_set = segment.info['raw info']['channel'][channel_name]['conversion-set']
+        if conversion == None:
+            conversion = conversion_set['conversions']['default']
+        if conversion == conversion_set['conversions']['base']:
+            # Our conversion is the base data.
+            if conversion != 'volts':
+                raise NotImplementedError(
+                    'unknown units for base channel: %s' % conversion)
+            segment.info['columns'][channel] = join_data_label(
+                channel_name, 'V')
+            return segment
          conversion_info = conversion_set['conversion'][conversion]
+        print channel_name, conversion_info.keys()
          if conversion_info['base-calibration-slot'] \
                  != conversion_set['conversions']['base']:
              # Our conversion is stacked on a previous conversion.  Do
              # the previous conversion first.
              segment = self._zip_scale_channel(
-                segment, channel, conversion_info['base-calibration-slot'])
+                segment, channel_name,
+                conversion_info['base-calibration-slot'],
+                path=path, info=info)
          if conversion_info['type'] == 'file':
-            if os.path.exists(conversion_info['file']):
-                raise NotImplementedError('No calibration files were available for testing')
-            else:
-                log = logging.getLogger('hooke')                
-                log.warn(
-                    'Skipping %s -> %s calibration for %s channel.  Calibration file %s not found'
-                    % (conversion_info['base-calibration-slot'],
-                       conversion, channel_name, conversion_info['file']))
+            # Michael Haggerty at JPK points out that the conversion
+            # information stored in the external file is reproduced in
+            # the force curve file.  So there is no need to actually
+            # read `conversion_info['file']`.  In fact, the data there
+            # may have changed with future calibrations, while the
+            # information stored directly in conversion_info retains
+            # the calibration information as it was when the experiment
+            # was performed.
+            pass  # Fall through to 'simple' conversion processing.
          else:
              assert conversion_info['type'] == 'simple', conversion_info['type']
          assert conversion_info['scaling']['type'] == 'linear', \
@@ -231,7 +251,7 @@ class JPKDriver (Driver):
          offset = float(conversion_info['scaling']['offset'])
          unit = conversion_info['scaling']['unit']['unit']
          segment[:,channel] = segment[:,channel] * multiplier + offset
-        segment.info['columns'][channel] = '%s (%s)' % (channel_name, unit)
+        segment.info['columns'][channel] = join_data_label(channel_name, unit)
          return segment
  
      def _parse_params(self, lines):
@@ -256,5 +276,5 @@ class JPKDriver (Driver):
                      sub_info[setting[-1]] = fields[1]
          return info
  
-    def _read_old(self, path):
+    def _read_old(self, path, info):
          raise NotImplementedError('No old-style JPK files were available for testing, please send us yours: %s' % path)