Attach raw info etc. to curve in MFP3D driver (used to be just data blocks).

[hooke.git] / hooke / driver / mfp3d.py
diff --git a/hooke/driver/mfp3d.py b/hooke/driver/mfp3d.py

index 1c5f209b16869c55ad6c2b5365cfdf24eb0f0bf9..ccffd5e8de798f1e7afb1d9fa9db6e5f00ae394b 100644 (file)
--- a/hooke/driver/mfp3d.py
+++ b/hooke/driver/mfp3d.py
@@ -6,15 +6,15 @@
  #
  # This file is part of Hooke.
  #
-# Hooke is free software: you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation, either
-# version 3 of the License, or (at your option) any later version.
+# Hooke is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
  #
-# Hooke is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
+# Hooke is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
+# Public License for more details.
  #
  # You should have received a copy of the GNU Lesser General Public
  # License along with Hooke.  If not, see
@@ -22,7 +22,7 @@
  
  """Driver for MFP-3D files.
  
-This driver reads Igor binary waves.
+This driver reads IGOR binary waves.
  
  AUTHORS:
  Matlab version: Richard Naud August 2008 (http://lcn.epfl.ch/~naud/)
@@ -30,278 +30,132 @@ Python port: A. Seeholzer October 2008
  Hooke submission: Rolf Schmidt, Alberto Gomez-Casado 2009
  """
  
-# DEFINITION:
-# Reads Igor's (Wavemetric) binary wave format, .ibw, files.
-#
-# ALGORITHM:
-# Parsing proper to version 2, 3, or version 5 (see Technical notes TN003.ifn:
-# http://mirror.optus.net.au/pub/wavemetrics/IgorPro/Technical_Notes/) and data
-# type 2 or 4 (non complex, single or double precision vector, real values).
-#
-# VERSION: 0.1
-#
-# COMMENTS:
-# Only tested for version 2 Igor files for now, testing for 3 and 5 remains to be done.
-# More header data could be passed back if wished. For significance of ignored bytes see
-# the technical notes linked above.
-
-import numpy
+import copy
  import os.path
-import struct
+import pprint
  
-from .. import curve as lhc
+import numpy
  
+from .. import curve as curve
+from .. import experiment as experiment
+from . import Driver as Driver
+from .igorbinarywave import loadibw
  
-__version__='0.0.0.20100310'
  
+__version__='0.0.0.20100604'
  
-class DataChunk(list):
-    #Dummy class to provide ext and ret methods to the data list.
-    
-    def ext(self):
-        halflen=(len(self)/2)
-        return self[0:halflen]
-        
-    def ret(self):
-        halflen=(len(self)/2)
-        return self[halflen:]
  
-class mfp3dDriver(lhc.Driver):
+class MFP3DDriver (Driver):
+    """Handle Asylum Research's MFP3D data format.
+    """
+    def __init__(self):
+        super(MFP3DDriver, self).__init__(name='mfp3d')
  
-    #Construction and other special methods
+    def is_me(self, path):
+        """Look for identifying fields in the IBW note.
+        """
+        if os.path.isdir(path):
+            return False
+        if not path.endswith('.ibw'):
+            return False
+        targets = ['Version:', 'XOPVersion:', 'ForceNote:']
+        found = [False]*len(targets)
+        for line in open(path, 'rU'):
+            for i,ft in enumerate(zip(found, targets)):
+                f,t = ft
+                if f == False and line.startswith(t):
+                    found[i] = True
+        if min(found) == True:
+            return True
+        return False
      
-    def __init__(self,filename):
-        '''
-        constructor method
-        '''
-           
-       self.textfile    =file(filename)
-        self.binfile=file(filename,'rb')
-       #unnecesary, but some other part of the program expects these to be open     
-
-        self.forcechunk=0
-        self.distancechunk=1
-       #TODO eliminate the need to set chunk numbers
-        
-        self.filepath=filename
-        self.debug=True
-        
-       self.data = []
-        self.note = []
-        self.retract_velocity = None
-        self.spring_constant = None
-        self.filename = filename
-
-        self.filedata = open(filename,'rU')
-        self.lines = list(self.filedata.readlines())
-        self.filedata.close()
-
-        self.filetype = 'mfp3d'
-        self.experiment = 'smfs'
-             
+    def read(self, path, info=None):
+        data,bin_info,wave_info = loadibw(path)
+        approach,retract,info = self._translate_ibw(data, bin_info, wave_info)
+        info['filetype'] = self.name
+        info['experiment'] = experiment.VelocityClamp
+        return ([approach, retract], info)
       
-    def _get_data_chunk(self,whichchunk):
-
-       data = None
-        f = open(self.filename, 'rb')
-        ####################### ORDERING
-        # machine format for IEEE floating point with big-endian
-        # byte ordering
-        # MacIgor use the Motorola big-endian 'b'
-        # WinIgor use Intel little-endian 'l'
-        # If the first byte in the file is non-zero, then the file is a WinIgor
-        firstbyte = struct.unpack('b', f.read(1))[0]
-        if firstbyte == 0:
-            format = '>'
-        else:
-            format = '<'
-        #######################  CHECK VERSION
-        f.seek(0)
-        version = struct.unpack(format+'h', f.read(2))[0]
-        #######################  READ DATA AND ACCOMPANYING INFO
-        if version == 2 or version == 3:
-            # pre header
-            wfmSize = struct.unpack(format+'i', f.read(4))[0] # The size of the WaveHeader2 data structure plus the wave data plus 16 bytes of padding.
-            noteSize = struct.unpack(format+'i', f.read(4))[0] # The size of the note text.
-            if version==3:
-                formulaSize = struct.unpack(format+'i', f.read(4))[0]
-            pictSize = struct.unpack(format+'i', f.read(4))[0] # Reserved. Write zero. Ignore on read.
-            checksum = struct.unpack(format+'H', f.read(2))[0] # Checksum over this header and the wave header.
-            # wave header
-            dtype = struct.unpack(format+'h', f.read(2))[0]
-            if dtype == 2:
-                dtype = numpy.float32(.0).dtype
-            elif dtype == 4:
-                dtype = numpy.double(.0).dtype
-            else:
-                assert False, "Wave is of type '%i', not supported" % dtype
-            dtype = dtype.newbyteorder(format)
-
-            ignore = f.read(4) # 1 uint32
-            bname = self._flatten(struct.unpack(format+'20c', f.read(20)))
-            ignore = f.read(4) # 2 int16
-            ignore = f.read(4) # 1 uint32
-            dUnits = self._flatten(struct.unpack(format+'4c', f.read(4)))
-            xUnits = self._flatten(struct.unpack(format+'4c', f.read(4)))
-            npnts = struct.unpack(format+'i', f.read(4))[0]
-            amod = struct.unpack(format+'h', f.read(2))[0]
-            dx = struct.unpack(format+'d', f.read(8))[0]
-            x0 = struct.unpack(format+'d', f.read(8))[0]
-            ignore = f.read(4) # 2 int16
-            fsValid = struct.unpack(format+'h', f.read(2))[0]
-            topFullScale = struct.unpack(format+'d', f.read(8))[0]
-            botFullScale = struct.unpack(format+'d', f.read(8))[0]
-            ignore = f.read(16) # 16 int8
-            modDate = struct.unpack(format+'I', f.read(4))[0]
-            ignore = f.read(4) # 1 uint32
-            # Numpy algorithm works a lot faster than struct.unpack
-            data = numpy.fromfile(f, dtype, npnts)
-
-        elif version == 5:
-            # pre header
-            checksum = struct.unpack(format+'H', f.read(2))[0] # Checksum over this header and the wave header.
-            wfmSize = struct.unpack(format+'i', f.read(4))[0] # The size of the WaveHeader2 data structure plus the wave data plus 16 bytes of padding.
-            formulaSize = struct.unpack(format+'i', f.read(4))[0]
-            noteSize = struct.unpack(format+'i', f.read(4))[0] # The size of the note text.
-            dataEUnitsSize = struct.unpack(format+'i', f.read(4))[0]
-            dimEUnitsSize = struct.unpack(format+'4i', f.read(16))
-            dimLabelsSize = struct.unpack(format+'4i', f.read(16))
-            sIndicesSize = struct.unpack(format+'i', f.read(4))[0]
-            optionSize1 = struct.unpack(format+'i', f.read(4))[0]
-            optionSize2 = struct.unpack(format+'i', f.read(4))[0]
-
-            # header
-            ignore = f.read(4)
-            CreationDate =  struct.unpack(format+'I',f.read(4))[0]
-            modData =  struct.unpack(format+'I',f.read(4))[0]
-            npnts =  struct.unpack(format+'i',f.read(4))[0]
-            # wave header
-            dtype = struct.unpack(format+'h',f.read(2))[0]
-            if dtype == 2:
-                dtype = numpy.float32(.0).dtype
-            elif dtype == 4:
-                dtype = numpy.double(.0).dtype
+    def _translate_ibw(self, data, bin_info, wave_info):
+        if bin_info['version'] != 5:
+            raise NotImplementedError('IBW version %d (< 5) not supported'
+                                      % bin_info['version'])
+            # We need version 5 for multidimensional arrays.
+
+        # Parse the note into a dictionary
+        note = {}
+        for line in bin_info['note'].split('\r'):
+            fields = [x.strip() for x in line.split(':', 1)]
+            key = fields[0]
+            if len(fields) == 2:
+                value = fields[1]
              else:
-                assert False, "Wave is of type '%i', not supported" % dtype
-            dtype = dtype.newbyteorder(format)
-
-            ignore = f.read(2) # 1 int16
-            ignore = f.read(6) # 6 schar, SCHAR = SIGNED CHAR?         ignore = fread(fid,6,'schar'); #
-            ignore = f.read(2) # 1 int16
-            bname = self._flatten(struct.unpack(format+'32c',f.read(32)))
-            ignore = f.read(4) # 1 int32
-            ignore = f.read(4) # 1 int32
-            ndims = struct.unpack(format+'4i',f.read(16)) # Number of of items in a dimension -- 0 means no data.
-            sfA = struct.unpack(format+'4d',f.read(32))
-            sfB = struct.unpack(format+'4d',f.read(32))
-            dUnits = self._flatten(struct.unpack(format+'4c',f.read(4)))
-            xUnits = self._flatten(struct.unpack(format+'16c',f.read(16)))
-            fsValid = struct.unpack(format+'h',f.read(2))
-            whpad3 = struct.unpack(format+'h',f.read(2))
-            ignore = f.read(16) # 2 double
-            ignore = f.read(40) # 10 int32
-            ignore = f.read(64) # 16 int32
-            ignore = f.read(6) # 3 int16
-            ignore = f.read(2) # 2 char
-            ignore = f.read(4) # 1 int32
-            ignore = f.read(4) # 2 int16
-            ignore = f.read(4) # 1 int32
-            ignore = f.read(8) # 2 int32
-
-            data = numpy.fromfile(f, dtype, npnts)
-            note_str = f.read(noteSize)
-            note_lines = note_str.split('\r')
-            self.note = {}
-            for line in note_lines:
-               if ':' in line:
-                   key, value = line.split(':', 1)
-                    self.note[key] = value
-            self.retract_velocity = float(self.note['Velocity'])
-            self.spring_constant = float(self.note['SpringConstant'])
+                value = None
+            note[key] = value
+        bin_info['note'] = note
+        if note['VerDate'] not in ['80501.041', '80501.0207']:
+            raise Exception(note['VerDate'])
+            raise NotImplementedError(
+                '%s file version %s not supported (yet!)\n%s'
+                % (self.name, note['VerDate'], pprint.pformat(note)))
+
+        info = {
+            'raw info':{'bin':bin_info,
+                        'wave':wave_info},
+            'time':wave_info['creationDate'],
+            'spring constant (N/m)':note['SpringConstant'],
+            }
+        # MFP3D's native data dimensions match Hooke's (<point>, <column>) layout.
+        approach = self._scale_block(data[:wave_info['npnts']/2,:], info, 'approach')
+        retract = self._scale_block(data[wave_info['npnts']/2:,:], info, 'retract')
+        return (approach, retract, info)
+
+    def _scale_block(self, data, info, name):
+        """Convert the block from its native format to a `numpy.float`
+        array in SI units.
+        """
+        shape = 3
+        # raw column indices
+        columns = info['raw info']['bin']['dimLabels'][1]
+        # Depending on your MFP3D version:
+        #   VerDate 80501.0207: ['Raw', 'Defl', 'LVDT', 'Time']
+        #   VerDate 80501.041:  ['Raw', 'Defl', 'LVDT']
+        if 'Time' in columns:
+            n_col = 3
          else:
-            assert False, "Fileversion is of type '%i', not supported" % dtype
-            data = []
-
-        f.close()
-        if len(data) > 0:
-            #we have 3 columns: deflection, LVDT, raw
-            #TODO detect which is each one
-           count = npnts / 3
-            lvdt = data[:count] 
-            deflection = data[count:2 * count] 
-            #every column contains data for extension and retraction
-            #we assume the same number of points for each
-            #we could possibly extract this info from the note
-            count = npnts / 6
-
-           forcechunk=deflection*self.spring_constant
-           distancechunk=lvdt
-       
-           if  whichchunk==self.forcechunk:
-               return forcechunk
-           if whichchunk==self.distancechunk:
-               return distancechunk
-        else:
-            return None                          
-        
-    def _force(self):
-       #returns force vector
-        Kspring=self.spring_constant
-        return DataChunk([(meter*Kspring) for meter in self._deflection()])
-
-    def _deflection(self):
-        #for internal use (feeds _force)
-        deflect=self.data_chunks[self.forcechunk]/self.spring_constant              
-        return deflect
-
-    def _flatten(self, tup):
-        out = ''
-        for ch in tup:
-            out += ch
-        return out            
-    
-    def _Z(self):   
-        return DataChunk(self.data_chunks[self.distancechunk])
-        
-    def is_me(self):
-        if len(self.lines) < 34:
-            return False
-
-        name, extension = os.path.splitext(self.filename)
-        if extension == '.ibw':
-            for line in self.lines:
-                if line.startswith('ForceNote:'):
-                   self.data_chunks=[self._get_data_chunk(num) for num in [0,1,2]]
-                    return True
-            else:
-                return False
-        else:
-            return False
-    
-    def close_all(self):
-        '''
-        Explicitly closes all files
-        '''
-        self.textfile.close()
-        self.binfile.close()
-    
-    def default_plots(self):
-        '''
-        creates the default PlotObject
-        '''
-        force=self._force()
-        zdomain=self._Z()
-        main_plot=lhc.PlotObject()
-        main_plot.vectors=[[zdomain.ext(), force.ext()],[zdomain.ret(), force.ret()]]
-        main_plot.normalize_vectors()
-        main_plot.units=['meters','newton']
-        main_plot.destination=0
-        main_plot.title=self.filepath
-        
-        
-        return [main_plot]
-
-    def deflection(self):
-        #interface for correct plotmanip and others
-        deflectionchunk=DataChunk(self._deflection())
-        return deflectionchunk.ext(),deflectionchunk.ret()
+            n_col = 2
+        ret = curve.Data(
+            shape=(data.shape[0], n_col),
+            dtype=numpy.float,
+            info=copy.deepcopy(info)
+            )
+        ret.info['name'] = name
+        ret.info['raw data'] = data # store the raw data
+
+        z_rcol = columns.index('LVDT')
+        d_rcol = columns.index('Defl')
+
+        # scaled column indices
+        ret.info['columns'] = ['z piezo (m)', 'deflection (m)']
+        z_scol = ret.info['columns'].index('z piezo (m)')
+        d_scol = ret.info['columns'].index('deflection (m)')
+
+        # Leading '-' because increasing voltage extends the piezo,
+        # moving the tip towards the surface (positive indentation),
+        # but it makes more sense to me to have it increase away from
+        # the surface (positive separation).
+        ret[:,z_scol] = -data[:,z_rcol].astype(ret.dtype)
+
+        # Leading '-' because deflection voltage increases as the tip
+        # moves away from the surface, but it makes more sense to me
+        # to have it increase as it moves toward the surface (positive
+        # tension on the protein chain).
+        ret[:,d_scol] = -data[:,d_rcol]
+
+        if 'Time' in columns:
+            ret.info['columns'].append('time (s)')
+            t_rcol = columns.index('Time')
+            t_scol = ret.info['columns'].index('time (s)')
+            ret[:,t_scol] = data[:,t_rcol]
+
+        return ret