igor/binarywave.py

   1 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
   2 #
   3 # This file is part of Hooke.
   4 #
   5 # Hooke is free software: you can redistribute it and/or modify it
   6 # under the terms of the GNU Lesser General Public License as
   7 # published by the Free Software Foundation, either version 3 of the
   8 # License, or (at your option) any later version.
   9 #
  10 # Hooke is distributed in the hope that it will be useful, but WITHOUT
  11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
  13 # Public License for more details.
  14 #
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with Hooke.  If not, see
  17 # <http://www.gnu.org/licenses/>.
  18
  19 "Read IGOR Binary Wave files into Numpy arrays."
  20
  21 # Based on WaveMetric's Technical Note 003, "Igor Binary Format"
  22 #   ftp://ftp.wavemetrics.net/IgorPro/Technical_Notes/TN003.zip
  23 # From ftp://ftp.wavemetrics.net/IgorPro/Technical_Notes/TN000.txt
  24 #   We place no restrictions on copying Technical Notes, with the
  25 #   exception that you cannot resell them. So read, enjoy, and
  26 #   share. We hope IGOR Technical Notes will provide you with lots of
  27 #   valuable information while you are developing IGOR applications.
  28
  29 import array as _array
  30 import sys as _sys
  31 import types as _types
  32
  33 import numpy as _numpy
  34
  35 from .struct import Structure as _Structure
  36 from .struct import Field as _Field
  37 from .util import assert_null as _assert_null
  38
  39
  40 # Numpy doesn't support complex integers by default, see
  41 #   http://mail.python.org/pipermail/python-dev/2002-April/022408.html
  42 #   http://mail.scipy.org/pipermail/numpy-discussion/2007-October/029447.html
  43 # So we roll our own types.  See
  44 #   http://docs.scipy.org/doc/numpy/user/basics.rec.html
  45 #   http://docs.scipy.org/doc/numpy/reference/generated/numpy.dtype.html
  46 complexInt8 = _numpy.dtype([('real', _numpy.int8), ('imag', _numpy.int8)])
  47 complexInt16 = _numpy.dtype([('real', _numpy.int16), ('imag', _numpy.int16)])
  48 complexInt32 = _numpy.dtype([('real', _numpy.int32), ('imag', _numpy.int32)])
  49 complexUInt8 = _numpy.dtype([('real', _numpy.uint8), ('imag', _numpy.uint8)])
  50 complexUInt16 = _numpy.dtype(
  51     [('real', _numpy.uint16), ('imag', _numpy.uint16)])
  52 complexUInt32 = _numpy.dtype(
  53     [('real', _numpy.uint32), ('imag', _numpy.uint32)])
  54
  55 # Begin IGOR constants and typedefs from IgorBin.h
  56
  57 # From IgorMath.h
  58 TYPE_TABLE = {        # (key: integer flag, value: numpy dtype)
  59     0:None,           # Text wave, not handled in ReadWave.c
  60     1:_numpy.complex, # NT_CMPLX, makes number complex.
  61     2:_numpy.float32, # NT_FP32, 32 bit fp numbers.
  62     3:_numpy.complex64,
  63     4:_numpy.float64, # NT_FP64, 64 bit fp numbers.
  64     5:_numpy.complex128,
  65     8:_numpy.int8,    # NT_I8, 8 bit signed integer. Requires Igor Pro
  66                       # 2.0 or later.
  67     9:complexInt8,
  68     0x10:_numpy.int16,# NT_I16, 16 bit integer numbers. Requires Igor
  69                       # Pro 2.0 or later.
  70     0x11:complexInt16,
  71     0x20:_numpy.int32,# NT_I32, 32 bit integer numbers. Requires Igor
  72                       # Pro 2.0 or later.
  73     0x21:complexInt32,
  74 #   0x40:None,        # NT_UNSIGNED, Makes above signed integers
  75 #                     # unsigned. Requires Igor Pro 3.0 or later.
  76     0x48:_numpy.uint8,
  77     0x49:complexUInt8,
  78     0x50:_numpy.uint16,
  79     0x51:complexUInt16,
  80     0x60:_numpy.uint32,
  81     0x61:complexUInt32,
  82 }
  83
  84 # From wave.h
  85 MAXDIMS = 4
  86
  87 # From binary.h
  88 BinHeaderCommon = _Structure(  # WTK: this one is mine.
  89     name='BinHeaderCommon',
  90     fields=[
  91         _Field('h', 'version', help='Version number for backwards compatibility.'),
  92         ])
  93
  94 BinHeader1 = _Structure(
  95     name='BinHeader1',
  96     fields=[
  97         _Field('h', 'version', help='Version number for backwards compatibility.'),
  98         _Field('l', 'wfmSize', help='The size of the WaveHeader2 data structure plus the wave data plus 16 bytes of padding.'),
  99         _Field('h', 'checksum', help='Checksum over this header and the wave header.'),
 100         ])
 101
 102 BinHeader2 = _Structure(
 103     name='BinHeader2',
 104     fields=[
 105         _Field('h', 'version', help='Version number for backwards compatibility.'),
 106         _Field('l', 'wfmSize', help='The size of the WaveHeader2 data structure plus the wave data plus 16 bytes of padding.'),
 107         _Field('l', 'noteSize', help='The size of the note text.'),
 108         _Field('l', 'pictSize', default=0, help='Reserved. Write zero. Ignore on read.'),
 109         _Field('h', 'checksum', help='Checksum over this header and the wave header.'),
 110         ])
 111
 112 BinHeader3 = _Structure(
 113     name='BinHeader3',
 114     fields=[
 115         _Field('h', 'version', help='Version number for backwards compatibility.'),
 116         _Field('h', 'wfmSize', help='The size of the WaveHeader2 data structure plus the wave data plus 16 bytes of padding.'),
 117         _Field('l', 'noteSize', help='The size of the note text.'),
 118         _Field('l', 'formulaSize', help='The size of the dependency formula, if any.'),
 119         _Field('l', 'pictSize', default=0, help='Reserved. Write zero. Ignore on read.'),
 120         _Field('h', 'checksum', help='Checksum over this header and the wave header.'),
 121         ])
 122
 123 BinHeader5 = _Structure(
 124     name='BinHeader5',
 125     fields=[
 126         _Field('h', 'version', help='Version number for backwards compatibility.'),
 127         _Field('h', 'checksum', help='Checksum over this header and the wave header.'),
 128         _Field('l', 'wfmSize', help='The size of the WaveHeader5 data structure plus the wave data.'),
 129         _Field('l', 'formulaSize', help='The size of the dependency formula, if any.'),
 130         _Field('l', 'noteSize', help='The size of the note text.'),
 131         _Field('l', 'dataEUnitsSize', help='The size of optional extended data units.'),
 132         _Field('l', 'dimEUnitsSize', help='The size of optional extended dimension units.', count=MAXDIMS),
 133         _Field('l', 'dimLabelsSize', help='The size of optional dimension labels.', count=MAXDIMS),
 134         _Field('l', 'sIndicesSize', help='The size of string indicies if this is a text wave.'),
 135         _Field('l', 'optionsSize1', default=0, help='Reserved. Write zero. Ignore on read.'),
 136         _Field('l', 'optionsSize2', default=0, help='Reserved. Write zero. Ignore on read.'),
 137         ])
 138
 139
 140 # From wave.h
 141 MAX_WAVE_NAME2 = 18 # Maximum length of wave name in version 1 and 2
 142                     # files. Does not include the trailing null.
 143 MAX_WAVE_NAME5 = 31 # Maximum length of wave name in version 5
 144                     # files. Does not include the trailing null.
 145 MAX_UNIT_CHARS = 3
 146
 147 # Header to an array of waveform data.
 148
 149 WaveHeader2 = _Structure(
 150     name='WaveHeader2',
 151     fields=[
 152         _Field('h', 'type', help='See types (e.g. NT_FP64) above. Zero for text waves.'),
 153         _Field('P', 'next', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 154         _Field('c', 'bname', help='Name of wave plus trailing null.', count=MAX_WAVE_NAME2+2),
 155         _Field('h', 'whVersion', default=0, help='Write 0. Ignore on read.'),
 156         _Field('h', 'srcFldr', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 157         _Field('P', 'fileName', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 158         _Field('c', 'dataUnits', default=0, help='Natural data units go here - null if none.', count=MAX_UNIT_CHARS+1),
 159         _Field('c', 'xUnits', default=0, help='Natural x-axis units go here - null if none.', count=MAX_UNIT_CHARS+1),
 160         _Field('l', 'npnts', help='Number of data points in wave.'),
 161         _Field('h', 'aModified', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 162         _Field('d', 'hsA', help='X value for point p = hsA*p + hsB'),
 163         _Field('d', 'hsB', help='X value for point p = hsA*p + hsB'),
 164         _Field('h', 'wModified', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 165         _Field('h', 'swModified', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 166         _Field('h', 'fsValid', help='True if full scale values have meaning.'),
 167         _Field('d', 'topFullScale', help='The min full scale value for wave.'), # sic, 'min' should probably be 'max'
 168         _Field('d', 'botFullScale', help='The min full scale value for wave.'),
 169         _Field('c', 'useBits', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 170         _Field('c', 'kindBits', default=0, help='Reserved. Write zero. Ignore on read.'),
 171         _Field('P', 'formula', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 172         _Field('l', 'depID', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 173         _Field('L', 'creationDate', help='DateTime of creation.  Not used in version 1 files.'),
 174         _Field('c', 'wUnused', default=0, help='Reserved. Write zero. Ignore on read.', count=2),
 175         _Field('L', 'modDate', help='DateTime of last modification.'),
 176         _Field('P', 'waveNoteH', help='Used in memory only. Write zero. Ignore on read.'),
 177         _Field('f', 'wData', help='The start of the array of waveform data.', count=4),
 178         ])
 179
 180 WaveHeader5 = _Structure(
 181     name='WaveHeader5',
 182     fields=[
 183         _Field('P', 'next', help='link to next wave in linked list.'),
 184         _Field('L', 'creationDate', help='DateTime of creation.'),
 185         _Field('L', 'modDate', help='DateTime of last modification.'),
 186         _Field('l', 'npnts', help='Total number of points (multiply dimensions up to first zero).'),
 187         _Field('h', 'type', help='See types (e.g. NT_FP64) above. Zero for text waves.'),
 188         _Field('h', 'dLock', default=0, help='Reserved. Write zero. Ignore on read.'),
 189         _Field('c', 'whpad1', default=0, help='Reserved. Write zero. Ignore on read.', count=6),
 190         _Field('h', 'whVersion', default=1, help='Write 1. Ignore on read.'),
 191         _Field('c', 'bname', help='Name of wave plus trailing null.', count=MAX_WAVE_NAME5+1),
 192         _Field('l', 'whpad2', default=0, help='Reserved. Write zero. Ignore on read.'),
 193         _Field('P', 'dFolder', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 194         # Dimensioning info. [0] == rows, [1] == cols etc
 195         _Field('l', 'nDim', help='Number of of items in a dimension -- 0 means no data.', count=MAXDIMS),
 196         _Field('d', 'sfA', help='Index value for element e of dimension d = sfA[d]*e + sfB[d].', count=MAXDIMS),
 197         _Field('d', 'sfB', help='Index value for element e of dimension d = sfA[d]*e + sfB[d].', count=MAXDIMS),
 198         # SI units
 199         _Field('c', 'dataUnits', default=0, help='Natural data units go here - null if none.', count=MAX_UNIT_CHARS+1),
 200         _Field('c', 'dimUnits', default=0, help='Natural dimension units go here - null if none.', count=(MAXDIMS, MAX_UNIT_CHARS+1)),
 201         _Field('h', 'fsValid', help='TRUE if full scale values have meaning.'),
 202         _Field('h', 'whpad3', default=0, help='Reserved. Write zero. Ignore on read.'),
 203         _Field('d', 'topFullScale', help='The max and max full scale value for wave'), # sic, probably "max and min"
 204         _Field('d', 'botFullScale', help='The max and max full scale value for wave.'), # sic, probably "max and min"
 205         _Field('P', 'dataEUnits', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 206         _Field('P', 'dimEUnits', default=0, help='Used in memory only. Write zero.  Ignore on read.', count=MAXDIMS),
 207         _Field('P', 'dimLabels', default=0, help='Used in memory only. Write zero.  Ignore on read.', count=MAXDIMS),
 208         _Field('P', 'waveNoteH', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 209         _Field('l', 'whUnused', default=0, help='Reserved. Write zero. Ignore on read.', count=16),
 210         # The following stuff is considered private to Igor.
 211         _Field('h', 'aModified', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 212         _Field('h', 'wModified', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 213         _Field('h', 'swModified', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 214         _Field('c', 'useBits', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 215         _Field('c', 'kindBits', default=0, help='Reserved. Write zero. Ignore on read.'),
 216         _Field('P', 'formula', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 217         _Field('l', 'depID', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 218         _Field('h', 'whpad4', default=0, help='Reserved. Write zero. Ignore on read.'),
 219         _Field('h', 'srcFldr', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 220         _Field('P', 'fileName', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 221         _Field('P', 'sIndices', default=0, help='Used in memory only. Write zero. Ignore on read.'),
 222         _Field('f', 'wData', help='The start of the array of data.  Must be 64 bit aligned.', count=1),
 223         ])
 224
 225 # End IGOR constants and typedefs from IgorBin.h
 226
 227 # Begin functions from ReadWave.c
 228
 229 def need_to_reorder_bytes(version):
 230     # If the low order byte of the version field of the BinHeader
 231     # structure is zero then the file is from a platform that uses
 232     # different byte-ordering and therefore all data will need to be
 233     # reordered.
 234     return version & 0xFF == 0
 235
 236 def byte_order(needToReorderBytes):
 237     little_endian = _sys.byteorder == 'little'
 238     if needToReorderBytes:
 239         little_endian = not little_endian
 240     if little_endian:
 241         return '<'  # little-endian
 242     return '>'  # big-endian
 243
 244 def version_structs(version, byte_order):
 245     if version == 1:
 246         bin = BinHeader1
 247         wave = WaveHeader2
 248     elif version == 2:
 249         bin = BinHeader2
 250         wave = WaveHeader2
 251     elif version == 3:
 252         bin = BinHeader3
 253         wave = WaveHeader2
 254     elif version == 5:
 255         bin = BinHeader5
 256         wave = WaveHeader5
 257     else:
 258         raise ValueError(
 259             ('This does not appear to be a valid Igor binary wave file. '
 260              'The version field = {}.\n').format(version))
 261     checkSumSize = bin.size + wave.size
 262     if version == 5:
 263         checkSumSize -= 4  # Version 5 checksum does not include the wData field.
 264     bin.set_byte_order(byte_order)
 265     wave.set_byte_order(byte_order)
 266     return (bin, wave, checkSumSize)
 267
 268 def checksum(buffer, byte_order, oldcksum, numbytes):
 269     x = _numpy.ndarray(
 270         (numbytes/2,), # 2 bytes to a short -- ignore trailing odd byte
 271         dtype=_numpy.dtype(byte_order+'h'),
 272         buffer=buffer)
 273     oldcksum += x.sum()
 274     if oldcksum > 2**31:  # fake the C implementation's int rollover
 275         oldcksum %= 2**32
 276         if oldcksum > 2**31:
 277             oldcksum -= 2**31
 278     return oldcksum & 0xffff
 279
 280 # Translated from ReadWave()
 281 def loadibw(filename, strict=True):
 282     if hasattr(filename, 'read'):
 283         f = filename  # filename is actually a stream object
 284     else:
 285         f = open(filename, 'rb')
 286     try:
 287         BinHeaderCommon.set_byte_order('=')
 288         b = buffer(f.read(BinHeaderCommon.size))
 289         version = BinHeaderCommon.unpack_dict_from(b)['version']
 290         needToReorderBytes = need_to_reorder_bytes(version)
 291         byteOrder = byte_order(needToReorderBytes)
 292
 293         if needToReorderBytes:
 294             BinHeaderCommon.set_byte_order(byteOrder)
 295             version = BinHeaderCommon.unpack_dict_from(b)['version']
 296         bin_struct,wave_struct,checkSumSize = version_structs(version, byteOrder)
 297
 298         b = buffer(b + f.read(bin_struct.size + wave_struct.size - BinHeaderCommon.size))
 299         c = checksum(b, byteOrder, 0, checkSumSize)
 300         if c != 0:
 301             raise ValueError(
 302                 ('This does not appear to be a valid Igor binary wave file.  '
 303                  'Error in checksum: should be 0, is {}.').format(c))
 304         bin_info = bin_struct.unpack_dict_from(b)
 305         wave_info = wave_struct.unpack_dict_from(b, offset=bin_struct.size)
 306         if version in [1,2,3]:
 307             tail = 16  # 16 = size of wData field in WaveHeader2 structure
 308             waveDataSize = bin_info['wfmSize'] - wave_struct.size
 309             # =  bin_info['wfmSize']-16 - (wave_struct.size - tail)
 310         else:
 311             assert version == 5, version
 312             tail = 4  # 4 = size of wData field in WaveHeader5 structure
 313             waveDataSize = bin_info['wfmSize'] - (wave_struct.size - tail)
 314         # dtype() wrapping to avoid numpy.generic and
 315         # getset_descriptor issues with the builtin numpy types
 316         # (e.g. int32).  It has no effect on our local complex
 317         # integers.
 318         if version == 5:
 319             shape = [n for n in wave_info['nDim'] if n > 0] or (0,)
 320         else:
 321             shape = (wave_info['npnts'],)
 322         t = _numpy.dtype(_numpy.int8)  # setup a safe default
 323         if wave_info['type'] == 0:  # text wave
 324             shape = (waveDataSize,)
 325         elif wave_info['type'] in TYPE_TABLE or wave_info['npnts']:
 326             t = _numpy.dtype(TYPE_TABLE[wave_info['type']])
 327             assert waveDataSize == wave_info['npnts'] * t.itemsize, (
 328                 '{}, {}, {}, {}'.format(
 329                     waveDataSize, wave_info['npnts'], t.itemsize, t))
 330         else:
 331             pass  # formula waves
 332         if wave_info['npnts'] == 0:
 333             data_b = buffer('')
 334         else:
 335             tail_data = _array.array('f', b[-tail:])
 336             data_b = buffer(buffer(tail_data) + f.read(waveDataSize-tail))
 337         data = _numpy.ndarray(
 338             shape=shape,
 339             dtype=t.newbyteorder(byteOrder),
 340             buffer=data_b,
 341             order='F',
 342             )
 343
 344         if version == 1:
 345             pass  # No post-data information
 346         elif version == 2:
 347             # Post-data info:
 348             #   * 16 bytes of padding
 349             #   * Optional wave note data
 350             pad_b = buffer(f.read(16))  # skip the padding
 351             _assert_null(pad_b, strict=strict)
 352             bin_info['note'] = str(f.read(bin_info['noteSize'])).strip()
 353         elif version == 3:
 354             # Post-data info:
 355             #   * 16 bytes of padding
 356             #   * Optional wave note data
 357             #   * Optional wave dependency formula
 358             """Excerpted from TN003:
 359
 360             A wave has a dependency formula if it has been bound by a
 361             statement such as "wave0 := sin(x)". In this example, the
 362             dependency formula is "sin(x)". The formula is stored with
 363             no trailing null byte.
 364             """
 365             pad_b = buffer(f.read(16))  # skip the padding
 366             _assert_null(pad_b, strict=strict)
 367             bin_info['note'] = str(f.read(bin_info['noteSize'])).strip()
 368             bin_info['formula'] = str(f.read(bin_info['formulaSize'])).strip()
 369         elif version == 5:
 370             # Post-data info:
 371             #   * Optional wave dependency formula
 372             #   * Optional wave note data
 373             #   * Optional extended data units data
 374             #   * Optional extended dimension units data
 375             #   * Optional dimension label data
 376             #   * String indices used for text waves only
 377             """Excerpted from TN003:
 378
 379             dataUnits - Present in versions 1, 2, 3, 5. The dataUnits
 380               field stores the units for the data represented by the
 381               wave. It is a C string terminated with a null
 382               character. This field supports units of 0 to 3 bytes. In
 383               version 1, 2 and 3 files, longer units can not be
 384               represented. In version 5 files, longer units can be
 385               stored using the optional extended data units section of
 386               the file.
 387
 388             xUnits - Present in versions 1, 2, 3. The xUnits field
 389               stores the X units for a wave. It is a C string
 390               terminated with a null character.  This field supports
 391               units of 0 to 3 bytes. In version 1, 2 and 3 files,
 392               longer units can not be represented.
 393
 394             dimUnits - Present in version 5 only. This field is an
 395               array of 4 strings, one for each possible wave
 396               dimension. Each string supports units of 0 to 3
 397               bytes. Longer units can be stored using the optional
 398               extended dimension units section of the file.
 399             """
 400             bin_info['formula'] = str(f.read(bin_info['formulaSize'])).strip()
 401             bin_info['note'] = str(f.read(bin_info['noteSize'])).strip()
 402             bin_info['dataEUnits'] = str(f.read(bin_info['dataEUnitsSize'])).strip()
 403             bin_info['dimEUnits'] = [
 404                 str(f.read(size)).strip() for size in bin_info['dimEUnitsSize']]
 405             bin_info['dimLabels'] = []
 406             for size in bin_info['dimLabelsSize']:
 407                 labels = str(f.read(size)).split(chr(0)) # split null-delimited strings
 408                 bin_info['dimLabels'].append([L for L in labels if len(L) > 0])
 409             if wave_info['type'] == 0:  # text wave
 410                 bin_info['sIndices'] = f.read(bin_info['sIndicesSize'])
 411
 412         if wave_info['type'] == 0:  # text wave
 413             # use sIndices to split data into strings
 414             strings = []
 415             start = 0
 416             for i,string_index in enumerate(bin_info['sIndices']):
 417                 offset = ord(string_index)
 418                 if offset > start:
 419                     string = data[start:offset]
 420                     strings.append(''.join(chr(x) for x in string))
 421                     start = offset
 422                 else:
 423                     assert offset == 0, offset
 424             data = _numpy.array(strings)
 425             shape = [n for n in wave_info['nDim'] if n > 0] or (0,)
 426             data.reshape(shape)
 427     finally:
 428         if not hasattr(filename, 'read'):
 429             f.close()
 430
 431     return data, bin_info, wave_info
 432
 433
 434 def saveibw(filename):
 435     raise NotImplementedError