igor/struct.py

   1 # Copyright
   2
   3 """Structure and Field classes for declaring structures
   4
   5 There are a few formats that can be used to represent the same data, a
   6 binary packed format with all the data in a buffer, a linearized
   7 format with each field in a single Python list, and a nested format
   8 with each field in a hierarchy of Python dictionaries.
   9 """
  10
  11 from __future__ import absolute_import
  12 import struct as _struct
  13
  14 import numpy as _numpy
  15
  16
  17 _buffer = buffer  # save builtin buffer for clobbered situations
  18
  19
  20 class Field (object):
  21     """Represent a Structure field.
  22
  23     The format argument can be a format character from the ``struct``
  24     documentation (e.g., ``c`` for ``char``, ``h`` for ``short``, ...)
  25     or ``Structure`` instance (for building nested structures).
  26
  27     Examples
  28     --------
  29
  30     >>> from pprint import pprint
  31     >>> import numpy
  32
  33     Example of an unsigned short integer field:
  34
  35     >>> time = Field(
  36     ...     'I', 'time', default=0, help='POSIX time')
  37     >>> time.total_count
  38     1
  39     >>> list(time.pack_data(1))
  40     [1]
  41     >>> list(time.pack_item(2))
  42     [2]
  43     >>> time.unpack_data([3])
  44     3
  45     >>> time.unpack_item([4])
  46     4
  47
  48     Example of a multi-dimensional float field:
  49
  50     >>> data = Field(
  51     ...     'f', 'data', help='example data', count=(2,3,4))
  52     >>> data.total_count
  53     24
  54     >>> list(data.indexes())  # doctest: +ELLIPSIS
  55     [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], ..., [1, 2, 3]]
  56     >>> list(data.pack_data(
  57     ...     [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]],
  58     ...      [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]])
  59     ...     )  # doctest: +ELLIPSIS
  60     [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ..., 19, 20, 21, 22, 23]
  61     >>> list(data.pack_item(3))
  62     [3]
  63     >>> data.unpack_data(range(data.total_count))
  64     array([[[ 0,  1,  2,  3],
  65             [ 4,  5,  6,  7],
  66             [ 8,  9, 10, 11]],
  67     <BLANKLINE>
  68            [[12, 13, 14, 15],
  69             [16, 17, 18, 19],
  70             [20, 21, 22, 23]]])
  71     >>> data.unpack_item([3])
  72     3
  73
  74     Example of a nested structure field:
  75
  76     >>> run = Structure('run', fields=[time, data])
  77     >>> runs = Field(run, 'runs', help='pair of runs', count=2)
  78     >>> runs.total_count  # = 2 * (1 + 24)
  79     50
  80     >>> data1 = numpy.arange(data.total_count).reshape(data.count)
  81     >>> data2 = data1 + data.total_count
  82     >>> list(runs.pack_data(
  83     ...     [{'time': 100, 'data': data1},
  84     ...      {'time': 101, 'data': data2}])
  85     ...     )  # doctest: +ELLIPSIS
  86     [100, 0, 1, 2, ..., 22, 23, 101, 24, 25, ..., 46, 47]
  87     >>> list(runs.pack_item({'time': 100, 'data': data1})
  88     ...     )  # doctest: +ELLIPSIS
  89     [100, 0, 1, 2, ..., 22, 23]
  90     >>> pprint(runs.unpack_data(range(runs.total_count)))
  91     [{'data': array([[[ 1,  2,  3,  4],
  92             [ 5,  6,  7,  8],
  93             [ 9, 10, 11, 12]],
  94     <BLANKLINE>
  95            [[13, 14, 15, 16],
  96             [17, 18, 19, 20],
  97             [21, 22, 23, 24]]]),
  98       'time': 0},
  99      {'data': array([[[26, 27, 28, 29],
 100             [30, 31, 32, 33],
 101             [34, 35, 36, 37]],
 102     <BLANKLINE>
 103            [[38, 39, 40, 41],
 104             [42, 43, 44, 45],
 105             [46, 47, 48, 49]]]),
 106       'time': 25}]
 107     >>> pprint(runs.unpack_item(range(runs.structure_count)))
 108     {'data': array([[[ 1,  2,  3,  4],
 109             [ 5,  6,  7,  8],
 110             [ 9, 10, 11, 12]],
 111     <BLANKLINE>
 112            [[13, 14, 15, 16],
 113             [17, 18, 19, 20],
 114             [21, 22, 23, 24]]]),
 115      'time': 0}
 116
 117     If you don't give enough values for an array field, the remaining
 118     values are filled in with their defaults.
 119
 120     >>> list(data.pack_data(
 121     ...     [[[0, 1, 2, 3], [4, 5, 6]], [[10]]]))  # doctest: +ELLIPSIS
 122     Traceback (most recent call last):
 123       ...
 124     ValueError: no default for <Field data ...>
 125     >>> data.default = 0
 126     >>> list(data.pack_data(
 127     ...     [[[0, 1, 2, 3], [4, 5, 6]], [[10]]]))
 128     [0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 129
 130     See Also
 131     --------
 132     Structure
 133     """
 134     def __init__(self, format, name, default=None, help=None, count=1):
 135         self.format = format
 136         self.name = name
 137         self.default = default
 138         self.help = help
 139         self.count = count
 140         self.item_count = _numpy.prod(count)  # number of item repeats
 141         if isinstance(self.format, Structure):
 142             self.structure_count = sum(f.total_count for f in format.fields)
 143             self.total_count = self.item_count * self.structure_count
 144         else:
 145             self.total_count = self.item_count  # struct.Struct format chars
 146
 147     def __str__(self):
 148         return self.__repr__()
 149
 150     def __repr__(self):
 151         return '<{} {} {}>'.format(
 152             self.__class__.__name__, self.name, id(self))
 153
 154     def indexes(self):
 155         """Iterate through indexes to a possibly multi-dimensional array"""
 156         assert self.item_count > 1, self
 157         try:
 158             i = [0] * len(self.count)
 159         except TypeError:  # non-iterable count
 160             for i in range(self.count):
 161                 yield i
 162         else:
 163             for i in range(self.item_count):
 164                 index = []
 165                 for j,c in enumerate(reversed(self.count)):
 166                     index.insert(0, i % c)
 167                     i /= c
 168                 yield index
 169
 170     def pack_data(self, data=None):
 171         """Linearize a single field's data to a flat list.
 172
 173         If the field is repeated (count > 1), the incoming data should
 174         be iterable with each iteration returning a single item.
 175         """
 176         if self.item_count > 1:
 177             if data is None:
 178                 data = []
 179             if hasattr(data, 'flat'):  # take advantage of numpy's ndarray.flat
 180                 items = 0
 181                 for item in data.flat:
 182                     items += 1
 183                     for arg in self.pack_item(item):
 184                         yield arg
 185                 if items < self.item_count:
 186                     if f.default is None:
 187                         raise ValueError(
 188                             'no default for {}.{}'.format(self, f))
 189                     for i in range(self.item_count - items):
 190                         yield f.default
 191             else:
 192                 for index in self.indexes():
 193                     try:
 194                         if isinstance(index, int):
 195                             item = data[index]
 196                         else:
 197                             item = data
 198                             for i in index:
 199                                 item = item[i]
 200                     except IndexError:
 201                         item = None
 202                     for arg in self.pack_item(item):
 203                         yield arg
 204         elif self.item_count:
 205             for arg in self.pack_item(data):
 206                 yield arg
 207
 208     def pack_item(self, item=None):
 209         """Linearize a single count of the field's data to a flat iterable
 210         """
 211         if isinstance(self.format, Structure):
 212             for i in self.format._pack_item(item):
 213                 yield i
 214         elif item is None:
 215             if self.default is None:
 216                 raise ValueError('no default for {}'.format(self))
 217             yield self.default
 218         else:
 219             yield item
 220
 221     def unpack_data(self, data):
 222         """Inverse of .pack_data"""
 223         iterator = iter(data)
 224         try:
 225             items = [iterator.next() for i in range(self.total_count)]
 226         except StopIteration:
 227             raise ValueError('not enough data to unpack {}'.format(self))
 228         try:
 229             iterator.next()
 230         except StopIteration:
 231             pass
 232         else:
 233             raise ValueError('too much data to unpack {}'.format(self))
 234         if isinstance(self.format, Structure):
 235             # break into per-structure clumps
 236             s = self.structure_count
 237             items = zip(*[items[i::s] for i in range(s)])
 238         else:
 239             items = [[i] for i in items]
 240         unpacked = [self.unpack_item(i) for i in items]
 241         if self.count == 1:
 242             return unpacked[0]
 243         if isinstance(self.format, Structure):
 244             try:
 245                 len(self.count)
 246             except TypeError:
 247                 pass
 248             else:
 249                 raise NotImplementedError('reshape Structure field')
 250         else:
 251             unpacked = _numpy.array(unpacked)
 252             unpacked = unpacked.reshape(self.count)
 253         return unpacked
 254
 255     def unpack_item(self, item):
 256         """Inverse of .unpack_item"""
 257         if isinstance(self.format, Structure):
 258             return self.format._unpack_item(item)
 259         else:
 260             assert len(item) == 1, item
 261             return item[0]
 262
 263
 264 class Structure (_struct.Struct):
 265     r"""Represent a C structure.
 266
 267     A convenient wrapper around struct.Struct that uses Fields and
 268     adds dict-handling methods for transparent name assignment.
 269
 270     See Also
 271     --------
 272     Field
 273
 274     Examples
 275     --------
 276
 277     >>> import array
 278     >>> from pprint import pprint
 279
 280     Represent the C structures::
 281
 282         struct run {
 283           unsigned int time;
 284           short data[2][3];
 285         }
 286
 287         struct experiment {
 288           unsigned short version;
 289           struct run runs[2];
 290         }
 291
 292     As
 293
 294     >>> time = Field('I', 'time', default=0, help='POSIX time')
 295     >>> data = Field(
 296     ...     'h', 'data', default=0, help='example data', count=(2,3))
 297     >>> run = Structure('run', fields=[time, data])
 298     >>> version = Field(
 299     ...     'H', 'version', default=1, help='example version')
 300     >>> runs = Field(run, 'runs', help='pair of runs', count=2)
 301     >>> experiment = Structure('experiment', fields=[version, runs])
 302
 303     The structures automatically calculate the flattened data format:
 304
 305     >>> run.format
 306     '=Ihhhhhh'
 307     >>> run.size  # 4 + 2*3*2
 308     16
 309     >>> experiment.format
 310     '=HIhhhhhhIhhhhhh'
 311     >>> experiment.size  # 2 + 2*(4 + 2*3*2)
 312     34
 313
 314     You can read data out of any object supporting the buffer
 315     interface:
 316
 317     >>> b = array.array('B', range(experiment.size))
 318     >>> experiment.set_byte_order('>')
 319     >>> d = experiment.unpack_from(buffer=b)
 320     >>> pprint(d)
 321     {'runs': [{'data': array([[1543, 2057, 2571],
 322            [3085, 3599, 4113]]),
 323                'time': 33752069},
 324               {'data': array([[5655, 6169, 6683],
 325            [7197, 7711, 8225]]),
 326                'time': 303240213}],
 327      'version': 1}
 328     >>> [hex(x) for x in d['runs'][0]['data'].flat]
 329     ['0x607L', '0x809L', '0xa0bL', '0xc0dL', '0xe0fL', '0x1011L']
 330
 331     You can also read out from strings:
 332
 333     >>> d = experiment.unpack(b.tostring())
 334     >>> pprint(d)
 335     {'runs': [{'data': array([[1543, 2057, 2571],
 336            [3085, 3599, 4113]]),
 337                'time': 33752069},
 338               {'data': array([[5655, 6169, 6683],
 339            [7197, 7711, 8225]]),
 340                'time': 303240213}],
 341      'version': 1}
 342
 343     If you don't give enough values for an array field, the remaining
 344     values are filled in with their defaults.
 345
 346     >>> experiment.pack_into(buffer=b, data=d)
 347     >>> b.tostring()[:17]
 348     '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10'
 349     >>> b.tostring()[17:]
 350     '\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !'
 351     >>> run0 = d['runs'].pop(0)
 352     >>> b = experiment.pack(data=d)
 353     >>> b[:17]
 354     '\x00\x01\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
 355     >>> b[17:]
 356     '!\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
 357
 358     If you set ``count=0``, the field is ignored.
 359
 360     >>> experiment2 = Structure('experiment', fields=[
 361     ...     version, Field('f', 'ignored', count=0), runs], byte_order='>')
 362     >>> experiment2.format
 363     '>HIhhhhhhIhhhhhh'
 364     >>> d = experiment2.unpack(b)
 365     >>> pprint(d)
 366     {'ignored': array([], dtype=float64),
 367      'runs': [{'data': array([[5655, 6169, 6683],
 368            [7197, 7711, 8225]]),
 369                'time': 303240213},
 370               {'data': array([[0, 0, 0],
 371            [0, 0, 0]]), 'time': 0}],
 372      'version': 1}
 373     >>> del d['ignored']
 374     >>> b2 = experiment2.pack(d)
 375     >>> b2 == b
 376     True
 377     """
 378     def __init__(self, name, fields, byte_order='='):
 379         # '=' for native byte order, standard size and alignment
 380         # See http://docs.python.org/library/struct for details
 381         self.name = name
 382         self.fields = fields
 383         self.set_byte_order(byte_order)
 384
 385     def __str__(self):
 386         return self.name
 387
 388     def __repr__(self):
 389         return '<{} {} {}>'.format(
 390             self.__class__.__name__, self.name, id(self))
 391
 392     def set_byte_order(self, byte_order):
 393         """Allow changing the format byte_order on the fly.
 394         """
 395         if (hasattr(self, 'format') and self.format != None
 396             and self.format.startswith(byte_order)):
 397             return  # no need to change anything
 398         format = []
 399         for field in self.fields:
 400             if isinstance(field.format, Structure):
 401                 field_format = field.format.sub_format(
 402                     ) * field.item_count
 403             else:
 404                 field_format = [field.format]*field.item_count
 405             format.extend(field_format)
 406         super(Structure, self).__init__(
 407             format=byte_order+''.join(format).replace('P', 'L'))
 408
 409     def sub_format(self):
 410         return self.format.lstrip('=<>')  # byte order handled by parent
 411
 412     def _pack_item(self, item=None):
 413         """Linearize a single count of the structure's data to a flat iterable
 414         """
 415         if item is None:
 416             item = {}
 417         for f in self.fields:
 418             try:
 419                 data = item[f.name]
 420             except TypeError:
 421                 raise ValueError((f.name, item))
 422             except KeyError:
 423                 data = None
 424             for arg in f.pack_data(data):
 425                 yield arg
 426
 427     def _unpack_item(self, args):
 428         """Inverse of ._unpack_item"""
 429         data = {}
 430         iterator = iter(args)
 431         for f in self.fields:
 432             try:
 433                 items = [iterator.next() for i in range(f.total_count)]
 434             except StopIteration:
 435                 raise ValueError('not enough data to unpack {}.{}'.format(
 436                         self, f))
 437             data[f.name] = f.unpack_data(items)
 438         try:
 439             iterator.next()
 440         except StopIteration:
 441             pass
 442         else:
 443             raise ValueError('too much data to unpack {}'.format(self))
 444         return data
 445
 446     def pack(self, data):
 447         args = list(self._pack_item(data))
 448         return super(Structure, self).pack(*args)
 449
 450     def pack_into(self, buffer, offset=0, data={}):
 451         args = list(self._pack_item(data))
 452         return super(Structure, self).pack_into(
 453             buffer, offset, *args)
 454
 455     def unpack(self, *args, **kwargs):
 456         args = super(Structure, self).unpack(*args, **kwargs)
 457         return self._unpack_item(args)
 458
 459     def unpack_from(self, buffer, offset=0, *args, **kwargs):
 460         try:
 461             args = super(Structure, self).unpack_from(
 462                 buffer, offset, *args, **kwargs)
 463         except _struct.error as e:
 464             if not self.name in ('WaveHeader2', 'WaveHeader5'):
 465                 raise
 466             # HACK!  For WaveHeader5, when npnts is 0, wData is
 467             # optional.  If we couldn't unpack the structure, fill in
 468             # wData with zeros and try again, asserting that npnts is
 469             # zero.
 470             if len(buffer) - offset < self.size:
 471                 # missing wData?  Pad with zeros
 472                 buffer += _buffer('\x00'*(self.size + offset - len(buffer)))
 473             args = super(Structure, self).unpack_from(buffer, offset)
 474             data = self._unpack_item(args)
 475             assert data['npnts'] == 0, data['npnts']
 476         return self._unpack_item(args)