3 """Structure and Field classes for declaring structures
5 There are a few formats that can be used to represent the same data, a
6 binary packed format with all the data in a buffer, a linearized
7 format with each field in a single Python list, and a nested format
8 with each field in a hierarchy of Python dictionaries.
11 from __future__ import absolute_import
13 import logging as _logging
14 import pprint as _pprint
15 import struct as _struct
17 import numpy as _numpy
19 from . import LOG as _LOG
22 _buffer = buffer # save builtin buffer for clobbered situations
26 """Represent a Structure field.
28 The format argument can be a format character from the ``struct``
29 documentation (e.g., ``c`` for ``char``, ``h`` for ``short``, ...)
30 or ``Structure`` instance (for building nested structures).
35 >>> from pprint import pprint
38 Example of an unsigned short integer field:
41 ... 'I', 'time', default=0, help='POSIX time')
44 >>> list(time.pack_data(1))
46 >>> list(time.pack_item(2))
48 >>> time.unpack_data([3])
50 >>> time.unpack_item([4])
53 Example of a multi-dimensional float field:
56 ... 'f', 'data', help='example data', count=(2,3,4))
59 >>> list(data.indexes()) # doctest: +ELLIPSIS
60 [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], ..., [1, 2, 3]]
61 >>> list(data.pack_data(
62 ... [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]],
63 ... [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]])
64 ... ) # doctest: +ELLIPSIS
65 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ..., 19, 20, 21, 22, 23]
66 >>> list(data.pack_item(3))
68 >>> data.unpack_data(range(data.arg_count))
69 array([[[ 0, 1, 2, 3],
76 >>> data.unpack_item([3])
79 Example of a nested structure field:
81 >>> run = Structure('run', fields=[time, data])
82 >>> runs = Field(run, 'runs', help='pair of runs', count=2)
83 >>> runs.arg_count # = 2 * (1 + 24)
85 >>> data1 = numpy.arange(data.arg_count).reshape(data.count)
86 >>> data2 = data1 + data.arg_count
87 >>> list(runs.pack_data(
88 ... [{'time': 100, 'data': data1},
89 ... {'time': 101, 'data': data2}])
90 ... ) # doctest: +ELLIPSIS
91 [100, 0, 1, 2, ..., 22, 23, 101, 24, 25, ..., 46, 47]
92 >>> list(runs.pack_item({'time': 100, 'data': data1})
93 ... ) # doctest: +ELLIPSIS
94 [100, 0, 1, 2, ..., 22, 23]
95 >>> pprint(runs.unpack_data(range(runs.arg_count)))
96 [{'data': array([[[ 1, 2, 3, 4],
104 {'data': array([[[26, 27, 28, 29],
112 >>> pprint(runs.unpack_item(range(runs.structure_count)))
113 {'data': array([[[ 1, 2, 3, 4],
122 If you don't give enough values for an array field, the remaining
123 values are filled in with their defaults.
125 >>> list(data.pack_data(
126 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]])) # doctest: +ELLIPSIS
127 Traceback (most recent call last):
129 ValueError: no default for <Field data ...>
131 >>> list(data.pack_data(
132 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]]))
133 [0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
139 def __init__(self, format, name, default=None, help=None, count=1):
142 self.default = default
148 """Setup any dynamic properties of a field.
150 Use this method to recalculate dynamic properities after
151 changing the basic properties set during initialization.
153 _LOG.debug('setup {}'.format(self))
154 self.item_count = _numpy.prod(self.count) # number of item repeats
155 if isinstance(self.format, Structure):
156 self.structure_count = sum(
157 f.arg_count for f in self.format.fields)
158 self.arg_count = self.item_count * self.structure_count
159 elif self.format == 'x':
160 self.arg_count = 0 # no data in padding bytes
162 self.arg_count = self.item_count # struct.Struct format args
165 return self.__repr__()
168 return '<{} {} {}>'.format(
169 self.__class__.__name__, self.name, id(self))
172 """Iterate through indexes to a possibly multi-dimensional array"""
173 assert self.item_count > 1, self
175 i = [0] * len(self.count)
176 except TypeError: # non-iterable count
177 for i in range(self.count):
180 for i in range(self.item_count):
182 for j,c in enumerate(reversed(self.count)):
183 index.insert(0, i % c)
187 def pack_data(self, data=None):
188 """Linearize a single field's data to a flat list.
190 If the field is repeated (count > 1), the incoming data should
191 be iterable with each iteration returning a single item.
193 if self.item_count > 1:
196 if hasattr(data, 'flat'): # take advantage of numpy's ndarray.flat
198 for item in data.flat:
200 for arg in self.pack_item(item):
202 if items < self.item_count:
203 if f.default is None:
205 'no default for {}.{}'.format(self, f))
206 for i in range(self.item_count - items):
209 for index in self.indexes():
211 if isinstance(index, int):
219 for arg in self.pack_item(item):
221 elif self.item_count:
222 for arg in self.pack_item(data):
225 def pack_item(self, item=None):
226 """Linearize a single count of the field's data to a flat iterable
228 if isinstance(self.format, Structure):
229 for i in self.format._pack_item(item):
232 if self.default is None:
233 raise ValueError('no default for {}'.format(self))
238 def unpack_data(self, data):
239 """Inverse of .pack_data"""
240 _LOG.debug('unpack {} for {} {}'.format(data, self, self.format))
241 iterator = iter(data)
243 items = [iterator.next() for i in range(self.arg_count)]
244 except StopIteration:
245 raise ValueError('not enough data to unpack {}'.format(self))
248 except StopIteration:
251 raise ValueError('too much data to unpack {}'.format(self))
252 if isinstance(self.format, Structure):
253 # break into per-structure clumps
254 s = self.structure_count
255 items = zip(*[items[i::s] for i in range(s)])
257 items = [[i] for i in items]
258 unpacked = [self.unpack_item(i) for i in items]
262 count = 0 # padding bytes, etc.
265 if isinstance(self.format, Structure):
271 raise NotImplementedError('reshape Structure field')
273 unpacked = _numpy.array(unpacked)
274 _LOG.debug('reshape {} data from {} to {}'.format(
275 self, unpacked.shape, count))
276 unpacked = unpacked.reshape(count)
279 def unpack_item(self, item):
280 """Inverse of .unpack_item"""
281 if isinstance(self.format, Structure):
282 return self.format._unpack_item(item)
284 assert len(item) == 1, item
288 class DynamicField (Field):
289 """Represent a DynamicStructure field with a dynamic definition.
291 Adds the methods ``.pre_pack``, ``pre_unpack``, and
292 ``post_unpack``, all of which are called when a ``DynamicField``
293 is used by a ``DynamicStructure``. Each method takes the
294 arguments ``(parents, data)``, where ``parents`` is a list of
295 ``DynamicStructure``\s that own the field and ``data`` is a dict
296 hierarchy of the structure data.
298 See the ``DynamicStructure`` docstring for the exact timing of the
303 Field, DynamicStructure
305 def pre_pack(self, parents, data):
309 def pre_unpack(self, parents, data):
310 "React to previously unpacked data"
313 def post_unpack(self, parents, data):
314 "React to our own data"
317 def _get_structure_data(self, parents, data, structure):
318 """Extract the data belonging to a particular ancestor structure.
324 for p in parents[1:]:
330 assert s == p, (s, p)
336 class Structure (_struct.Struct):
337 r"""Represent a C structure.
339 A convenient wrapper around struct.Struct that uses Fields and
340 adds dict-handling methods for transparent name assignment.
350 >>> from pprint import pprint
352 Represent the C structures::
360 unsigned short version;
366 >>> time = Field('I', 'time', default=0, help='POSIX time')
368 ... 'h', 'data', default=0, help='example data', count=(2,3))
369 >>> run = Structure('run', fields=[time, data])
371 ... 'H', 'version', default=1, help='example version')
372 >>> runs = Field(run, 'runs', help='pair of runs', count=2)
373 >>> experiment = Structure('experiment', fields=[version, runs])
375 The structures automatically calculate the flattened data format:
379 >>> run.size # 4 + 2*3*2
381 >>> experiment.format
383 >>> experiment.size # 2 + 2 + 2*(4 + 2*3*2)
386 The first two elements in the above size calculation are 2 (for
387 the unsigned short, 'H') and 2 (padding so the unsigned int aligns
388 with a 4-byte block). If you select a byte ordering that doesn't
389 mess with alignment and recalculate the format, the padding goes
392 >>> experiment.set_byte_order('>')
393 >>> experiment.get_format()
398 You can read data out of any object supporting the buffer
401 >>> b = array.array('B', range(experiment.size))
402 >>> d = experiment.unpack_from(buffer=b)
404 {'runs': [{'data': array([[1543, 2057, 2571],
405 [3085, 3599, 4113]]),
407 {'data': array([[5655, 6169, 6683],
408 [7197, 7711, 8225]]),
411 >>> [hex(x) for x in d['runs'][0]['data'].flat]
412 ['0x607L', '0x809L', '0xa0bL', '0xc0dL', '0xe0fL', '0x1011L']
414 You can also read out from strings:
416 >>> d = experiment.unpack(b.tostring())
418 {'runs': [{'data': array([[1543, 2057, 2571],
419 [3085, 3599, 4113]]),
421 {'data': array([[5655, 6169, 6683],
422 [7197, 7711, 8225]]),
426 If you don't give enough values for an array field, the remaining
427 values are filled in with their defaults.
429 >>> experiment.pack_into(buffer=b, data=d)
430 >>> b.tostring()[:17]
431 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10'
432 >>> b.tostring()[17:]
433 '\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !'
434 >>> run0 = d['runs'].pop(0)
435 >>> b = experiment.pack(data=d)
437 '\x00\x01\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
439 '!\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
441 If you set ``count=0``, the field is ignored.
443 >>> experiment2 = Structure('experiment', fields=[
444 ... version, Field('f', 'ignored', count=0), runs], byte_order='>')
445 >>> experiment2.format
447 >>> d = experiment2.unpack(b)
449 {'ignored': array([], dtype=float64),
450 'runs': [{'data': array([[5655, 6169, 6683],
451 [7197, 7711, 8225]]),
453 {'data': array([[0, 0, 0],
454 [0, 0, 0]]), 'time': 0}],
457 >>> b2 = experiment2.pack(d)
461 _byte_order_symbols = '@=<>!'
463 def __init__(self, name, fields, byte_order='@'):
464 # '=' for native byte order, standard size and alignment
465 # See http://docs.python.org/library/struct for details
468 self.byte_order = byte_order
475 return '<{} {} {}>'.format(
476 self.__class__.__name__, self.name, id(self))
479 """Setup any dynamic properties of a structure.
481 Use this method to recalculate dynamic properities after
482 changing the basic properties set during initialization.
484 _LOG.debug('setup {!r}'.format(self))
485 self.set_byte_order(self.byte_order)
488 def set_byte_order(self, byte_order):
489 """Allow changing the format byte_order on the fly.
491 _LOG.debug('set byte order for {!r} to {}'.format(self, byte_order))
492 self.byte_order = byte_order
493 for field in self.fields:
494 if isinstance(field.format, Structure):
495 field.format.set_byte_order(byte_order)
497 def get_format(self):
498 format = self.byte_order + ''.join(self.sub_format())
499 # P format only allowed for native byte ordering
500 # Convert P to I for ILP32 compatibility when running on a LP64.
501 format = format.replace('P', 'I')
503 super(Structure, self).__init__(format=format)
504 except _struct.error as e:
505 raise ValueError((e, format))
508 def sub_format(self):
509 _LOG.debug('calculate sub-format for {!r}'.format(self))
510 for field in self.fields:
511 if isinstance(field.format, Structure):
513 field.format.sub_format()) * field.item_count
515 field_format = [field.format]*field.item_count
516 for fmt in field_format:
519 def _pack_item(self, item=None):
520 """Linearize a single count of the structure's data to a flat iterable
524 for f in self.fields:
528 raise ValueError((f.name, item))
531 for arg in f.pack_data(data):
534 def _unpack_item(self, args):
535 """Inverse of ._unpack_item"""
537 iterator = iter(args)
538 for f in self.fields:
540 items = [iterator.next() for i in range(f.arg_count)]
541 except StopIteration:
542 raise ValueError('not enough data to unpack {}.{}'.format(
544 data[f.name] = f.unpack_data(items)
547 except StopIteration:
550 raise ValueError('too much data to unpack {}'.format(self))
553 def pack(self, data):
554 args = list(self._pack_item(data))
556 return super(Structure, self).pack(*args)
558 raise ValueError(self.format)
560 def pack_into(self, buffer, offset=0, data={}):
561 args = list(self._pack_item(data))
562 return super(Structure, self).pack_into(
563 buffer, offset, *args)
565 def unpack(self, *args, **kwargs):
566 args = super(Structure, self).unpack(*args, **kwargs)
567 return self._unpack_item(args)
569 def unpack_from(self, buffer, offset=0, *args, **kwargs):
571 'unpack {!r} for {!r} ({}, offset={}) with {} ({})'.format(
572 buffer, self, len(buffer), offset, self.format, self.size))
573 args = super(Structure, self).unpack_from(
574 buffer, offset, *args, **kwargs)
575 return self._unpack_item(args)
577 def get_field(self, name):
578 return [f for f in self.fields if f.name == name][0]
581 class DebuggingStream (object):
582 def __init__(self, stream):
585 def read(self, size):
586 data = self.stream.read(size)
587 _LOG.debug('read {} from {}: ({}) {!r}'.format(
588 size, self.stream, len(data), data))
592 class DynamicStructure (Structure):
593 r"""Represent a C structure field with a dynamic definition.
595 Any dynamic fields have their ``.pre_pack`` called before any
596 structure packing is done. ``.pre_unpack`` is called for a
597 particular field just before that field's ``.unpack_data`` call.
598 ``.post_unpack`` is called for a particular field just after
599 ``.unpack_data``. If ``.post_unpack`` returns ``True``, the same
600 field is unpacked again.
605 >>> from pprint import pprint
607 This allows you to define structures where some portion of the
608 global structure depends on earlier data. For example, in the
616 You can generate a Python version of this structure in two ways,
617 with a dynamic ``length``, or with a dynamic ``data``. In both
618 cases, the required methods are the same, the only difference is
619 where you attach them.
621 >>> def packer(self, parents, data):
622 ... vector_structure = parents[-1]
623 ... vector_data = self._get_structure_data(
624 ... parents, data, vector_structure)
625 ... length = len(vector_data['data'])
626 ... vector_data['length'] = length
627 ... data_field = vector_structure.get_field('data')
628 ... data_field.count = length
629 ... data_field.setup()
630 >>> def unpacker(self, parents, data):
631 ... vector_structure = parents[-1]
632 ... vector_data = self._get_structure_data(
633 ... parents, data, vector_structure)
634 ... length = vector_data['length']
635 ... data_field = vector_structure.get_field('data')
636 ... data_field.count = length
637 ... data_field.setup()
639 >>> class DynamicLengthField (DynamicField):
640 ... def pre_pack(self, parents, data):
641 ... packer(self, parents, data)
642 ... def post_unpack(self, parents, data):
643 ... unpacker(self, parents, data)
644 >>> dynamic_length_vector = DynamicStructure('vector',
646 ... DynamicLengthField('I', 'length'),
647 ... Field('h', 'data', count=0),
650 >>> class DynamicDataField (DynamicField):
651 ... def pre_pack(self, parents, data):
652 ... packer(self, parents, data)
653 ... def pre_unpack(self, parents, data):
654 ... unpacker(self, parents, data)
655 >>> dynamic_data_vector = DynamicStructure('vector',
657 ... Field('I', 'length'),
658 ... DynamicDataField('h', 'data', count=0),
662 >>> b = '\x00\x00\x00\x02\x01\x02\x03\x04'
663 >>> d = dynamic_length_vector.unpack(b)
665 {'data': array([258, 772]), 'length': 2}
666 >>> d = dynamic_data_vector.unpack(b)
668 {'data': array([258, 772]), 'length': 2}
670 >>> d['data'] = [1,2,3,4]
671 >>> dynamic_length_vector.pack(d)
672 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
673 >>> dynamic_data_vector.pack(d)
674 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
676 The implementation is a good deal more complicated than the one
677 for ``Structure``, because we must make multiple calls to
678 ``struct.Struct.unpack`` to unpack the data.
680 #def __init__(self, *args, **kwargs):
681 # pass #self.parent = ..
683 def _pre_pack(self, parents=None, data=None):
687 parents = parents + [self]
688 for f in self.fields:
689 if hasattr(f, 'pre_pack'):
690 _LOG.debug('pre-pack {}'.format(f))
691 f.pre_pack(parents=parents, data=data)
692 if isinstance(f.format, DynamicStructure):
693 _LOG.debug('pre-pack {!r}'.format(f.format))
694 f._pre_pack(parents=parents, data=data)
696 def pack(self, data):
697 self._pre_pack(data=data)
699 return super(DynamicStructure, self).pack(data)
701 def pack_into(self, buffer, offset=0, data={}):
702 self._pre_pack(data=data)
704 return super(DynamicStructure, self).pack_into(
705 buffer=buffer, offset=offset, data=data)
707 def unpack_stream(self, stream, parents=None, data=None, d=None):
708 # `d` is the working data directory
712 if _LOG.level == _logging.DEBUG:
713 stream = DebuggingStream(stream)
715 parents = parents + [self]
717 for f in self.fields:
718 _LOG.debug('parsing {!r}.{} (count={}, item_count={})'.format(
719 self, f, f.count, f.item_count))
720 if _LOG.level <= _logging.DEBUG:
721 _LOG.debug('data:\n{}'.format(_pprint.pformat(data)))
722 if hasattr(f, 'pre_unpack'):
723 _LOG.debug('pre-unpack {}'.format(f))
724 f.pre_unpack(parents=parents, data=data)
726 if hasattr(f, 'unpack'): # override default unpacking
727 _LOG.debug('override unpack for {}'.format(f))
728 d[f.name] = f.unpack(stream)
731 # setup for unpacking loop
732 if isinstance(f.format, Structure):
733 f.format.set_byte_order(self.byte_order)
736 if isinstance(f.format, DynamicStructure):
737 if f.item_count == 1:
738 # TODO, fix in case we *want* an array
740 f.format.unpack_stream(
741 stream, parents=parents, data=data, d=d[f.name])
744 for i in range(f.item_count):
747 f.format.unpack_stream(
748 stream, parents=parents, data=data, d=x)
749 if hasattr(f, 'post_unpack'):
750 _LOG.debug('post-unpack {}'.format(f))
751 repeat = f.post_unpack(parents=parents, data=data)
753 raise NotImplementedError(
754 'cannot repeat unpack for dynamic structures')
756 if isinstance(f.format, Structure):
757 _LOG.debug('parsing {} bytes for {}'.format(
758 f.format.size, f.format.format))
759 bs = [stream.read(f.format.size) for i in range(f.item_count)]
761 f.format.set_byte_order(self.byte_order)
764 x = [f.format.unpack_from(b) for b in bs]
765 if len(x) == 1: # TODO, fix in case we *want* an array
769 field_format = self.byte_order + f.format*f.item_count
770 field_format = field_format.replace('P', 'I')
772 size = _struct.calcsize(field_format)
773 except _struct.error as e:
775 _LOG.error('{}.{}: {}'.format(self, f, field_format))
777 _LOG.debug('parsing {} bytes for preliminary {}'.format(
779 raw = stream.read(size)
782 'not enough data to unpack {}.{} ({} < {})'.format(
783 self, f, len(raw), size))
785 field_format = self.byte_order + f.format*f.item_count
786 field_format = field_format.replace('P', 'I')
787 _LOG.debug('parse previous bytes using {}'.format(
789 struct = _struct.Struct(field_format)
790 items = struct.unpack(raw)
791 return f.unpack_data(items)
797 if hasattr(f, 'post_unpack'):
798 _LOG.debug('post-unpack {}'.format(f))
799 repeat = f.post_unpack(parents=parents, data=data)
803 _LOG.debug('repeat unpack for {}'.format(f))
807 def unpack(self, string):
808 stream = _io.BytesIO(string)
809 return self.unpack_stream(stream)
811 def unpack_from(self, buffer, offset=0, *args, **kwargs):
812 args = super(Structure, self).unpack_from(
813 buffer, offset, *args, **kwargs)
814 return self._unpack_item(args)