1 # Copyright (C) 2012 W. Trevor King <wking@tremily.us>
3 # This file is part of igor.
5 # igor is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU Lesser General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
10 # igor is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with igor. If not, see <http://www.gnu.org/licenses/>.
18 """Structure and Field classes for declaring structures
20 There are a few formats that can be used to represent the same data, a
21 binary packed format with all the data in a buffer, a linearized
22 format with each field in a single Python list, and a nested format
23 with each field in a hierarchy of Python dictionaries.
26 from __future__ import absolute_import
28 import logging as _logging
29 import pprint as _pprint
30 import struct as _struct
32 import numpy as _numpy
34 from . import LOG as _LOG
38 """Represent a Structure field.
40 The format argument can be a format character from the ``struct``
41 documentation (e.g., ``c`` for ``char``, ``h`` for ``short``, ...)
42 or ``Structure`` instance (for building nested structures).
47 >>> from pprint import pprint
50 Example of an unsigned short integer field:
53 ... 'I', 'time', default=0, help='POSIX time')
56 >>> list(time.pack_data(1))
58 >>> list(time.pack_item(2))
60 >>> time.unpack_data([3])
62 >>> time.unpack_item([4])
65 Example of a multi-dimensional float field:
68 ... 'f', 'data', help='example data', count=(2,3,4), array=True)
71 >>> list(data.indexes()) # doctest: +ELLIPSIS
72 [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], ..., [1, 2, 3]]
73 >>> list(data.pack_data(
74 ... [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]],
75 ... [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]])
76 ... ) # doctest: +ELLIPSIS
77 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ..., 19, 20, 21, 22, 23]
78 >>> list(data.pack_item(3))
80 >>> data.unpack_data(range(data.arg_count))
81 array([[[ 0, 1, 2, 3],
88 >>> data.unpack_item([3])
91 Example of a nested structure field:
93 >>> run = Structure('run', fields=[time, data])
94 >>> runs = Field(run, 'runs', help='pair of runs', count=2, array=True)
95 >>> runs.arg_count # = 2 * (1 + 24)
97 >>> data1 = numpy.arange(data.arg_count).reshape(data.count)
98 >>> data2 = data1 + data.arg_count
99 >>> list(runs.pack_data(
100 ... [{'time': 100, 'data': data1},
101 ... {'time': 101, 'data': data2}])
102 ... ) # doctest: +ELLIPSIS
103 [100, 0, 1, 2, ..., 22, 23, 101, 24, 25, ..., 46, 47]
104 >>> list(runs.pack_item({'time': 100, 'data': data1})
105 ... ) # doctest: +ELLIPSIS
106 [100, 0, 1, 2, ..., 22, 23]
107 >>> pprint(runs.unpack_data(range(runs.arg_count)))
108 [{'data': array([[[ 1, 2, 3, 4],
116 {'data': array([[[26, 27, 28, 29],
124 >>> pprint(runs.unpack_item(range(runs.structure_count)))
125 {'data': array([[[ 1, 2, 3, 4],
134 If you don't give enough values for an array field, the remaining
135 values are filled in with their defaults.
137 >>> list(data.pack_data(
138 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]])) # doctest: +ELLIPSIS
139 Traceback (most recent call last):
141 ValueError: no default for <Field data ...>
143 >>> list(data.pack_data(
144 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]]))
145 [0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
151 def __init__(self, format, name, default=None, help=None, count=1,
155 self.default = default
162 """Setup any dynamic properties of a field.
164 Use this method to recalculate dynamic properities after
165 changing the basic properties set during initialization.
167 _LOG.debug('setup {}'.format(self))
168 self.item_count = _numpy.prod(self.count) # number of item repeats
169 if not self.array and self.item_count != 1:
171 '{} must be an array field to have a count of {}'.format(
173 if isinstance(self.format, Structure):
174 self.structure_count = sum(
175 f.arg_count for f in self.format.fields)
176 self.arg_count = self.item_count * self.structure_count
177 elif self.format == 'x':
178 self.arg_count = 0 # no data in padding bytes
180 self.arg_count = self.item_count # struct.Struct format args
183 return self.__repr__()
186 return '<{} {} {}>'.format(
187 self.__class__.__name__, self.name, id(self))
190 """Iterate through indexes to a possibly multi-dimensional array"""
191 assert self.array, self
193 i = [0] * len(self.count)
194 except TypeError: # non-iterable count
195 for i in range(self.count):
198 for i in range(self.item_count):
200 for j,c in enumerate(reversed(self.count)):
201 index.insert(0, i % c)
205 def pack_data(self, data=None):
206 """Linearize a single field's data to a flat list.
208 If the field is repeated (count > 1), the incoming data should
209 be iterable with each iteration returning a single item.
214 if hasattr(data, 'flat'): # take advantage of numpy's ndarray.flat
216 for item in data.flat:
218 for arg in self.pack_item(item):
220 if items < self.item_count:
221 if f.default is None:
223 'no default for {}.{}'.format(self, f))
224 for i in range(self.item_count - items):
227 for index in self.indexes():
229 if isinstance(index, int):
237 for arg in self.pack_item(item):
240 for arg in self.pack_item(data):
243 def pack_item(self, item=None):
244 """Linearize a single count of the field's data to a flat iterable
246 if isinstance(self.format, Structure):
247 for i in self.format._pack_item(item):
250 if self.default is None:
251 raise ValueError('no default for {}'.format(self))
256 def unpack_data(self, data):
257 """Inverse of .pack_data"""
258 _LOG.debug('unpack {} for {} {}'.format(data, self, self.format))
259 iterator = iter(data)
261 items = [next(iterator) for i in range(self.arg_count)]
262 except StopIteration:
263 raise ValueError('not enough data to unpack {}'.format(self))
266 except StopIteration:
269 raise ValueError('too much data to unpack {}'.format(self))
270 if isinstance(self.format, Structure):
271 # break into per-structure clumps
272 s = self.structure_count
273 items = zip(*[items[i::s] for i in range(s)])
275 items = [[i] for i in items]
276 unpacked = [self.unpack_item(i) for i in items]
280 count = 0 # padding bytes, etc.
282 assert count == 1, (self, self.count)
284 if isinstance(self.format, Structure):
290 raise NotImplementedError('reshape Structure field')
292 unpacked = _numpy.array(unpacked)
293 _LOG.debug('reshape {} data from {} to {}'.format(
294 self, unpacked.shape, count))
295 unpacked = unpacked.reshape(count)
298 def unpack_item(self, item):
299 """Inverse of .unpack_item"""
300 if isinstance(self.format, Structure):
301 return self.format._unpack_item(item)
303 assert len(item) == 1, item
307 class DynamicField (Field):
308 """Represent a DynamicStructure field with a dynamic definition.
310 Adds the methods ``.pre_pack``, ``pre_unpack``, and
311 ``post_unpack``, all of which are called when a ``DynamicField``
312 is used by a ``DynamicStructure``. Each method takes the
313 arguments ``(parents, data)``, where ``parents`` is a list of
314 ``DynamicStructure``\s that own the field and ``data`` is a dict
315 hierarchy of the structure data.
317 See the ``DynamicStructure`` docstring for the exact timing of the
322 Field, DynamicStructure
324 def pre_pack(self, parents, data):
328 def pre_unpack(self, parents, data):
329 "React to previously unpacked data"
332 def post_unpack(self, parents, data):
333 "React to our own data"
336 def _get_structure_data(self, parents, data, structure):
337 """Extract the data belonging to a particular ancestor structure.
343 for p in parents[1:]:
349 assert s == p, (s, p)
355 class Structure (_struct.Struct):
356 r"""Represent a C structure.
358 A convenient wrapper around struct.Struct that uses Fields and
359 adds dict-handling methods for transparent name assignment.
369 >>> from pprint import pprint
371 Represent the C structures::
379 unsigned short version;
385 >>> time = Field('I', 'time', default=0, help='POSIX time')
387 ... 'h', 'data', default=0, help='example data', count=(2,3),
389 >>> run = Structure('run', fields=[time, data])
391 ... 'H', 'version', default=1, help='example version')
392 >>> runs = Field(run, 'runs', help='pair of runs', count=2, array=True)
393 >>> experiment = Structure('experiment', fields=[version, runs])
395 The structures automatically calculate the flattened data format:
399 >>> run.size # 4 + 2*3*2
401 >>> experiment.format
403 >>> experiment.size # 2 + 2 + 2*(4 + 2*3*2)
406 The first two elements in the above size calculation are 2 (for
407 the unsigned short, 'H') and 2 (padding so the unsigned int aligns
408 with a 4-byte block). If you select a byte ordering that doesn't
409 mess with alignment and recalculate the format, the padding goes
412 >>> experiment.set_byte_order('>')
413 >>> experiment.get_format()
418 You can read data out of any object supporting the buffer
421 >>> b = array.array('B', range(experiment.size))
422 >>> d = experiment.unpack_from(buffer=b)
424 {'runs': [{'data': array([[1543, 2057, 2571],
425 [3085, 3599, 4113]]),
427 {'data': array([[5655, 6169, 6683],
428 [7197, 7711, 8225]]),
431 >>> [hex(x) for x in d['runs'][0]['data'].flat]
432 ['0x607L', '0x809L', '0xa0bL', '0xc0dL', '0xe0fL', '0x1011L']
434 You can also read out from strings:
436 >>> d = experiment.unpack(b.tostring())
438 {'runs': [{'data': array([[1543, 2057, 2571],
439 [3085, 3599, 4113]]),
441 {'data': array([[5655, 6169, 6683],
442 [7197, 7711, 8225]]),
446 If you don't give enough values for an array field, the remaining
447 values are filled in with their defaults.
449 >>> experiment.pack_into(buffer=b, data=d)
450 >>> b.tostring()[:17]
451 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10'
452 >>> b.tostring()[17:]
453 '\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !'
454 >>> run0 = d['runs'].pop(0)
455 >>> b = experiment.pack(data=d)
457 '\x00\x01\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
459 '!\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
461 If you set ``count=0``, the field is ignored.
463 >>> experiment2 = Structure('experiment', fields=[
464 ... version, Field('f', 'ignored', count=0, array=True), runs],
466 >>> experiment2.format
468 >>> d = experiment2.unpack(b)
470 {'ignored': array([], dtype=float64),
471 'runs': [{'data': array([[5655, 6169, 6683],
472 [7197, 7711, 8225]]),
474 {'data': array([[0, 0, 0],
475 [0, 0, 0]]), 'time': 0}],
478 >>> b2 = experiment2.pack(d)
482 _byte_order_symbols = '@=<>!'
484 def __init__(self, name, fields, byte_order='@'):
485 # '=' for native byte order, standard size and alignment
486 # See http://docs.python.org/library/struct for details
489 self.byte_order = byte_order
496 return '<{} {} {}>'.format(
497 self.__class__.__name__, self.name, id(self))
500 """Setup any dynamic properties of a structure.
502 Use this method to recalculate dynamic properities after
503 changing the basic properties set during initialization.
505 _LOG.debug('setup {!r}'.format(self))
506 self.set_byte_order(self.byte_order)
509 def set_byte_order(self, byte_order):
510 """Allow changing the format byte_order on the fly.
512 _LOG.debug('set byte order for {!r} to {}'.format(self, byte_order))
513 self.byte_order = byte_order
514 for field in self.fields:
515 if isinstance(field.format, Structure):
516 field.format.set_byte_order(byte_order)
518 def get_format(self):
519 format = self.byte_order + ''.join(self.sub_format())
520 # P format only allowed for native byte ordering
521 # Convert P to I for ILP32 compatibility when running on a LP64.
522 format = format.replace('P', 'I')
524 super(Structure, self).__init__(format=format)
525 except _struct.error as e:
526 raise ValueError((e, format))
529 def sub_format(self):
530 _LOG.debug('calculate sub-format for {!r}'.format(self))
531 for field in self.fields:
532 if isinstance(field.format, Structure):
534 field.format.sub_format()) * field.item_count
536 field_format = [field.format]*field.item_count
537 for fmt in field_format:
540 def _pack_item(self, item=None):
541 """Linearize a single count of the structure's data to a flat iterable
545 for f in self.fields:
549 raise ValueError((f.name, item))
552 for arg in f.pack_data(data):
555 def _unpack_item(self, args):
556 """Inverse of ._unpack_item"""
558 iterator = iter(args)
559 for f in self.fields:
561 items = [next(iterator) for i in range(f.arg_count)]
562 except StopIteration:
563 raise ValueError('not enough data to unpack {}.{}'.format(
565 data[f.name] = f.unpack_data(items)
568 except StopIteration:
571 raise ValueError('too much data to unpack {}'.format(self))
574 def pack(self, data):
575 args = list(self._pack_item(data))
577 return super(Structure, self).pack(*args)
579 raise ValueError(self.format)
581 def pack_into(self, buffer, offset=0, data={}):
582 args = list(self._pack_item(data))
583 return super(Structure, self).pack_into(
584 buffer, offset, *args)
586 def unpack(self, *args, **kwargs):
587 args = super(Structure, self).unpack(*args, **kwargs)
588 return self._unpack_item(args)
590 def unpack_from(self, buffer, offset=0, *args, **kwargs):
592 'unpack {!r} for {!r} ({}, offset={}) with {} ({})'.format(
593 buffer, self, len(buffer), offset, self.format, self.size))
594 args = super(Structure, self).unpack_from(
595 buffer, offset, *args, **kwargs)
596 return self._unpack_item(args)
598 def get_field(self, name):
599 return [f for f in self.fields if f.name == name][0]
602 class DebuggingStream (object):
603 def __init__(self, stream):
606 def read(self, size):
607 data = self.stream.read(size)
608 _LOG.debug('read {} from {}: ({}) {!r}'.format(
609 size, self.stream, len(data), data))
613 class DynamicStructure (Structure):
614 r"""Represent a C structure field with a dynamic definition.
616 Any dynamic fields have their ``.pre_pack`` called before any
617 structure packing is done. ``.pre_unpack`` is called for a
618 particular field just before that field's ``.unpack_data`` call.
619 ``.post_unpack`` is called for a particular field just after
620 ``.unpack_data``. If ``.post_unpack`` returns ``True``, the same
621 field is unpacked again.
626 >>> from pprint import pprint
628 This allows you to define structures where some portion of the
629 global structure depends on earlier data. For example, in the
637 You can generate a Python version of this structure in two ways,
638 with a dynamic ``length``, or with a dynamic ``data``. In both
639 cases, the required methods are the same, the only difference is
640 where you attach them.
642 >>> def packer(self, parents, data):
643 ... vector_structure = parents[-1]
644 ... vector_data = self._get_structure_data(
645 ... parents, data, vector_structure)
646 ... length = len(vector_data['data'])
647 ... vector_data['length'] = length
648 ... data_field = vector_structure.get_field('data')
649 ... data_field.count = length
650 ... data_field.setup()
651 >>> def unpacker(self, parents, data):
652 ... vector_structure = parents[-1]
653 ... vector_data = self._get_structure_data(
654 ... parents, data, vector_structure)
655 ... length = vector_data['length']
656 ... data_field = vector_structure.get_field('data')
657 ... data_field.count = length
658 ... data_field.setup()
660 >>> class DynamicLengthField (DynamicField):
661 ... def pre_pack(self, parents, data):
662 ... packer(self, parents, data)
663 ... def post_unpack(self, parents, data):
664 ... unpacker(self, parents, data)
665 >>> dynamic_length_vector = DynamicStructure('vector',
667 ... DynamicLengthField('I', 'length'),
668 ... Field('h', 'data', count=0, array=True),
671 >>> class DynamicDataField (DynamicField):
672 ... def pre_pack(self, parents, data):
673 ... packer(self, parents, data)
674 ... def pre_unpack(self, parents, data):
675 ... unpacker(self, parents, data)
676 >>> dynamic_data_vector = DynamicStructure('vector',
678 ... Field('I', 'length'),
679 ... DynamicDataField('h', 'data', count=0, array=True),
683 >>> b = b'\x00\x00\x00\x02\x01\x02\x03\x04'
684 >>> d = dynamic_length_vector.unpack(b)
686 {'data': array([258, 772]), 'length': 2}
687 >>> d = dynamic_data_vector.unpack(b)
689 {'data': array([258, 772]), 'length': 2}
691 >>> d['data'] = [1,2,3,4]
692 >>> dynamic_length_vector.pack(d)
693 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
694 >>> dynamic_data_vector.pack(d)
695 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
697 The implementation is a good deal more complicated than the one
698 for ``Structure``, because we must make multiple calls to
699 ``struct.Struct.unpack`` to unpack the data.
701 #def __init__(self, *args, **kwargs):
702 # pass #self.parent = ..
704 def _pre_pack(self, parents=None, data=None):
708 parents = parents + [self]
709 for f in self.fields:
710 if hasattr(f, 'pre_pack'):
711 _LOG.debug('pre-pack {}'.format(f))
712 f.pre_pack(parents=parents, data=data)
713 if isinstance(f.format, DynamicStructure):
714 _LOG.debug('pre-pack {!r}'.format(f.format))
715 f._pre_pack(parents=parents, data=data)
717 def pack(self, data):
718 self._pre_pack(data=data)
720 return super(DynamicStructure, self).pack(data)
722 def pack_into(self, buffer, offset=0, data={}):
723 self._pre_pack(data=data)
725 return super(DynamicStructure, self).pack_into(
726 buffer=buffer, offset=offset, data=data)
728 def unpack_stream(self, stream, parents=None, data=None, d=None):
729 # `d` is the working data directory
733 if _LOG.level <= _logging.DEBUG:
734 stream = DebuggingStream(stream)
736 parents = parents + [self]
738 for f in self.fields:
739 _LOG.debug('parsing {!r}.{} (count={}, item_count={})'.format(
740 self, f, f.count, f.item_count))
741 if _LOG.level <= _logging.DEBUG:
742 _LOG.debug('data:\n{}'.format(_pprint.pformat(data)))
743 if hasattr(f, 'pre_unpack'):
744 _LOG.debug('pre-unpack {}'.format(f))
745 f.pre_unpack(parents=parents, data=data)
747 if hasattr(f, 'unpack'): # override default unpacking
748 _LOG.debug('override unpack for {}'.format(f))
749 d[f.name] = f.unpack(stream)
752 # setup for unpacking loop
753 if isinstance(f.format, Structure):
754 f.format.set_byte_order(self.byte_order)
757 if isinstance(f.format, DynamicStructure):
760 for i in range(f.item_count):
763 f.format.unpack_stream(
764 stream, parents=parents, data=data, d=x)
766 assert f.item_count == 1, (f, f.count)
768 f.format.unpack_stream(
769 stream, parents=parents, data=data, d=d[f.name])
770 if hasattr(f, 'post_unpack'):
771 _LOG.debug('post-unpack {}'.format(f))
772 repeat = f.post_unpack(parents=parents, data=data)
774 raise NotImplementedError(
775 'cannot repeat unpack for dynamic structures')
777 if isinstance(f.format, Structure):
778 _LOG.debug('parsing {} bytes for {}'.format(
779 f.format.size, f.format.format))
780 bs = [stream.read(f.format.size) for i in range(f.item_count)]
782 f.format.set_byte_order(self.byte_order)
785 x = [f.format.unpack_from(b) for b in bs]
787 assert len(x) == 1, (f, f.count, x)
791 field_format = self.byte_order + f.format*f.item_count
792 field_format = field_format.replace('P', 'I')
794 size = _struct.calcsize(field_format)
795 except _struct.error as e:
797 _LOG.error('{}.{}: {}'.format(self, f, field_format))
799 _LOG.debug('parsing {} bytes for preliminary {}'.format(
801 raw = stream.read(size)
804 'not enough data to unpack {}.{} ({} < {})'.format(
805 self, f, len(raw), size))
807 field_format = self.byte_order + f.format*f.item_count
808 field_format = field_format.replace('P', 'I')
809 _LOG.debug('parse previous bytes using {}'.format(
811 struct = _struct.Struct(field_format)
812 items = struct.unpack(raw)
813 return f.unpack_data(items)
819 if hasattr(f, 'post_unpack'):
820 _LOG.debug('post-unpack {}'.format(f))
821 repeat = f.post_unpack(parents=parents, data=data)
825 _LOG.debug('repeat unpack for {}'.format(f))
829 def unpack(self, string):
830 stream = _io.BytesIO(string)
831 return self.unpack_stream(stream)
833 def unpack_from(self, buffer, offset=0, *args, **kwargs):
834 args = super(Structure, self).unpack_from(
835 buffer, offset, *args, **kwargs)
836 return self._unpack_item(args)