1 # Copyright (C) 2012 W. Trevor King <wking@tremily.us>
3 # This file is part of igor.
5 # igor is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU Lesser General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
10 # igor is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with igor. If not, see <http://www.gnu.org/licenses/>.
18 """Structure and Field classes for declaring structures
20 There are a few formats that can be used to represent the same data, a
21 binary packed format with all the data in a buffer, a linearized
22 format with each field in a single Python list, and a nested format
23 with each field in a hierarchy of Python dictionaries.
26 from __future__ import absolute_import
28 import logging as _logging
29 import pprint as _pprint
30 import struct as _struct
32 import numpy as _numpy
34 from . import LOG as _LOG
37 _buffer = buffer # save builtin buffer for clobbered situations
41 """Represent a Structure field.
43 The format argument can be a format character from the ``struct``
44 documentation (e.g., ``c`` for ``char``, ``h`` for ``short``, ...)
45 or ``Structure`` instance (for building nested structures).
50 >>> from pprint import pprint
53 Example of an unsigned short integer field:
56 ... 'I', 'time', default=0, help='POSIX time')
59 >>> list(time.pack_data(1))
61 >>> list(time.pack_item(2))
63 >>> time.unpack_data([3])
65 >>> time.unpack_item([4])
68 Example of a multi-dimensional float field:
71 ... 'f', 'data', help='example data', count=(2,3,4))
74 >>> list(data.indexes()) # doctest: +ELLIPSIS
75 [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], ..., [1, 2, 3]]
76 >>> list(data.pack_data(
77 ... [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]],
78 ... [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]])
79 ... ) # doctest: +ELLIPSIS
80 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ..., 19, 20, 21, 22, 23]
81 >>> list(data.pack_item(3))
83 >>> data.unpack_data(range(data.arg_count))
84 array([[[ 0, 1, 2, 3],
91 >>> data.unpack_item([3])
94 Example of a nested structure field:
96 >>> run = Structure('run', fields=[time, data])
97 >>> runs = Field(run, 'runs', help='pair of runs', count=2)
98 >>> runs.arg_count # = 2 * (1 + 24)
100 >>> data1 = numpy.arange(data.arg_count).reshape(data.count)
101 >>> data2 = data1 + data.arg_count
102 >>> list(runs.pack_data(
103 ... [{'time': 100, 'data': data1},
104 ... {'time': 101, 'data': data2}])
105 ... ) # doctest: +ELLIPSIS
106 [100, 0, 1, 2, ..., 22, 23, 101, 24, 25, ..., 46, 47]
107 >>> list(runs.pack_item({'time': 100, 'data': data1})
108 ... ) # doctest: +ELLIPSIS
109 [100, 0, 1, 2, ..., 22, 23]
110 >>> pprint(runs.unpack_data(range(runs.arg_count)))
111 [{'data': array([[[ 1, 2, 3, 4],
119 {'data': array([[[26, 27, 28, 29],
127 >>> pprint(runs.unpack_item(range(runs.structure_count)))
128 {'data': array([[[ 1, 2, 3, 4],
137 If you don't give enough values for an array field, the remaining
138 values are filled in with their defaults.
140 >>> list(data.pack_data(
141 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]])) # doctest: +ELLIPSIS
142 Traceback (most recent call last):
144 ValueError: no default for <Field data ...>
146 >>> list(data.pack_data(
147 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]]))
148 [0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
154 def __init__(self, format, name, default=None, help=None, count=1):
157 self.default = default
163 """Setup any dynamic properties of a field.
165 Use this method to recalculate dynamic properities after
166 changing the basic properties set during initialization.
168 _LOG.debug('setup {}'.format(self))
169 self.item_count = _numpy.prod(self.count) # number of item repeats
170 if isinstance(self.format, Structure):
171 self.structure_count = sum(
172 f.arg_count for f in self.format.fields)
173 self.arg_count = self.item_count * self.structure_count
174 elif self.format == 'x':
175 self.arg_count = 0 # no data in padding bytes
177 self.arg_count = self.item_count # struct.Struct format args
180 return self.__repr__()
183 return '<{} {} {}>'.format(
184 self.__class__.__name__, self.name, id(self))
187 """Iterate through indexes to a possibly multi-dimensional array"""
188 assert self.item_count > 1, self
190 i = [0] * len(self.count)
191 except TypeError: # non-iterable count
192 for i in range(self.count):
195 for i in range(self.item_count):
197 for j,c in enumerate(reversed(self.count)):
198 index.insert(0, i % c)
202 def pack_data(self, data=None):
203 """Linearize a single field's data to a flat list.
205 If the field is repeated (count > 1), the incoming data should
206 be iterable with each iteration returning a single item.
208 if self.item_count > 1:
211 if hasattr(data, 'flat'): # take advantage of numpy's ndarray.flat
213 for item in data.flat:
215 for arg in self.pack_item(item):
217 if items < self.item_count:
218 if f.default is None:
220 'no default for {}.{}'.format(self, f))
221 for i in range(self.item_count - items):
224 for index in self.indexes():
226 if isinstance(index, int):
234 for arg in self.pack_item(item):
236 elif self.item_count:
237 for arg in self.pack_item(data):
240 def pack_item(self, item=None):
241 """Linearize a single count of the field's data to a flat iterable
243 if isinstance(self.format, Structure):
244 for i in self.format._pack_item(item):
247 if self.default is None:
248 raise ValueError('no default for {}'.format(self))
253 def unpack_data(self, data):
254 """Inverse of .pack_data"""
255 _LOG.debug('unpack {} for {} {}'.format(data, self, self.format))
256 iterator = iter(data)
258 items = [iterator.next() for i in range(self.arg_count)]
259 except StopIteration:
260 raise ValueError('not enough data to unpack {}'.format(self))
263 except StopIteration:
266 raise ValueError('too much data to unpack {}'.format(self))
267 if isinstance(self.format, Structure):
268 # break into per-structure clumps
269 s = self.structure_count
270 items = zip(*[items[i::s] for i in range(s)])
272 items = [[i] for i in items]
273 unpacked = [self.unpack_item(i) for i in items]
277 count = 0 # padding bytes, etc.
280 if isinstance(self.format, Structure):
286 raise NotImplementedError('reshape Structure field')
288 unpacked = _numpy.array(unpacked)
289 _LOG.debug('reshape {} data from {} to {}'.format(
290 self, unpacked.shape, count))
291 unpacked = unpacked.reshape(count)
294 def unpack_item(self, item):
295 """Inverse of .unpack_item"""
296 if isinstance(self.format, Structure):
297 return self.format._unpack_item(item)
299 assert len(item) == 1, item
303 class DynamicField (Field):
304 """Represent a DynamicStructure field with a dynamic definition.
306 Adds the methods ``.pre_pack``, ``pre_unpack``, and
307 ``post_unpack``, all of which are called when a ``DynamicField``
308 is used by a ``DynamicStructure``. Each method takes the
309 arguments ``(parents, data)``, where ``parents`` is a list of
310 ``DynamicStructure``\s that own the field and ``data`` is a dict
311 hierarchy of the structure data.
313 See the ``DynamicStructure`` docstring for the exact timing of the
318 Field, DynamicStructure
320 def pre_pack(self, parents, data):
324 def pre_unpack(self, parents, data):
325 "React to previously unpacked data"
328 def post_unpack(self, parents, data):
329 "React to our own data"
332 def _get_structure_data(self, parents, data, structure):
333 """Extract the data belonging to a particular ancestor structure.
339 for p in parents[1:]:
345 assert s == p, (s, p)
351 class Structure (_struct.Struct):
352 r"""Represent a C structure.
354 A convenient wrapper around struct.Struct that uses Fields and
355 adds dict-handling methods for transparent name assignment.
365 >>> from pprint import pprint
367 Represent the C structures::
375 unsigned short version;
381 >>> time = Field('I', 'time', default=0, help='POSIX time')
383 ... 'h', 'data', default=0, help='example data', count=(2,3))
384 >>> run = Structure('run', fields=[time, data])
386 ... 'H', 'version', default=1, help='example version')
387 >>> runs = Field(run, 'runs', help='pair of runs', count=2)
388 >>> experiment = Structure('experiment', fields=[version, runs])
390 The structures automatically calculate the flattened data format:
394 >>> run.size # 4 + 2*3*2
396 >>> experiment.format
398 >>> experiment.size # 2 + 2 + 2*(4 + 2*3*2)
401 The first two elements in the above size calculation are 2 (for
402 the unsigned short, 'H') and 2 (padding so the unsigned int aligns
403 with a 4-byte block). If you select a byte ordering that doesn't
404 mess with alignment and recalculate the format, the padding goes
407 >>> experiment.set_byte_order('>')
408 >>> experiment.get_format()
413 You can read data out of any object supporting the buffer
416 >>> b = array.array('B', range(experiment.size))
417 >>> d = experiment.unpack_from(buffer=b)
419 {'runs': [{'data': array([[1543, 2057, 2571],
420 [3085, 3599, 4113]]),
422 {'data': array([[5655, 6169, 6683],
423 [7197, 7711, 8225]]),
426 >>> [hex(x) for x in d['runs'][0]['data'].flat]
427 ['0x607L', '0x809L', '0xa0bL', '0xc0dL', '0xe0fL', '0x1011L']
429 You can also read out from strings:
431 >>> d = experiment.unpack(b.tostring())
433 {'runs': [{'data': array([[1543, 2057, 2571],
434 [3085, 3599, 4113]]),
436 {'data': array([[5655, 6169, 6683],
437 [7197, 7711, 8225]]),
441 If you don't give enough values for an array field, the remaining
442 values are filled in with their defaults.
444 >>> experiment.pack_into(buffer=b, data=d)
445 >>> b.tostring()[:17]
446 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10'
447 >>> b.tostring()[17:]
448 '\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !'
449 >>> run0 = d['runs'].pop(0)
450 >>> b = experiment.pack(data=d)
452 '\x00\x01\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
454 '!\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
456 If you set ``count=0``, the field is ignored.
458 >>> experiment2 = Structure('experiment', fields=[
459 ... version, Field('f', 'ignored', count=0), runs], byte_order='>')
460 >>> experiment2.format
462 >>> d = experiment2.unpack(b)
464 {'ignored': array([], dtype=float64),
465 'runs': [{'data': array([[5655, 6169, 6683],
466 [7197, 7711, 8225]]),
468 {'data': array([[0, 0, 0],
469 [0, 0, 0]]), 'time': 0}],
472 >>> b2 = experiment2.pack(d)
476 _byte_order_symbols = '@=<>!'
478 def __init__(self, name, fields, byte_order='@'):
479 # '=' for native byte order, standard size and alignment
480 # See http://docs.python.org/library/struct for details
483 self.byte_order = byte_order
490 return '<{} {} {}>'.format(
491 self.__class__.__name__, self.name, id(self))
494 """Setup any dynamic properties of a structure.
496 Use this method to recalculate dynamic properities after
497 changing the basic properties set during initialization.
499 _LOG.debug('setup {!r}'.format(self))
500 self.set_byte_order(self.byte_order)
503 def set_byte_order(self, byte_order):
504 """Allow changing the format byte_order on the fly.
506 _LOG.debug('set byte order for {!r} to {}'.format(self, byte_order))
507 self.byte_order = byte_order
508 for field in self.fields:
509 if isinstance(field.format, Structure):
510 field.format.set_byte_order(byte_order)
512 def get_format(self):
513 format = self.byte_order + ''.join(self.sub_format())
514 # P format only allowed for native byte ordering
515 # Convert P to I for ILP32 compatibility when running on a LP64.
516 format = format.replace('P', 'I')
518 super(Structure, self).__init__(format=format)
519 except _struct.error as e:
520 raise ValueError((e, format))
523 def sub_format(self):
524 _LOG.debug('calculate sub-format for {!r}'.format(self))
525 for field in self.fields:
526 if isinstance(field.format, Structure):
528 field.format.sub_format()) * field.item_count
530 field_format = [field.format]*field.item_count
531 for fmt in field_format:
534 def _pack_item(self, item=None):
535 """Linearize a single count of the structure's data to a flat iterable
539 for f in self.fields:
543 raise ValueError((f.name, item))
546 for arg in f.pack_data(data):
549 def _unpack_item(self, args):
550 """Inverse of ._unpack_item"""
552 iterator = iter(args)
553 for f in self.fields:
555 items = [iterator.next() for i in range(f.arg_count)]
556 except StopIteration:
557 raise ValueError('not enough data to unpack {}.{}'.format(
559 data[f.name] = f.unpack_data(items)
562 except StopIteration:
565 raise ValueError('too much data to unpack {}'.format(self))
568 def pack(self, data):
569 args = list(self._pack_item(data))
571 return super(Structure, self).pack(*args)
573 raise ValueError(self.format)
575 def pack_into(self, buffer, offset=0, data={}):
576 args = list(self._pack_item(data))
577 return super(Structure, self).pack_into(
578 buffer, offset, *args)
580 def unpack(self, *args, **kwargs):
581 args = super(Structure, self).unpack(*args, **kwargs)
582 return self._unpack_item(args)
584 def unpack_from(self, buffer, offset=0, *args, **kwargs):
586 'unpack {!r} for {!r} ({}, offset={}) with {} ({})'.format(
587 buffer, self, len(buffer), offset, self.format, self.size))
588 args = super(Structure, self).unpack_from(
589 buffer, offset, *args, **kwargs)
590 return self._unpack_item(args)
592 def get_field(self, name):
593 return [f for f in self.fields if f.name == name][0]
596 class DebuggingStream (object):
597 def __init__(self, stream):
600 def read(self, size):
601 data = self.stream.read(size)
602 _LOG.debug('read {} from {}: ({}) {!r}'.format(
603 size, self.stream, len(data), data))
607 class DynamicStructure (Structure):
608 r"""Represent a C structure field with a dynamic definition.
610 Any dynamic fields have their ``.pre_pack`` called before any
611 structure packing is done. ``.pre_unpack`` is called for a
612 particular field just before that field's ``.unpack_data`` call.
613 ``.post_unpack`` is called for a particular field just after
614 ``.unpack_data``. If ``.post_unpack`` returns ``True``, the same
615 field is unpacked again.
620 >>> from pprint import pprint
622 This allows you to define structures where some portion of the
623 global structure depends on earlier data. For example, in the
631 You can generate a Python version of this structure in two ways,
632 with a dynamic ``length``, or with a dynamic ``data``. In both
633 cases, the required methods are the same, the only difference is
634 where you attach them.
636 >>> def packer(self, parents, data):
637 ... vector_structure = parents[-1]
638 ... vector_data = self._get_structure_data(
639 ... parents, data, vector_structure)
640 ... length = len(vector_data['data'])
641 ... vector_data['length'] = length
642 ... data_field = vector_structure.get_field('data')
643 ... data_field.count = length
644 ... data_field.setup()
645 >>> def unpacker(self, parents, data):
646 ... vector_structure = parents[-1]
647 ... vector_data = self._get_structure_data(
648 ... parents, data, vector_structure)
649 ... length = vector_data['length']
650 ... data_field = vector_structure.get_field('data')
651 ... data_field.count = length
652 ... data_field.setup()
654 >>> class DynamicLengthField (DynamicField):
655 ... def pre_pack(self, parents, data):
656 ... packer(self, parents, data)
657 ... def post_unpack(self, parents, data):
658 ... unpacker(self, parents, data)
659 >>> dynamic_length_vector = DynamicStructure('vector',
661 ... DynamicLengthField('I', 'length'),
662 ... Field('h', 'data', count=0),
665 >>> class DynamicDataField (DynamicField):
666 ... def pre_pack(self, parents, data):
667 ... packer(self, parents, data)
668 ... def pre_unpack(self, parents, data):
669 ... unpacker(self, parents, data)
670 >>> dynamic_data_vector = DynamicStructure('vector',
672 ... Field('I', 'length'),
673 ... DynamicDataField('h', 'data', count=0),
677 >>> b = '\x00\x00\x00\x02\x01\x02\x03\x04'
678 >>> d = dynamic_length_vector.unpack(b)
680 {'data': array([258, 772]), 'length': 2}
681 >>> d = dynamic_data_vector.unpack(b)
683 {'data': array([258, 772]), 'length': 2}
685 >>> d['data'] = [1,2,3,4]
686 >>> dynamic_length_vector.pack(d)
687 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
688 >>> dynamic_data_vector.pack(d)
689 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
691 The implementation is a good deal more complicated than the one
692 for ``Structure``, because we must make multiple calls to
693 ``struct.Struct.unpack`` to unpack the data.
695 #def __init__(self, *args, **kwargs):
696 # pass #self.parent = ..
698 def _pre_pack(self, parents=None, data=None):
702 parents = parents + [self]
703 for f in self.fields:
704 if hasattr(f, 'pre_pack'):
705 _LOG.debug('pre-pack {}'.format(f))
706 f.pre_pack(parents=parents, data=data)
707 if isinstance(f.format, DynamicStructure):
708 _LOG.debug('pre-pack {!r}'.format(f.format))
709 f._pre_pack(parents=parents, data=data)
711 def pack(self, data):
712 self._pre_pack(data=data)
714 return super(DynamicStructure, self).pack(data)
716 def pack_into(self, buffer, offset=0, data={}):
717 self._pre_pack(data=data)
719 return super(DynamicStructure, self).pack_into(
720 buffer=buffer, offset=offset, data=data)
722 def unpack_stream(self, stream, parents=None, data=None, d=None):
723 # `d` is the working data directory
727 if _LOG.level <= _logging.DEBUG:
728 stream = DebuggingStream(stream)
730 parents = parents + [self]
732 for f in self.fields:
733 _LOG.debug('parsing {!r}.{} (count={}, item_count={})'.format(
734 self, f, f.count, f.item_count))
735 if _LOG.level <= _logging.DEBUG:
736 _LOG.debug('data:\n{}'.format(_pprint.pformat(data)))
737 if hasattr(f, 'pre_unpack'):
738 _LOG.debug('pre-unpack {}'.format(f))
739 f.pre_unpack(parents=parents, data=data)
741 if hasattr(f, 'unpack'): # override default unpacking
742 _LOG.debug('override unpack for {}'.format(f))
743 d[f.name] = f.unpack(stream)
746 # setup for unpacking loop
747 if isinstance(f.format, Structure):
748 f.format.set_byte_order(self.byte_order)
751 if isinstance(f.format, DynamicStructure):
752 if f.item_count == 1:
753 # TODO, fix in case we *want* an array
755 f.format.unpack_stream(
756 stream, parents=parents, data=data, d=d[f.name])
759 for i in range(f.item_count):
762 f.format.unpack_stream(
763 stream, parents=parents, data=data, d=x)
764 if hasattr(f, 'post_unpack'):
765 _LOG.debug('post-unpack {}'.format(f))
766 repeat = f.post_unpack(parents=parents, data=data)
768 raise NotImplementedError(
769 'cannot repeat unpack for dynamic structures')
771 if isinstance(f.format, Structure):
772 _LOG.debug('parsing {} bytes for {}'.format(
773 f.format.size, f.format.format))
774 bs = [stream.read(f.format.size) for i in range(f.item_count)]
776 f.format.set_byte_order(self.byte_order)
779 x = [f.format.unpack_from(b) for b in bs]
780 if len(x) == 1: # TODO, fix in case we *want* an array
784 field_format = self.byte_order + f.format*f.item_count
785 field_format = field_format.replace('P', 'I')
787 size = _struct.calcsize(field_format)
788 except _struct.error as e:
790 _LOG.error('{}.{}: {}'.format(self, f, field_format))
792 _LOG.debug('parsing {} bytes for preliminary {}'.format(
794 raw = stream.read(size)
797 'not enough data to unpack {}.{} ({} < {})'.format(
798 self, f, len(raw), size))
800 field_format = self.byte_order + f.format*f.item_count
801 field_format = field_format.replace('P', 'I')
802 _LOG.debug('parse previous bytes using {}'.format(
804 struct = _struct.Struct(field_format)
805 items = struct.unpack(raw)
806 return f.unpack_data(items)
812 if hasattr(f, 'post_unpack'):
813 _LOG.debug('post-unpack {}'.format(f))
814 repeat = f.post_unpack(parents=parents, data=data)
818 _LOG.debug('repeat unpack for {}'.format(f))
822 def unpack(self, string):
823 stream = _io.BytesIO(string)
824 return self.unpack_stream(stream)
826 def unpack_from(self, buffer, offset=0, *args, **kwargs):
827 args = super(Structure, self).unpack_from(
828 buffer, offset, *args, **kwargs)
829 return self._unpack_item(args)