1 # Copyright (C) 2012 W. Trevor King <wking@tremily.us>
3 # This file is part of igor.
5 # igor is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU Lesser General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
10 # igor is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with igor. If not, see <http://www.gnu.org/licenses/>.
18 """Structure and Field classes for declaring structures
20 There are a few formats that can be used to represent the same data, a
21 binary packed format with all the data in a buffer, a linearized
22 format with each field in a single Python list, and a nested format
23 with each field in a hierarchy of Python dictionaries.
26 from __future__ import absolute_import
28 import logging as _logging
29 import pprint as _pprint
30 import struct as _struct
32 import numpy as _numpy
34 from . import LOG as _LOG
38 """Represent a Structure field.
40 The format argument can be a format character from the ``struct``
41 documentation (e.g., ``c`` for ``char``, ``h`` for ``short``, ...)
42 or ``Structure`` instance (for building nested structures).
47 >>> from pprint import pprint
50 Example of an unsigned short integer field:
53 ... 'I', 'time', default=0, help='POSIX time')
56 >>> list(time.pack_data(1))
58 >>> list(time.pack_item(2))
60 >>> time.unpack_data([3])
62 >>> time.unpack_item([4])
65 Example of a multi-dimensional float field:
68 ... 'f', 'data', help='example data', count=(2,3,4))
71 >>> list(data.indexes()) # doctest: +ELLIPSIS
72 [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], ..., [1, 2, 3]]
73 >>> list(data.pack_data(
74 ... [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]],
75 ... [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]])
76 ... ) # doctest: +ELLIPSIS
77 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ..., 19, 20, 21, 22, 23]
78 >>> list(data.pack_item(3))
80 >>> data.unpack_data(range(data.arg_count))
81 array([[[ 0, 1, 2, 3],
88 >>> data.unpack_item([3])
91 Example of a nested structure field:
93 >>> run = Structure('run', fields=[time, data])
94 >>> runs = Field(run, 'runs', help='pair of runs', count=2)
95 >>> runs.arg_count # = 2 * (1 + 24)
97 >>> data1 = numpy.arange(data.arg_count).reshape(data.count)
98 >>> data2 = data1 + data.arg_count
99 >>> list(runs.pack_data(
100 ... [{'time': 100, 'data': data1},
101 ... {'time': 101, 'data': data2}])
102 ... ) # doctest: +ELLIPSIS
103 [100, 0, 1, 2, ..., 22, 23, 101, 24, 25, ..., 46, 47]
104 >>> list(runs.pack_item({'time': 100, 'data': data1})
105 ... ) # doctest: +ELLIPSIS
106 [100, 0, 1, 2, ..., 22, 23]
107 >>> pprint(runs.unpack_data(range(runs.arg_count)))
108 [{'data': array([[[ 1, 2, 3, 4],
116 {'data': array([[[26, 27, 28, 29],
124 >>> pprint(runs.unpack_item(range(runs.structure_count)))
125 {'data': array([[[ 1, 2, 3, 4],
134 If you don't give enough values for an array field, the remaining
135 values are filled in with their defaults.
137 >>> list(data.pack_data(
138 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]])) # doctest: +ELLIPSIS
139 Traceback (most recent call last):
141 ValueError: no default for <Field data ...>
143 >>> list(data.pack_data(
144 ... [[[0, 1, 2, 3], [4, 5, 6]], [[10]]]))
145 [0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
151 def __init__(self, format, name, default=None, help=None, count=1):
154 self.default = default
160 """Setup any dynamic properties of a field.
162 Use this method to recalculate dynamic properities after
163 changing the basic properties set during initialization.
165 _LOG.debug('setup {}'.format(self))
166 self.item_count = _numpy.prod(self.count) # number of item repeats
167 if isinstance(self.format, Structure):
168 self.structure_count = sum(
169 f.arg_count for f in self.format.fields)
170 self.arg_count = self.item_count * self.structure_count
171 elif self.format == 'x':
172 self.arg_count = 0 # no data in padding bytes
174 self.arg_count = self.item_count # struct.Struct format args
177 return self.__repr__()
180 return '<{} {} {}>'.format(
181 self.__class__.__name__, self.name, id(self))
184 """Iterate through indexes to a possibly multi-dimensional array"""
185 assert self.item_count > 1, self
187 i = [0] * len(self.count)
188 except TypeError: # non-iterable count
189 for i in range(self.count):
192 for i in range(self.item_count):
194 for j,c in enumerate(reversed(self.count)):
195 index.insert(0, i % c)
199 def pack_data(self, data=None):
200 """Linearize a single field's data to a flat list.
202 If the field is repeated (count > 1), the incoming data should
203 be iterable with each iteration returning a single item.
205 if self.item_count > 1:
208 if hasattr(data, 'flat'): # take advantage of numpy's ndarray.flat
210 for item in data.flat:
212 for arg in self.pack_item(item):
214 if items < self.item_count:
215 if f.default is None:
217 'no default for {}.{}'.format(self, f))
218 for i in range(self.item_count - items):
221 for index in self.indexes():
223 if isinstance(index, int):
231 for arg in self.pack_item(item):
233 elif self.item_count:
234 for arg in self.pack_item(data):
237 def pack_item(self, item=None):
238 """Linearize a single count of the field's data to a flat iterable
240 if isinstance(self.format, Structure):
241 for i in self.format._pack_item(item):
244 if self.default is None:
245 raise ValueError('no default for {}'.format(self))
250 def unpack_data(self, data):
251 """Inverse of .pack_data"""
252 _LOG.debug('unpack {} for {} {}'.format(data, self, self.format))
253 iterator = iter(data)
255 items = [next(iterator) for i in range(self.arg_count)]
256 except StopIteration:
257 raise ValueError('not enough data to unpack {}'.format(self))
260 except StopIteration:
263 raise ValueError('too much data to unpack {}'.format(self))
264 if isinstance(self.format, Structure):
265 # break into per-structure clumps
266 s = self.structure_count
267 items = zip(*[items[i::s] for i in range(s)])
269 items = [[i] for i in items]
270 unpacked = [self.unpack_item(i) for i in items]
274 count = 0 # padding bytes, etc.
277 if isinstance(self.format, Structure):
283 raise NotImplementedError('reshape Structure field')
285 unpacked = _numpy.array(unpacked)
286 _LOG.debug('reshape {} data from {} to {}'.format(
287 self, unpacked.shape, count))
288 unpacked = unpacked.reshape(count)
291 def unpack_item(self, item):
292 """Inverse of .unpack_item"""
293 if isinstance(self.format, Structure):
294 return self.format._unpack_item(item)
296 assert len(item) == 1, item
300 class DynamicField (Field):
301 """Represent a DynamicStructure field with a dynamic definition.
303 Adds the methods ``.pre_pack``, ``pre_unpack``, and
304 ``post_unpack``, all of which are called when a ``DynamicField``
305 is used by a ``DynamicStructure``. Each method takes the
306 arguments ``(parents, data)``, where ``parents`` is a list of
307 ``DynamicStructure``\s that own the field and ``data`` is a dict
308 hierarchy of the structure data.
310 See the ``DynamicStructure`` docstring for the exact timing of the
315 Field, DynamicStructure
317 def pre_pack(self, parents, data):
321 def pre_unpack(self, parents, data):
322 "React to previously unpacked data"
325 def post_unpack(self, parents, data):
326 "React to our own data"
329 def _get_structure_data(self, parents, data, structure):
330 """Extract the data belonging to a particular ancestor structure.
336 for p in parents[1:]:
342 assert s == p, (s, p)
348 class Structure (_struct.Struct):
349 r"""Represent a C structure.
351 A convenient wrapper around struct.Struct that uses Fields and
352 adds dict-handling methods for transparent name assignment.
362 >>> from pprint import pprint
364 Represent the C structures::
372 unsigned short version;
378 >>> time = Field('I', 'time', default=0, help='POSIX time')
380 ... 'h', 'data', default=0, help='example data', count=(2,3))
381 >>> run = Structure('run', fields=[time, data])
383 ... 'H', 'version', default=1, help='example version')
384 >>> runs = Field(run, 'runs', help='pair of runs', count=2)
385 >>> experiment = Structure('experiment', fields=[version, runs])
387 The structures automatically calculate the flattened data format:
391 >>> run.size # 4 + 2*3*2
393 >>> experiment.format
395 >>> experiment.size # 2 + 2 + 2*(4 + 2*3*2)
398 The first two elements in the above size calculation are 2 (for
399 the unsigned short, 'H') and 2 (padding so the unsigned int aligns
400 with a 4-byte block). If you select a byte ordering that doesn't
401 mess with alignment and recalculate the format, the padding goes
404 >>> experiment.set_byte_order('>')
405 >>> experiment.get_format()
410 You can read data out of any object supporting the buffer
413 >>> b = array.array('B', range(experiment.size))
414 >>> d = experiment.unpack_from(buffer=b)
416 {'runs': [{'data': array([[1543, 2057, 2571],
417 [3085, 3599, 4113]]),
419 {'data': array([[5655, 6169, 6683],
420 [7197, 7711, 8225]]),
423 >>> [hex(x) for x in d['runs'][0]['data'].flat]
424 ['0x607L', '0x809L', '0xa0bL', '0xc0dL', '0xe0fL', '0x1011L']
426 You can also read out from strings:
428 >>> d = experiment.unpack(b.tostring())
430 {'runs': [{'data': array([[1543, 2057, 2571],
431 [3085, 3599, 4113]]),
433 {'data': array([[5655, 6169, 6683],
434 [7197, 7711, 8225]]),
438 If you don't give enough values for an array field, the remaining
439 values are filled in with their defaults.
441 >>> experiment.pack_into(buffer=b, data=d)
442 >>> b.tostring()[:17]
443 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10'
444 >>> b.tostring()[17:]
445 '\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !'
446 >>> run0 = d['runs'].pop(0)
447 >>> b = experiment.pack(data=d)
449 '\x00\x01\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
451 '!\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
453 If you set ``count=0``, the field is ignored.
455 >>> experiment2 = Structure('experiment', fields=[
456 ... version, Field('f', 'ignored', count=0), runs], byte_order='>')
457 >>> experiment2.format
459 >>> d = experiment2.unpack(b)
461 {'ignored': array([], dtype=float64),
462 'runs': [{'data': array([[5655, 6169, 6683],
463 [7197, 7711, 8225]]),
465 {'data': array([[0, 0, 0],
466 [0, 0, 0]]), 'time': 0}],
469 >>> b2 = experiment2.pack(d)
473 _byte_order_symbols = '@=<>!'
475 def __init__(self, name, fields, byte_order='@'):
476 # '=' for native byte order, standard size and alignment
477 # See http://docs.python.org/library/struct for details
480 self.byte_order = byte_order
487 return '<{} {} {}>'.format(
488 self.__class__.__name__, self.name, id(self))
491 """Setup any dynamic properties of a structure.
493 Use this method to recalculate dynamic properities after
494 changing the basic properties set during initialization.
496 _LOG.debug('setup {!r}'.format(self))
497 self.set_byte_order(self.byte_order)
500 def set_byte_order(self, byte_order):
501 """Allow changing the format byte_order on the fly.
503 _LOG.debug('set byte order for {!r} to {}'.format(self, byte_order))
504 self.byte_order = byte_order
505 for field in self.fields:
506 if isinstance(field.format, Structure):
507 field.format.set_byte_order(byte_order)
509 def get_format(self):
510 format = self.byte_order + ''.join(self.sub_format())
511 # P format only allowed for native byte ordering
512 # Convert P to I for ILP32 compatibility when running on a LP64.
513 format = format.replace('P', 'I')
515 super(Structure, self).__init__(format=format)
516 except _struct.error as e:
517 raise ValueError((e, format))
520 def sub_format(self):
521 _LOG.debug('calculate sub-format for {!r}'.format(self))
522 for field in self.fields:
523 if isinstance(field.format, Structure):
525 field.format.sub_format()) * field.item_count
527 field_format = [field.format]*field.item_count
528 for fmt in field_format:
531 def _pack_item(self, item=None):
532 """Linearize a single count of the structure's data to a flat iterable
536 for f in self.fields:
540 raise ValueError((f.name, item))
543 for arg in f.pack_data(data):
546 def _unpack_item(self, args):
547 """Inverse of ._unpack_item"""
549 iterator = iter(args)
550 for f in self.fields:
552 items = [next(iterator) for i in range(f.arg_count)]
553 except StopIteration:
554 raise ValueError('not enough data to unpack {}.{}'.format(
556 data[f.name] = f.unpack_data(items)
559 except StopIteration:
562 raise ValueError('too much data to unpack {}'.format(self))
565 def pack(self, data):
566 args = list(self._pack_item(data))
568 return super(Structure, self).pack(*args)
570 raise ValueError(self.format)
572 def pack_into(self, buffer, offset=0, data={}):
573 args = list(self._pack_item(data))
574 return super(Structure, self).pack_into(
575 buffer, offset, *args)
577 def unpack(self, *args, **kwargs):
578 args = super(Structure, self).unpack(*args, **kwargs)
579 return self._unpack_item(args)
581 def unpack_from(self, buffer, offset=0, *args, **kwargs):
583 'unpack {!r} for {!r} ({}, offset={}) with {} ({})'.format(
584 buffer, self, len(buffer), offset, self.format, self.size))
585 args = super(Structure, self).unpack_from(
586 buffer, offset, *args, **kwargs)
587 return self._unpack_item(args)
589 def get_field(self, name):
590 return [f for f in self.fields if f.name == name][0]
593 class DebuggingStream (object):
594 def __init__(self, stream):
597 def read(self, size):
598 data = self.stream.read(size)
599 _LOG.debug('read {} from {}: ({}) {!r}'.format(
600 size, self.stream, len(data), data))
604 class DynamicStructure (Structure):
605 r"""Represent a C structure field with a dynamic definition.
607 Any dynamic fields have their ``.pre_pack`` called before any
608 structure packing is done. ``.pre_unpack`` is called for a
609 particular field just before that field's ``.unpack_data`` call.
610 ``.post_unpack`` is called for a particular field just after
611 ``.unpack_data``. If ``.post_unpack`` returns ``True``, the same
612 field is unpacked again.
617 >>> from pprint import pprint
619 This allows you to define structures where some portion of the
620 global structure depends on earlier data. For example, in the
628 You can generate a Python version of this structure in two ways,
629 with a dynamic ``length``, or with a dynamic ``data``. In both
630 cases, the required methods are the same, the only difference is
631 where you attach them.
633 >>> def packer(self, parents, data):
634 ... vector_structure = parents[-1]
635 ... vector_data = self._get_structure_data(
636 ... parents, data, vector_structure)
637 ... length = len(vector_data['data'])
638 ... vector_data['length'] = length
639 ... data_field = vector_structure.get_field('data')
640 ... data_field.count = length
641 ... data_field.setup()
642 >>> def unpacker(self, parents, data):
643 ... vector_structure = parents[-1]
644 ... vector_data = self._get_structure_data(
645 ... parents, data, vector_structure)
646 ... length = vector_data['length']
647 ... data_field = vector_structure.get_field('data')
648 ... data_field.count = length
649 ... data_field.setup()
651 >>> class DynamicLengthField (DynamicField):
652 ... def pre_pack(self, parents, data):
653 ... packer(self, parents, data)
654 ... def post_unpack(self, parents, data):
655 ... unpacker(self, parents, data)
656 >>> dynamic_length_vector = DynamicStructure('vector',
658 ... DynamicLengthField('I', 'length'),
659 ... Field('h', 'data', count=0),
662 >>> class DynamicDataField (DynamicField):
663 ... def pre_pack(self, parents, data):
664 ... packer(self, parents, data)
665 ... def pre_unpack(self, parents, data):
666 ... unpacker(self, parents, data)
667 >>> dynamic_data_vector = DynamicStructure('vector',
669 ... Field('I', 'length'),
670 ... DynamicDataField('h', 'data', count=0),
674 >>> b = b'\x00\x00\x00\x02\x01\x02\x03\x04'
675 >>> d = dynamic_length_vector.unpack(b)
677 {'data': array([258, 772]), 'length': 2}
678 >>> d = dynamic_data_vector.unpack(b)
680 {'data': array([258, 772]), 'length': 2}
682 >>> d['data'] = [1,2,3,4]
683 >>> dynamic_length_vector.pack(d)
684 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
685 >>> dynamic_data_vector.pack(d)
686 '\x00\x00\x00\x04\x00\x01\x00\x02\x00\x03\x00\x04'
688 The implementation is a good deal more complicated than the one
689 for ``Structure``, because we must make multiple calls to
690 ``struct.Struct.unpack`` to unpack the data.
692 #def __init__(self, *args, **kwargs):
693 # pass #self.parent = ..
695 def _pre_pack(self, parents=None, data=None):
699 parents = parents + [self]
700 for f in self.fields:
701 if hasattr(f, 'pre_pack'):
702 _LOG.debug('pre-pack {}'.format(f))
703 f.pre_pack(parents=parents, data=data)
704 if isinstance(f.format, DynamicStructure):
705 _LOG.debug('pre-pack {!r}'.format(f.format))
706 f._pre_pack(parents=parents, data=data)
708 def pack(self, data):
709 self._pre_pack(data=data)
711 return super(DynamicStructure, self).pack(data)
713 def pack_into(self, buffer, offset=0, data={}):
714 self._pre_pack(data=data)
716 return super(DynamicStructure, self).pack_into(
717 buffer=buffer, offset=offset, data=data)
719 def unpack_stream(self, stream, parents=None, data=None, d=None):
720 # `d` is the working data directory
724 if _LOG.level <= _logging.DEBUG:
725 stream = DebuggingStream(stream)
727 parents = parents + [self]
729 for f in self.fields:
730 _LOG.debug('parsing {!r}.{} (count={}, item_count={})'.format(
731 self, f, f.count, f.item_count))
732 if _LOG.level <= _logging.DEBUG:
733 _LOG.debug('data:\n{}'.format(_pprint.pformat(data)))
734 if hasattr(f, 'pre_unpack'):
735 _LOG.debug('pre-unpack {}'.format(f))
736 f.pre_unpack(parents=parents, data=data)
738 if hasattr(f, 'unpack'): # override default unpacking
739 _LOG.debug('override unpack for {}'.format(f))
740 d[f.name] = f.unpack(stream)
743 # setup for unpacking loop
744 if isinstance(f.format, Structure):
745 f.format.set_byte_order(self.byte_order)
748 if isinstance(f.format, DynamicStructure):
749 if f.item_count == 1:
750 # TODO, fix in case we *want* an array
752 f.format.unpack_stream(
753 stream, parents=parents, data=data, d=d[f.name])
756 for i in range(f.item_count):
759 f.format.unpack_stream(
760 stream, parents=parents, data=data, d=x)
761 if hasattr(f, 'post_unpack'):
762 _LOG.debug('post-unpack {}'.format(f))
763 repeat = f.post_unpack(parents=parents, data=data)
765 raise NotImplementedError(
766 'cannot repeat unpack for dynamic structures')
768 if isinstance(f.format, Structure):
769 _LOG.debug('parsing {} bytes for {}'.format(
770 f.format.size, f.format.format))
771 bs = [stream.read(f.format.size) for i in range(f.item_count)]
773 f.format.set_byte_order(self.byte_order)
776 x = [f.format.unpack_from(b) for b in bs]
777 if len(x) == 1: # TODO, fix in case we *want* an array
781 field_format = self.byte_order + f.format*f.item_count
782 field_format = field_format.replace('P', 'I')
784 size = _struct.calcsize(field_format)
785 except _struct.error as e:
787 _LOG.error('{}.{}: {}'.format(self, f, field_format))
789 _LOG.debug('parsing {} bytes for preliminary {}'.format(
791 raw = stream.read(size)
794 'not enough data to unpack {}.{} ({} < {})'.format(
795 self, f, len(raw), size))
797 field_format = self.byte_order + f.format*f.item_count
798 field_format = field_format.replace('P', 'I')
799 _LOG.debug('parse previous bytes using {}'.format(
801 struct = _struct.Struct(field_format)
802 items = struct.unpack(raw)
803 return f.unpack_data(items)
809 if hasattr(f, 'post_unpack'):
810 _LOG.debug('post-unpack {}'.format(f))
811 repeat = f.post_unpack(parents=parents, data=data)
815 _LOG.debug('repeat unpack for {}'.format(f))
819 def unpack(self, string):
820 stream = _io.BytesIO(string)
821 return self.unpack_stream(stream)
823 def unpack_from(self, buffer, offset=0, *args, **kwargs):
824 args = super(Structure, self).unpack_from(
825 buffer, offset, *args, **kwargs)
826 return self._unpack_item(args)