I don't know if encoding information is embedded in the IGOR files or
not. Currently the stock parser just leaves everything it reads in in
bytes. For compatibility, the igorpy module attempts to convert those
byte strings to Unicode, but it just assumes that the encoding used in
the file matches the locale or default encoding used by your system.
This could be a portability issue. Until commit
commit
fe7006e3e2d741b6d80767b1aac53394ff1e7e76
Author: W. Trevor King <wking@tremily.us>
Date: Sat Jul 21 07:50:09 2012 -0400
Replace igor.igorpy parsing with translations from igor.packed.load.
The igorpy parser used sys.getfilesystemencoding() to guess the
encoding, but that encoding is actually used to encode file names, not
file contents. locale.getpreferredencoding is a better guess, but
it's still just a guess.
wave_data[self.name] = d
def _normalize_string(self, d):
- if hasattr(d, 'tostring'):
+ if isinstance(d, bytes):
+ pass
+ elif hasattr(d, 'tobytes'):
+ d = d.tobytes()
+ elif hasattr(d, 'tostring'): # Python 2 compatibility
d = d.tostring()
else:
- d = ''.join(d)
+ d = b''.join(d)
if self._array_size_field:
start = 0
strings = []
wave_structure = parents[-1]
wave_data = self._get_structure_data(parents, data, wave_structure)
bin_header = wave_data['bin_header']
- d = ''.join(wave_data[self.name])
+ d = b''.join(wave_data[self.name])
dim_labels = []
start = 0
for size in bin_header[self._size_field]:
if end > start:
dim_data = d[start:end]
# split null-delimited strings
- labels = dim_data.split(chr(0))
+ labels = dim_data.split(b'\x00')
start = end
else:
labels = []
for i,offset in enumerate(wave_data['sIndices']):
if offset > start:
chars = wdata[start:offset]
- strings.append(''.join(chars))
+ strings.append(b''.join(chars))
start = offset
elif offset == start:
- strings.append('')
+ strings.append(b'')
else:
raise ValueError((offset, wave_data['sIndices']))
wdata = _numpy.array(strings)
"""
from __future__ import absolute_import
import io as _io
+import locale as _locale
import re as _re
import sys as _sys
__version__='0.10'
+ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding()
PYKEYWORDS = set(('and','as','assert','break','class','continue',
'def','elif','else','except','exec','finally',
'for','global','if','import','in','is','lambda',
"""
def __init__(self, record):
d = record.wave['wave']
- self.name = d['wave_header']['bname']
+ self.name = d['wave_header']['bname'].decode(ENCODING)
self.data = d['wData']
self.fs = d['wave_header']['fsValid']
self.fstop = d['wave_header']['topFullScale']
sfA = d['wave_header']['sfA']
sfB = d['wave_header']['sfB']
# TODO find example with multiple data units
- self.data_units = [d['data_units']]
- self.axis_units = [d['dimension_units']]
+ self.data_units = [d['data_units'].decode(ENCODING)]
+ self.axis_units = [d['dimension_units'].decode(ENCODING)]
self.data_units.extend(['']*(_MAXDIMS-len(self.data_units)))
self.data_units = tuple(self.data_units)
self.axis_units.extend(['']*(_MAXDIMS-len(self.axis_units)))
r = None
if isinstance(record, _FolderStartRecord):
- path = stack[-1].path+[record.null_terminated_text]
+ path = stack[-1].path + [
+ record.null_terminated_text.decode(ENCODING)]
folder = Folder(path)
stack[-1].append(folder)
stack.append(folder)
class TextRecord (Record):
def __init__(self, *args, **kwargs):
super(TextRecord, self).__init__(*args, **kwargs)
- self.text = str(self.data).replace('\r\n', '\n').replace('\r', '\n')
- self.null_terminated_text = self.text.split('\x00', 1)[0]
+ self.text = bytes(self.data).replace(
+ b'\r\n', b'\n').replace(b'\r', b'\n')
+ self.null_terminated_text = self.text.split(b'\x00', 1)[0]
>>> from pprint import pprint
>>> import igor.igorpy as igor
+>>> igor.ENCODING = 'UTF-8'
Load a packed experiment:
...
0.00077303, 0.00038651, 0. ]), array([], dtype=float64), array([], dtype=float64), array([], dtype=float64)]
>>> d.W_plrX5.data_units
-('', '', '', '')
+(u'', '', '', '')
>>> d.W_plrX5.axis_units
-('', '', '', '')
+(u'', '', '', '')
>>> d.W_plrX5.data # doctest: +ELLIPSIS
array([ 1.83690956e-17, 2.69450769e-02, 7.65399113e-02,
1.44305170e-01, 2.23293692e-01, 3.04783821e-01,