From 7612a4622392bd4599dc5bbcdd0d6e5b5397e6d8 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sat, 21 Jul 2012 09:54:20 -0400 Subject: [PATCH] Fixes to string/byte handling for Python 3 compatibility. I don't know if encoding information is embedded in the IGOR files or not. Currently the stock parser just leaves everything it reads in in bytes. For compatibility, the igorpy module attempts to convert those byte strings to Unicode, but it just assumes that the encoding used in the file matches the locale or default encoding used by your system. This could be a portability issue. Until commit commit fe7006e3e2d741b6d80767b1aac53394ff1e7e76 Author: W. Trevor King Date: Sat Jul 21 07:50:09 2012 -0400 Replace igor.igorpy parsing with translations from igor.packed.load. The igorpy parser used sys.getfilesystemencoding() to guess the encoding, but that encoding is actually used to encode file names, not file contents. locale.getpreferredencoding is a better guess, but it's still just a guess. --- igor/binarywave.py | 16 ++++++++++------ igor/igorpy.py | 11 +++++++---- igor/record/base.py | 5 +++-- test/test-igorpy.py | 5 +++-- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/igor/binarywave.py b/igor/binarywave.py index 513148b..c9d1ff9 100644 --- a/igor/binarywave.py +++ b/igor/binarywave.py @@ -70,10 +70,14 @@ class StaticStringField (_DynamicField): wave_data[self.name] = d def _normalize_string(self, d): - if hasattr(d, 'tostring'): + if isinstance(d, bytes): + pass + elif hasattr(d, 'tobytes'): + d = d.tobytes() + elif hasattr(d, 'tostring'): # Python 2 compatibility d = d.tostring() else: - d = ''.join(d) + d = b''.join(d) if self._array_size_field: start = 0 strings = [] @@ -449,7 +453,7 @@ class DynamicLabelsField (DynamicStringField): wave_structure = parents[-1] wave_data = self._get_structure_data(parents, data, wave_structure) bin_header = wave_data['bin_header'] - d = ''.join(wave_data[self.name]) + d = b''.join(wave_data[self.name]) dim_labels = [] start = 0 for size in bin_header[self._size_field]: @@ -457,7 +461,7 @@ class DynamicLabelsField (DynamicStringField): if end > start: dim_data = d[start:end] # split null-delimited strings - labels = dim_data.split(chr(0)) + labels = dim_data.split(b'\x00') start = end else: labels = [] @@ -494,10 +498,10 @@ class DynamicStringIndicesDataField (_DynamicField): for i,offset in enumerate(wave_data['sIndices']): if offset > start: chars = wdata[start:offset] - strings.append(''.join(chars)) + strings.append(b''.join(chars)) start = offset elif offset == start: - strings.append('') + strings.append(b'') else: raise ValueError((offset, wave_data['sIndices'])) wdata = _numpy.array(strings) diff --git a/igor/igorpy.py b/igor/igorpy.py index ede660f..f9e0961 100644 --- a/igor/igorpy.py +++ b/igor/igorpy.py @@ -15,6 +15,7 @@ PTN003.ifn and TN003.ifn. """ from __future__ import absolute_import import io as _io +import locale as _locale import re as _re import sys as _sys @@ -37,6 +38,7 @@ from .record.variables import VariablesRecord as _VariablesRecord __version__='0.10' +ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding() PYKEYWORDS = set(('and','as','assert','break','class','continue', 'def','elif','else','except','exec','finally', 'for','global','if','import','in','is','lambda', @@ -84,7 +86,7 @@ class Wave(IgorObject): """ def __init__(self, record): d = record.wave['wave'] - self.name = d['wave_header']['bname'] + self.name = d['wave_header']['bname'].decode(ENCODING) self.data = d['wData'] self.fs = d['wave_header']['fsValid'] self.fstop = d['wave_header']['topFullScale'] @@ -100,8 +102,8 @@ class Wave(IgorObject): sfA = d['wave_header']['sfA'] sfB = d['wave_header']['sfB'] # TODO find example with multiple data units - self.data_units = [d['data_units']] - self.axis_units = [d['dimension_units']] + self.data_units = [d['data_units'].decode(ENCODING)] + self.axis_units = [d['dimension_units'].decode(ENCODING)] self.data_units.extend(['']*(_MAXDIMS-len(self.data_units))) self.data_units = tuple(self.data_units) self.axis_units.extend(['']*(_MAXDIMS-len(self.axis_units))) @@ -257,7 +259,8 @@ def _convert(packed_experiment, ignore_unknown=True): r = None if isinstance(record, _FolderStartRecord): - path = stack[-1].path+[record.null_terminated_text] + path = stack[-1].path + [ + record.null_terminated_text.decode(ENCODING)] folder = Folder(path) stack[-1].append(folder) stack.append(folder) diff --git a/igor/record/base.py b/igor/record/base.py index eebd923..6b168cf 100644 --- a/igor/record/base.py +++ b/igor/record/base.py @@ -42,5 +42,6 @@ class UnusedRecord (Record): class TextRecord (Record): def __init__(self, *args, **kwargs): super(TextRecord, self).__init__(*args, **kwargs) - self.text = str(self.data).replace('\r\n', '\n').replace('\r', '\n') - self.null_terminated_text = self.text.split('\x00', 1)[0] + self.text = bytes(self.data).replace( + b'\r\n', b'\n').replace(b'\r', b'\n') + self.null_terminated_text = self.text.split(b'\x00', 1)[0] diff --git a/test/test-igorpy.py b/test/test-igorpy.py index 423ac3c..1f88927 100644 --- a/test/test-igorpy.py +++ b/test/test-igorpy.py @@ -4,6 +4,7 @@ r"""Test the igor.igorpy compatibility layer by loading sample files. >>> from pprint import pprint >>> import igor.igorpy as igor +>>> igor.ENCODING = 'UTF-8' Load a packed experiment: @@ -74,9 +75,9 @@ Waves: ... 0.00077303, 0.00038651, 0. ]), array([], dtype=float64), array([], dtype=float64), array([], dtype=float64)] >>> d.W_plrX5.data_units -('', '', '', '') +(u'', '', '', '') >>> d.W_plrX5.axis_units -('', '', '', '') +(u'', '', '', '') >>> d.W_plrX5.data # doctest: +ELLIPSIS array([ 1.83690956e-17, 2.69450769e-02, 7.65399113e-02, 1.44305170e-01, 2.23293692e-01, 3.04783821e-01, -- 2.26.2