From: W. Trevor King <wking@tremily.us>
Date: Sat, 21 Jul 2012 13:54:20 +0000 (-0400)
Subject: Fixes to string/byte handling for Python 3 compatibility.
X-Git-Tag: v0.2~2
X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=7612a4622392bd4599dc5bbcdd0d6e5b5397e6d8;p=igor.git

Fixes to string/byte handling for Python 3 compatibility.

I don't know if encoding information is embedded in the IGOR files or
not.  Currently the stock parser just leaves everything it reads in in
bytes.  For compatibility, the igorpy module attempts to convert those
byte strings to Unicode, but it just assumes that the encoding used in
the file matches the locale or default encoding used by your system.
This could be a portability issue.  Until commit

  commit fe7006e3e2d741b6d80767b1aac53394ff1e7e76
  Author: W. Trevor King <wking@tremily.us>
  Date:   Sat Jul 21 07:50:09 2012 -0400

    Replace igor.igorpy parsing with translations from igor.packed.load.

The igorpy parser used sys.getfilesystemencoding() to guess the
encoding, but that encoding is actually used to encode file names, not
file contents.  locale.getpreferredencoding is a better guess, but
it's still just a guess.
---

diff --git a/igor/binarywave.py b/igor/binarywave.py
index 513148b..c9d1ff9 100644
--- a/igor/binarywave.py
+++ b/igor/binarywave.py
@@ -70,10 +70,14 @@ class StaticStringField (_DynamicField):
         wave_data[self.name] = d
 
     def _normalize_string(self, d):
-        if hasattr(d, 'tostring'):
+        if isinstance(d, bytes):
+            pass
+        elif hasattr(d, 'tobytes'):
+            d = d.tobytes()
+        elif hasattr(d, 'tostring'):  # Python 2 compatibility
             d = d.tostring()
         else:
-            d = ''.join(d)
+            d = b''.join(d)
         if self._array_size_field:
             start = 0
             strings = []
@@ -449,7 +453,7 @@ class DynamicLabelsField (DynamicStringField):
         wave_structure = parents[-1]
         wave_data = self._get_structure_data(parents, data, wave_structure)
         bin_header = wave_data['bin_header']
-        d = ''.join(wave_data[self.name])
+        d = b''.join(wave_data[self.name])
         dim_labels = []
         start = 0
         for size in bin_header[self._size_field]:
@@ -457,7 +461,7 @@ class DynamicLabelsField (DynamicStringField):
             if end > start:
                 dim_data = d[start:end]
                 # split null-delimited strings
-                labels = dim_data.split(chr(0))
+                labels = dim_data.split(b'\x00')
                 start = end
             else:
                 labels = []
@@ -494,10 +498,10 @@ class DynamicStringIndicesDataField (_DynamicField):
         for i,offset in enumerate(wave_data['sIndices']):
             if offset > start:
                 chars = wdata[start:offset]
-                strings.append(''.join(chars))
+                strings.append(b''.join(chars))
                 start = offset
             elif offset == start:
-                strings.append('')
+                strings.append(b'')
             else:
                 raise ValueError((offset, wave_data['sIndices']))
         wdata = _numpy.array(strings)
diff --git a/igor/igorpy.py b/igor/igorpy.py
index ede660f..f9e0961 100644
--- a/igor/igorpy.py
+++ b/igor/igorpy.py
@@ -15,6 +15,7 @@ PTN003.ifn and TN003.ifn.
 """
 from __future__ import absolute_import
 import io as _io
+import locale as _locale
 import re as _re
 import sys as _sys
 
@@ -37,6 +38,7 @@ from .record.variables import VariablesRecord as _VariablesRecord
 __version__='0.10'
 
 
+ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding()
 PYKEYWORDS = set(('and','as','assert','break','class','continue',
                   'def','elif','else','except','exec','finally',
                   'for','global','if','import','in','is','lambda',
@@ -84,7 +86,7 @@ class Wave(IgorObject):
     """
     def __init__(self, record):
         d = record.wave['wave']
-        self.name = d['wave_header']['bname']
+        self.name = d['wave_header']['bname'].decode(ENCODING)
         self.data = d['wData']
         self.fs = d['wave_header']['fsValid']
         self.fstop = d['wave_header']['topFullScale']
@@ -100,8 +102,8 @@ class Wave(IgorObject):
             sfA = d['wave_header']['sfA']
             sfB = d['wave_header']['sfB']
             # TODO find example with multiple data units
-            self.data_units = [d['data_units']]
-            self.axis_units = [d['dimension_units']]
+            self.data_units = [d['data_units'].decode(ENCODING)]
+            self.axis_units = [d['dimension_units'].decode(ENCODING)]
         self.data_units.extend(['']*(_MAXDIMS-len(self.data_units)))
         self.data_units = tuple(self.data_units)
         self.axis_units.extend(['']*(_MAXDIMS-len(self.axis_units)))
@@ -257,7 +259,8 @@ def _convert(packed_experiment, ignore_unknown=True):
             r = None
 
         if isinstance(record, _FolderStartRecord):
-            path = stack[-1].path+[record.null_terminated_text]
+            path = stack[-1].path + [
+                record.null_terminated_text.decode(ENCODING)]
             folder = Folder(path)
             stack[-1].append(folder)
             stack.append(folder)
diff --git a/igor/record/base.py b/igor/record/base.py
index eebd923..6b168cf 100644
--- a/igor/record/base.py
+++ b/igor/record/base.py
@@ -42,5 +42,6 @@ class UnusedRecord (Record):
 class TextRecord (Record):
     def __init__(self, *args, **kwargs):
         super(TextRecord, self).__init__(*args, **kwargs)
-        self.text = str(self.data).replace('\r\n', '\n').replace('\r', '\n')
-        self.null_terminated_text = self.text.split('\x00', 1)[0]
+        self.text = bytes(self.data).replace(
+            b'\r\n', b'\n').replace(b'\r', b'\n')
+        self.null_terminated_text = self.text.split(b'\x00', 1)[0]
diff --git a/test/test-igorpy.py b/test/test-igorpy.py
index 423ac3c..1f88927 100644
--- a/test/test-igorpy.py
+++ b/test/test-igorpy.py
@@ -4,6 +4,7 @@ r"""Test the igor.igorpy compatibility layer by loading sample files.
 
 >>> from pprint import pprint
 >>> import igor.igorpy as igor
+>>> igor.ENCODING = 'UTF-8'
 
 Load a packed experiment:
 
@@ -74,9 +75,9 @@ Waves:
         ...
         0.00077303,  0.00038651,  0.        ]), array([], dtype=float64), array([], dtype=float64), array([], dtype=float64)]
 >>> d.W_plrX5.data_units
-('', '', '', '')
+(u'', '', '', '')
 >>> d.W_plrX5.axis_units
-('', '', '', '')
+(u'', '', '', '')
 >>> d.W_plrX5.data  # doctest: +ELLIPSIS
 array([  1.83690956e-17,   2.69450769e-02,   7.65399113e-02,
          1.44305170e-01,   2.23293692e-01,   3.04783821e-01,