Don't distinguish between ASCII unicode and strings in YAML output.
[hooke.git] / hooke / util / yaml.py
index e7f99806a49c6382716f49764d8ac72e3d1092f1..25d62fc428acf57c3c26d7fa2ca88669dbc47dd1 100644 (file)
@@ -18,7 +18,7 @@ The default behavior is to crash.
 
 >>> yaml.Dumper.yaml_representers.pop(numpy.ndarray)  # doctest: +ELLIPSIS
 <function none_representer at 0x...>
->>> print yaml.dump(a)
+>>> print yaml.dump(a)  # doctest: +REPORT_UDIFF
 !!python/object/apply:numpy.core.multiarray._reconstruct
 args:
 - !!python/name:numpy.ndarray ''
@@ -27,7 +27,9 @@ args:
 state: !!python/tuple
 - 1
 - !!python/tuple [3]
-- null
+- !!python/object/apply:numpy.dtype
+  args: [i4, 0, 1]
+  state: !!python/tuple [3, <, null, null, null, -1, -1, 0]
 - false
 - "\\x01\\0\\0\\0\\x02\\0\\0\\0\\x03\\0\\0\\0"
 <BLANKLINE>
@@ -44,6 +46,19 @@ Must be because of the other representers I've loaded since.
 Restore the representer for future tests.
 
 >>> yaml.add_representer(numpy.ndarray, none_representer)
+
+We also avoid !!python/unicode tags by sacrificing the string/unicode
+distinction.
+
+>>> yaml.dump('ascii', allow_unicode=True)
+'ascii\\n...\\n'
+>>> yaml.dump(u'ascii', allow_unicode=True)
+'ascii\\n...\\n'
+>>> a = yaml.dump(u'Fran\\xe7ois', allow_unicode=True)
+>>> a
+'Fran\\xc3\\xa7ois\\n...\\n'
+>>> unicode(a, 'utf-8')
+u'Fran\\xe7ois\\n...\\n'
 """
 
 from __future__ import absolute_import
@@ -54,12 +69,16 @@ import types
 import numpy
 import yaml
 import yaml.constructor
+from yaml.constructor import ConstructorError
 import yaml.representer
 
 from ..curve import Data, Curve
 from ..playlist import FilePlaylist
 
 
+DATA_INFO_TAG = u'!hooke.curve.DataInfo'
+
+
 if False: # YAML dump debugging code
     """To help isolate data types etc. that give YAML problems.
 
@@ -91,11 +110,13 @@ else:
     yaml.representer.SafeRepresenter.ignore_aliases = staticmethod(
         ignore_aliases)
 
+def unicode_representer(dumper, data):
+    return dumper.represent_scalar(u'tag:yaml.org,2002:str', data)
+yaml.add_representer(unicode, unicode_representer)
 
 def none_representer(dumper, data):
     return dumper.represent_none(None)
 yaml.add_representer(numpy.ndarray, none_representer)
-yaml.add_representer(numpy.dtype, none_representer)
 
 def bool_representer(dumper, data):
     return dumper.represent_bool(data)
@@ -120,9 +141,14 @@ def data_representer(dumper, data):
     for key in info.keys():
         if key.startswith('raw '):
             del(info[key])
-    return dumper.represent_mapping(u'!hooke.curve.DataInfo', info)
+    return dumper.represent_mapping(DATA_INFO_TAG, info)
 yaml.add_representer(Data, data_representer)
 
+def data_constructor(loader, node):
+    info = loader.construct_mapping(node)
+    return Data(shape=(0,0), dtype=numpy.float32, info=info)
+yaml.add_constructor(DATA_INFO_TAG, data_constructor)
+
 def object_representer(dumper, data):
     cls = type(data)
     if cls in copy_reg.dispatch_table: