Don't distinguish between ASCII unicode and strings in YAML output.
authorW. Trevor King <wking@drexel.edu>
Sat, 28 Aug 2010 22:58:38 +0000 (18:58 -0400)
committerW. Trevor King <wking@drexel.edu>
Sat, 28 Aug 2010 22:58:38 +0000 (18:58 -0400)
hooke/util/yaml.py

index b4fa7decaa9762eb16e2b6e906c013784ab18d0a..25d62fc428acf57c3c26d7fa2ca88669dbc47dd1 100644 (file)
@@ -18,7 +18,7 @@ The default behavior is to crash.
 
 >>> yaml.Dumper.yaml_representers.pop(numpy.ndarray)  # doctest: +ELLIPSIS
 <function none_representer at 0x...>
->>> print yaml.dump(a)
+>>> print yaml.dump(a)  # doctest: +REPORT_UDIFF
 !!python/object/apply:numpy.core.multiarray._reconstruct
 args:
 - !!python/name:numpy.ndarray ''
@@ -27,7 +27,9 @@ args:
 state: !!python/tuple
 - 1
 - !!python/tuple [3]
-- null
+- !!python/object/apply:numpy.dtype
+  args: [i4, 0, 1]
+  state: !!python/tuple [3, <, null, null, null, -1, -1, 0]
 - false
 - "\\x01\\0\\0\\0\\x02\\0\\0\\0\\x03\\0\\0\\0"
 <BLANKLINE>
@@ -44,6 +46,19 @@ Must be because of the other representers I've loaded since.
 Restore the representer for future tests.
 
 >>> yaml.add_representer(numpy.ndarray, none_representer)
+
+We also avoid !!python/unicode tags by sacrificing the string/unicode
+distinction.
+
+>>> yaml.dump('ascii', allow_unicode=True)
+'ascii\\n...\\n'
+>>> yaml.dump(u'ascii', allow_unicode=True)
+'ascii\\n...\\n'
+>>> a = yaml.dump(u'Fran\\xe7ois', allow_unicode=True)
+>>> a
+'Fran\\xc3\\xa7ois\\n...\\n'
+>>> unicode(a, 'utf-8')
+u'Fran\\xe7ois\\n...\\n'
 """
 
 from __future__ import absolute_import
@@ -95,6 +110,9 @@ else:
     yaml.representer.SafeRepresenter.ignore_aliases = staticmethod(
         ignore_aliases)
 
+def unicode_representer(dumper, data):
+    return dumper.represent_scalar(u'tag:yaml.org,2002:str', data)
+yaml.add_representer(unicode, unicode_representer)
 
 def none_representer(dumper, data):
     return dumper.represent_none(None)