Don't distinguish between ASCII unicode and strings in YAML output.
[hooke.git] / hooke / util / yaml.py
1 # Copyright
2
3 """Add representers to YAML to support Hooke.
4
5 Without introspection, YAML cannot decide how to save some
6 objects.  By refusing to save these objects, we obviously loose
7 that information, so make sure the things you drop are either
8 stored somewhere else or not important.
9
10 >>> import yaml
11 >>> a = numpy.array([1,2,3])
12 >>> print yaml.dump(a)
13 null
14 ...
15 <BLANKLINE>
16
17 The default behavior is to crash.
18
19 >>> yaml.Dumper.yaml_representers.pop(numpy.ndarray)  # doctest: +ELLIPSIS
20 <function none_representer at 0x...>
21 >>> print yaml.dump(a)  # doctest: +REPORT_UDIFF
22 !!python/object/apply:numpy.core.multiarray._reconstruct
23 args:
24 - !!python/name:numpy.ndarray ''
25 - !!python/tuple [0]
26 - b
27 state: !!python/tuple
28 - 1
29 - !!python/tuple [3]
30 - !!python/object/apply:numpy.dtype
31   args: [i4, 0, 1]
32   state: !!python/tuple [3, <, null, null, null, -1, -1, 0]
33 - false
34 - "\\x01\\0\\0\\0\\x02\\0\\0\\0\\x03\\0\\0\\0"
35 <BLANKLINE>
36
37 Hmm, at one point that crashed like this::
38
39     Traceback (most recent call last):
40       ...
41         if data in [None, ()]:
42     TypeError: data type not understood
43
44 Must be because of the other representers I've loaded since.
45
46 Restore the representer for future tests.
47
48 >>> yaml.add_representer(numpy.ndarray, none_representer)
49
50 We also avoid !!python/unicode tags by sacrificing the string/unicode
51 distinction.
52
53 >>> yaml.dump('ascii', allow_unicode=True)
54 'ascii\\n...\\n'
55 >>> yaml.dump(u'ascii', allow_unicode=True)
56 'ascii\\n...\\n'
57 >>> a = yaml.dump(u'Fran\\xe7ois', allow_unicode=True)
58 >>> a
59 'Fran\\xc3\\xa7ois\\n...\\n'
60 >>> unicode(a, 'utf-8')
61 u'Fran\\xe7ois\\n...\\n'
62 """
63
64 from __future__ import absolute_import
65 import copy_reg
66 import sys
67 import types
68
69 import numpy
70 import yaml
71 import yaml.constructor
72 from yaml.constructor import ConstructorError
73 import yaml.representer
74
75 from ..curve import Data, Curve
76 from ..playlist import FilePlaylist
77
78
79 DATA_INFO_TAG = u'!hooke.curve.DataInfo'
80
81
82 if False: # YAML dump debugging code
83     """To help isolate data types etc. that give YAML problems.
84
85     This is usually caused by external C modules (e.g. numpy) that
86     define new types (e.g. numpy.ndarray) which YAML cannot inspect.
87     """
88     def ignore_aliases(data):
89         print data, repr(data), type(data), repr(type(data))
90         sys.stdout.flush()
91         if data in [None, ()]:
92             return True
93         if isinstance(data, (str, unicode, bool, int, float)):
94             return True
95     yaml.representer.SafeRepresenter.ignore_aliases = staticmethod(
96         ignore_aliases)
97 else:
98     # Avoid error with
99     #   numpy.dtype(numpy.int32) in [None, ()]
100     # See
101     #   http://projects.scipy.org/numpy/ticket/1001
102     def ignore_aliases(data):
103         try:
104             if data in [None, ()]:
105                 return True
106             if isinstance(data, (str, unicode, bool, int, float)):
107                 return True
108         except TypeError, e:
109             pass
110     yaml.representer.SafeRepresenter.ignore_aliases = staticmethod(
111         ignore_aliases)
112
113 def unicode_representer(dumper, data):
114     return dumper.represent_scalar(u'tag:yaml.org,2002:str', data)
115 yaml.add_representer(unicode, unicode_representer)
116
117 def none_representer(dumper, data):
118     return dumper.represent_none(None)
119 yaml.add_representer(numpy.ndarray, none_representer)
120
121 def bool_representer(dumper, data):
122     return dumper.represent_bool(data)
123 yaml.add_representer(numpy.bool_, bool_representer)
124
125 def int_representer(dumper, data):
126     return dumper.represent_int(data)
127 yaml.add_representer(numpy.int32, int_representer)
128 yaml.add_representer(numpy.dtype(numpy.int32), int_representer)
129
130 def long_representer(dumper, data):
131     return dumper.represent_long(data)
132 yaml.add_representer(numpy.int64, int_representer)
133
134 def float_representer(dumper, data):
135     return dumper.represent_float(data)
136 yaml.add_representer(numpy.float32, float_representer)
137 yaml.add_representer(numpy.float64, float_representer)
138
139 def data_representer(dumper, data):
140     info = dict(data.info)
141     for key in info.keys():
142         if key.startswith('raw '):
143             del(info[key])
144     return dumper.represent_mapping(DATA_INFO_TAG, info)
145 yaml.add_representer(Data, data_representer)
146
147 def data_constructor(loader, node):
148     info = loader.construct_mapping(node)
149     return Data(shape=(0,0), dtype=numpy.float32, info=info)
150 yaml.add_constructor(DATA_INFO_TAG, data_constructor)
151
152 def object_representer(dumper, data):
153     cls = type(data)
154     if cls in copy_reg.dispatch_table:
155         reduce = copy_reg.dispatch_table[cls](data)
156     elif hasattr(data, '__reduce_ex__'):
157         reduce = data.__reduce_ex__(2)
158     elif hasattr(data, '__reduce__'):
159         reduce = data.__reduce__()
160     else:
161         raise RepresenterError("cannot represent object: %r" % data)
162     reduce = (list(reduce)+[None]*5)[:5]
163     function, args, state, listitems, dictitems = reduce
164     args = list(args)
165     if state is None:
166         state = {}
167     if isinstance(state, dict) and '_default_attrs' in state:
168         for key in state['_default_attrs']:
169             if key in state and state[key] == state['_default_attrs'][key]:
170                 del(state[key])
171         del(state['_default_attrs'])
172     if listitems is not None:
173         listitems = list(listitems)
174     if dictitems is not None:
175         dictitems = dict(dictitems)
176     if function.__name__ == '__newobj__':
177         function = args[0]
178         args = args[1:]
179         tag = u'tag:yaml.org,2002:python/object/new:'
180         newobj = True
181     else:
182         tag = u'tag:yaml.org,2002:python/object/apply:'
183         newobj = False
184     function_name = u'%s.%s' % (function.__module__, function.__name__)
185     if not args and not listitems and not dictitems \
186             and isinstance(state, dict) and newobj:
187         return dumper.represent_mapping(
188                 u'tag:yaml.org,2002:python/object:'+function_name, state)
189     if not listitems and not dictitems  \
190             and isinstance(state, dict) and not state:
191         return dumper.represent_sequence(tag+function_name, args)
192     value = {}
193     if args:
194         value['args'] = args
195     if state or not isinstance(state, dict):
196         value['state'] = state
197     if listitems:
198         value['listitems'] = listitems
199     if dictitems:
200         value['dictitems'] = dictitems
201     return dumper.represent_mapping(tag+function_name, value)
202 yaml.add_representer(FilePlaylist, object_representer)
203 yaml.add_representer(Curve, object_representer)
204
205
206 # Monkey patch PyYAML bug 159.
207 #   Yaml failed to restore loops in objects when __setstate__ is defined
208 #   http://pyyaml.org/ticket/159
209 # With viktor.x.voroshylo@jpmchase.com's patch
210 def construct_object(self, node, deep=False):
211     if deep:
212         old_deep = self.deep_construct
213         self.deep_construct = True
214     if node in self.constructed_objects:
215         return self.constructed_objects[node]
216     if node in self.recursive_objects:
217         obj = self.recursive_objects[node]
218         if obj is None :
219             raise ConstructorError(None, None,
220                  "found unconstructable recursive node", node.start_mark)
221         return obj
222     self.recursive_objects[node] = None
223     constructor = None
224     tag_suffix = None
225     if node.tag in self.yaml_constructors:
226         constructor = self.yaml_constructors[node.tag]
227     else:
228         for tag_prefix in self.yaml_multi_constructors:
229             if node.tag.startswith(tag_prefix):
230                 tag_suffix = node.tag[len(tag_prefix):]
231                 constructor = self.yaml_multi_constructors[tag_prefix]
232                 break
233         else:
234             if None in self.yaml_multi_constructors:
235                 tag_suffix = node.tag
236                 constructor = self.yaml_multi_constructors[None]
237             elif None in self.yaml_constructors:
238                 constructor = self.yaml_constructors[None]
239             elif isinstance(node, ScalarNode):
240                 constructor = self.__class__.construct_scalar
241             elif isinstance(node, SequenceNode):
242                 constructor = self.__class__.construct_sequence
243             elif isinstance(node, MappingNode):
244                 constructor = self.__class__.construct_mapping
245     if tag_suffix is None:
246         data = constructor(self, node)
247     else:
248         data = constructor(self, tag_suffix, node)
249     if isinstance(data, types.GeneratorType):
250         generator = data
251         data = generator.next()
252         if self.deep_construct:
253             self.recursive_objects[node] = data
254             for dummy in generator:
255                 pass
256         else:
257             self.state_generators.append(generator)
258     self.constructed_objects[node] = data
259     del self.recursive_objects[node]
260     if deep:
261         self.deep_construct = old_deep
262     return data
263 yaml.constructor.BaseConstructor.construct_object = construct_object