2 A simple XPath-like language for tree traversal.
4 This works by creating a filter chain of generator functions. Each
5 function selects a part of the expression, e.g. a child node, a
6 specific descendant or a node that holds an attribute.
12 path_tokenizer = re.compile(
19 "([^/\[\]\(\)@=\s]+)|"
23 def iterchildren(node, attr_name):
24 # returns an iterable of all child nodes of that name
25 child = getattr(node, attr_name)
27 if type(child) is list:
34 def _get_first_or_none(it):
38 except AttributeError:
46 return node.__class__.__name__.split('.')[-1]
48 def parse_func(next, token):
52 raise ValueError("Expected '(' after function name '%s'" % name)
53 predicate = handle_predicate(next, token)
54 return name, predicate
56 def handle_func_not(next, token):
60 name, predicate = parse_func(next, token)
64 if _get_first_or_none(predicate([node])) is None:
68 def handle_name(next, token):
76 return functions[name](next, token)
79 for attr_name in node.child_attrs:
80 for child in iterchildren(node, attr_name):
81 if type_name(child) == name:
85 def handle_star(next, token):
91 for name in node.child_attrs:
92 for child in iterchildren(node, name):
96 def handle_dot(next, token):
104 def handle_descendants(next, token):
110 def iter_recursive(node):
111 for name in node.child_attrs:
112 for child in iterchildren(node, name):
114 for c in iter_recursive(child):
118 def iter_recursive(node):
119 for name in node.child_attrs:
120 for child in iterchildren(node, name):
121 if type_name(child) == node_name:
123 for c in iter_recursive(child):
126 raise ValueError("Expected node name after '//'")
130 for child in iter_recursive(node):
135 def handle_attribute(next, token):
138 raise ValueError("Expected attribute name")
143 except StopIteration:
147 value = parse_path_value(next)
148 if sys.version_info >= (2,6) or (sys.version_info >= (2,4) and '.' not in name):
150 readattr = operator.attrgetter(name)
152 name_path = name.split('.')
155 for attr in name_path:
156 attr_value = getattr(attr_value, attr)
162 attr_value = readattr(node)
163 except AttributeError:
165 if attr_value is not None:
171 attr_value = readattr(node)
172 except AttributeError:
174 if attr_value == value:
178 def parse_path_value(next):
182 if value[:1] == "'" or value[:1] == '"':
189 name = token[1].lower()
192 elif name == 'false':
194 raise ValueError("Invalid attribute predicate: '%s'" % value)
196 def handle_predicate(next, token):
199 while token[0] != ']':
200 selector.append( operations[token[0]](next, token) )
203 except StopIteration:
209 if not token[0] and token[1] == 'and':
210 return logical_and(selector, handle_predicate(next, token))
214 subresult = iter((node,))
215 for select in selector:
216 subresult = select(subresult)
217 predicate_result = _get_first_or_none(subresult)
218 if predicate_result is not None:
222 def logical_and(lhs_selects, rhs_select):
225 subresult = iter((node,))
226 for select in lhs_selects:
227 subresult = select(subresult)
228 predicate_result = _get_first_or_none(subresult)
229 subresult = iter((node,))
230 if predicate_result is not None:
231 for result_node in rhs_select(subresult):
237 "@": handle_attribute,
241 "//": handle_descendants,
242 "[": handle_predicate,
246 'not' : handle_func_not
249 def _build_path_iterator(path):
251 stream = iter([ (special,text)
252 for (special,text) in path_tokenizer(path)
253 if special or text ])
256 except AttributeError:
264 selector.append(operations[token[0]](_next, token))
265 except StopIteration:
266 raise ValueError("invalid path")
271 except StopIteration:
277 def iterfind(node, path):
278 selector_chain = _build_path_iterator(path)
279 result = iter((node,))
280 for select in selector_chain:
281 result = select(result)
284 def find_first(node, path):
285 return _get_first_or_none(iterfind(node, path))
287 def find_all(node, path):
288 return list(iterfind(node, path))