Your IP : 3.15.142.42
# cython: binding=True
# cython: auto_pickle=False
# cython: language_level=3
"""
The ``lxml.objectify`` module implements a Python object API for XML.
It is based on `lxml.etree`.
"""
cimport cython
from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup
from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode
from lxml.includes.tree cimport const_xmlChar, _xcstr
from lxml cimport python
from lxml.includes cimport tree
cimport lxml.includes.etreepublic as cetree
cimport libc.string as cstring_h # not to be confused with stdlib 'string'
from libc.string cimport const_char
__all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker',
'FloatElement', 'IntElement', 'NoneElement',
'NumberElement', 'ObjectPath', 'ObjectifiedDataElement',
'ObjectifiedElement', 'ObjectifyElementClassLookup',
'PYTYPE_ATTRIBUTE', 'PyType', 'StringElement', 'SubElement',
'XML', 'annotate', 'deannotate', 'dump', 'enable_recursive_str',
'fromstring', 'getRegisteredTypes', 'makeparser', 'parse',
'pyannotate', 'pytypename', 'set_default_parser',
'set_pytype_attribute_tag', 'xsiannotate']
cdef object etree
from lxml import etree
# initialize C-API of lxml.etree
import_lxml__etree()
__version__ = etree.__version__
cdef object _float_is_inf, _float_is_nan
from math import isinf as _float_is_inf, isnan as _float_is_nan
cdef object re
import re
cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError)
cdef object is_special_method = re.compile('__.*__$').match
cdef object _typename(object t):
cdef const_char* c_name
c_name = python._fqtypename(t)
s = cstring_h.strrchr(c_name, c'.')
if s is not NULL:
c_name = s + 1
return pyunicode(<const_xmlChar*>c_name)
# namespace/name for "pytype" hint attribute
cdef object PYTYPE_NAMESPACE
cdef bytes PYTYPE_NAMESPACE_UTF8
cdef const_xmlChar* _PYTYPE_NAMESPACE
cdef object PYTYPE_ATTRIBUTE_NAME
cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8
cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME
PYTYPE_ATTRIBUTE = None
cdef unicode TREE_PYTYPE_NAME = "TREE"
cdef tuple _unicodeAndUtf8(s):
return s, python.PyUnicode_AsUTF8String(s)
def set_pytype_attribute_tag(attribute_tag=None):
"""set_pytype_attribute_tag(attribute_tag=None)
Change name and namespace of the XML attribute that holds Python type
information.
Do not use this unless you know what you are doing.
Reset by calling without argument.
Default: "{http://codespeak.net/lxml/objectify/pytype}pytype"
"""
global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME
global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8
global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8
if attribute_tag is None:
PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \
_unicodeAndUtf8("http://codespeak.net/lxml/objectify/pytype")
PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
_unicodeAndUtf8("pytype")
else:
PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
cetree.getNsTag(attribute_tag)
PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8')
PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8')
_PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8
_PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8
PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
_PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
set_pytype_attribute_tag()
# namespaces for XML Schema
cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8
XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \
_unicodeAndUtf8("http://www.w3.org/2001/XMLSchema")
cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8)
cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8
XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \
_unicodeAndUtf8("http://www.w3.org/2001/XMLSchema-instance")
cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8)
cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
################################################################################
# Element class for the main API
cdef class ObjectifiedElement(ElementBase):
"""Main XML Element class.
Element children are accessed as object attributes. Multiple children
with the same name are available through a list index. Example::
>>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>")
>>> second_c2 = root.c1.c2[1]
>>> print(second_c2.text)
1
Note that you cannot (and must not) instantiate this class or its
subclasses.
"""
def __iter__(self):
"""Iterate over self and all siblings with the same tag.
"""
parent = self.getparent()
if parent is None:
return iter([self])
return etree.ElementChildIterator(parent, tag=self.tag)
def __str__(self):
if __RECURSIVE_STR:
return _dump(self, 0)
else:
return textOf(self._c_node) or ''
# pickle support for objectified Element
def __reduce__(self):
return fromstring, (etree.tostring(self),)
@property
def text(self):
return textOf(self._c_node)
@property
def __dict__(self):
"""A fake implementation for __dict__ to support dir() etc.
Note that this only considers the first child with a given name.
"""
cdef _Element child
cdef dict children
c_ns = tree._getNs(self._c_node)
tag = "{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None
children = {}
for child in etree.ElementChildIterator(self, tag=tag):
if c_ns is NULL and tree._getNs(child._c_node) is not NULL:
continue
name = pyunicode(child._c_node.name)
if name not in children:
children[name] = child
return children
def __len__(self):
"""Count self and siblings with the same tag.
"""
return _countSiblings(self._c_node)
def countchildren(self):
"""countchildren(self)
Return the number of children of this element, regardless of their
name.
"""
# copied from etree
cdef Py_ssize_t c
cdef tree.xmlNode* c_node
c = 0
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
c += 1
c_node = c_node.next
return c
def getchildren(self):
"""getchildren(self)
Returns a sequence of all direct children. The elements are
returned in document order.
"""
cdef tree.xmlNode* c_node
result = []
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
result.append(cetree.elementFactory(self._doc, c_node))
c_node = c_node.next
return result
def __getattr__(self, tag):
"""Return the (first) child with the given tag name. If no namespace
is provided, the child will be looked up in the same one as self.
"""
return _lookupChildOrRaise(self, tag)
def __setattr__(self, tag, value):
"""Set the value of the (first) child with the given tag name. If no
namespace is provided, the child will be looked up in the same one as
self.
"""
cdef _Element element
# properties are looked up /after/ __setattr__, so we must emulate them
if tag == 'text' or tag == 'pyval':
# read-only !
raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable"
elif tag == 'tail':
cetree.setTailText(self._c_node, value)
return
elif tag == 'tag':
ElementBase.tag.__set__(self, value)
return
elif tag == 'base':
ElementBase.base.__set__(self, value)
return
tag = _buildChildTag(self, tag)
element = _lookupChild(self, tag)
if element is None:
_appendValue(self, tag, value)
else:
_replaceElement(element, value)
def __delattr__(self, tag):
child = _lookupChildOrRaise(self, tag)
self.remove(child)
def addattr(self, tag, value):
"""addattr(self, tag, value)
Add a child value to the element.
As opposed to append(), it sets a data value, not an element.
"""
_appendValue(self, _buildChildTag(self, tag), value)
def __getitem__(self, key):
"""Return a sibling, counting from the first child of the parent. The
method behaves like both a dict and a sequence.
* If argument is an integer, returns the sibling at that position.
* If argument is a string, does the same as getattr(). This can be
used to provide namespaces for element lookup, or to look up
children with special names (``text`` etc.).
* If argument is a slice object, returns the matching slice.
"""
cdef tree.xmlNode* c_self_node
cdef tree.xmlNode* c_parent
cdef tree.xmlNode* c_node
cdef Py_ssize_t c_index
if python._isString(key):
return _lookupChildOrRaise(self, key)
elif isinstance(key, slice):
return list(self)[key]
# normal item access
c_index = key # raises TypeError if necessary
c_self_node = self._c_node
c_parent = c_self_node.parent
if c_parent is NULL:
if c_index == 0 or c_index == -1:
return self
raise IndexError, unicode(key)
if c_index < 0:
c_node = c_parent.last
else:
c_node = c_parent.children
c_node = _findFollowingSibling(
c_node, tree._getNs(c_self_node), c_self_node.name, c_index)
if c_node is NULL:
raise IndexError, unicode(key)
return elementFactory(self._doc, c_node)
def __setitem__(self, key, value):
"""Set the value of a sibling, counting from the first child of the
parent. Implements key assignment, item assignment and slice
assignment.
* If argument is an integer, sets the sibling at that position.
* If argument is a string, does the same as setattr(). This is used
to provide namespaces for element lookup.
* If argument is a sequence (list, tuple, etc.), assign the contained
items to the siblings.
"""
cdef _Element element
cdef tree.xmlNode* c_node
if python._isString(key):
key = _buildChildTag(self, key)
element = _lookupChild(self, key)
if element is None:
_appendValue(self, key, value)
else:
_replaceElement(element, value)
return
if self._c_node.parent is NULL:
# the 'root[i] = ...' case
raise TypeError, "assignment to root element is invalid"
if isinstance(key, slice):
# slice assignment
_setSlice(key, self, value)
else:
# normal index assignment
if key < 0:
c_node = self._c_node.parent.last
else:
c_node = self._c_node.parent.children
c_node = _findFollowingSibling(
c_node, tree._getNs(self._c_node), self._c_node.name, key)
if c_node is NULL:
raise IndexError, unicode(key)
element = elementFactory(self._doc, c_node)
_replaceElement(element, value)
def __delitem__(self, key):
parent = self.getparent()
if parent is None:
raise TypeError, "deleting items not supported by root element"
if isinstance(key, slice):
# slice deletion
del_items = list(self)[key]
remove = parent.remove
for el in del_items:
remove(el)
else:
# normal index deletion
sibling = self.__getitem__(key)
parent.remove(sibling)
def descendantpaths(self, prefix=None):
"""descendantpaths(self, prefix=None)
Returns a list of object path expressions for all descendants.
"""
if prefix is not None and not python._isString(prefix):
prefix = '.'.join(prefix)
return _build_descendant_paths(self._c_node, prefix)
cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
if c_node.name != c_name:
return 0
if c_href == NULL:
return 1
c_node_href = tree._getNs(c_node)
if c_node_href == NULL:
return c_href[0] == c'\0'
return tree.xmlStrcmp(c_node_href, c_href) == 0
cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
cdef tree.xmlNode* c_node
cdef Py_ssize_t count
c_tag = c_start_node.name
c_href = tree._getNs(c_start_node)
count = 1
c_node = c_start_node.next
while c_node is not NULL:
if c_node.type == tree.XML_ELEMENT_NODE and \
_tagMatches(c_node, c_href, c_tag):
count += 1
c_node = c_node.next
c_node = c_start_node.prev
while c_node is not NULL:
if c_node.type == tree.XML_ELEMENT_NODE and \
_tagMatches(c_node, c_href, c_tag):
count += 1
c_node = c_node.prev
return count
cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node,
const_xmlChar* href, const_xmlChar* name,
Py_ssize_t index):
cdef tree.xmlNode* (*next)(tree.xmlNode*)
if index >= 0:
next = cetree.nextElement
else:
index = -1 - index
next = cetree.previousElement
while c_node is not NULL:
if c_node.type == tree.XML_ELEMENT_NODE and \
_tagMatches(c_node, href, name):
index = index - 1
if index < 0:
return c_node
c_node = next(c_node)
return NULL
cdef object _lookupChild(_Element parent, tag):
cdef tree.xmlNode* c_result
cdef tree.xmlNode* c_node
c_node = parent._c_node
ns, tag = cetree.getNsTagWithEmptyNs(tag)
c_tag = tree.xmlDictExists(
c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag))
if c_tag is NULL:
return None # not in the hash map => not in the tree
if ns is None:
# either inherit ns from parent or use empty (i.e. no) namespace
c_href = tree._getNs(c_node) or <const_xmlChar*>''
else:
c_href = _xcstr(ns)
c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
if c_result is NULL:
return None
return elementFactory(parent._doc, c_result)
cdef object _lookupChildOrRaise(_Element parent, tag):
element = _lookupChild(parent, tag)
if element is None:
raise AttributeError, "no such child: " + _buildChildTag(parent, tag)
return element
cdef object _buildChildTag(_Element parent, tag):
ns, tag = cetree.getNsTag(tag)
c_tag = _xcstr(tag)
c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns)
return cetree.namespacedNameFromNsName(c_href, c_tag)
cdef _replaceElement(_Element element, value):
cdef _Element new_element
if isinstance(value, _Element):
# deep copy the new element
new_element = cetree.deepcopyNodeToDocument(
element._doc, (<_Element>value)._c_node)
new_element.tag = element.tag
elif isinstance(value, (list, tuple)):
element[:] = value
return
else:
new_element = element.makeelement(element.tag)
_setElementValue(new_element, value)
element.getparent().replace(element, new_element)
cdef _appendValue(_Element parent, tag, value):
cdef _Element new_element
if isinstance(value, _Element):
# deep copy the new element
new_element = cetree.deepcopyNodeToDocument(
parent._doc, (<_Element>value)._c_node)
new_element.tag = tag
cetree.appendChildToElement(parent, new_element)
elif isinstance(value, (list, tuple)):
for item in value:
_appendValue(parent, tag, item)
else:
new_element = cetree.makeElement(
tag, parent._doc, None, None, None, None, None)
_setElementValue(new_element, value)
cetree.appendChildToElement(parent, new_element)
cdef _setElementValue(_Element element, value):
if value is None:
cetree.setAttributeValue(
element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
elif isinstance(value, _Element):
_replaceElement(element, value)
return
else:
cetree.delAttributeFromNsName(
element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil")
if python._isString(value):
pytype_name = "str"
py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
else:
pytype_name = _typename(value)
py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
if py_type is not None:
value = py_type.stringify(value)
else:
value = unicode(value)
if py_type is not None:
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
else:
cetree.delAttributeFromNsName(
element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
cetree.setNodeText(element._c_node, value)
cdef _setSlice(sliceobject, _Element target, items):
cdef _Element parent
cdef tree.xmlNode* c_node
cdef Py_ssize_t c_step, c_start, pos
# collect existing slice
if (<slice>sliceobject).step is None:
c_step = 1
else:
c_step = (<slice>sliceobject).step
if c_step == 0:
raise ValueError, "Invalid slice"
cdef list del_items = target[sliceobject]
# collect new values
new_items = []
tag = target.tag
for item in items:
if isinstance(item, _Element):
# deep copy the new element
new_element = cetree.deepcopyNodeToDocument(
target._doc, (<_Element>item)._c_node)
new_element.tag = tag
else:
new_element = cetree.makeElement(
tag, target._doc, None, None, None, None, None)
_setElementValue(new_element, item)
new_items.append(new_element)
# sanity check - raise what a list would raise
if c_step != 1 and len(del_items) != len(new_items):
raise ValueError, \
f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}"
# replace existing items
pos = 0
parent = target.getparent()
replace = parent.replace
while pos < len(new_items) and pos < len(del_items):
replace(del_items[pos], new_items[pos])
pos += 1
# remove leftover items
if pos < len(del_items):
remove = parent.remove
while pos < len(del_items):
remove(del_items[pos])
pos += 1
# append remaining new items
if pos < len(new_items):
# the sanity check above guarantees (step == 1)
if pos > 0:
item = new_items[pos-1]
else:
if (<slice>sliceobject).start > 0:
c_node = parent._c_node.children
else:
c_node = parent._c_node.last
c_node = _findFollowingSibling(
c_node, tree._getNs(target._c_node), target._c_node.name,
(<slice>sliceobject).start - 1)
if c_node is NULL:
while pos < len(new_items):
cetree.appendChildToElement(parent, new_items[pos])
pos += 1
return
item = cetree.elementFactory(parent._doc, c_node)
while pos < len(new_items):
add = item.addnext
item = new_items[pos]
add(item)
pos += 1
################################################################################
# Data type support in subclasses
cdef class ObjectifiedDataElement(ObjectifiedElement):
"""This is the base class for all data type Elements. Subclasses should
override the 'pyval' property and possibly the __str__ method.
"""
@property
def pyval(self):
return textOf(self._c_node)
def __str__(self):
return textOf(self._c_node) or ''
def __repr__(self):
return textOf(self._c_node) or ''
def _setText(self, s):
"""For use in subclasses only. Don't use unless you know what you are
doing.
"""
cetree.setNodeText(self._c_node, s)
cdef class NumberElement(ObjectifiedDataElement):
cdef object _parse_value
def _setValueParser(self, function):
"""Set the function that parses the Python value from a string.
Do not use this unless you know what you are doing.
"""
self._parse_value = function
@property
def pyval(self):
return _parseNumber(self)
def __int__(self):
return int(_parseNumber(self))
def __float__(self):
return float(_parseNumber(self))
def __complex__(self):
return complex(_parseNumber(self))
def __str__(self):
return unicode(_parseNumber(self))
def __repr__(self):
return repr(_parseNumber(self))
def __oct__(self):
return oct(_parseNumber(self))
def __hex__(self):
return hex(_parseNumber(self))
def __richcmp__(self, other, int op):
return _richcmpPyvals(self, other, op)
def __hash__(self):
return hash(_parseNumber(self))
def __add__(self, other):
return _numericValueOf(self) + _numericValueOf(other)
def __radd__(self, other):
return _numericValueOf(other) + _numericValueOf(self)
def __sub__(self, other):
return _numericValueOf(self) - _numericValueOf(other)
def __rsub__(self, other):
return _numericValueOf(other) - _numericValueOf(self)
def __mul__(self, other):
return _numericValueOf(self) * _numericValueOf(other)
def __rmul__(self, other):
return _numericValueOf(other) * _numericValueOf(self)
def __div__(self, other):
return _numericValueOf(self) / _numericValueOf(other)
def __rdiv__(self, other):
return _numericValueOf(other) / _numericValueOf(self)
def __truediv__(self, other):
return _numericValueOf(self) / _numericValueOf(other)
def __rtruediv__(self, other):
return _numericValueOf(other) / _numericValueOf(self)
def __floordiv__(self, other):
return _numericValueOf(self) // _numericValueOf(other)
def __rfloordiv__(self, other):
return _numericValueOf(other) // _numericValueOf(self)
def __mod__(self, other):
return _numericValueOf(self) % _numericValueOf(other)
def __rmod__(self, other):
return _numericValueOf(other) % _numericValueOf(self)
def __divmod__(self, other):
return divmod(_numericValueOf(self), _numericValueOf(other))
def __rdivmod__(self, other):
return divmod(_numericValueOf(other), _numericValueOf(self))
def __pow__(self, other, modulo):
if modulo is None:
return _numericValueOf(self) ** _numericValueOf(other)
else:
return pow(_numericValueOf(self), _numericValueOf(other), modulo)
def __rpow__(self, other, modulo):
if modulo is None:
return _numericValueOf(other) ** _numericValueOf(self)
else:
return pow(_numericValueOf(other), _numericValueOf(self), modulo)
def __neg__(self):
return - _numericValueOf(self)
def __pos__(self):
return + _numericValueOf(self)
def __abs__(self):
return abs( _numericValueOf(self) )
def __bool__(self):
return bool(_numericValueOf(self))
def __invert__(self):
return ~ _numericValueOf(self)
def __lshift__(self, other):
return _numericValueOf(self) << _numericValueOf(other)
def __rlshift__(self, other):
return _numericValueOf(other) << _numericValueOf(self)
def __rshift__(self, other):
return _numericValueOf(self) >> _numericValueOf(other)
def __rrshift__(self, other):
return _numericValueOf(other) >> _numericValueOf(self)
def __and__(self, other):
return _numericValueOf(self) & _numericValueOf(other)
def __rand__(self, other):
return _numericValueOf(other) & _numericValueOf(self)
def __or__(self, other):
return _numericValueOf(self) | _numericValueOf(other)
def __ror__(self, other):
return _numericValueOf(other) | _numericValueOf(self)
def __xor__(self, other):
return _numericValueOf(self) ^ _numericValueOf(other)
def __rxor__(self, other):
return _numericValueOf(other) ^ _numericValueOf(self)
cdef class IntElement(NumberElement):
def _init(self):
self._parse_value = int
def __index__(self):
return int(_parseNumber(self))
cdef class FloatElement(NumberElement):
def _init(self):
self._parse_value = float
cdef class StringElement(ObjectifiedDataElement):
"""String data class.
Note that this class does *not* support the sequence protocol of strings:
len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported.
Instead, use the .text attribute to get a 'real' string.
"""
@property
def pyval(self):
return textOf(self._c_node) or ''
def __repr__(self):
return repr(textOf(self._c_node) or '')
def strlen(self):
text = textOf(self._c_node)
if text is None:
return 0
else:
return len(text)
def __bool__(self):
return bool(textOf(self._c_node))
def __richcmp__(self, other, int op):
return _richcmpPyvals(self, other, op)
def __hash__(self):
return hash(textOf(self._c_node) or '')
def __add__(self, other):
text = _strValueOf(self)
other = _strValueOf(other)
return text + other
def __radd__(self, other):
text = _strValueOf(self)
other = _strValueOf(other)
return other + text
def __mul__(self, other):
if isinstance(self, StringElement):
return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other)
elif isinstance(other, StringElement):
return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '')
else:
return NotImplemented
def __rmul__(self, other):
return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '')
def __mod__(self, other):
return (_strValueOf(self) or '') % other
def __int__(self):
return int(textOf(self._c_node))
def __float__(self):
return float(textOf(self._c_node))
def __complex__(self):
return complex(textOf(self._c_node))
cdef class NoneElement(ObjectifiedDataElement):
def __str__(self):
return "None"
def __repr__(self):
return "None"
def __bool__(self):
return False
def __richcmp__(self, other, int op):
if other is None or self is None:
return python.PyObject_RichCompare(None, None, op)
if isinstance(self, NoneElement):
return python.PyObject_RichCompare(None, other, op)
else:
return python.PyObject_RichCompare(self, None, op)
def __hash__(self):
return hash(None)
@property
def pyval(self):
return None
cdef class BoolElement(IntElement):
"""Boolean type base on string values: 'true' or 'false'.
Note that this inherits from IntElement to mimic the behaviour of
Python's bool type.
"""
def _init(self):
self._parse_value = _parseBool # wraps as Python callable
def __bool__(self):
return _parseBool(textOf(self._c_node))
def __int__(self):
return 0 + _parseBool(textOf(self._c_node))
def __float__(self):
return 0.0 + _parseBool(textOf(self._c_node))
def __richcmp__(self, other, int op):
return _richcmpPyvals(self, other, op)
def __hash__(self):
return hash(_parseBool(textOf(self._c_node)))
def __str__(self):
return unicode(_parseBool(textOf(self._c_node)))
def __repr__(self):
return repr(_parseBool(textOf(self._c_node)))
@property
def pyval(self):
return _parseBool(textOf(self._c_node))
cdef _checkBool(s):
cdef int value = -1
if s is not None:
value = __parseBoolAsInt(s)
if value == -1:
raise ValueError
cdef bint _parseBool(s) except -1:
cdef int value
if s is None:
return False
value = __parseBoolAsInt(s)
if value == -1:
raise ValueError, f"Invalid boolean value: '{s}'"
return value
cdef inline int __parseBoolAsInt(text) except -2:
if text == 'false':
return 0
elif text == 'true':
return 1
elif text == '0':
return 0
elif text == '1':
return 1
return -1
cdef object _parseNumber(NumberElement element):
return element._parse_value(textOf(element._c_node))
cdef enum NumberParserState:
NPS_SPACE_PRE = 0
NPS_SIGN = 1
NPS_DIGITS = 2
NPS_POINT_LEAD = 3
NPS_POINT = 4
NPS_FRACTION = 5
NPS_EXP = 6
NPS_EXP_SIGN = 7
NPS_DIGITS_EXP = 8
NPS_SPACE_TAIL = 9
NPS_INF1 = 20
NPS_INF2 = 21
NPS_INF3 = 22
NPS_NAN1 = 23
NPS_NAN2 = 24
NPS_NAN3 = 25
NPS_ERROR = 99
ctypedef fused bytes_unicode:
bytes
unicode
cdef _checkNumber(bytes_unicode s, bint allow_float):
cdef Py_UCS4 c
cdef NumberParserState state = NPS_SPACE_PRE
for c in s:
if c in '0123456789':
if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP):
pass
elif state in (NPS_SPACE_PRE, NPS_SIGN):
state = NPS_DIGITS
elif state in (NPS_POINT_LEAD, NPS_POINT):
state = NPS_FRACTION
elif state in (NPS_EXP, NPS_EXP_SIGN):
state = NPS_DIGITS_EXP
else:
state = NPS_ERROR
else:
if c == '.':
if state in (NPS_SPACE_PRE, NPS_SIGN):
state = NPS_POINT_LEAD
elif state == NPS_DIGITS:
state = NPS_POINT
else:
state = NPS_ERROR
if not allow_float:
state = NPS_ERROR
elif c in '-+':
if state == NPS_SPACE_PRE:
state = NPS_SIGN
elif state == NPS_EXP:
state = NPS_EXP_SIGN
else:
state = NPS_ERROR
elif c == 'E':
if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION):
state = NPS_EXP
else:
state = NPS_ERROR
if not allow_float:
state = NPS_ERROR
# Allow INF and NaN. XMLSchema requires case, we don't, like Python.
elif c in 'iI':
state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR
elif c in 'fF':
state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR
elif c in 'aA':
state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR
elif c in 'nN':
# Python also allows [+-]NaN, so let's accept that.
if state in (NPS_SPACE_PRE, NPS_SIGN):
state = NPS_NAN1 if allow_float else NPS_ERROR
elif state == NPS_NAN2:
state = NPS_NAN3
elif state == NPS_INF1:
state = NPS_INF2
else:
state = NPS_ERROR
# Allow spaces around text values.
else:
if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20':
if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL):
pass
elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3):
state = NPS_SPACE_TAIL
else:
state = NPS_ERROR
else:
state = NPS_ERROR
if state == NPS_ERROR:
break
if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL):
raise ValueError
cdef _checkInt(s):
return _checkNumber(<unicode>s, allow_float=False)
cdef _checkFloat(s):
return _checkNumber(<unicode>s, allow_float=True)
cdef object _strValueOf(obj):
if python._isString(obj):
return obj
if isinstance(obj, _Element):
return textOf((<_Element>obj)._c_node) or ''
if obj is None:
return ''
return unicode(obj)
cdef object _numericValueOf(obj):
if isinstance(obj, NumberElement):
return _parseNumber(<NumberElement>obj)
try:
# not always numeric, but Python will raise the right exception
return obj.pyval
except AttributeError:
pass
return obj
cdef _richcmpPyvals(left, right, int op):
left = getattr(left, 'pyval', left)
right = getattr(right, 'pyval', right)
return python.PyObject_RichCompare(left, right, op)
################################################################################
# Python type registry
cdef class PyType:
"""PyType(self, name, type_check, type_class, stringify=None)
User defined type.
Named type that contains a type check function, a type class that
inherits from ObjectifiedDataElement and an optional "stringification"
function. The type check must take a string as argument and raise
ValueError or TypeError if it cannot handle the string value. It may be
None in which case it is not considered for type guessing. For registered
named types, the 'stringify' function (or unicode() if None) is used to
convert a Python object with type name 'name' to the string representation
stored in the XML tree.
Example::
PyType('int', int, MyIntClass).register()
Note that the order in which types are registered matters. The first
matching type will be used.
"""
cdef readonly object name
cdef readonly object type_check
cdef readonly object stringify
cdef object _type
cdef list _schema_types
def __init__(self, name, type_check, type_class, stringify=None):
if isinstance(name, bytes):
name = (<bytes>name).decode('ascii')
elif not isinstance(name, unicode):
raise TypeError, "Type name must be a string"
if type_check is not None and not callable(type_check):
raise TypeError, "Type check function must be callable (or None)"
if name != TREE_PYTYPE_NAME and \
not issubclass(type_class, ObjectifiedDataElement):
raise TypeError, \
"Data classes must inherit from ObjectifiedDataElement"
self.name = name
self._type = type_class
self.type_check = type_check
if stringify is None:
stringify = unicode
self.stringify = stringify
self._schema_types = []
def __repr__(self):
return "PyType(%s, %s)" % (self.name, self._type.__name__)
def register(self, before=None, after=None):
"""register(self, before=None, after=None)
Register the type.
The additional keyword arguments 'before' and 'after' accept a
sequence of type names that must appear before/after the new type in
the type list. If any of them is not currently known, it is simply
ignored. Raises ValueError if the dependencies cannot be fulfilled.
"""
if self.name == TREE_PYTYPE_NAME:
raise ValueError, "Cannot register tree type"
if self.type_check is not None:
for item in _TYPE_CHECKS:
if item[0] is self.type_check:
_TYPE_CHECKS.remove(item)
break
entry = (self.type_check, self)
first_pos = 0
last_pos = -1
if before or after:
if before is None:
before = ()
elif after is None:
after = ()
for i, (check, pytype) in enumerate(_TYPE_CHECKS):
if last_pos == -1 and pytype.name in before:
last_pos = i
if pytype.name in after:
first_pos = i+1
if last_pos == -1:
_TYPE_CHECKS.append(entry)
elif first_pos > last_pos:
raise ValueError, "inconsistent before/after dependencies"
else:
_TYPE_CHECKS.insert(last_pos, entry)
_PYTYPE_DICT[self.name] = self
for xs_type in self._schema_types:
_SCHEMA_TYPE_DICT[xs_type] = self
def unregister(self):
"unregister(self)"
if _PYTYPE_DICT.get(self.name) is self:
del _PYTYPE_DICT[self.name]
for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()):
if pytype is self:
del _SCHEMA_TYPE_DICT[xs_type]
if self.type_check is None:
return
try:
_TYPE_CHECKS.remove( (self.type_check, self) )
except ValueError:
pass
property xmlSchemaTypes:
"""The list of XML Schema datatypes this Python type maps to.
Note that this must be set before registering the type!
"""
def __get__(self):
return self._schema_types
def __set__(self, types):
self._schema_types = list(map(unicode, types))
cdef dict _PYTYPE_DICT = {}
cdef dict _SCHEMA_TYPE_DICT = {}
cdef list _TYPE_CHECKS = []
cdef unicode _xml_bool(value):
return "true" if value else "false"
cdef unicode _xml_float(value):
if _float_is_inf(value):
if value > 0:
return "INF"
return "-INF"
if _float_is_nan(value):
return "NaN"
return unicode(repr(value))
cdef _pytypename(obj):
return "str" if python._isString(obj) else _typename(obj)
def pytypename(obj):
"""pytypename(obj)
Find the name of the corresponding PyType for a Python object.
"""
return _pytypename(obj)
cdef _registerPyTypes():
pytype = PyType('int', _checkInt, IntElement) # wraps functions for Python
pytype.xmlSchemaTypes = ("integer", "int", "short", "byte", "unsignedShort",
"unsignedByte", "nonPositiveInteger",
"negativeInteger", "long", "nonNegativeInteger",
"unsignedLong", "unsignedInt", "positiveInteger",)
pytype.register()
# 'long' type just for backwards compatibility
pytype = PyType('long', None, IntElement)
pytype.register()
pytype = PyType('float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python
pytype.xmlSchemaTypes = ("double", "float")
pytype.register()
pytype = PyType('bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python
pytype.xmlSchemaTypes = ("boolean",)
pytype.register()
pytype = PyType('str', None, StringElement)
pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
"Name", "NCName", "ID", "IDREF", "ENTITY",
"NMTOKEN", )
pytype.register()
# since lxml 2.0
pytype = PyType('NoneType', None, NoneElement)
pytype.register()
# backwards compatibility
pytype = PyType('none', None, NoneElement)
pytype.register()
# non-registered PyType for inner tree elements
cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement)
_registerPyTypes()
def getRegisteredTypes():
"""getRegisteredTypes()
Returns a list of the currently registered PyType objects.
To add a new type, retrieve this list and call unregister() for all
entries. Then add the new type at a suitable position (possibly replacing
an existing one) and call register() for all entries.
This is necessary if the new type interferes with the type check functions
of existing ones (normally only int/float/bool) and must the tried before
other types. To add a type that is not yet parsable by the current type
check functions, you can simply register() it, which will append it to the
end of the type list.
"""
cdef list types = []
cdef set known = set()
for check, pytype in _TYPE_CHECKS:
name = pytype.name
if name not in known:
known.add(name)
types.append(pytype)
for pytype in _PYTYPE_DICT.values():
name = pytype.name
if name not in known:
known.add(name)
types.append(pytype)
return types
cdef PyType _guessPyType(value, PyType defaulttype):
if value is None:
return None
for type_check, tested_pytype in _TYPE_CHECKS:
try:
type_check(value)
return <PyType>tested_pytype
except IGNORABLE_ERRORS:
# could not be parsed as the specified type => ignore
pass
return defaulttype
cdef object _guessElementClass(tree.xmlNode* c_node):
value = textOf(c_node)
if value is None:
return None
if value == '':
return StringElement
for type_check, pytype in _TYPE_CHECKS:
try:
type_check(value)
return (<PyType>pytype)._type
except IGNORABLE_ERRORS:
pass
return None
################################################################################
# adapted ElementMaker supports registered PyTypes
@cython.final
@cython.internal
cdef class _ObjectifyElementMakerCaller:
cdef object _tag
cdef object _nsmap
cdef object _element_factory
cdef bint _annotate
def __call__(self, *children, **attrib):
"__call__(self, *children, **attrib)"
cdef _ObjectifyElementMakerCaller elementMaker
cdef _Element element
cdef _Element childElement
cdef bint has_children
cdef bint has_string_value
if self._element_factory is None:
element = _makeElement(self._tag, None, attrib, self._nsmap)
else:
element = self._element_factory(self._tag, attrib, self._nsmap)
pytype_name = None
has_children = False
has_string_value = False
for child in children:
if child is None:
if len(children) == 1:
cetree.setAttributeValue(
element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
elif python._isString(child):
_add_text(element, child)
has_string_value = True
elif isinstance(child, _Element):
cetree.appendChildToElement(element, <_Element>child)
has_children = True
elif isinstance(child, _ObjectifyElementMakerCaller):
elementMaker = <_ObjectifyElementMakerCaller>child
if elementMaker._element_factory is None:
cetree.makeSubElement(element, elementMaker._tag,
None, None, None, None)
else:
childElement = elementMaker._element_factory(
elementMaker._tag)
cetree.appendChildToElement(element, childElement)
has_children = True
elif isinstance(child, dict):
for name, value in child.items():
# keyword arguments in attrib take precedence
if name in attrib:
continue
pytype = _PYTYPE_DICT.get(_typename(value))
if pytype is not None:
value = (<PyType>pytype).stringify(value)
elif not python._isString(value):
value = unicode(value)
cetree.setAttributeValue(element, name, value)
else:
if pytype_name is not None:
# concatenation always makes the result a string
has_string_value = True
pytype_name = _typename(child)
pytype = _PYTYPE_DICT.get(_typename(child))
if pytype is not None:
_add_text(element, (<PyType>pytype).stringify(child))
else:
has_string_value = True
child = unicode(child)
_add_text(element, child)
if self._annotate and not has_children:
if has_string_value:
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str")
elif pytype_name is not None:
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
return element
cdef _add_text(_Element elem, text):
# add text to the tree in construction, either as element text or
# tail text, depending on the current tree state
cdef tree.xmlNode* c_child
c_child = cetree.findChildBackwards(elem._c_node, 0)
if c_child is not NULL:
old = cetree.tailOf(c_child)
if old is not None:
text = old + text
cetree.setTailText(c_child, text)
else:
old = cetree.textOf(elem._c_node)
if old is not None:
text = old + text
cetree.setNodeText(elem._c_node, text)
cdef class ElementMaker:
"""ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
An ElementMaker that can be used for constructing trees.
Example::
>>> M = ElementMaker(annotate=False)
>>> attributes = {'class': 'par'}
>>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) )
>>> from lxml.etree import tostring
>>> print(tostring(html, method='html').decode('ascii'))
<html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html>
To create tags that are not valid Python identifiers, call the factory
directly and pass the tag name as first argument::
>>> root = M('tricky-tag', 'some text')
>>> print(root.tag)
tricky-tag
>>> print(root.text)
some text
Note that this module has a predefined ElementMaker instance called ``E``.
"""
cdef object _makeelement
cdef object _namespace
cdef object _nsmap
cdef bint _annotate
cdef dict _cache
def __init__(self, *, namespace=None, nsmap=None, annotate=True,
makeelement=None):
if nsmap is None:
nsmap = _DEFAULT_NSMAP if annotate else {}
self._nsmap = nsmap
self._namespace = None if namespace is None else "{%s}" % namespace
self._annotate = annotate
if makeelement is not None:
if not callable(makeelement):
raise TypeError(
f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}")
self._makeelement = makeelement
else:
self._makeelement = None
self._cache = {}
@cython.final
cdef _build_element_maker(self, tag, bint caching):
cdef _ObjectifyElementMakerCaller element_maker
element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller)
if self._namespace is not None and tag[0] != "{":
element_maker._tag = self._namespace + tag
else:
element_maker._tag = tag
element_maker._nsmap = self._nsmap
element_maker._annotate = self._annotate
element_maker._element_factory = self._makeelement
if caching:
if len(self._cache) > 200:
self._cache.clear()
self._cache[tag] = element_maker
return element_maker
def __getattr__(self, tag):
element_maker = self._cache.get(tag)
if element_maker is None:
return self._build_element_maker(tag, caching=True)
return element_maker
def __call__(self, tag, *args, **kwargs):
element_maker = self._cache.get(tag)
if element_maker is None:
element_maker = self._build_element_maker(
tag, caching=not is_special_method(tag))
return element_maker(*args, **kwargs)
################################################################################
# Recursive element dumping
cdef bint __RECURSIVE_STR = 0 # default: off
def enable_recursive_str(on=True):
"""enable_recursive_str(on=True)
Enable a recursively generated tree representation for str(element),
based on objectify.dump(element).
"""
global __RECURSIVE_STR
__RECURSIVE_STR = on
def dump(_Element element not None):
"""dump(_Element element not None)
Return a recursively generated string representation of an element.
"""
return _dump(element, 0)
cdef object _dump(_Element element, int indent):
indentstr = " " * indent
if isinstance(element, ObjectifiedDataElement):
value = repr(element)
else:
value = textOf(element._c_node)
if value is not None:
if not value.strip():
value = None
else:
value = repr(value)
result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n"
xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS
pytype_ns = "{%s}" % PYTYPE_NAMESPACE
for name, value in sorted(cetree.iterattributes(element, 3)):
if '{' in name:
if name == PYTYPE_ATTRIBUTE:
if value == TREE_PYTYPE_NAME:
continue
else:
name = name.replace(pytype_ns, 'py:')
name = name.replace(xsi_ns, 'xsi:')
result += f"{indentstr} * {name} = {value!r}\n"
indent += 1
for child in element.iterchildren():
result += _dump(child, indent)
if indent == 1:
return result[:-1] # strip last '\n'
else:
return result
################################################################################
# Pickle support for objectified ElementTree
def __unpickleElementTree(data):
return etree.ElementTree(fromstring(data))
cdef _setupPickle(elementTreeReduceFunction):
import copyreg
copyreg.pickle(etree._ElementTree,
elementTreeReduceFunction, __unpickleElementTree)
def pickleReduceElementTree(obj):
return __unpickleElementTree, (etree.tostring(obj),)
_setupPickle(pickleReduceElementTree)
del pickleReduceElementTree
################################################################################
# Element class lookup
cdef class ObjectifyElementClassLookup(ElementClassLookup):
"""ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
Element class lookup method that uses the objectify classes.
"""
cdef object empty_data_class
cdef object tree_class
def __init__(self, tree_class=None, empty_data_class=None):
"""Lookup mechanism for objectify.
The default Element classes can be replaced by passing subclasses of
ObjectifiedElement and ObjectifiedDataElement as keyword arguments.
'tree_class' defines inner tree classes (defaults to
ObjectifiedElement), 'empty_data_class' defines the default class for
empty data elements (defaults to StringElement).
"""
self._lookup_function = _lookupElementClass
if tree_class is None:
tree_class = ObjectifiedElement
self.tree_class = tree_class
if empty_data_class is None:
empty_data_class = StringElement
self.empty_data_class = empty_data_class
cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node):
cdef ObjectifyElementClassLookup lookup
lookup = <ObjectifyElementClassLookup>state
# if element has children => no data class
if cetree.hasChild(c_node):
return lookup.tree_class
# if element is defined as xsi:nil, return NoneElement class
if "true" == cetree.attributeValueFromNsName(
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"):
return NoneElement
# check for Python type hint
value = cetree.attributeValueFromNsName(
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
if value is not None:
if value == TREE_PYTYPE_NAME:
return lookup.tree_class
py_type = <PyType>_PYTYPE_DICT.get(value)
if py_type is not None:
return py_type._type
# unknown 'pyval' => try to figure it out ourself, just go on
# check for XML Schema type hint
value = cetree.attributeValueFromNsName(
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
if value is not None:
schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
if schema_type is None and ':' in value:
prefix, value = value.split(':', 1)
schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
if schema_type is not None:
return schema_type._type
# otherwise determine class based on text content type
el_class = _guessElementClass(c_node)
if el_class is not None:
return el_class
# if element is a root node => default to tree node
if c_node.parent is NULL or not tree._isElement(c_node.parent):
return lookup.tree_class
return lookup.empty_data_class
################################################################################
# Type annotations
cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
if pytype is None:
return None
value = textOf(c_node)
try:
pytype.type_check(value)
return pytype
except IGNORABLE_ERRORS:
# could not be parsed as the specified type => ignore
pass
return None
def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
empty_pytype=None):
"""pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
Recursively annotates the elements of an XML tree with 'pytype'
attributes.
If the 'ignore_old' keyword argument is True (the default), current 'pytype'
attributes will be ignored and replaced. Otherwise, they will be checked
and only replaced if they no longer fit the current text value.
Setting the keyword argument ``ignore_xsi`` to True makes the function
additionally ignore existing ``xsi:type`` annotations. The default is to
use them as a type hint.
The default annotation of empty elements can be set with the
``empty_pytype`` keyword argument. The default is not to annotate empty
elements. Pass 'str', for example, to make string values the default.
"""
cdef _Element element
element = cetree.rootNodeOrRaise(element_or_tree)
_annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)
def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
empty_type=None):
"""xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
Recursively annotates the elements of an XML tree with 'xsi:type'
attributes.
If the 'ignore_old' keyword argument is True (the default), current
'xsi:type' attributes will be ignored and replaced. Otherwise, they will be
checked and only replaced if they no longer fit the current text value.
Note that the mapping from Python types to XSI types is usually ambiguous.
Currently, only the first XSI type name in the corresponding PyType
definition will be used for annotation. Thus, you should consider naming
the widest type first if you define additional types.
Setting the keyword argument ``ignore_pytype`` to True makes the function
additionally ignore existing ``pytype`` annotations. The default is to
use them as a type hint.
The default annotation of empty elements can be set with the
``empty_type`` keyword argument. The default is not to annotate empty
elements. Pass 'string', for example, to make string values the default.
"""
cdef _Element element
element = cetree.rootNodeOrRaise(element_or_tree)
_annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)
def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
empty_pytype=None, empty_type=None, annotate_xsi=0,
annotate_pytype=1):
"""annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
Recursively annotates the elements of an XML tree with 'xsi:type'
and/or 'py:pytype' attributes.
If the 'ignore_old' keyword argument is True (the default), current
'py:pytype' attributes will be ignored for the type annotation. Set to False
if you want reuse existing 'py:pytype' information (iff appropriate for the
element text value).
If the 'ignore_xsi' keyword argument is False (the default), existing
'xsi:type' attributes will be used for the type annotation, if they fit the
element text values.
Note that the mapping from Python types to XSI types is usually ambiguous.
Currently, only the first XSI type name in the corresponding PyType
definition will be used for annotation. Thus, you should consider naming
the widest type first if you define additional types.
The default 'py:pytype' annotation of empty elements can be set with the
``empty_pytype`` keyword argument. Pass 'str', for example, to make
string values the default.
The default 'xsi:type' annotation of empty elements can be set with the
``empty_type`` keyword argument. The default is not to annotate empty
elements. Pass 'string', for example, to make string values the default.
The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
(default: 1) control which kind(s) of annotation to use.
"""
cdef _Element element
element = cetree.rootNodeOrRaise(element_or_tree)
_annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
ignore_old, empty_type, empty_pytype)
cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
bint ignore_xsi, bint ignore_pytype,
empty_type_name, empty_pytype_name):
cdef _Document doc
cdef tree.xmlNode* c_node
cdef PyType empty_pytype, StrType, NoneType
if not annotate_xsi and not annotate_pytype:
return
if empty_type_name is not None:
if isinstance(empty_type_name, bytes):
empty_type_name = (<bytes>empty_type_name).decode("ascii")
empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name)
elif empty_pytype_name is not None:
if isinstance(empty_pytype_name, bytes):
empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii")
empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name)
else:
empty_pytype = None
StrType = <PyType>_PYTYPE_DICT.get('str')
NoneType = <PyType>_PYTYPE_DICT.get('NoneType')
doc = element._doc
c_node = element._c_node
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
if c_node.type == tree.XML_ELEMENT_NODE:
_annotate_element(c_node, doc, annotate_xsi, annotate_pytype,
ignore_xsi, ignore_pytype,
empty_type_name, empty_pytype, StrType, NoneType)
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
cdef int _annotate_element(tree.xmlNode* c_node, _Document doc,
bint annotate_xsi, bint annotate_pytype,
bint ignore_xsi, bint ignore_pytype,
empty_type_name, PyType empty_pytype,
PyType StrType, PyType NoneType) except -1:
cdef tree.xmlNs* c_ns
cdef PyType pytype = None
typename = None
istree = 0
# if element is defined as xsi:nil, represent it as None
if cetree.attributeValueFromNsName(
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true":
pytype = NoneType
if pytype is None and not ignore_xsi:
# check that old xsi type value is valid
typename = cetree.attributeValueFromNsName(
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
if typename is not None:
pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
if pytype is None and ':' in typename:
prefix, typename = typename.split(':', 1)
pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
if pytype is not None and pytype is not StrType:
# StrType does not have a typecheck but is the default
# anyway, so just accept it if given as type
# information
pytype = _check_type(c_node, pytype)
if pytype is None:
typename = None
if pytype is None and not ignore_pytype:
# check that old pytype value is valid
old_pytypename = cetree.attributeValueFromNsName(
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
if old_pytypename is not None:
if old_pytypename == TREE_PYTYPE_NAME:
if not cetree.hasChild(c_node):
# only case where we should keep it,
# everything else is clear enough
pytype = TREE_PYTYPE
else:
if old_pytypename == 'none':
# transition from lxml 1.x
old_pytypename = "NoneType"
pytype = <PyType>_PYTYPE_DICT.get(old_pytypename)
if pytype is not None and pytype is not StrType:
# StrType does not have a typecheck but is the
# default anyway, so just accept it if given as
# type information
pytype = _check_type(c_node, pytype)
if pytype is None:
# try to guess type
if not cetree.hasChild(c_node):
# element has no children => data class
pytype = _guessPyType(textOf(c_node), StrType)
else:
istree = 1
if pytype is None:
# use default type for empty elements
if cetree.hasText(c_node):
pytype = StrType
else:
pytype = empty_pytype
if typename is None:
typename = empty_type_name
if pytype is not None:
if typename is None:
if not istree:
if pytype._schema_types:
# pytype->xsi:type is a 1:n mapping
# simply take the first
typename = pytype._schema_types[0]
elif typename not in pytype._schema_types:
typename = pytype._schema_types[0]
if annotate_xsi:
if typename is None or istree:
cetree.delAttributeFromNsName(
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
else:
# update or create attribute
typename_utf8 = cetree.utf8(typename)
c_ns = cetree.findOrBuildNodeNsPrefix(
doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd')
if c_ns is not NULL:
if b':' in typename_utf8:
prefix, name = typename_utf8.split(b':', 1)
if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0':
typename_utf8 = name
elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0:
typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name
elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0':
typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8
c_ns = cetree.findOrBuildNodeNsPrefix(
doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8))
if annotate_pytype:
if pytype is None:
# delete attribute if it exists
cetree.delAttributeFromNsName(
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
else:
# update or create attribute
c_ns = cetree.findOrBuildNodeNsPrefix(
doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py')
pytype_name = cetree.utf8(pytype.name)
tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME,
_xcstr(pytype_name))
if pytype is NoneType:
c_ns = cetree.findOrBuildNodeNsPrefix(
doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true")
return 0
cdef object _strip_attributes = etree.strip_attributes
cdef object _cleanup_namespaces = etree.cleanup_namespaces
def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
bint xsi_nil=False, bint cleanup_namespaces=False):
"""deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False)
Recursively de-annotate the elements of an XML tree by removing 'py:pytype'
and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
If the 'pytype' keyword argument is True (the default), 'py:pytype'
attributes will be removed. If the 'xsi' keyword argument is True (the
default), 'xsi:type' attributes will be removed.
If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
attributes will be removed.
Note that this does not touch the namespace declarations by
default. If you want to remove unused namespace declarations from
the tree, pass the option ``cleanup_namespaces=True``.
"""
cdef list attribute_names = []
if pytype:
attribute_names.append(PYTYPE_ATTRIBUTE)
if xsi:
attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR)
if xsi_nil:
attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR)
_strip_attributes(element_or_tree, *attribute_names)
if cleanup_namespaces:
_cleanup_namespaces(element_or_tree)
################################################################################
# Module level parser setup
cdef object __DEFAULT_PARSER
__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True)
__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() )
cdef object objectify_parser
objectify_parser = __DEFAULT_PARSER
def set_default_parser(new_parser = None):
"""set_default_parser(new_parser = None)
Replace the default parser used by objectify's Element() and
fromstring() functions.
The new parser must be an etree.XMLParser.
Call without arguments to reset to the original parser.
"""
global objectify_parser
if new_parser is None:
objectify_parser = __DEFAULT_PARSER
elif isinstance(new_parser, etree.XMLParser):
objectify_parser = new_parser
else:
raise TypeError, "parser must inherit from lxml.etree.XMLParser"
def makeparser(**kw):
"""makeparser(remove_blank_text=True, **kw)
Create a new XML parser for objectify trees.
You can pass all keyword arguments that are supported by
``etree.XMLParser()``. Note that this parser defaults to removing
blank text. You can disable this by passing the
``remove_blank_text`` boolean keyword option yourself.
"""
if 'remove_blank_text' not in kw:
kw['remove_blank_text'] = True
parser = etree.XMLParser(**kw)
parser.set_element_class_lookup( ObjectifyElementClassLookup() )
return parser
cdef _Element _makeElement(tag, text, attrib, nsmap):
return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap)
################################################################################
# Module level factory functions
cdef object _fromstring
_fromstring = etree.fromstring
SubElement = etree.SubElement
def fromstring(xml, parser=None, *, base_url=None):
"""fromstring(xml, parser=None, base_url=None)
Objectify specific version of the lxml.etree fromstring() function
that uses the objectify parser.
You can pass a different parser as second argument.
The ``base_url`` keyword argument allows to set the original base URL of
the document to support relative Paths when looking up external entities
(DTD, XInclude, ...).
"""
if parser is None:
parser = objectify_parser
return _fromstring(xml, parser, base_url=base_url)
def XML(xml, parser=None, *, base_url=None):
"""XML(xml, parser=None, base_url=None)
Objectify specific version of the lxml.etree XML() literal factory
that uses the objectify parser.
You can pass a different parser as second argument.
The ``base_url`` keyword argument allows to set the original base URL of
the document to support relative Paths when looking up external entities
(DTD, XInclude, ...).
"""
if parser is None:
parser = objectify_parser
return _fromstring(xml, parser, base_url=base_url)
cdef object _parse
_parse = etree.parse
def parse(f, parser=None, *, base_url=None):
"""parse(f, parser=None, base_url=None)
Parse a file or file-like object with the objectify parser.
You can pass a different parser as second argument.
The ``base_url`` keyword allows setting a URL for the document
when parsing from a file-like object. This is needed when looking
up external entities (DTD, XInclude, ...) with relative paths.
"""
if parser is None:
parser = objectify_parser
return _parse(f, parser, base_url=base_url)
cdef dict _DEFAULT_NSMAP = {
"py" : PYTYPE_NAMESPACE,
"xsi" : XML_SCHEMA_INSTANCE_NS,
"xsd" : XML_SCHEMA_NS
}
E = ElementMaker()
def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
"""Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
Objectify specific version of the lxml.etree Element() factory that
always creates a structural (tree) element.
NOTE: requires parser based element class lookup activated in lxml.etree!
"""
if attrib is not None:
if _attributes:
attrib = dict(attrib)
attrib.update(_attributes)
_attributes = attrib
if _pytype is None:
_pytype = TREE_PYTYPE_NAME
if nsmap is None:
nsmap = _DEFAULT_NSMAP
_attributes[PYTYPE_ATTRIBUTE] = _pytype
return _makeElement(_tag, None, _attributes, nsmap)
def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
**_attributes):
"""DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
Create a new element from a Python value and XML attributes taken from
keyword arguments or a dictionary passed as second argument.
Automatically adds a 'pytype' attribute for the Python type of the value,
if the type can be identified. If '_pytype' or '_xsi' are among the
keyword arguments, they will be used instead.
If the _value argument is an ObjectifiedDataElement instance, its py:pytype,
xsi:type and other attributes and nsmap are reused unless they are redefined
in attrib and/or keyword arguments.
"""
if nsmap is None:
nsmap = _DEFAULT_NSMAP
if attrib is not None and attrib:
if _attributes:
attrib = dict(attrib)
attrib.update(_attributes)
_attributes = attrib
if isinstance(_value, ObjectifiedElement):
if _pytype is None:
if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP:
# special case: no change!
return _value.__copy__()
if isinstance(_value, ObjectifiedDataElement):
# reuse existing nsmap unless redefined in nsmap parameter
temp = _value.nsmap
if temp is not None and temp:
temp = dict(temp)
temp.update(nsmap)
nsmap = temp
# reuse existing attributes unless redefined in attrib/_attributes
temp = _value.attrib
if temp is not None and temp:
temp = dict(temp)
temp.update(_attributes)
_attributes = temp
# reuse existing xsi:type or py:pytype attributes, unless provided as
# arguments
if _xsi is None and _pytype is None:
_xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
_pytype = _attributes.get(PYTYPE_ATTRIBUTE)
if _xsi is not None:
if ':' in _xsi:
prefix, name = _xsi.split(':', 1)
ns = nsmap.get(prefix)
if ns != XML_SCHEMA_NS:
raise ValueError, "XSD types require the XSD namespace"
elif nsmap is _DEFAULT_NSMAP:
name = _xsi
_xsi = 'xsd:' + _xsi
else:
name = _xsi
for prefix, ns in nsmap.items():
if ns == XML_SCHEMA_NS:
if prefix is not None and prefix:
_xsi = prefix + ':' + _xsi
break
else:
raise ValueError, "XSD types require the XSD namespace"
_attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi
if _pytype is None:
# allow using unregistered or even wrong xsi:type names
py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi)
if py_type is None:
py_type = <PyType>_SCHEMA_TYPE_DICT.get(name)
if py_type is not None:
_pytype = py_type.name
if _pytype is None:
_pytype = _pytypename(_value)
if _value is None and _pytype != "str":
_pytype = _pytype or "NoneType"
strval = None
elif python._isString(_value):
strval = _value
elif isinstance(_value, bool):
if _value:
strval = "true"
else:
strval = "false"
else:
py_type = <PyType>_PYTYPE_DICT.get(_pytype)
stringify = unicode if py_type is None else py_type.stringify
strval = stringify(_value)
if _pytype is not None:
if _pytype == "NoneType" or _pytype == "none":
strval = None
_attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true"
else:
# check if type information from arguments is valid
py_type = <PyType>_PYTYPE_DICT.get(_pytype)
if py_type is not None:
if py_type.type_check is not None:
py_type.type_check(strval)
_attributes[PYTYPE_ATTRIBUTE] = _pytype
return _makeElement("value", strval, _attributes, nsmap)
################################################################################
# ObjectPath
include "objectpath.pxi"