Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
|
|
@ -0,0 +1,10 @@
|
|||
# -*- test-case-name: twisted.words.test -*-
|
||||
# Copyright (c) Twisted Matrix Laboratories.
|
||||
# See LICENSE for details.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
Twisted X-ish: XML-ish DOM and XPath-ish engine
|
||||
|
||||
"""
|
||||
899
venv/lib/python3.9/site-packages/twisted/words/xish/domish.py
Normal file
899
venv/lib/python3.9/site-packages/twisted/words/xish/domish.py
Normal file
|
|
@ -0,0 +1,899 @@
|
|||
# -*- test-case-name: twisted.words.test.test_domish -*-
|
||||
# Copyright (c) Twisted Matrix Laboratories.
|
||||
# See LICENSE for details.
|
||||
|
||||
"""
|
||||
DOM-like XML processing support.
|
||||
|
||||
This module provides support for parsing XML into DOM-like object structures
|
||||
and serializing such structures to an XML string representation, optimized
|
||||
for use in streaming XML applications.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division
|
||||
|
||||
from zope.interface import implementer, Interface, Attribute
|
||||
|
||||
from twisted.python.compat import (_PY3, StringType, _coercedUnicode,
|
||||
iteritems, itervalues, unicode)
|
||||
|
||||
def _splitPrefix(name):
|
||||
""" Internal method for splitting a prefixed Element name into its
|
||||
respective parts """
|
||||
ntok = name.split(":", 1)
|
||||
if len(ntok) == 2:
|
||||
return ntok
|
||||
else:
|
||||
return (None, ntok[0])
|
||||
|
||||
# Global map of prefixes that always get injected
|
||||
# into the serializers prefix map (note, that doesn't
|
||||
# mean they're always _USED_)
|
||||
G_PREFIXES = { "http://www.w3.org/XML/1998/namespace":"xml" }
|
||||
|
||||
class _ListSerializer:
|
||||
""" Internal class which serializes an Element tree into a buffer """
|
||||
def __init__(self, prefixes=None, prefixesInScope=None):
|
||||
self.writelist = []
|
||||
self.prefixes = {}
|
||||
if prefixes:
|
||||
self.prefixes.update(prefixes)
|
||||
self.prefixes.update(G_PREFIXES)
|
||||
self.prefixStack = [G_PREFIXES.values()] + (prefixesInScope or [])
|
||||
self.prefixCounter = 0
|
||||
|
||||
def getValue(self):
|
||||
return u"".join(self.writelist)
|
||||
|
||||
def getPrefix(self, uri):
|
||||
if uri not in self.prefixes:
|
||||
self.prefixes[uri] = "xn%d" % (self.prefixCounter)
|
||||
self.prefixCounter = self.prefixCounter + 1
|
||||
return self.prefixes[uri]
|
||||
|
||||
def prefixInScope(self, prefix):
|
||||
stack = self.prefixStack
|
||||
for i in range(-1, (len(self.prefixStack)+1) * -1, -1):
|
||||
if prefix in stack[i]:
|
||||
return True
|
||||
return False
|
||||
|
||||
def serialize(self, elem, closeElement=1, defaultUri=''):
|
||||
# Optimization shortcuts
|
||||
write = self.writelist.append
|
||||
|
||||
# Shortcut, check to see if elem is actually a chunk o' serialized XML
|
||||
if isinstance(elem, SerializedXML):
|
||||
write(elem)
|
||||
return
|
||||
|
||||
# Shortcut, check to see if elem is actually a string (aka Cdata)
|
||||
if isinstance(elem, StringType):
|
||||
write(escapeToXml(elem))
|
||||
return
|
||||
|
||||
# Further optimizations
|
||||
name = elem.name
|
||||
uri = elem.uri
|
||||
defaultUri, currentDefaultUri = elem.defaultUri, defaultUri
|
||||
|
||||
for p, u in iteritems(elem.localPrefixes):
|
||||
self.prefixes[u] = p
|
||||
self.prefixStack.append(list(elem.localPrefixes.keys()))
|
||||
|
||||
# Inherit the default namespace
|
||||
if defaultUri is None:
|
||||
defaultUri = currentDefaultUri
|
||||
|
||||
if uri is None:
|
||||
uri = defaultUri
|
||||
|
||||
prefix = None
|
||||
if uri != defaultUri or uri in self.prefixes:
|
||||
prefix = self.getPrefix(uri)
|
||||
inScope = self.prefixInScope(prefix)
|
||||
|
||||
# Create the starttag
|
||||
|
||||
if not prefix:
|
||||
write("<%s" % (name))
|
||||
else:
|
||||
write("<%s:%s" % (prefix, name))
|
||||
|
||||
if not inScope:
|
||||
write(" xmlns:%s='%s'" % (prefix, uri))
|
||||
self.prefixStack[-1].append(prefix)
|
||||
inScope = True
|
||||
|
||||
if defaultUri != currentDefaultUri and \
|
||||
(uri != defaultUri or not prefix or not inScope):
|
||||
write(" xmlns='%s'" % (defaultUri))
|
||||
|
||||
for p, u in iteritems(elem.localPrefixes):
|
||||
write(" xmlns:%s='%s'" % (p, u))
|
||||
|
||||
# Serialize attributes
|
||||
for k,v in elem.attributes.items():
|
||||
# If the attribute name is a tuple, it's a qualified attribute
|
||||
if isinstance(k, tuple):
|
||||
attr_uri, attr_name = k
|
||||
attr_prefix = self.getPrefix(attr_uri)
|
||||
|
||||
if not self.prefixInScope(attr_prefix):
|
||||
write(" xmlns:%s='%s'" % (attr_prefix, attr_uri))
|
||||
self.prefixStack[-1].append(attr_prefix)
|
||||
|
||||
write(" %s:%s='%s'" % (attr_prefix, attr_name,
|
||||
escapeToXml(v, 1)))
|
||||
else:
|
||||
write((" %s='%s'" % ( k, escapeToXml(v, 1))))
|
||||
|
||||
# Shortcut out if this is only going to return
|
||||
# the element (i.e. no children)
|
||||
if closeElement == 0:
|
||||
write(">")
|
||||
return
|
||||
|
||||
# Serialize children
|
||||
if len(elem.children) > 0:
|
||||
write(">")
|
||||
for c in elem.children:
|
||||
self.serialize(c, defaultUri=defaultUri)
|
||||
# Add closing tag
|
||||
if not prefix:
|
||||
write("</%s>" % (name))
|
||||
else:
|
||||
write("</%s:%s>" % (prefix, name))
|
||||
else:
|
||||
write("/>")
|
||||
|
||||
self.prefixStack.pop()
|
||||
|
||||
|
||||
SerializerClass = _ListSerializer
|
||||
|
||||
def escapeToXml(text, isattrib = 0):
|
||||
""" Escape text to proper XML form, per section 2.3 in the XML specification.
|
||||
|
||||
@type text: C{str}
|
||||
@param text: Text to escape
|
||||
|
||||
@type isattrib: C{bool}
|
||||
@param isattrib: Triggers escaping of characters necessary for use as
|
||||
attribute values
|
||||
"""
|
||||
text = text.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
if isattrib == 1:
|
||||
text = text.replace("'", "'")
|
||||
text = text.replace("\"", """)
|
||||
return text
|
||||
|
||||
def unescapeFromXml(text):
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
text = text.replace("'", "'")
|
||||
text = text.replace(""", "\"")
|
||||
text = text.replace("&", "&")
|
||||
return text
|
||||
|
||||
def generateOnlyInterface(list, int):
|
||||
""" Filters items in a list by class
|
||||
"""
|
||||
for n in list:
|
||||
if int.providedBy(n):
|
||||
yield n
|
||||
|
||||
def generateElementsQNamed(list, name, uri):
|
||||
""" Filters Element items in a list with matching name and URI. """
|
||||
for n in list:
|
||||
if IElement.providedBy(n) and n.name == name and n.uri == uri:
|
||||
yield n
|
||||
|
||||
def generateElementsNamed(list, name):
|
||||
""" Filters Element items in a list with matching name, regardless of URI.
|
||||
"""
|
||||
for n in list:
|
||||
if IElement.providedBy(n) and n.name == name:
|
||||
yield n
|
||||
|
||||
|
||||
class SerializedXML(unicode):
|
||||
""" Marker class for pre-serialized XML in the DOM. """
|
||||
pass
|
||||
|
||||
|
||||
class Namespace:
|
||||
""" Convenience object for tracking namespace declarations. """
|
||||
def __init__(self, uri):
|
||||
self._uri = uri
|
||||
def __getattr__(self, n):
|
||||
return (self._uri, n)
|
||||
def __getitem__(self, n):
|
||||
return (self._uri, n)
|
||||
|
||||
class IElement(Interface):
|
||||
"""
|
||||
Interface to XML element nodes.
|
||||
|
||||
See L{Element} for a detailed example of its general use.
|
||||
|
||||
Warning: this Interface is not yet complete!
|
||||
"""
|
||||
|
||||
uri = Attribute(""" Element's namespace URI """)
|
||||
name = Attribute(""" Element's local name """)
|
||||
defaultUri = Attribute(""" Default namespace URI of child elements """)
|
||||
attributes = Attribute(""" Dictionary of element attributes """)
|
||||
children = Attribute(""" List of child nodes """)
|
||||
parent = Attribute(""" Reference to element's parent element """)
|
||||
localPrefixes = Attribute(""" Dictionary of local prefixes """)
|
||||
|
||||
def toXml(prefixes=None, closeElement=1, defaultUri='',
|
||||
prefixesInScope=None):
|
||||
""" Serializes object to a (partial) XML document
|
||||
|
||||
@param prefixes: dictionary that maps namespace URIs to suggested
|
||||
prefix names.
|
||||
@type prefixes: L{dict}
|
||||
|
||||
@param closeElement: flag that determines whether to include the
|
||||
closing tag of the element in the serialized string. A value of
|
||||
C{0} only generates the element's start tag. A value of C{1} yields
|
||||
a complete serialization.
|
||||
@type closeElement: L{int}
|
||||
|
||||
@param defaultUri: Initial default namespace URI. This is most useful
|
||||
for partial rendering, where the logical parent element (of which
|
||||
the starttag was already serialized) declares a default namespace
|
||||
that should be inherited.
|
||||
@type defaultUri: L{unicode}
|
||||
|
||||
@param prefixesInScope: list of prefixes that are assumed to be
|
||||
declared by ancestors.
|
||||
@type prefixesInScope: C{list}
|
||||
|
||||
@return: (partial) serialized XML
|
||||
@rtype: C{unicode}
|
||||
"""
|
||||
|
||||
def addElement(name, defaultUri=None, content=None):
|
||||
"""
|
||||
Create an element and add as child.
|
||||
|
||||
The new element is added to this element as a child, and will have
|
||||
this element as its parent.
|
||||
|
||||
@param name: element name. This can be either a L{unicode} object that
|
||||
contains the local name, or a tuple of (uri, local_name) for a
|
||||
fully qualified name. In the former case, the namespace URI is
|
||||
inherited from this element.
|
||||
@type name: L{unicode} or L{tuple} of (L{unicode}, L{unicode})
|
||||
|
||||
@param defaultUri: default namespace URI for child elements. If
|
||||
L{None}, this is inherited from this element.
|
||||
@type defaultUri: L{unicode}
|
||||
|
||||
@param content: text contained by the new element.
|
||||
@type content: L{unicode}
|
||||
|
||||
@return: the created element
|
||||
@rtype: object providing L{IElement}
|
||||
"""
|
||||
|
||||
def addChild(node):
|
||||
"""
|
||||
Adds a node as child of this element.
|
||||
|
||||
The C{node} will be added to the list of childs of this element, and
|
||||
will have this element set as its parent when C{node} provides
|
||||
L{IElement}. If C{node} is a L{unicode} and the current last child is
|
||||
character data (L{unicode}), the text from C{node} is appended to the
|
||||
existing last child.
|
||||
|
||||
@param node: the child node.
|
||||
@type node: L{unicode} or object implementing L{IElement}
|
||||
"""
|
||||
|
||||
def addContent(text):
|
||||
"""
|
||||
Adds character data to this element.
|
||||
|
||||
If the current last child of this element is a string, the text will
|
||||
be appended to that string. Otherwise, the text will be added as a new
|
||||
child.
|
||||
|
||||
@param text: The character data to be added to this element.
|
||||
@type text: L{unicode}
|
||||
"""
|
||||
|
||||
|
||||
@implementer(IElement)
|
||||
class Element(object):
|
||||
""" Represents an XML element node.
|
||||
|
||||
An Element contains a series of attributes (name/value pairs), content
|
||||
(character data), and other child Element objects. When building a document
|
||||
with markup (such as HTML or XML), use this object as the starting point.
|
||||
|
||||
Element objects fully support XML Namespaces. The fully qualified name of
|
||||
the XML Element it represents is stored in the C{uri} and C{name}
|
||||
attributes, where C{uri} holds the namespace URI. There is also a default
|
||||
namespace, for child elements. This is stored in the C{defaultUri}
|
||||
attribute. Note that C{''} means the empty namespace.
|
||||
|
||||
Serialization of Elements through C{toXml()} will use these attributes
|
||||
for generating proper serialized XML. When both C{uri} and C{defaultUri}
|
||||
are not None in the Element and all of its descendents, serialization
|
||||
proceeds as expected:
|
||||
|
||||
>>> from twisted.words.xish import domish
|
||||
>>> root = domish.Element(('myns', 'root'))
|
||||
>>> root.addElement('child', content='test')
|
||||
<twisted.words.xish.domish.Element object at 0x83002ac>
|
||||
>>> root.toXml()
|
||||
u"<root xmlns='myns'><child>test</child></root>"
|
||||
|
||||
For partial serialization, needed for streaming XML, a special value for
|
||||
namespace URIs can be used: L{None}.
|
||||
|
||||
Using L{None} as the value for C{uri} means: this element is in whatever
|
||||
namespace inherited by the closest logical ancestor when the complete XML
|
||||
document has been serialized. The serialized start tag will have a
|
||||
non-prefixed name, and no xmlns declaration will be generated.
|
||||
|
||||
Similarly, L{None} for C{defaultUri} means: the default namespace for my
|
||||
child elements is inherited from the logical ancestors of this element,
|
||||
when the complete XML document has been serialized.
|
||||
|
||||
To illustrate, an example from a Jabber stream. Assume the start tag of the
|
||||
root element of the stream has already been serialized, along with several
|
||||
complete child elements, and sent off, looking like this::
|
||||
|
||||
<stream:stream xmlns:stream='http://etherx.jabber.org/streams'
|
||||
xmlns='jabber:client' to='example.com'>
|
||||
...
|
||||
|
||||
Now suppose we want to send a complete element represented by an
|
||||
object C{message} created like:
|
||||
|
||||
>>> message = domish.Element((None, 'message'))
|
||||
>>> message['to'] = 'user@example.com'
|
||||
>>> message.addElement('body', content='Hi!')
|
||||
<twisted.words.xish.domish.Element object at 0x8276e8c>
|
||||
>>> message.toXml()
|
||||
u"<message to='user@example.com'><body>Hi!</body></message>"
|
||||
|
||||
As, you can see, this XML snippet has no xmlns declaration. When sent
|
||||
off, it inherits the C{jabber:client} namespace from the root element.
|
||||
Note that this renders the same as using C{''} instead of L{None}:
|
||||
|
||||
>>> presence = domish.Element(('', 'presence'))
|
||||
>>> presence.toXml()
|
||||
u"<presence/>"
|
||||
|
||||
However, if this object has a parent defined, the difference becomes
|
||||
clear:
|
||||
|
||||
>>> child = message.addElement(('http://example.com/', 'envelope'))
|
||||
>>> child.addChild(presence)
|
||||
<twisted.words.xish.domish.Element object at 0x8276fac>
|
||||
>>> message.toXml()
|
||||
u"<message to='user@example.com'><body>Hi!</body><envelope xmlns='http://example.com/'><presence xmlns=''/></envelope></message>"
|
||||
|
||||
As, you can see, the <presence/> element is now in the empty namespace, not
|
||||
in the default namespace of the parent or the streams'.
|
||||
|
||||
@type uri: C{unicode} or None
|
||||
@ivar uri: URI of this Element's name
|
||||
|
||||
@type name: C{unicode}
|
||||
@ivar name: Name of this Element
|
||||
|
||||
@type defaultUri: C{unicode} or None
|
||||
@ivar defaultUri: URI this Element exists within
|
||||
|
||||
@type children: C{list}
|
||||
@ivar children: List of child Elements and content
|
||||
|
||||
@type parent: L{Element}
|
||||
@ivar parent: Reference to the parent Element, if any.
|
||||
|
||||
@type attributes: L{dict}
|
||||
@ivar attributes: Dictionary of attributes associated with this Element.
|
||||
|
||||
@type localPrefixes: L{dict}
|
||||
@ivar localPrefixes: Dictionary of namespace declarations on this
|
||||
element. The key is the prefix to bind the
|
||||
namespace uri to.
|
||||
"""
|
||||
|
||||
_idCounter = 0
|
||||
|
||||
def __init__(self, qname, defaultUri=None, attribs=None,
|
||||
localPrefixes=None):
|
||||
"""
|
||||
@param qname: Tuple of (uri, name)
|
||||
@param defaultUri: The default URI of the element; defaults to the URI
|
||||
specified in C{qname}
|
||||
@param attribs: Dictionary of attributes
|
||||
@param localPrefixes: Dictionary of namespace declarations on this
|
||||
element. The key is the prefix to bind the
|
||||
namespace uri to.
|
||||
"""
|
||||
self.localPrefixes = localPrefixes or {}
|
||||
self.uri, self.name = qname
|
||||
if defaultUri is None and \
|
||||
self.uri not in itervalues(self.localPrefixes):
|
||||
self.defaultUri = self.uri
|
||||
else:
|
||||
self.defaultUri = defaultUri
|
||||
self.attributes = attribs or {}
|
||||
self.children = []
|
||||
self.parent = None
|
||||
|
||||
def __getattr__(self, key):
|
||||
# Check child list for first Element with a name matching the key
|
||||
for n in self.children:
|
||||
if IElement.providedBy(n) and n.name == key:
|
||||
return n
|
||||
|
||||
# Tweak the behaviour so that it's more friendly about not
|
||||
# finding elements -- we need to document this somewhere :)
|
||||
if key.startswith('_'):
|
||||
raise AttributeError(key)
|
||||
else:
|
||||
return None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.attributes[self._dqa(key)]
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self.attributes[self._dqa(key)];
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.attributes[self._dqa(key)] = value
|
||||
|
||||
def __unicode__(self):
|
||||
"""
|
||||
Retrieve the first CData (content) node
|
||||
"""
|
||||
for n in self.children:
|
||||
if isinstance(n, StringType):
|
||||
return n
|
||||
return u""
|
||||
|
||||
def __bytes__(self):
|
||||
"""
|
||||
Retrieve the first character data node as UTF-8 bytes.
|
||||
"""
|
||||
return unicode(self).encode('utf-8')
|
||||
|
||||
if _PY3:
|
||||
__str__ = __unicode__
|
||||
else:
|
||||
__str__ = __bytes__
|
||||
|
||||
def _dqa(self, attr):
|
||||
""" Dequalify an attribute key as needed """
|
||||
if isinstance(attr, tuple) and not attr[0]:
|
||||
return attr[1]
|
||||
else:
|
||||
return attr
|
||||
|
||||
def getAttribute(self, attribname, default = None):
|
||||
""" Retrieve the value of attribname, if it exists """
|
||||
return self.attributes.get(attribname, default)
|
||||
|
||||
def hasAttribute(self, attrib):
|
||||
""" Determine if the specified attribute exists """
|
||||
return self._dqa(attrib) in self.attributes
|
||||
|
||||
def compareAttribute(self, attrib, value):
|
||||
""" Safely compare the value of an attribute against a provided value.
|
||||
|
||||
L{None}-safe.
|
||||
"""
|
||||
return self.attributes.get(self._dqa(attrib), None) == value
|
||||
|
||||
def swapAttributeValues(self, left, right):
|
||||
""" Swap the values of two attribute. """
|
||||
d = self.attributes
|
||||
l = d[left]
|
||||
d[left] = d[right]
|
||||
d[right] = l
|
||||
|
||||
def addChild(self, node):
|
||||
""" Add a child to this Element. """
|
||||
if IElement.providedBy(node):
|
||||
node.parent = self
|
||||
self.children.append(node)
|
||||
return node
|
||||
|
||||
def addContent(self, text):
|
||||
""" Add some text data to this Element. """
|
||||
text = _coercedUnicode(text)
|
||||
c = self.children
|
||||
if len(c) > 0 and isinstance(c[-1], unicode):
|
||||
c[-1] = c[-1] + text
|
||||
else:
|
||||
c.append(text)
|
||||
return c[-1]
|
||||
|
||||
def addElement(self, name, defaultUri = None, content = None):
|
||||
if isinstance(name, tuple):
|
||||
if defaultUri is None:
|
||||
defaultUri = name[0]
|
||||
child = Element(name, defaultUri)
|
||||
else:
|
||||
if defaultUri is None:
|
||||
defaultUri = self.defaultUri
|
||||
child = Element((defaultUri, name), defaultUri)
|
||||
|
||||
self.addChild(child)
|
||||
|
||||
if content:
|
||||
child.addContent(content)
|
||||
|
||||
return child
|
||||
|
||||
def addRawXml(self, rawxmlstring):
|
||||
""" Add a pre-serialized chunk o' XML as a child of this Element. """
|
||||
self.children.append(SerializedXML(rawxmlstring))
|
||||
|
||||
def addUniqueId(self):
|
||||
""" Add a unique (across a given Python session) id attribute to this
|
||||
Element.
|
||||
"""
|
||||
self.attributes["id"] = "H_%d" % Element._idCounter
|
||||
Element._idCounter = Element._idCounter + 1
|
||||
|
||||
|
||||
def elements(self, uri=None, name=None):
|
||||
"""
|
||||
Iterate across all children of this Element that are Elements.
|
||||
|
||||
Returns a generator over the child elements. If both the C{uri} and
|
||||
C{name} parameters are set, the returned generator will only yield
|
||||
on elements matching the qualified name.
|
||||
|
||||
@param uri: Optional element URI.
|
||||
@type uri: C{unicode}
|
||||
@param name: Optional element name.
|
||||
@type name: C{unicode}
|
||||
@return: Iterator that yields objects implementing L{IElement}.
|
||||
"""
|
||||
if name is None:
|
||||
return generateOnlyInterface(self.children, IElement)
|
||||
else:
|
||||
return generateElementsQNamed(self.children, name, uri)
|
||||
|
||||
|
||||
def toXml(self, prefixes=None, closeElement=1, defaultUri='',
|
||||
prefixesInScope=None):
|
||||
""" Serialize this Element and all children to a string. """
|
||||
s = SerializerClass(prefixes=prefixes, prefixesInScope=prefixesInScope)
|
||||
s.serialize(self, closeElement=closeElement, defaultUri=defaultUri)
|
||||
return s.getValue()
|
||||
|
||||
def firstChildElement(self):
|
||||
for c in self.children:
|
||||
if IElement.providedBy(c):
|
||||
return c
|
||||
return None
|
||||
|
||||
|
||||
class ParserError(Exception):
|
||||
""" Exception thrown when a parsing error occurs """
|
||||
pass
|
||||
|
||||
def elementStream():
|
||||
""" Preferred method to construct an ElementStream
|
||||
|
||||
Uses Expat-based stream if available, and falls back to Sux if necessary.
|
||||
"""
|
||||
try:
|
||||
es = ExpatElementStream()
|
||||
return es
|
||||
except ImportError:
|
||||
if SuxElementStream is None:
|
||||
raise Exception("No parsers available :(")
|
||||
es = SuxElementStream()
|
||||
return es
|
||||
|
||||
try:
|
||||
from twisted.web import sux
|
||||
except:
|
||||
SuxElementStream = None
|
||||
else:
|
||||
class SuxElementStream(sux.XMLParser):
|
||||
def __init__(self):
|
||||
self.connectionMade()
|
||||
self.DocumentStartEvent = None
|
||||
self.ElementEvent = None
|
||||
self.DocumentEndEvent = None
|
||||
self.currElem = None
|
||||
self.rootElem = None
|
||||
self.documentStarted = False
|
||||
self.defaultNsStack = []
|
||||
self.prefixStack = []
|
||||
|
||||
def parse(self, buffer):
|
||||
try:
|
||||
self.dataReceived(buffer)
|
||||
except sux.ParseError as e:
|
||||
raise ParserError(str(e))
|
||||
|
||||
|
||||
def findUri(self, prefix):
|
||||
# Walk prefix stack backwards, looking for the uri
|
||||
# matching the specified prefix
|
||||
stack = self.prefixStack
|
||||
for i in range(-1, (len(self.prefixStack)+1) * -1, -1):
|
||||
if prefix in stack[i]:
|
||||
return stack[i][prefix]
|
||||
return None
|
||||
|
||||
def gotTagStart(self, name, attributes):
|
||||
defaultUri = None
|
||||
localPrefixes = {}
|
||||
attribs = {}
|
||||
uri = None
|
||||
|
||||
# Pass 1 - Identify namespace decls
|
||||
for k, v in list(attributes.items()):
|
||||
if k.startswith("xmlns"):
|
||||
x, p = _splitPrefix(k)
|
||||
if (x is None): # I.e. default declaration
|
||||
defaultUri = v
|
||||
else:
|
||||
localPrefixes[p] = v
|
||||
del attributes[k]
|
||||
|
||||
# Push namespace decls onto prefix stack
|
||||
self.prefixStack.append(localPrefixes)
|
||||
|
||||
# Determine default namespace for this element; if there
|
||||
# is one
|
||||
if defaultUri is None:
|
||||
if len(self.defaultNsStack) > 0:
|
||||
defaultUri = self.defaultNsStack[-1]
|
||||
else:
|
||||
defaultUri = ''
|
||||
|
||||
# Fix up name
|
||||
prefix, name = _splitPrefix(name)
|
||||
if prefix is None: # This element is in the default namespace
|
||||
uri = defaultUri
|
||||
else:
|
||||
# Find the URI for the prefix
|
||||
uri = self.findUri(prefix)
|
||||
|
||||
# Pass 2 - Fix up and escape attributes
|
||||
for k, v in attributes.items():
|
||||
p, n = _splitPrefix(k)
|
||||
if p is None:
|
||||
attribs[n] = v
|
||||
else:
|
||||
attribs[(self.findUri(p)), n] = unescapeFromXml(v)
|
||||
|
||||
# Construct the actual Element object
|
||||
e = Element((uri, name), defaultUri, attribs, localPrefixes)
|
||||
|
||||
# Save current default namespace
|
||||
self.defaultNsStack.append(defaultUri)
|
||||
|
||||
# Document already started
|
||||
if self.documentStarted:
|
||||
# Starting a new packet
|
||||
if self.currElem is None:
|
||||
self.currElem = e
|
||||
# Adding to existing element
|
||||
else:
|
||||
self.currElem = self.currElem.addChild(e)
|
||||
# New document
|
||||
else:
|
||||
self.rootElem = e
|
||||
self.documentStarted = True
|
||||
self.DocumentStartEvent(e)
|
||||
|
||||
def gotText(self, data):
|
||||
if self.currElem != None:
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode('ascii')
|
||||
self.currElem.addContent(data)
|
||||
|
||||
def gotCData(self, data):
|
||||
if self.currElem != None:
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode('ascii')
|
||||
self.currElem.addContent(data)
|
||||
|
||||
def gotComment(self, data):
|
||||
# Ignore comments for the moment
|
||||
pass
|
||||
|
||||
entities = { "amp" : "&",
|
||||
"lt" : "<",
|
||||
"gt" : ">",
|
||||
"apos": "'",
|
||||
"quot": "\"" }
|
||||
|
||||
def gotEntityReference(self, entityRef):
|
||||
# If this is an entity we know about, add it as content
|
||||
# to the current element
|
||||
if entityRef in SuxElementStream.entities:
|
||||
data = SuxElementStream.entities[entityRef]
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode('ascii')
|
||||
self.currElem.addContent(data)
|
||||
|
||||
def gotTagEnd(self, name):
|
||||
# Ensure the document hasn't already ended
|
||||
if self.rootElem is None:
|
||||
# XXX: Write more legible explanation
|
||||
raise ParserError("Element closed after end of document.")
|
||||
|
||||
# Fix up name
|
||||
prefix, name = _splitPrefix(name)
|
||||
if prefix is None:
|
||||
uri = self.defaultNsStack[-1]
|
||||
else:
|
||||
uri = self.findUri(prefix)
|
||||
|
||||
# End of document
|
||||
if self.currElem is None:
|
||||
# Ensure element name and uri matches
|
||||
if self.rootElem.name != name or self.rootElem.uri != uri:
|
||||
raise ParserError("Mismatched root elements")
|
||||
self.DocumentEndEvent()
|
||||
self.rootElem = None
|
||||
|
||||
# Other elements
|
||||
else:
|
||||
# Ensure the tag being closed matches the name of the current
|
||||
# element
|
||||
if self.currElem.name != name or self.currElem.uri != uri:
|
||||
# XXX: Write more legible explanation
|
||||
raise ParserError("Malformed element close")
|
||||
|
||||
# Pop prefix and default NS stack
|
||||
self.prefixStack.pop()
|
||||
self.defaultNsStack.pop()
|
||||
|
||||
# Check for parent null parent of current elem;
|
||||
# that's the top of the stack
|
||||
if self.currElem.parent is None:
|
||||
self.currElem.parent = self.rootElem
|
||||
self.ElementEvent(self.currElem)
|
||||
self.currElem = None
|
||||
|
||||
# Anything else is just some element wrapping up
|
||||
else:
|
||||
self.currElem = self.currElem.parent
|
||||
|
||||
|
||||
class ExpatElementStream:
|
||||
def __init__(self):
|
||||
import pyexpat
|
||||
self.DocumentStartEvent = None
|
||||
self.ElementEvent = None
|
||||
self.DocumentEndEvent = None
|
||||
self.error = pyexpat.error
|
||||
self.parser = pyexpat.ParserCreate("UTF-8", " ")
|
||||
self.parser.StartElementHandler = self._onStartElement
|
||||
self.parser.EndElementHandler = self._onEndElement
|
||||
self.parser.CharacterDataHandler = self._onCdata
|
||||
self.parser.StartNamespaceDeclHandler = self._onStartNamespace
|
||||
self.parser.EndNamespaceDeclHandler = self._onEndNamespace
|
||||
self.currElem = None
|
||||
self.defaultNsStack = ['']
|
||||
self.documentStarted = 0
|
||||
self.localPrefixes = {}
|
||||
|
||||
def parse(self, buffer):
|
||||
try:
|
||||
self.parser.Parse(buffer)
|
||||
except self.error as e:
|
||||
raise ParserError(str(e))
|
||||
|
||||
def _onStartElement(self, name, attrs):
|
||||
# Generate a qname tuple from the provided name. See
|
||||
# http://docs.python.org/library/pyexpat.html#xml.parsers.expat.ParserCreate
|
||||
# for an explanation of the formatting of name.
|
||||
qname = name.rsplit(" ", 1)
|
||||
if len(qname) == 1:
|
||||
qname = ('', name)
|
||||
|
||||
# Process attributes
|
||||
newAttrs = {}
|
||||
toDelete = []
|
||||
for k, v in attrs.items():
|
||||
if " " in k:
|
||||
aqname = k.rsplit(" ", 1)
|
||||
newAttrs[(aqname[0], aqname[1])] = v
|
||||
toDelete.append(k)
|
||||
|
||||
attrs.update(newAttrs)
|
||||
|
||||
for k in toDelete:
|
||||
del attrs[k]
|
||||
|
||||
# Construct the new element
|
||||
e = Element(qname, self.defaultNsStack[-1], attrs, self.localPrefixes)
|
||||
self.localPrefixes = {}
|
||||
|
||||
# Document already started
|
||||
if self.documentStarted == 1:
|
||||
if self.currElem != None:
|
||||
self.currElem.children.append(e)
|
||||
e.parent = self.currElem
|
||||
self.currElem = e
|
||||
|
||||
# New document
|
||||
else:
|
||||
self.documentStarted = 1
|
||||
self.DocumentStartEvent(e)
|
||||
|
||||
def _onEndElement(self, _):
|
||||
# Check for null current elem; end of doc
|
||||
if self.currElem is None:
|
||||
self.DocumentEndEvent()
|
||||
|
||||
# Check for parent that is None; that's
|
||||
# the top of the stack
|
||||
elif self.currElem.parent is None:
|
||||
self.ElementEvent(self.currElem)
|
||||
self.currElem = None
|
||||
|
||||
# Anything else is just some element in the current
|
||||
# packet wrapping up
|
||||
else:
|
||||
self.currElem = self.currElem.parent
|
||||
|
||||
def _onCdata(self, data):
|
||||
if self.currElem != None:
|
||||
self.currElem.addContent(data)
|
||||
|
||||
def _onStartNamespace(self, prefix, uri):
|
||||
# If this is the default namespace, put
|
||||
# it on the stack
|
||||
if prefix is None:
|
||||
self.defaultNsStack.append(uri)
|
||||
else:
|
||||
self.localPrefixes[prefix] = uri
|
||||
|
||||
def _onEndNamespace(self, prefix):
|
||||
# Remove last element on the stack
|
||||
if prefix is None:
|
||||
self.defaultNsStack.pop()
|
||||
|
||||
## class FileParser(ElementStream):
|
||||
## def __init__(self):
|
||||
## ElementStream.__init__(self)
|
||||
## self.DocumentStartEvent = self.docStart
|
||||
## self.ElementEvent = self.elem
|
||||
## self.DocumentEndEvent = self.docEnd
|
||||
## self.done = 0
|
||||
|
||||
## def docStart(self, elem):
|
||||
## self.document = elem
|
||||
|
||||
## def elem(self, elem):
|
||||
## self.document.addChild(elem)
|
||||
|
||||
## def docEnd(self):
|
||||
## self.done = 1
|
||||
|
||||
## def parse(self, filename):
|
||||
## with open(filename) as f:
|
||||
## for l in f.readlines():
|
||||
## self.parser.Parse(l)
|
||||
## assert self.done == 1
|
||||
## return self.document
|
||||
|
||||
## def parseFile(filename):
|
||||
## return FileParser().parse(filename)
|
||||
|
||||
|
||||
375
venv/lib/python3.9/site-packages/twisted/words/xish/utility.py
Normal file
375
venv/lib/python3.9/site-packages/twisted/words/xish/utility.py
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
# -*- test-case-name: twisted.words.test.test_xishutil -*-
|
||||
#
|
||||
# Copyright (c) Twisted Matrix Laboratories.
|
||||
# See LICENSE for details.
|
||||
|
||||
"""
|
||||
Event Dispatching and Callback utilities.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division
|
||||
|
||||
from twisted.python import log
|
||||
from twisted.python.compat import iteritems
|
||||
from twisted.words.xish import xpath
|
||||
|
||||
class _MethodWrapper(object):
|
||||
"""
|
||||
Internal class for tracking method calls.
|
||||
"""
|
||||
def __init__(self, method, *args, **kwargs):
|
||||
self.method = method
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
nargs = self.args + args
|
||||
nkwargs = self.kwargs.copy()
|
||||
nkwargs.update(kwargs)
|
||||
self.method(*nargs, **nkwargs)
|
||||
|
||||
|
||||
|
||||
class CallbackList:
|
||||
"""
|
||||
Container for callbacks.
|
||||
|
||||
Event queries are linked to lists of callables. When a matching event
|
||||
occurs, these callables are called in sequence. One-time callbacks
|
||||
are removed from the list after the first time the event was triggered.
|
||||
|
||||
Arguments to callbacks are split spread across two sets. The first set,
|
||||
callback specific, is passed to C{addCallback} and is used for all
|
||||
subsequent event triggers. The second set is passed to C{callback} and is
|
||||
event specific. Positional arguments in the second set come after the
|
||||
positional arguments of the first set. Keyword arguments in the second set
|
||||
override those in the first set.
|
||||
|
||||
@ivar callbacks: The registered callbacks as mapping from the callable to a
|
||||
tuple of a wrapper for that callable that keeps the
|
||||
callback specific arguments and a boolean that signifies
|
||||
if it is to be called only once.
|
||||
@type callbacks: C{dict}
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.callbacks = {}
|
||||
|
||||
|
||||
def addCallback(self, onetime, method, *args, **kwargs):
|
||||
"""
|
||||
Add callback.
|
||||
|
||||
The arguments passed are used as callback specific arguments.
|
||||
|
||||
@param onetime: If C{True}, this callback is called at most once.
|
||||
@type onetime: C{bool}
|
||||
@param method: The callback callable to be added.
|
||||
@param args: Positional arguments to the callable.
|
||||
@type args: C{list}
|
||||
@param kwargs: Keyword arguments to the callable.
|
||||
@type kwargs: C{dict}
|
||||
"""
|
||||
|
||||
if not method in self.callbacks:
|
||||
self.callbacks[method] = (_MethodWrapper(method, *args, **kwargs),
|
||||
onetime)
|
||||
|
||||
|
||||
def removeCallback(self, method):
|
||||
"""
|
||||
Remove callback.
|
||||
|
||||
@param method: The callable to be removed.
|
||||
"""
|
||||
|
||||
if method in self.callbacks:
|
||||
del self.callbacks[method]
|
||||
|
||||
|
||||
def callback(self, *args, **kwargs):
|
||||
"""
|
||||
Call all registered callbacks.
|
||||
|
||||
The passed arguments are event specific and augment and override
|
||||
the callback specific arguments as described above.
|
||||
|
||||
@note: Exceptions raised by callbacks are trapped and logged. They will
|
||||
not propagate up to make sure other callbacks will still be
|
||||
called, and the event dispatching always succeeds.
|
||||
|
||||
@param args: Positional arguments to the callable.
|
||||
@type args: C{list}
|
||||
@param kwargs: Keyword arguments to the callable.
|
||||
@type kwargs: C{dict}
|
||||
"""
|
||||
|
||||
for key, (methodwrapper, onetime) in list(self.callbacks.items()):
|
||||
try:
|
||||
methodwrapper(*args, **kwargs)
|
||||
except:
|
||||
log.err()
|
||||
|
||||
if onetime:
|
||||
del self.callbacks[key]
|
||||
|
||||
|
||||
def isEmpty(self):
|
||||
"""
|
||||
Return if list of registered callbacks is empty.
|
||||
|
||||
@rtype: C{bool}
|
||||
"""
|
||||
|
||||
return len(self.callbacks) == 0
|
||||
|
||||
|
||||
|
||||
class EventDispatcher:
|
||||
"""
|
||||
Event dispatching service.
|
||||
|
||||
The C{EventDispatcher} allows observers to be registered for certain events
|
||||
that are dispatched. There are two types of events: XPath events and Named
|
||||
events.
|
||||
|
||||
Every dispatch is triggered by calling L{dispatch} with a data object and,
|
||||
for named events, the name of the event.
|
||||
|
||||
When an XPath type event is dispatched, the associated object is assumed to
|
||||
be an L{Element<twisted.words.xish.domish.Element>} instance, which is
|
||||
matched against all registered XPath queries. For every match, the
|
||||
respective observer will be called with the data object.
|
||||
|
||||
A named event will simply call each registered observer for that particular
|
||||
event name, with the data object. Unlike XPath type events, the data object
|
||||
is not restricted to L{Element<twisted.words.xish.domish.Element>}, but can
|
||||
be anything.
|
||||
|
||||
When registering observers, the event that is to be observed is specified
|
||||
using an L{xpath.XPathQuery} instance or a string. In the latter case, the
|
||||
string can also contain the string representation of an XPath expression.
|
||||
To distinguish these from named events, each named event should start with
|
||||
a special prefix that is stored in C{self.prefix}. It defaults to
|
||||
C{//event/}.
|
||||
|
||||
Observers registered using L{addObserver} are persistent: after the
|
||||
observer has been triggered by a dispatch, it remains registered for a
|
||||
possible next dispatch. If instead L{addOnetimeObserver} was used to
|
||||
observe an event, the observer is removed from the list of observers after
|
||||
the first observed event.
|
||||
|
||||
Observers can also be prioritized, by providing an optional C{priority}
|
||||
parameter to the L{addObserver} and L{addOnetimeObserver} methods. Higher
|
||||
priority observers are then called before lower priority observers.
|
||||
|
||||
Finally, observers can be unregistered by using L{removeObserver}.
|
||||
"""
|
||||
|
||||
def __init__(self, eventprefix="//event/"):
|
||||
self.prefix = eventprefix
|
||||
self._eventObservers = {}
|
||||
self._xpathObservers = {}
|
||||
self._dispatchDepth = 0 # Flag indicating levels of dispatching
|
||||
# in progress
|
||||
self._updateQueue = [] # Queued updates for observer ops
|
||||
|
||||
|
||||
def _getEventAndObservers(self, event):
|
||||
if isinstance(event, xpath.XPathQuery):
|
||||
# Treat as xpath
|
||||
observers = self._xpathObservers
|
||||
else:
|
||||
if self.prefix == event[:len(self.prefix)]:
|
||||
# Treat as event
|
||||
observers = self._eventObservers
|
||||
else:
|
||||
# Treat as xpath
|
||||
event = xpath.internQuery(event)
|
||||
observers = self._xpathObservers
|
||||
|
||||
return event, observers
|
||||
|
||||
|
||||
def addOnetimeObserver(self, event, observerfn, priority=0, *args, **kwargs):
|
||||
"""
|
||||
Register a one-time observer for an event.
|
||||
|
||||
Like L{addObserver}, but is only triggered at most once. See there
|
||||
for a description of the parameters.
|
||||
"""
|
||||
self._addObserver(True, event, observerfn, priority, *args, **kwargs)
|
||||
|
||||
|
||||
def addObserver(self, event, observerfn, priority=0, *args, **kwargs):
|
||||
"""
|
||||
Register an observer for an event.
|
||||
|
||||
Each observer will be registered with a certain priority. Higher
|
||||
priority observers get called before lower priority observers.
|
||||
|
||||
@param event: Name or XPath query for the event to be monitored.
|
||||
@type event: C{str} or L{xpath.XPathQuery}.
|
||||
@param observerfn: Function to be called when the specified event
|
||||
has been triggered. This callable takes
|
||||
one parameter: the data object that triggered
|
||||
the event. When specified, the C{*args} and
|
||||
C{**kwargs} parameters to addObserver are being used
|
||||
as additional parameters to the registered observer
|
||||
callable.
|
||||
@param priority: (Optional) priority of this observer in relation to
|
||||
other observer that match the same event. Defaults to
|
||||
C{0}.
|
||||
@type priority: C{int}
|
||||
"""
|
||||
self._addObserver(False, event, observerfn, priority, *args, **kwargs)
|
||||
|
||||
|
||||
def _addObserver(self, onetime, event, observerfn, priority, *args, **kwargs):
|
||||
# If this is happening in the middle of the dispatch, queue
|
||||
# it up for processing after the dispatch completes
|
||||
if self._dispatchDepth > 0:
|
||||
self._updateQueue.append(lambda:self._addObserver(onetime, event, observerfn, priority, *args, **kwargs))
|
||||
return
|
||||
|
||||
event, observers = self._getEventAndObservers(event)
|
||||
|
||||
if priority not in observers:
|
||||
cbl = CallbackList()
|
||||
observers[priority] = {event: cbl}
|
||||
else:
|
||||
priorityObservers = observers[priority]
|
||||
if event not in priorityObservers:
|
||||
cbl = CallbackList()
|
||||
observers[priority][event] = cbl
|
||||
else:
|
||||
cbl = priorityObservers[event]
|
||||
|
||||
cbl.addCallback(onetime, observerfn, *args, **kwargs)
|
||||
|
||||
|
||||
def removeObserver(self, event, observerfn):
|
||||
"""
|
||||
Remove callable as observer for an event.
|
||||
|
||||
The observer callable is removed for all priority levels for the
|
||||
specified event.
|
||||
|
||||
@param event: Event for which the observer callable was registered.
|
||||
@type event: C{str} or L{xpath.XPathQuery}
|
||||
@param observerfn: Observer callable to be unregistered.
|
||||
"""
|
||||
|
||||
# If this is happening in the middle of the dispatch, queue
|
||||
# it up for processing after the dispatch completes
|
||||
if self._dispatchDepth > 0:
|
||||
self._updateQueue.append(lambda:self.removeObserver(event, observerfn))
|
||||
return
|
||||
|
||||
event, observers = self._getEventAndObservers(event)
|
||||
|
||||
emptyLists = []
|
||||
for priority, priorityObservers in iteritems(observers):
|
||||
for query, callbacklist in iteritems(priorityObservers):
|
||||
if event == query:
|
||||
callbacklist.removeCallback(observerfn)
|
||||
if callbacklist.isEmpty():
|
||||
emptyLists.append((priority, query))
|
||||
|
||||
for priority, query in emptyLists:
|
||||
del observers[priority][query]
|
||||
|
||||
|
||||
def dispatch(self, obj, event=None):
|
||||
"""
|
||||
Dispatch an event.
|
||||
|
||||
When C{event} is L{None}, an XPath type event is triggered, and
|
||||
C{obj} is assumed to be an instance of
|
||||
L{Element<twisted.words.xish.domish.Element>}. Otherwise, C{event}
|
||||
holds the name of the named event being triggered. In the latter case,
|
||||
C{obj} can be anything.
|
||||
|
||||
@param obj: The object to be dispatched.
|
||||
@param event: Optional event name.
|
||||
@type event: C{str}
|
||||
"""
|
||||
|
||||
foundTarget = False
|
||||
|
||||
self._dispatchDepth += 1
|
||||
|
||||
if event != None:
|
||||
# Named event
|
||||
observers = self._eventObservers
|
||||
match = lambda query, obj: query == event
|
||||
else:
|
||||
# XPath event
|
||||
observers = self._xpathObservers
|
||||
match = lambda query, obj: query.matches(obj)
|
||||
|
||||
priorities = list(observers.keys())
|
||||
priorities.sort()
|
||||
priorities.reverse()
|
||||
|
||||
emptyLists = []
|
||||
for priority in priorities:
|
||||
for query, callbacklist in iteritems(observers[priority]):
|
||||
if match(query, obj):
|
||||
callbacklist.callback(obj)
|
||||
foundTarget = True
|
||||
if callbacklist.isEmpty():
|
||||
emptyLists.append((priority, query))
|
||||
|
||||
for priority, query in emptyLists:
|
||||
del observers[priority][query]
|
||||
|
||||
self._dispatchDepth -= 1
|
||||
|
||||
# If this is a dispatch within a dispatch, don't
|
||||
# do anything with the updateQueue -- it needs to
|
||||
# wait until we've back all the way out of the stack
|
||||
if self._dispatchDepth == 0:
|
||||
# Deal with pending update operations
|
||||
for f in self._updateQueue:
|
||||
f()
|
||||
self._updateQueue = []
|
||||
|
||||
return foundTarget
|
||||
|
||||
|
||||
|
||||
class XmlPipe(object):
|
||||
"""
|
||||
XML stream pipe.
|
||||
|
||||
Connects two objects that communicate stanzas through an XML stream like
|
||||
interface. Each of the ends of the pipe (sink and source) can be used to
|
||||
send XML stanzas to the other side, or add observers to process XML stanzas
|
||||
that were sent from the other side.
|
||||
|
||||
XML pipes are usually used in place of regular XML streams that are
|
||||
transported over TCP. This is the reason for the use of the names source
|
||||
and sink for both ends of the pipe. The source side corresponds with the
|
||||
entity that initiated the TCP connection, whereas the sink corresponds with
|
||||
the entity that accepts that connection. In this object, though, the source
|
||||
and sink are treated equally.
|
||||
|
||||
Unlike Jabber
|
||||
L{XmlStream<twisted.words.protocols.jabber.xmlstream.XmlStream>}s, the sink
|
||||
and source objects are assumed to represent an eternal connected and
|
||||
initialized XML stream. As such, events corresponding to connection,
|
||||
disconnection, initialization and stream errors are not dispatched or
|
||||
processed.
|
||||
|
||||
@since: 8.2
|
||||
@ivar source: Source XML stream.
|
||||
@ivar sink: Sink XML stream.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.source = EventDispatcher()
|
||||
self.sink = EventDispatcher()
|
||||
self.source.send = lambda obj: self.sink.dispatch(obj)
|
||||
self.sink.send = lambda obj: self.source.dispatch(obj)
|
||||
279
venv/lib/python3.9/site-packages/twisted/words/xish/xmlstream.py
Normal file
279
venv/lib/python3.9/site-packages/twisted/words/xish/xmlstream.py
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
# -*- test-case-name: twisted.words.test.test_xmlstream -*-
|
||||
#
|
||||
# Copyright (c) Twisted Matrix Laboratories.
|
||||
# See LICENSE for details.
|
||||
|
||||
"""
|
||||
XML Stream processing.
|
||||
|
||||
An XML Stream is defined as a connection over which two XML documents are
|
||||
exchanged during the lifetime of the connection, one for each direction. The
|
||||
unit of interaction is a direct child element of the root element (stanza).
|
||||
|
||||
The most prominent use of XML Streams is Jabber, but this module is generically
|
||||
usable. See Twisted Words for Jabber specific protocol support.
|
||||
|
||||
Maintainer: Ralph Meijer
|
||||
|
||||
@var STREAM_CONNECTED_EVENT: This event signals that the connection has been
|
||||
established.
|
||||
@type STREAM_CONNECTED_EVENT: L{str}.
|
||||
|
||||
@var STREAM_END_EVENT: This event signals that the connection has been closed.
|
||||
@type STREAM_END_EVENT: L{str}.
|
||||
|
||||
@var STREAM_ERROR_EVENT: This event signals that a parse error occurred.
|
||||
@type STREAM_ERROR_EVENT: L{str}.
|
||||
|
||||
@var STREAM_START_EVENT: This event signals that the root element of the XML
|
||||
Stream has been received.
|
||||
For XMPP, this would be the C{<stream:stream ...>} opening tag.
|
||||
@type STREAM_START_EVENT: L{str}.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division
|
||||
|
||||
from twisted.python import failure
|
||||
from twisted.python.compat import intern, unicode
|
||||
from twisted.internet import protocol
|
||||
from twisted.words.xish import domish, utility
|
||||
|
||||
STREAM_CONNECTED_EVENT = intern("//event/stream/connected")
|
||||
STREAM_START_EVENT = intern("//event/stream/start")
|
||||
STREAM_END_EVENT = intern("//event/stream/end")
|
||||
STREAM_ERROR_EVENT = intern("//event/stream/error")
|
||||
|
||||
class XmlStream(protocol.Protocol, utility.EventDispatcher):
|
||||
""" Generic Streaming XML protocol handler.
|
||||
|
||||
This protocol handler will parse incoming data as XML and dispatch events
|
||||
accordingly. Incoming stanzas can be handled by registering observers using
|
||||
XPath-like expressions that are matched against each stanza. See
|
||||
L{utility.EventDispatcher} for details.
|
||||
"""
|
||||
def __init__(self):
|
||||
utility.EventDispatcher.__init__(self)
|
||||
self.stream = None
|
||||
self.rawDataOutFn = None
|
||||
self.rawDataInFn = None
|
||||
|
||||
def _initializeStream(self):
|
||||
""" Sets up XML Parser. """
|
||||
self.stream = domish.elementStream()
|
||||
self.stream.DocumentStartEvent = self.onDocumentStart
|
||||
self.stream.ElementEvent = self.onElement
|
||||
self.stream.DocumentEndEvent = self.onDocumentEnd
|
||||
|
||||
### --------------------------------------------------------------
|
||||
###
|
||||
### Protocol events
|
||||
###
|
||||
### --------------------------------------------------------------
|
||||
|
||||
def connectionMade(self):
|
||||
""" Called when a connection is made.
|
||||
|
||||
Sets up the XML parser and dispatches the L{STREAM_CONNECTED_EVENT}
|
||||
event indicating the connection has been established.
|
||||
"""
|
||||
self._initializeStream()
|
||||
self.dispatch(self, STREAM_CONNECTED_EVENT)
|
||||
|
||||
def dataReceived(self, data):
|
||||
""" Called whenever data is received.
|
||||
|
||||
Passes the data to the XML parser. This can result in calls to the
|
||||
DOM handlers. If a parse error occurs, the L{STREAM_ERROR_EVENT} event
|
||||
is called to allow for cleanup actions, followed by dropping the
|
||||
connection.
|
||||
"""
|
||||
try:
|
||||
if self.rawDataInFn:
|
||||
self.rawDataInFn(data)
|
||||
self.stream.parse(data)
|
||||
except domish.ParserError:
|
||||
self.dispatch(failure.Failure(), STREAM_ERROR_EVENT)
|
||||
self.transport.loseConnection()
|
||||
|
||||
def connectionLost(self, reason):
|
||||
""" Called when the connection is shut down.
|
||||
|
||||
Dispatches the L{STREAM_END_EVENT}.
|
||||
"""
|
||||
self.dispatch(reason, STREAM_END_EVENT)
|
||||
self.stream = None
|
||||
|
||||
### --------------------------------------------------------------
|
||||
###
|
||||
### DOM events
|
||||
###
|
||||
### --------------------------------------------------------------
|
||||
|
||||
def onDocumentStart(self, rootElement):
|
||||
""" Called whenever the start tag of a root element has been received.
|
||||
|
||||
Dispatches the L{STREAM_START_EVENT}.
|
||||
"""
|
||||
self.dispatch(self, STREAM_START_EVENT)
|
||||
|
||||
def onElement(self, element):
|
||||
""" Called whenever a direct child element of the root element has
|
||||
been received.
|
||||
|
||||
Dispatches the received element.
|
||||
"""
|
||||
self.dispatch(element)
|
||||
|
||||
def onDocumentEnd(self):
|
||||
""" Called whenever the end tag of the root element has been received.
|
||||
|
||||
Closes the connection. This causes C{connectionLost} being called.
|
||||
"""
|
||||
self.transport.loseConnection()
|
||||
|
||||
def setDispatchFn(self, fn):
|
||||
""" Set another function to handle elements. """
|
||||
self.stream.ElementEvent = fn
|
||||
|
||||
def resetDispatchFn(self):
|
||||
""" Set the default function (C{onElement}) to handle elements. """
|
||||
self.stream.ElementEvent = self.onElement
|
||||
|
||||
def send(self, obj):
|
||||
""" Send data over the stream.
|
||||
|
||||
Sends the given C{obj} over the connection. C{obj} may be instances of
|
||||
L{domish.Element}, C{unicode} and C{str}. The first two will be
|
||||
properly serialized and/or encoded. C{str} objects must be in UTF-8
|
||||
encoding.
|
||||
|
||||
Note: because it is easy to make mistakes in maintaining a properly
|
||||
encoded C{str} object, it is advised to use C{unicode} objects
|
||||
everywhere when dealing with XML Streams.
|
||||
|
||||
@param obj: Object to be sent over the stream.
|
||||
@type obj: L{domish.Element}, L{domish} or C{str}
|
||||
|
||||
"""
|
||||
if domish.IElement.providedBy(obj):
|
||||
obj = obj.toXml()
|
||||
|
||||
if isinstance(obj, unicode):
|
||||
obj = obj.encode('utf-8')
|
||||
|
||||
if self.rawDataOutFn:
|
||||
self.rawDataOutFn(obj)
|
||||
|
||||
self.transport.write(obj)
|
||||
|
||||
|
||||
|
||||
class BootstrapMixin(object):
|
||||
"""
|
||||
XmlStream factory mixin to install bootstrap event observers.
|
||||
|
||||
This mixin is for factories providing
|
||||
L{IProtocolFactory<twisted.internet.interfaces.IProtocolFactory>} to make
|
||||
sure bootstrap event observers are set up on protocols, before incoming
|
||||
data is processed. Such protocols typically derive from
|
||||
L{utility.EventDispatcher}, like L{XmlStream}.
|
||||
|
||||
You can set up bootstrap event observers using C{addBootstrap}. The
|
||||
C{event} and C{fn} parameters correspond with the C{event} and
|
||||
C{observerfn} arguments to L{utility.EventDispatcher.addObserver}.
|
||||
|
||||
@since: 8.2.
|
||||
@ivar bootstraps: The list of registered bootstrap event observers.
|
||||
@type bootstrap: C{list}
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.bootstraps = []
|
||||
|
||||
|
||||
def installBootstraps(self, dispatcher):
|
||||
"""
|
||||
Install registered bootstrap observers.
|
||||
|
||||
@param dispatcher: Event dispatcher to add the observers to.
|
||||
@type dispatcher: L{utility.EventDispatcher}
|
||||
"""
|
||||
for event, fn in self.bootstraps:
|
||||
dispatcher.addObserver(event, fn)
|
||||
|
||||
|
||||
def addBootstrap(self, event, fn):
|
||||
"""
|
||||
Add a bootstrap event handler.
|
||||
|
||||
@param event: The event to register an observer for.
|
||||
@type event: C{str} or L{xpath.XPathQuery}
|
||||
@param fn: The observer callable to be registered.
|
||||
"""
|
||||
self.bootstraps.append((event, fn))
|
||||
|
||||
|
||||
def removeBootstrap(self, event, fn):
|
||||
"""
|
||||
Remove a bootstrap event handler.
|
||||
|
||||
@param event: The event the observer is registered for.
|
||||
@type event: C{str} or L{xpath.XPathQuery}
|
||||
@param fn: The registered observer callable.
|
||||
"""
|
||||
self.bootstraps.remove((event, fn))
|
||||
|
||||
|
||||
|
||||
class XmlStreamFactoryMixin(BootstrapMixin):
|
||||
"""
|
||||
XmlStream factory mixin that takes care of event handlers.
|
||||
|
||||
All positional and keyword arguments passed to create this factory are
|
||||
passed on as-is to the protocol.
|
||||
|
||||
@ivar args: Positional arguments passed to the protocol upon instantiation.
|
||||
@type args: C{tuple}.
|
||||
@ivar kwargs: Keyword arguments passed to the protocol upon instantiation.
|
||||
@type kwargs: C{dict}.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BootstrapMixin.__init__(self)
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
|
||||
def buildProtocol(self, addr):
|
||||
"""
|
||||
Create an instance of XmlStream.
|
||||
|
||||
The returned instance will have bootstrap event observers registered
|
||||
and will proceed to handle input on an incoming connection.
|
||||
"""
|
||||
xs = self.protocol(*self.args, **self.kwargs)
|
||||
xs.factory = self
|
||||
self.installBootstraps(xs)
|
||||
return xs
|
||||
|
||||
|
||||
|
||||
class XmlStreamFactory(XmlStreamFactoryMixin,
|
||||
protocol.ReconnectingClientFactory):
|
||||
"""
|
||||
Factory for XmlStream protocol objects as a reconnection client.
|
||||
"""
|
||||
|
||||
protocol = XmlStream
|
||||
|
||||
def buildProtocol(self, addr):
|
||||
"""
|
||||
Create a protocol instance.
|
||||
|
||||
Overrides L{XmlStreamFactoryMixin.buildProtocol} to work with
|
||||
a L{ReconnectingClientFactory}. As this is called upon having an
|
||||
connection established, we are resetting the delay for reconnection
|
||||
attempts when the connection is lost again.
|
||||
"""
|
||||
self.resetDelay()
|
||||
return XmlStreamFactoryMixin.buildProtocol(self, addr)
|
||||
337
venv/lib/python3.9/site-packages/twisted/words/xish/xpath.py
Normal file
337
venv/lib/python3.9/site-packages/twisted/words/xish/xpath.py
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
# -*- test-case-name: twisted.words.test.test_xpath -*-
|
||||
#
|
||||
# Copyright (c) Twisted Matrix Laboratories.
|
||||
# See LICENSE for details.
|
||||
|
||||
"""
|
||||
XPath query support.
|
||||
|
||||
This module provides L{XPathQuery} to match
|
||||
L{domish.Element<twisted.words.xish.domish.Element>} instances against
|
||||
XPath-like expressions.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division
|
||||
|
||||
from io import StringIO
|
||||
|
||||
from twisted.python.compat import StringType, unicode
|
||||
|
||||
class LiteralValue(unicode):
|
||||
def value(self, elem):
|
||||
return self
|
||||
|
||||
|
||||
class IndexValue:
|
||||
def __init__(self, index):
|
||||
self.index = int(index) - 1
|
||||
|
||||
def value(self, elem):
|
||||
return elem.children[self.index]
|
||||
|
||||
|
||||
class AttribValue:
|
||||
def __init__(self, attribname):
|
||||
self.attribname = attribname
|
||||
if self.attribname == "xmlns":
|
||||
self.value = self.value_ns
|
||||
|
||||
def value_ns(self, elem):
|
||||
return elem.uri
|
||||
|
||||
def value(self, elem):
|
||||
if self.attribname in elem.attributes:
|
||||
return elem.attributes[self.attribname]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class CompareValue:
|
||||
def __init__(self, lhs, op, rhs):
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
if op == "=":
|
||||
self.value = self._compareEqual
|
||||
else:
|
||||
self.value = self._compareNotEqual
|
||||
|
||||
def _compareEqual(self, elem):
|
||||
return self.lhs.value(elem) == self.rhs.value(elem)
|
||||
|
||||
def _compareNotEqual(self, elem):
|
||||
return self.lhs.value(elem) != self.rhs.value(elem)
|
||||
|
||||
|
||||
class BooleanValue:
|
||||
"""
|
||||
Provide boolean XPath expression operators.
|
||||
|
||||
@ivar lhs: Left hand side expression of the operator.
|
||||
@ivar op: The operator. One of C{'and'}, C{'or'}.
|
||||
@ivar rhs: Right hand side expression of the operator.
|
||||
@ivar value: Reference to the method that will calculate the value of
|
||||
this expression given an element.
|
||||
"""
|
||||
def __init__(self, lhs, op, rhs):
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
if op == "and":
|
||||
self.value = self._booleanAnd
|
||||
else:
|
||||
self.value = self._booleanOr
|
||||
|
||||
def _booleanAnd(self, elem):
|
||||
"""
|
||||
Calculate boolean and of the given expressions given an element.
|
||||
|
||||
@param elem: The element to calculate the value of the expression from.
|
||||
"""
|
||||
return self.lhs.value(elem) and self.rhs.value(elem)
|
||||
|
||||
def _booleanOr(self, elem):
|
||||
"""
|
||||
Calculate boolean or of the given expressions given an element.
|
||||
|
||||
@param elem: The element to calculate the value of the expression from.
|
||||
"""
|
||||
return self.lhs.value(elem) or self.rhs.value(elem)
|
||||
|
||||
|
||||
def Function(fname):
|
||||
"""
|
||||
Internal method which selects the function object
|
||||
"""
|
||||
klassname = "_%s_Function" % fname
|
||||
c = globals()[klassname]()
|
||||
return c
|
||||
|
||||
|
||||
class _not_Function:
|
||||
def __init__(self):
|
||||
self.baseValue = None
|
||||
|
||||
def setParams(self, baseValue):
|
||||
self.baseValue = baseValue
|
||||
|
||||
def value(self, elem):
|
||||
return not self.baseValue.value(elem)
|
||||
|
||||
|
||||
class _text_Function:
|
||||
def setParams(self):
|
||||
pass
|
||||
|
||||
def value(self, elem):
|
||||
return unicode(elem)
|
||||
|
||||
|
||||
class _Location:
|
||||
def __init__(self):
|
||||
self.predicates = []
|
||||
self.elementName = None
|
||||
self.childLocation = None
|
||||
|
||||
def matchesPredicates(self, elem):
|
||||
if self.elementName != None and self.elementName != elem.name:
|
||||
return 0
|
||||
|
||||
for p in self.predicates:
|
||||
if not p.value(elem):
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
def matches(self, elem):
|
||||
if not self.matchesPredicates(elem):
|
||||
return 0
|
||||
|
||||
if self.childLocation != None:
|
||||
for c in elem.elements():
|
||||
if self.childLocation.matches(c):
|
||||
return 1
|
||||
else:
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
def queryForString(self, elem, resultbuf):
|
||||
if not self.matchesPredicates(elem):
|
||||
return
|
||||
|
||||
if self.childLocation != None:
|
||||
for c in elem.elements():
|
||||
self.childLocation.queryForString(c, resultbuf)
|
||||
else:
|
||||
resultbuf.write(unicode(elem))
|
||||
|
||||
def queryForNodes(self, elem, resultlist):
|
||||
if not self.matchesPredicates(elem):
|
||||
return
|
||||
|
||||
if self.childLocation != None:
|
||||
for c in elem.elements():
|
||||
self.childLocation.queryForNodes(c, resultlist)
|
||||
else:
|
||||
resultlist.append(elem)
|
||||
|
||||
def queryForStringList(self, elem, resultlist):
|
||||
if not self.matchesPredicates(elem):
|
||||
return
|
||||
|
||||
if self.childLocation != None:
|
||||
for c in elem.elements():
|
||||
self.childLocation.queryForStringList(c, resultlist)
|
||||
else:
|
||||
for c in elem.children:
|
||||
if isinstance(c, StringType):
|
||||
resultlist.append(c)
|
||||
|
||||
|
||||
class _AnyLocation:
|
||||
def __init__(self):
|
||||
self.predicates = []
|
||||
self.elementName = None
|
||||
self.childLocation = None
|
||||
|
||||
def matchesPredicates(self, elem):
|
||||
for p in self.predicates:
|
||||
if not p.value(elem):
|
||||
return 0
|
||||
return 1
|
||||
|
||||
def listParents(self, elem, parentlist):
|
||||
if elem.parent != None:
|
||||
self.listParents(elem.parent, parentlist)
|
||||
parentlist.append(elem.name)
|
||||
|
||||
def isRootMatch(self, elem):
|
||||
if (self.elementName == None or self.elementName == elem.name) and \
|
||||
self.matchesPredicates(elem):
|
||||
if self.childLocation != None:
|
||||
for c in elem.elements():
|
||||
if self.childLocation.matches(c):
|
||||
return True
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
def findFirstRootMatch(self, elem):
|
||||
if (self.elementName == None or self.elementName == elem.name) and \
|
||||
self.matchesPredicates(elem):
|
||||
# Thus far, the name matches and the predicates match,
|
||||
# now check into the children and find the first one
|
||||
# that matches the rest of the structure
|
||||
# the rest of the structure
|
||||
if self.childLocation != None:
|
||||
for c in elem.elements():
|
||||
if self.childLocation.matches(c):
|
||||
return c
|
||||
return None
|
||||
else:
|
||||
# No children locations; this is a match!
|
||||
return elem
|
||||
else:
|
||||
# Ok, predicates or name didn't match, so we need to start
|
||||
# down each child and treat it as the root and try
|
||||
# again
|
||||
for c in elem.elements():
|
||||
if self.matches(c):
|
||||
return c
|
||||
# No children matched...
|
||||
return None
|
||||
|
||||
def matches(self, elem):
|
||||
if self.isRootMatch(elem):
|
||||
return True
|
||||
else:
|
||||
# Ok, initial element isn't an exact match, walk
|
||||
# down each child and treat it as the root and try
|
||||
# again
|
||||
for c in elem.elements():
|
||||
if self.matches(c):
|
||||
return True
|
||||
# No children matched...
|
||||
return False
|
||||
|
||||
def queryForString(self, elem, resultbuf):
|
||||
raise NotImplementedError(
|
||||
"queryForString is not implemented for any location")
|
||||
|
||||
def queryForNodes(self, elem, resultlist):
|
||||
# First check to see if _this_ element is a root
|
||||
if self.isRootMatch(elem):
|
||||
resultlist.append(elem)
|
||||
|
||||
# Now check each child
|
||||
for c in elem.elements():
|
||||
self.queryForNodes(c, resultlist)
|
||||
|
||||
|
||||
def queryForStringList(self, elem, resultlist):
|
||||
if self.isRootMatch(elem):
|
||||
for c in elem.children:
|
||||
if isinstance(c, StringType):
|
||||
resultlist.append(c)
|
||||
for c in elem.elements():
|
||||
self.queryForStringList(c, resultlist)
|
||||
|
||||
|
||||
class XPathQuery:
|
||||
def __init__(self, queryStr):
|
||||
self.queryStr = queryStr
|
||||
# Prevent a circular import issue, as xpathparser imports this module.
|
||||
from twisted.words.xish.xpathparser import (XPathParser,
|
||||
XPathParserScanner)
|
||||
parser = XPathParser(XPathParserScanner(queryStr))
|
||||
self.baseLocation = getattr(parser, 'XPATH')()
|
||||
|
||||
def __hash__(self):
|
||||
return self.queryStr.__hash__()
|
||||
|
||||
def matches(self, elem):
|
||||
return self.baseLocation.matches(elem)
|
||||
|
||||
def queryForString(self, elem):
|
||||
result = StringIO()
|
||||
self.baseLocation.queryForString(elem, result)
|
||||
return result.getvalue()
|
||||
|
||||
def queryForNodes(self, elem):
|
||||
result = []
|
||||
self.baseLocation.queryForNodes(elem, result)
|
||||
if len(result) == 0:
|
||||
return None
|
||||
else:
|
||||
return result
|
||||
|
||||
def queryForStringList(self, elem):
|
||||
result = []
|
||||
self.baseLocation.queryForStringList(elem, result)
|
||||
if len(result) == 0:
|
||||
return None
|
||||
else:
|
||||
return result
|
||||
|
||||
|
||||
__internedQueries = {}
|
||||
|
||||
def internQuery(queryString):
|
||||
if queryString not in __internedQueries:
|
||||
__internedQueries[queryString] = XPathQuery(queryString)
|
||||
return __internedQueries[queryString]
|
||||
|
||||
|
||||
def matches(xpathstr, elem):
|
||||
return internQuery(xpathstr).matches(elem)
|
||||
|
||||
|
||||
def queryForStringList(xpathstr, elem):
|
||||
return internQuery(xpathstr).queryForStringList(elem)
|
||||
|
||||
|
||||
def queryForString(xpathstr, elem):
|
||||
return internQuery(xpathstr).queryForString(elem)
|
||||
|
||||
|
||||
def queryForNodes(xpathstr, elem):
|
||||
return internQuery(xpathstr).queryForNodes(elem)
|
||||
|
|
@ -0,0 +1,524 @@
|
|||
# -*- test-case-name: twisted.words.test.test_xpath -*-
|
||||
# Copyright (c) Twisted Matrix Laboratories.
|
||||
# See LICENSE for details.
|
||||
|
||||
# pylint: disable=W9401,W9402
|
||||
|
||||
# DO NOT EDIT xpathparser.py!
|
||||
#
|
||||
# It is generated from xpathparser.g using Yapps. Make needed changes there.
|
||||
# This also means that the generated Python may not conform to Twisted's coding
|
||||
# standards, so it is wrapped in exec to prevent automated checkers from
|
||||
# complaining.
|
||||
|
||||
# HOWTO Generate me:
|
||||
#
|
||||
# 1.) Grab a copy of yapps2:
|
||||
# https://github.com/smurfix/yapps
|
||||
#
|
||||
# Note: Do NOT use the package in debian/ubuntu as it has incompatible
|
||||
# modifications. The original at http://theory.stanford.edu/~amitp/yapps/
|
||||
# hasn't been touched since 2003 and has not been updated to work with
|
||||
# Python 3.
|
||||
#
|
||||
# 2.) Generate the grammar:
|
||||
#
|
||||
# yapps2 xpathparser.g xpathparser.py.proto
|
||||
#
|
||||
# 3.) Edit the output to depend on the embedded runtime, and remove extraneous
|
||||
# imports:
|
||||
#
|
||||
# sed -e '/^# Begin/,${/^[^ ].*mport/d}' -e 's/runtime\.//g' \
|
||||
# -e "s/^\(from __future\)/exec(r'''\n\1/" -e"\$a''')"
|
||||
# xpathparser.py.proto > xpathparser.py
|
||||
|
||||
"""
|
||||
XPath Parser.
|
||||
|
||||
Besides the parser code produced by Yapps, this module also defines the
|
||||
parse-time exception classes, a scanner class, a base class for parsers
|
||||
produced by Yapps, and a context class that keeps track of the parse stack.
|
||||
These have been copied from the Yapps runtime module.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
import sys, re
|
||||
|
||||
MIN_WINDOW=4096
|
||||
# File lookup window
|
||||
|
||||
class SyntaxError(Exception):
|
||||
"""When we run into an unexpected token, this is the exception to use"""
|
||||
def __init__(self, pos=None, msg="Bad Token", context=None):
|
||||
Exception.__init__(self)
|
||||
self.pos = pos
|
||||
self.msg = msg
|
||||
self.context = context
|
||||
|
||||
def __str__(self):
|
||||
if not self.pos: return 'SyntaxError'
|
||||
else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
|
||||
|
||||
class NoMoreTokens(Exception):
|
||||
"""Another exception object, for when we run out of tokens"""
|
||||
pass
|
||||
|
||||
class Token(object):
|
||||
"""Yapps token.
|
||||
|
||||
This is a container for a scanned token.
|
||||
"""
|
||||
|
||||
def __init__(self, type,value, pos=None):
|
||||
"""Initialize a token."""
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.pos = pos
|
||||
|
||||
def __repr__(self):
|
||||
output = '<%s: %s' % (self.type, repr(self.value))
|
||||
if self.pos:
|
||||
output += " @ "
|
||||
if self.pos[0]:
|
||||
output += "%s:" % self.pos[0]
|
||||
if self.pos[1]:
|
||||
output += "%d" % self.pos[1]
|
||||
if self.pos[2] is not None:
|
||||
output += ".%d" % self.pos[2]
|
||||
output += ">"
|
||||
return output
|
||||
|
||||
in_name=0
|
||||
class Scanner(object):
|
||||
"""Yapps scanner.
|
||||
|
||||
The Yapps scanner can work in context sensitive or context
|
||||
insensitive modes. The token(i) method is used to retrieve the
|
||||
i-th token. It takes a restrict set that limits the set of tokens
|
||||
it is allowed to return. In context sensitive mode, this restrict
|
||||
set guides the scanner. In context insensitive mode, there is no
|
||||
restriction (the set is always the full set of tokens).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, patterns, ignore, input="",
|
||||
file=None,filename=None,stacked=False):
|
||||
"""Initialize the scanner.
|
||||
|
||||
Parameters:
|
||||
patterns : [(terminal, uncompiled regex), ...] or None
|
||||
ignore : {terminal:None, ...}
|
||||
input : string
|
||||
|
||||
If patterns is None, we assume that the subclass has
|
||||
defined self.patterns : [(terminal, compiled regex), ...].
|
||||
Note that the patterns parameter expects uncompiled regexes,
|
||||
whereas the self.patterns field expects compiled regexes.
|
||||
|
||||
The 'ignore' value is either None or a callable, which is called
|
||||
with the scanner and the to-be-ignored match object; this can
|
||||
be used for include file or comment handling.
|
||||
"""
|
||||
|
||||
if not filename:
|
||||
global in_name
|
||||
filename="<f.%d>" % in_name
|
||||
in_name += 1
|
||||
|
||||
self.input = input
|
||||
self.ignore = ignore
|
||||
self.file = file
|
||||
self.filename = filename
|
||||
self.pos = 0
|
||||
self.del_pos = 0 # skipped
|
||||
self.line = 1
|
||||
self.del_line = 0 # skipped
|
||||
self.col = 0
|
||||
self.tokens = []
|
||||
self.stack = None
|
||||
self.stacked = stacked
|
||||
|
||||
self.last_read_token = None
|
||||
self.last_token = None
|
||||
self.last_types = None
|
||||
|
||||
if patterns is not None:
|
||||
# Compile the regex strings into regex objects
|
||||
self.patterns = []
|
||||
for terminal, regex in patterns:
|
||||
self.patterns.append( (terminal, re.compile(regex)) )
|
||||
|
||||
def stack_input(self, input="", file=None, filename=None):
|
||||
"""Temporarily parse from a second file."""
|
||||
|
||||
# Already reading from somewhere else: Go on top of that, please.
|
||||
if self.stack:
|
||||
# autogenerate a recursion-level-identifying filename
|
||||
if not filename:
|
||||
filename = 1
|
||||
else:
|
||||
try:
|
||||
filename += 1
|
||||
except TypeError:
|
||||
pass
|
||||
# now pass off to the include file
|
||||
self.stack.stack_input(input,file,filename)
|
||||
else:
|
||||
|
||||
try:
|
||||
filename += 0
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
filename = "<str_%d>" % filename
|
||||
|
||||
# self.stack = object.__new__(self.__class__)
|
||||
# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
|
||||
|
||||
# Note that the pattern+ignore are added by the generated
|
||||
# scanner code
|
||||
self.stack = self.__class__(input,file,filename, stacked=True)
|
||||
|
||||
def get_pos(self):
|
||||
"""Return a file/line/char tuple."""
|
||||
if self.stack: return self.stack.get_pos()
|
||||
|
||||
return (self.filename, self.line+self.del_line, self.col)
|
||||
|
||||
# def __repr__(self):
|
||||
# """Print the last few tokens that have been scanned in"""
|
||||
# output = ''
|
||||
# for t in self.tokens:
|
||||
# output += '%s\n' % (repr(t),)
|
||||
# return output
|
||||
|
||||
def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
|
||||
"""Print the line of 'text' that includes position 'p',
|
||||
along with a second line with a single caret (^) at position p"""
|
||||
|
||||
file,line,p = pos
|
||||
if file != self.filename:
|
||||
if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
|
||||
print >>out, "(%s: not in input buffer)" % file
|
||||
return
|
||||
|
||||
text = self.input
|
||||
p += length-1 # starts at pos 1
|
||||
|
||||
origline=line
|
||||
line -= self.del_line
|
||||
spos=0
|
||||
if line > 0:
|
||||
while 1:
|
||||
line = line - 1
|
||||
try:
|
||||
cr = text.index("\n",spos)
|
||||
except ValueError:
|
||||
if line:
|
||||
text = ""
|
||||
break
|
||||
if line == 0:
|
||||
text = text[spos:cr]
|
||||
break
|
||||
spos = cr+1
|
||||
else:
|
||||
print >>out, "(%s:%d not in input buffer)" % (file,origline)
|
||||
return
|
||||
|
||||
# Now try printing part of the line
|
||||
text = text[max(p-80, 0):p+80]
|
||||
p = p - max(p-80, 0)
|
||||
|
||||
# Strip to the left
|
||||
i = text[:p].rfind('\n')
|
||||
j = text[:p].rfind('\r')
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if 0 <= i < p:
|
||||
p = p - i - 1
|
||||
text = text[i+1:]
|
||||
|
||||
# Strip to the right
|
||||
i = text.find('\n', p)
|
||||
j = text.find('\r', p)
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if i >= 0:
|
||||
text = text[:i]
|
||||
|
||||
# Now shorten the text
|
||||
while len(text) > 70 and p > 60:
|
||||
# Cut off 10 chars
|
||||
text = "..." + text[10:]
|
||||
p = p - 7
|
||||
|
||||
# Now print the string, along with an indicator
|
||||
print >>out, '> ',text
|
||||
print >>out, '> ',' '*p + '^'
|
||||
|
||||
def grab_input(self):
|
||||
"""Get more input if possible."""
|
||||
if not self.file: return
|
||||
if len(self.input) - self.pos >= MIN_WINDOW: return
|
||||
|
||||
data = self.file.read(MIN_WINDOW)
|
||||
if data is None or data == "":
|
||||
self.file = None
|
||||
|
||||
# Drop bytes from the start, if necessary.
|
||||
if self.pos > 2*MIN_WINDOW:
|
||||
self.del_pos += MIN_WINDOW
|
||||
self.del_line += self.input[:MIN_WINDOW].count("\n")
|
||||
self.pos -= MIN_WINDOW
|
||||
self.input = self.input[MIN_WINDOW:] + data
|
||||
else:
|
||||
self.input = self.input + data
|
||||
|
||||
def getchar(self):
|
||||
"""Return the next character."""
|
||||
self.grab_input()
|
||||
|
||||
c = self.input[self.pos]
|
||||
self.pos += 1
|
||||
return c
|
||||
|
||||
def token(self, restrict, context=None):
|
||||
"""Scan for another token."""
|
||||
|
||||
while 1:
|
||||
if self.stack:
|
||||
try:
|
||||
return self.stack.token(restrict, context)
|
||||
except StopIteration:
|
||||
self.stack = None
|
||||
|
||||
# Keep looking for a token, ignoring any in self.ignore
|
||||
self.grab_input()
|
||||
|
||||
# special handling for end-of-file
|
||||
if self.stacked and self.pos==len(self.input):
|
||||
raise StopIteration
|
||||
|
||||
# Search the patterns for the longest match, with earlier
|
||||
# tokens in the list having preference
|
||||
best_match = -1
|
||||
best_pat = '(error)'
|
||||
best_m = None
|
||||
for p, regexp in self.patterns:
|
||||
# First check to see if we're ignoring this token
|
||||
if restrict and p not in restrict and p not in self.ignore:
|
||||
continue
|
||||
m = regexp.match(self.input, self.pos)
|
||||
if m and m.end()-m.start() > best_match:
|
||||
# We got a match that's better than the previous one
|
||||
best_pat = p
|
||||
best_match = m.end()-m.start()
|
||||
best_m = m
|
||||
|
||||
# If we didn't find anything, raise an error
|
||||
if best_pat == '(error)' and best_match < 0:
|
||||
msg = 'Bad Token'
|
||||
if restrict:
|
||||
msg = 'Trying to find one of '+', '.join(restrict)
|
||||
raise SyntaxError(self.get_pos(), msg, context=context)
|
||||
|
||||
ignore = best_pat in self.ignore
|
||||
value = self.input[self.pos:self.pos+best_match]
|
||||
if not ignore:
|
||||
tok=Token(type=best_pat, value=value, pos=self.get_pos())
|
||||
|
||||
self.pos += best_match
|
||||
|
||||
npos = value.rfind("\n")
|
||||
if npos > -1:
|
||||
self.col = best_match-npos
|
||||
self.line += value.count("\n")
|
||||
else:
|
||||
self.col += best_match
|
||||
|
||||
# If we found something that isn't to be ignored, return it
|
||||
if not ignore:
|
||||
if len(self.tokens) >= 10:
|
||||
del self.tokens[0]
|
||||
self.tokens.append(tok)
|
||||
self.last_read_token = tok
|
||||
# print repr(tok)
|
||||
return tok
|
||||
else:
|
||||
ignore = self.ignore[best_pat]
|
||||
if ignore:
|
||||
ignore(self, best_m)
|
||||
|
||||
def peek(self, *types, **kw):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
context = kw.get("context",None)
|
||||
if self.last_token is None:
|
||||
self.last_types = types
|
||||
self.last_token = self.token(types,context)
|
||||
elif self.last_types:
|
||||
for t in types:
|
||||
if t not in self.last_types:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
return self.last_token.type
|
||||
|
||||
def scan(self, type, **kw):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
context = kw.get("context",None)
|
||||
|
||||
if self.last_token is None:
|
||||
tok = self.token([type],context)
|
||||
else:
|
||||
if self.last_types and type not in self.last_types:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
|
||||
tok = self.last_token
|
||||
self.last_token = None
|
||||
if tok.type != type:
|
||||
if not self.last_types: self.last_types=[]
|
||||
raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
|
||||
return tok.value
|
||||
|
||||
class Parser(object):
|
||||
"""Base class for Yapps-generated parsers.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, scanner):
|
||||
self._scanner = scanner
|
||||
|
||||
def _stack(self, input="",file=None,filename=None):
|
||||
"""Temporarily read from someplace else"""
|
||||
self._scanner.stack_input(input,file,filename)
|
||||
self._tok = None
|
||||
|
||||
def _peek(self, *types, **kw):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
return self._scanner.peek(*types, **kw)
|
||||
|
||||
def _scan(self, type, **kw):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
return self._scanner.scan(type, **kw)
|
||||
|
||||
class Context(object):
|
||||
"""Class to represent the parser's call stack.
|
||||
|
||||
Every rule creates a Context that links to its parent rule. The
|
||||
contexts can be used for debugging.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, parent, scanner, rule, args=()):
|
||||
"""Create a new context.
|
||||
|
||||
Args:
|
||||
parent: Context object or None
|
||||
scanner: Scanner object
|
||||
rule: string (name of the rule)
|
||||
args: tuple listing parameters to the rule
|
||||
|
||||
"""
|
||||
self.parent = parent
|
||||
self.scanner = scanner
|
||||
self.rule = rule
|
||||
self.args = args
|
||||
while scanner.stack: scanner = scanner.stack
|
||||
self.token = scanner.last_read_token
|
||||
|
||||
def __str__(self):
|
||||
output = ''
|
||||
if self.parent: output = str(self.parent) + ' > '
|
||||
output += self.rule
|
||||
return output
|
||||
|
||||
def print_error(err, scanner, max_ctx=None):
|
||||
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
|
||||
# NOTE: this function assumes 80 columns :-(
|
||||
# Figure out the line number
|
||||
pos = err.pos
|
||||
if not pos:
|
||||
pos = scanner.get_pos()
|
||||
|
||||
file_name, line_number, column_number = pos
|
||||
print('%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg), file=sys.stderr)
|
||||
|
||||
scanner.print_line_with_pointer(pos)
|
||||
|
||||
context = err.context
|
||||
token = None
|
||||
while context:
|
||||
print('while parsing %s%s:' % (context.rule, tuple(context.args)), file=sys.stderr)
|
||||
if context.token:
|
||||
token = context.token
|
||||
if token:
|
||||
scanner.print_line_with_pointer(token.pos, length=len(token.value))
|
||||
context = context.parent
|
||||
if max_ctx:
|
||||
max_ctx = max_ctx-1
|
||||
if not max_ctx:
|
||||
break
|
||||
|
||||
def wrap_error_reporter(parser, rule, *args,**kw):
|
||||
try:
|
||||
return getattr(parser, rule)(*args,**kw)
|
||||
except SyntaxError as e:
|
||||
print_error(e, parser._scanner)
|
||||
except NoMoreTokens:
|
||||
print('Could not complete parsing; stopped around here:', file=sys.stderr)
|
||||
print(parser._scanner, file=sys.stderr)
|
||||
|
||||
from twisted.words.xish.xpath import AttribValue, BooleanValue, CompareValue
|
||||
from twisted.words.xish.xpath import Function, IndexValue, LiteralValue
|
||||
from twisted.words.xish.xpath import _AnyLocation, _Location
|
||||
|
||||
%%
|
||||
parser XPathParser:
|
||||
ignore: "\\s+"
|
||||
token INDEX: "[0-9]+"
|
||||
token WILDCARD: "\*"
|
||||
token IDENTIFIER: "[a-zA-Z][a-zA-Z0-9_\-]*"
|
||||
token ATTRIBUTE: "\@[a-zA-Z][a-zA-Z0-9_\-]*"
|
||||
token FUNCNAME: "[a-zA-Z][a-zA-Z0-9_]*"
|
||||
token CMP_EQ: "\="
|
||||
token CMP_NE: "\!\="
|
||||
token STR_DQ: '"([^"]|(\\"))*?"'
|
||||
token STR_SQ: "'([^']|(\\'))*?'"
|
||||
token OP_AND: "and"
|
||||
token OP_OR: "or"
|
||||
token END: "$"
|
||||
|
||||
rule XPATH: PATH {{ result = PATH; current = result }}
|
||||
( PATH {{ current.childLocation = PATH; current = current.childLocation }} ) * END
|
||||
{{ return result }}
|
||||
|
||||
rule PATH: ("/" {{ result = _Location() }} | "//" {{ result = _AnyLocation() }} )
|
||||
( IDENTIFIER {{ result.elementName = IDENTIFIER }} | WILDCARD {{ result.elementName = None }} )
|
||||
( "\[" PREDICATE {{ result.predicates.append(PREDICATE) }} "\]")*
|
||||
{{ return result }}
|
||||
|
||||
rule PREDICATE: EXPR {{ return EXPR }} |
|
||||
INDEX {{ return IndexValue(INDEX) }}
|
||||
|
||||
rule EXPR: FACTOR {{ e = FACTOR }}
|
||||
( BOOLOP FACTOR {{ e = BooleanValue(e, BOOLOP, FACTOR) }} )*
|
||||
{{ return e }}
|
||||
|
||||
rule BOOLOP: ( OP_AND {{ return OP_AND }} | OP_OR {{ return OP_OR }} )
|
||||
|
||||
rule FACTOR: TERM {{ return TERM }}
|
||||
| "\(" EXPR "\)" {{ return EXPR }}
|
||||
|
||||
rule TERM: VALUE {{ t = VALUE }}
|
||||
[ CMP VALUE {{ t = CompareValue(t, CMP, VALUE) }} ]
|
||||
{{ return t }}
|
||||
|
||||
rule VALUE: "@" IDENTIFIER {{ return AttribValue(IDENTIFIER) }} |
|
||||
FUNCNAME {{ f = Function(FUNCNAME); args = [] }}
|
||||
"\(" [ VALUE {{ args.append(VALUE) }}
|
||||
(
|
||||
"," VALUE {{ args.append(VALUE) }}
|
||||
)*
|
||||
] "\)" {{ f.setParams(*args); return f }} |
|
||||
STR {{ return LiteralValue(STR[1:len(STR)-1]) }}
|
||||
|
||||
rule CMP: (CMP_EQ {{ return CMP_EQ }} | CMP_NE {{ return CMP_NE }})
|
||||
rule STR: (STR_DQ {{ return STR_DQ }} | STR_SQ {{ return STR_SQ }})
|
||||
|
|
@ -0,0 +1,650 @@
|
|||
# -*- test-case-name: twisted.words.test.test_xpath -*-
|
||||
# Copyright (c) Twisted Matrix Laboratories.
|
||||
# See LICENSE for details.
|
||||
|
||||
# pylint: disable=W9401,W9402
|
||||
|
||||
# DO NOT EDIT xpathparser.py!
|
||||
#
|
||||
# It is generated from xpathparser.g using Yapps. Make needed changes there.
|
||||
# This also means that the generated Python may not conform to Twisted's coding
|
||||
# standards, so it is wrapped in exec to prevent automated checkers from
|
||||
# complaining.
|
||||
|
||||
# HOWTO Generate me:
|
||||
#
|
||||
# 1.) Grab a copy of yapps2:
|
||||
# https://github.com/smurfix/yapps
|
||||
#
|
||||
# Note: Do NOT use the package in debian/ubuntu as it has incompatible
|
||||
# modifications. The original at http://theory.stanford.edu/~amitp/yapps/
|
||||
# hasn't been touched since 2003 and has not been updated to work with
|
||||
# Python 3.
|
||||
#
|
||||
# 2.) Generate the grammar:
|
||||
#
|
||||
# yapps2 xpathparser.g xpathparser.py.proto
|
||||
#
|
||||
# 3.) Edit the output to depend on the embedded runtime, and remove extraneous
|
||||
# imports:
|
||||
#
|
||||
# sed -e '/^# Begin/,${/^[^ ].*mport/d}' -e '/^[^#]/s/runtime\.//g' \
|
||||
# -e "s/^\(from __future\)/exec(r'''\n\1/" -e"\$a''')"
|
||||
# xpathparser.py.proto > xpathparser.py
|
||||
|
||||
"""
|
||||
XPath Parser.
|
||||
|
||||
Besides the parser code produced by Yapps, this module also defines the
|
||||
parse-time exception classes, a scanner class, a base class for parsers
|
||||
produced by Yapps, and a context class that keeps track of the parse stack.
|
||||
These have been copied from the Yapps runtime module.
|
||||
"""
|
||||
|
||||
exec(r'''
|
||||
from __future__ import print_function
|
||||
import sys, re
|
||||
|
||||
MIN_WINDOW=4096
|
||||
# File lookup window
|
||||
|
||||
class SyntaxError(Exception):
|
||||
"""When we run into an unexpected token, this is the exception to use"""
|
||||
def __init__(self, pos=None, msg="Bad Token", context=None):
|
||||
Exception.__init__(self)
|
||||
self.pos = pos
|
||||
self.msg = msg
|
||||
self.context = context
|
||||
|
||||
def __str__(self):
|
||||
if not self.pos: return 'SyntaxError'
|
||||
else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
|
||||
|
||||
class NoMoreTokens(Exception):
|
||||
"""Another exception object, for when we run out of tokens"""
|
||||
pass
|
||||
|
||||
class Token(object):
|
||||
"""Yapps token.
|
||||
|
||||
This is a container for a scanned token.
|
||||
"""
|
||||
|
||||
def __init__(self, type,value, pos=None):
|
||||
"""Initialize a token."""
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.pos = pos
|
||||
|
||||
def __repr__(self):
|
||||
output = '<%s: %s' % (self.type, repr(self.value))
|
||||
if self.pos:
|
||||
output += " @ "
|
||||
if self.pos[0]:
|
||||
output += "%s:" % self.pos[0]
|
||||
if self.pos[1]:
|
||||
output += "%d" % self.pos[1]
|
||||
if self.pos[2] is not None:
|
||||
output += ".%d" % self.pos[2]
|
||||
output += ">"
|
||||
return output
|
||||
|
||||
in_name=0
|
||||
class Scanner(object):
|
||||
"""Yapps scanner.
|
||||
|
||||
The Yapps scanner can work in context sensitive or context
|
||||
insensitive modes. The token(i) method is used to retrieve the
|
||||
i-th token. It takes a restrict set that limits the set of tokens
|
||||
it is allowed to return. In context sensitive mode, this restrict
|
||||
set guides the scanner. In context insensitive mode, there is no
|
||||
restriction (the set is always the full set of tokens).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, patterns, ignore, input="",
|
||||
file=None,filename=None,stacked=False):
|
||||
"""Initialize the scanner.
|
||||
|
||||
Parameters:
|
||||
patterns : [(terminal, uncompiled regex), ...] or None
|
||||
ignore : {terminal:None, ...}
|
||||
input : string
|
||||
|
||||
If patterns is None, we assume that the subclass has
|
||||
defined self.patterns : [(terminal, compiled regex), ...].
|
||||
Note that the patterns parameter expects uncompiled regexes,
|
||||
whereas the self.patterns field expects compiled regexes.
|
||||
|
||||
The 'ignore' value is either None or a callable, which is called
|
||||
with the scanner and the to-be-ignored match object; this can
|
||||
be used for include file or comment handling.
|
||||
"""
|
||||
|
||||
if not filename:
|
||||
global in_name
|
||||
filename="<f.%d>" % in_name
|
||||
in_name += 1
|
||||
|
||||
self.input = input
|
||||
self.ignore = ignore
|
||||
self.file = file
|
||||
self.filename = filename
|
||||
self.pos = 0
|
||||
self.del_pos = 0 # skipped
|
||||
self.line = 1
|
||||
self.del_line = 0 # skipped
|
||||
self.col = 0
|
||||
self.tokens = []
|
||||
self.stack = None
|
||||
self.stacked = stacked
|
||||
|
||||
self.last_read_token = None
|
||||
self.last_token = None
|
||||
self.last_types = None
|
||||
|
||||
if patterns is not None:
|
||||
# Compile the regex strings into regex objects
|
||||
self.patterns = []
|
||||
for terminal, regex in patterns:
|
||||
self.patterns.append( (terminal, re.compile(regex)) )
|
||||
|
||||
def stack_input(self, input="", file=None, filename=None):
|
||||
"""Temporarily parse from a second file."""
|
||||
|
||||
# Already reading from somewhere else: Go on top of that, please.
|
||||
if self.stack:
|
||||
# autogenerate a recursion-level-identifying filename
|
||||
if not filename:
|
||||
filename = 1
|
||||
else:
|
||||
try:
|
||||
filename += 1
|
||||
except TypeError:
|
||||
pass
|
||||
# now pass off to the include file
|
||||
self.stack.stack_input(input,file,filename)
|
||||
else:
|
||||
|
||||
try:
|
||||
filename += 0
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
filename = "<str_%d>" % filename
|
||||
|
||||
# self.stack = object.__new__(self.__class__)
|
||||
# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
|
||||
|
||||
# Note that the pattern+ignore are added by the generated
|
||||
# scanner code
|
||||
self.stack = self.__class__(input,file,filename, stacked=True)
|
||||
|
||||
def get_pos(self):
|
||||
"""Return a file/line/char tuple."""
|
||||
if self.stack: return self.stack.get_pos()
|
||||
|
||||
return (self.filename, self.line+self.del_line, self.col)
|
||||
|
||||
# def __repr__(self):
|
||||
# """Print the last few tokens that have been scanned in"""
|
||||
# output = ''
|
||||
# for t in self.tokens:
|
||||
# output += '%s\n' % (repr(t),)
|
||||
# return output
|
||||
|
||||
def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
|
||||
"""Print the line of 'text' that includes position 'p',
|
||||
along with a second line with a single caret (^) at position p"""
|
||||
|
||||
file,line,p = pos
|
||||
if file != self.filename:
|
||||
if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
|
||||
print >>out, "(%s: not in input buffer)" % file
|
||||
return
|
||||
|
||||
text = self.input
|
||||
p += length-1 # starts at pos 1
|
||||
|
||||
origline=line
|
||||
line -= self.del_line
|
||||
spos=0
|
||||
if line > 0:
|
||||
while 1:
|
||||
line = line - 1
|
||||
try:
|
||||
cr = text.index("\n",spos)
|
||||
except ValueError:
|
||||
if line:
|
||||
text = ""
|
||||
break
|
||||
if line == 0:
|
||||
text = text[spos:cr]
|
||||
break
|
||||
spos = cr+1
|
||||
else:
|
||||
print >>out, "(%s:%d not in input buffer)" % (file,origline)
|
||||
return
|
||||
|
||||
# Now try printing part of the line
|
||||
text = text[max(p-80, 0):p+80]
|
||||
p = p - max(p-80, 0)
|
||||
|
||||
# Strip to the left
|
||||
i = text[:p].rfind('\n')
|
||||
j = text[:p].rfind('\r')
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if 0 <= i < p:
|
||||
p = p - i - 1
|
||||
text = text[i+1:]
|
||||
|
||||
# Strip to the right
|
||||
i = text.find('\n', p)
|
||||
j = text.find('\r', p)
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if i >= 0:
|
||||
text = text[:i]
|
||||
|
||||
# Now shorten the text
|
||||
while len(text) > 70 and p > 60:
|
||||
# Cut off 10 chars
|
||||
text = "..." + text[10:]
|
||||
p = p - 7
|
||||
|
||||
# Now print the string, along with an indicator
|
||||
print >>out, '> ',text
|
||||
print >>out, '> ',' '*p + '^'
|
||||
|
||||
def grab_input(self):
|
||||
"""Get more input if possible."""
|
||||
if not self.file: return
|
||||
if len(self.input) - self.pos >= MIN_WINDOW: return
|
||||
|
||||
data = self.file.read(MIN_WINDOW)
|
||||
if data is None or data == "":
|
||||
self.file = None
|
||||
|
||||
# Drop bytes from the start, if necessary.
|
||||
if self.pos > 2*MIN_WINDOW:
|
||||
self.del_pos += MIN_WINDOW
|
||||
self.del_line += self.input[:MIN_WINDOW].count("\n")
|
||||
self.pos -= MIN_WINDOW
|
||||
self.input = self.input[MIN_WINDOW:] + data
|
||||
else:
|
||||
self.input = self.input + data
|
||||
|
||||
def getchar(self):
|
||||
"""Return the next character."""
|
||||
self.grab_input()
|
||||
|
||||
c = self.input[self.pos]
|
||||
self.pos += 1
|
||||
return c
|
||||
|
||||
def token(self, restrict, context=None):
|
||||
"""Scan for another token."""
|
||||
|
||||
while 1:
|
||||
if self.stack:
|
||||
try:
|
||||
return self.stack.token(restrict, context)
|
||||
except StopIteration:
|
||||
self.stack = None
|
||||
|
||||
# Keep looking for a token, ignoring any in self.ignore
|
||||
self.grab_input()
|
||||
|
||||
# special handling for end-of-file
|
||||
if self.stacked and self.pos==len(self.input):
|
||||
raise StopIteration
|
||||
|
||||
# Search the patterns for the longest match, with earlier
|
||||
# tokens in the list having preference
|
||||
best_match = -1
|
||||
best_pat = '(error)'
|
||||
best_m = None
|
||||
for p, regexp in self.patterns:
|
||||
# First check to see if we're ignoring this token
|
||||
if restrict and p not in restrict and p not in self.ignore:
|
||||
continue
|
||||
m = regexp.match(self.input, self.pos)
|
||||
if m and m.end()-m.start() > best_match:
|
||||
# We got a match that's better than the previous one
|
||||
best_pat = p
|
||||
best_match = m.end()-m.start()
|
||||
best_m = m
|
||||
|
||||
# If we didn't find anything, raise an error
|
||||
if best_pat == '(error)' and best_match < 0:
|
||||
msg = 'Bad Token'
|
||||
if restrict:
|
||||
msg = 'Trying to find one of '+', '.join(restrict)
|
||||
raise SyntaxError(self.get_pos(), msg, context=context)
|
||||
|
||||
ignore = best_pat in self.ignore
|
||||
value = self.input[self.pos:self.pos+best_match]
|
||||
if not ignore:
|
||||
tok=Token(type=best_pat, value=value, pos=self.get_pos())
|
||||
|
||||
self.pos += best_match
|
||||
|
||||
npos = value.rfind("\n")
|
||||
if npos > -1:
|
||||
self.col = best_match-npos
|
||||
self.line += value.count("\n")
|
||||
else:
|
||||
self.col += best_match
|
||||
|
||||
# If we found something that isn't to be ignored, return it
|
||||
if not ignore:
|
||||
if len(self.tokens) >= 10:
|
||||
del self.tokens[0]
|
||||
self.tokens.append(tok)
|
||||
self.last_read_token = tok
|
||||
# print repr(tok)
|
||||
return tok
|
||||
else:
|
||||
ignore = self.ignore[best_pat]
|
||||
if ignore:
|
||||
ignore(self, best_m)
|
||||
|
||||
def peek(self, *types, **kw):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
context = kw.get("context",None)
|
||||
if self.last_token is None:
|
||||
self.last_types = types
|
||||
self.last_token = self.token(types,context)
|
||||
elif self.last_types:
|
||||
for t in types:
|
||||
if t not in self.last_types:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
return self.last_token.type
|
||||
|
||||
def scan(self, type, **kw):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
context = kw.get("context",None)
|
||||
|
||||
if self.last_token is None:
|
||||
tok = self.token([type],context)
|
||||
else:
|
||||
if self.last_types and type not in self.last_types:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
|
||||
tok = self.last_token
|
||||
self.last_token = None
|
||||
if tok.type != type:
|
||||
if not self.last_types: self.last_types=[]
|
||||
raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
|
||||
return tok.value
|
||||
|
||||
class Parser(object):
|
||||
"""Base class for Yapps-generated parsers.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, scanner):
|
||||
self._scanner = scanner
|
||||
|
||||
def _stack(self, input="",file=None,filename=None):
|
||||
"""Temporarily read from someplace else"""
|
||||
self._scanner.stack_input(input,file,filename)
|
||||
self._tok = None
|
||||
|
||||
def _peek(self, *types, **kw):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
return self._scanner.peek(*types, **kw)
|
||||
|
||||
def _scan(self, type, **kw):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
return self._scanner.scan(type, **kw)
|
||||
|
||||
class Context(object):
|
||||
"""Class to represent the parser's call stack.
|
||||
|
||||
Every rule creates a Context that links to its parent rule. The
|
||||
contexts can be used for debugging.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, parent, scanner, rule, args=()):
|
||||
"""Create a new context.
|
||||
|
||||
Args:
|
||||
parent: Context object or None
|
||||
scanner: Scanner object
|
||||
rule: string (name of the rule)
|
||||
args: tuple listing parameters to the rule
|
||||
|
||||
"""
|
||||
self.parent = parent
|
||||
self.scanner = scanner
|
||||
self.rule = rule
|
||||
self.args = args
|
||||
while scanner.stack: scanner = scanner.stack
|
||||
self.token = scanner.last_read_token
|
||||
|
||||
def __str__(self):
|
||||
output = ''
|
||||
if self.parent: output = str(self.parent) + ' > '
|
||||
output += self.rule
|
||||
return output
|
||||
|
||||
def print_error(err, scanner, max_ctx=None):
|
||||
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
|
||||
# NOTE: this function assumes 80 columns :-(
|
||||
# Figure out the line number
|
||||
pos = err.pos
|
||||
if not pos:
|
||||
pos = scanner.get_pos()
|
||||
|
||||
file_name, line_number, column_number = pos
|
||||
print('%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg), file=sys.stderr)
|
||||
|
||||
scanner.print_line_with_pointer(pos)
|
||||
|
||||
context = err.context
|
||||
token = None
|
||||
while context:
|
||||
print('while parsing %s%s:' % (context.rule, tuple(context.args)), file=sys.stderr)
|
||||
if context.token:
|
||||
token = context.token
|
||||
if token:
|
||||
scanner.print_line_with_pointer(token.pos, length=len(token.value))
|
||||
context = context.parent
|
||||
if max_ctx:
|
||||
max_ctx = max_ctx-1
|
||||
if not max_ctx:
|
||||
break
|
||||
|
||||
def wrap_error_reporter(parser, rule, *args,**kw):
|
||||
try:
|
||||
return getattr(parser, rule)(*args,**kw)
|
||||
except SyntaxError as e:
|
||||
print_error(e, parser._scanner)
|
||||
except NoMoreTokens:
|
||||
print('Could not complete parsing; stopped around here:', file=sys.stderr)
|
||||
print(parser._scanner, file=sys.stderr)
|
||||
|
||||
from twisted.words.xish.xpath import AttribValue, BooleanValue, CompareValue
|
||||
from twisted.words.xish.xpath import Function, IndexValue, LiteralValue
|
||||
from twisted.words.xish.xpath import _AnyLocation, _Location
|
||||
|
||||
|
||||
# Begin -- grammar generated by Yapps
|
||||
|
||||
class XPathParserScanner(Scanner):
|
||||
patterns = [
|
||||
('","', re.compile(',')),
|
||||
('"@"', re.compile('@')),
|
||||
('"\\)"', re.compile('\\)')),
|
||||
('"\\("', re.compile('\\(')),
|
||||
('"\\]"', re.compile('\\]')),
|
||||
('"\\["', re.compile('\\[')),
|
||||
('"//"', re.compile('//')),
|
||||
('"/"', re.compile('/')),
|
||||
('\\s+', re.compile('\\s+')),
|
||||
('INDEX', re.compile('[0-9]+')),
|
||||
('WILDCARD', re.compile('\\*')),
|
||||
('IDENTIFIER', re.compile('[a-zA-Z][a-zA-Z0-9_\\-]*')),
|
||||
('ATTRIBUTE', re.compile('\\@[a-zA-Z][a-zA-Z0-9_\\-]*')),
|
||||
('FUNCNAME', re.compile('[a-zA-Z][a-zA-Z0-9_]*')),
|
||||
('CMP_EQ', re.compile('\\=')),
|
||||
('CMP_NE', re.compile('\\!\\=')),
|
||||
('STR_DQ', re.compile('"([^"]|(\\"))*?"')),
|
||||
('STR_SQ', re.compile("'([^']|(\\'))*?'")),
|
||||
('OP_AND', re.compile('and')),
|
||||
('OP_OR', re.compile('or')),
|
||||
('END', re.compile('$')),
|
||||
]
|
||||
def __init__(self, str,*args,**kw):
|
||||
Scanner.__init__(self,None,{'\\s+':None,},str,*args,**kw)
|
||||
|
||||
class XPathParser(Parser):
|
||||
Context = Context
|
||||
def XPATH(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'XPATH', [])
|
||||
PATH = self.PATH(_context)
|
||||
result = PATH; current = result
|
||||
while self._peek('END', '"/"', '"//"', context=_context) != 'END':
|
||||
PATH = self.PATH(_context)
|
||||
current.childLocation = PATH; current = current.childLocation
|
||||
END = self._scan('END', context=_context)
|
||||
return result
|
||||
|
||||
def PATH(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'PATH', [])
|
||||
_token = self._peek('"/"', '"//"', context=_context)
|
||||
if _token == '"/"':
|
||||
self._scan('"/"', context=_context)
|
||||
result = _Location()
|
||||
else: # == '"//"'
|
||||
self._scan('"//"', context=_context)
|
||||
result = _AnyLocation()
|
||||
_token = self._peek('IDENTIFIER', 'WILDCARD', context=_context)
|
||||
if _token == 'IDENTIFIER':
|
||||
IDENTIFIER = self._scan('IDENTIFIER', context=_context)
|
||||
result.elementName = IDENTIFIER
|
||||
else: # == 'WILDCARD'
|
||||
WILDCARD = self._scan('WILDCARD', context=_context)
|
||||
result.elementName = None
|
||||
while self._peek('"\\["', 'END', '"/"', '"//"', context=_context) == '"\\["':
|
||||
self._scan('"\\["', context=_context)
|
||||
PREDICATE = self.PREDICATE(_context)
|
||||
result.predicates.append(PREDICATE)
|
||||
self._scan('"\\]"', context=_context)
|
||||
return result
|
||||
|
||||
def PREDICATE(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'PREDICATE', [])
|
||||
_token = self._peek('INDEX', '"\\("', '"@"', 'FUNCNAME', 'STR_DQ', 'STR_SQ', context=_context)
|
||||
if _token != 'INDEX':
|
||||
EXPR = self.EXPR(_context)
|
||||
return EXPR
|
||||
else: # == 'INDEX'
|
||||
INDEX = self._scan('INDEX', context=_context)
|
||||
return IndexValue(INDEX)
|
||||
|
||||
def EXPR(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'EXPR', [])
|
||||
FACTOR = self.FACTOR(_context)
|
||||
e = FACTOR
|
||||
while self._peek('OP_AND', 'OP_OR', '"\\)"', '"\\]"', context=_context) in ['OP_AND', 'OP_OR']:
|
||||
BOOLOP = self.BOOLOP(_context)
|
||||
FACTOR = self.FACTOR(_context)
|
||||
e = BooleanValue(e, BOOLOP, FACTOR)
|
||||
return e
|
||||
|
||||
def BOOLOP(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'BOOLOP', [])
|
||||
_token = self._peek('OP_AND', 'OP_OR', context=_context)
|
||||
if _token == 'OP_AND':
|
||||
OP_AND = self._scan('OP_AND', context=_context)
|
||||
return OP_AND
|
||||
else: # == 'OP_OR'
|
||||
OP_OR = self._scan('OP_OR', context=_context)
|
||||
return OP_OR
|
||||
|
||||
def FACTOR(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'FACTOR', [])
|
||||
_token = self._peek('"\\("', '"@"', 'FUNCNAME', 'STR_DQ', 'STR_SQ', context=_context)
|
||||
if _token != '"\\("':
|
||||
TERM = self.TERM(_context)
|
||||
return TERM
|
||||
else: # == '"\\("'
|
||||
self._scan('"\\("', context=_context)
|
||||
EXPR = self.EXPR(_context)
|
||||
self._scan('"\\)"', context=_context)
|
||||
return EXPR
|
||||
|
||||
def TERM(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'TERM', [])
|
||||
VALUE = self.VALUE(_context)
|
||||
t = VALUE
|
||||
if self._peek('CMP_EQ', 'CMP_NE', 'OP_AND', 'OP_OR', '"\\)"', '"\\]"', context=_context) in ['CMP_EQ', 'CMP_NE']:
|
||||
CMP = self.CMP(_context)
|
||||
VALUE = self.VALUE(_context)
|
||||
t = CompareValue(t, CMP, VALUE)
|
||||
return t
|
||||
|
||||
def VALUE(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'VALUE', [])
|
||||
_token = self._peek('"@"', 'FUNCNAME', 'STR_DQ', 'STR_SQ', context=_context)
|
||||
if _token == '"@"':
|
||||
self._scan('"@"', context=_context)
|
||||
IDENTIFIER = self._scan('IDENTIFIER', context=_context)
|
||||
return AttribValue(IDENTIFIER)
|
||||
elif _token == 'FUNCNAME':
|
||||
FUNCNAME = self._scan('FUNCNAME', context=_context)
|
||||
f = Function(FUNCNAME); args = []
|
||||
self._scan('"\\("', context=_context)
|
||||
if self._peek('"\\)"', '"@"', 'FUNCNAME', '","', 'STR_DQ', 'STR_SQ', context=_context) not in ['"\\)"', '","']:
|
||||
VALUE = self.VALUE(_context)
|
||||
args.append(VALUE)
|
||||
while self._peek('","', '"\\)"', context=_context) == '","':
|
||||
self._scan('","', context=_context)
|
||||
VALUE = self.VALUE(_context)
|
||||
args.append(VALUE)
|
||||
self._scan('"\\)"', context=_context)
|
||||
f.setParams(*args); return f
|
||||
else: # in ['STR_DQ', 'STR_SQ']
|
||||
STR = self.STR(_context)
|
||||
return LiteralValue(STR[1:len(STR)-1])
|
||||
|
||||
def CMP(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'CMP', [])
|
||||
_token = self._peek('CMP_EQ', 'CMP_NE', context=_context)
|
||||
if _token == 'CMP_EQ':
|
||||
CMP_EQ = self._scan('CMP_EQ', context=_context)
|
||||
return CMP_EQ
|
||||
else: # == 'CMP_NE'
|
||||
CMP_NE = self._scan('CMP_NE', context=_context)
|
||||
return CMP_NE
|
||||
|
||||
def STR(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'STR', [])
|
||||
_token = self._peek('STR_DQ', 'STR_SQ', context=_context)
|
||||
if _token == 'STR_DQ':
|
||||
STR_DQ = self._scan('STR_DQ', context=_context)
|
||||
return STR_DQ
|
||||
else: # == 'STR_SQ'
|
||||
STR_SQ = self._scan('STR_SQ', context=_context)
|
||||
return STR_SQ
|
||||
|
||||
|
||||
def parse(rule, text):
|
||||
P = XPathParser(XPathParserScanner(text))
|
||||
return wrap_error_reporter(P, rule)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from sys import argv, stdin
|
||||
if len(argv) >= 2:
|
||||
if len(argv) >= 3:
|
||||
f = open(argv[2],'r')
|
||||
else:
|
||||
f = stdin
|
||||
print(parse(argv[1], f.read()))
|
||||
else: print ('Args: <rule> [<filename>]', file=sys.stderr)
|
||||
# End -- grammar generated by Yapps
|
||||
''')
|
||||
Loading…
Add table
Add a link
Reference in a new issue