Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
61
venv/lib/python3.9/site-packages/parsel/xpathfuncs.py
Normal file
61
venv/lib/python3.9/site-packages/parsel/xpathfuncs.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import re
|
||||
from lxml import etree
|
||||
|
||||
from six import string_types
|
||||
|
||||
from w3lib.html import HTML5_WHITESPACE
|
||||
|
||||
regex = '[{}]+'.format(HTML5_WHITESPACE)
|
||||
replace_html5_whitespaces = re.compile(regex).sub
|
||||
|
||||
|
||||
def set_xpathfunc(fname, func):
|
||||
"""Register a custom extension function to use in XPath expressions.
|
||||
|
||||
The function ``func`` registered under ``fname`` identifier will be called
|
||||
for every matching node, being passed a ``context`` parameter as well as
|
||||
any parameters passed from the corresponding XPath expression.
|
||||
|
||||
If ``func`` is ``None``, the extension function will be removed.
|
||||
|
||||
See more `in lxml documentation`_.
|
||||
|
||||
.. _`in lxml documentation`: http://lxml.de/extensions.html#xpath-extension-functions
|
||||
|
||||
"""
|
||||
ns_fns = etree.FunctionNamespace(None)
|
||||
if func is not None:
|
||||
ns_fns[fname] = func
|
||||
else:
|
||||
del ns_fns[fname]
|
||||
|
||||
|
||||
def setup():
|
||||
set_xpathfunc('has-class', has_class)
|
||||
|
||||
|
||||
def has_class(context, *classes):
|
||||
"""has-class function.
|
||||
|
||||
Return True if all ``classes`` are present in element's class attr.
|
||||
|
||||
"""
|
||||
if not context.eval_context.get('args_checked'):
|
||||
if not classes:
|
||||
raise ValueError(
|
||||
'XPath error: has-class must have at least 1 argument')
|
||||
for c in classes:
|
||||
if not isinstance(c, string_types):
|
||||
raise ValueError(
|
||||
'XPath error: has-class arguments must be strings')
|
||||
context.eval_context['args_checked'] = True
|
||||
|
||||
node_cls = context.context_node.get('class')
|
||||
if node_cls is None:
|
||||
return False
|
||||
node_cls = ' ' + node_cls + ' '
|
||||
node_cls = replace_html5_whitespaces(' ', node_cls)
|
||||
for cls in classes:
|
||||
if ' ' + cls + ' ' not in node_cls:
|
||||
return False
|
||||
return True
|
||||
Loading…
Add table
Add a link
Reference in a new issue