Ausgabe der neuen DB Einträge

2022-01-02 21:50:48 +01:00 · 2022-01-02 21:50:48 +01:00 · cfbbb9ee3d
commit cfbbb9ee3d
parent bad48e1627
2399 changed files with 843193 additions and 43 deletions
--- a/venv/lib/python3.9/site-packages/scrapy/selector/init.py
+++ b/venv/lib/python3.9/site-packages/scrapy/selector/init.py
@ -0,0 +1,6 @@
+"""
+Selectors
+"""
+
+# top-level imports
+from scrapy.selector.unified import Selector, SelectorList
--- a/venv/lib/python3.9/site-packages/scrapy/selector/unified.py
+++ b/venv/lib/python3.9/site-packages/scrapy/selector/unified.py
@ -0,0 +1,82 @@
+"""
+XPath selectors based on lxml
+"""
+
+from parsel import Selector as _ParselSelector
+from scrapy.utils.trackref import object_ref
+from scrapy.utils.python import to_bytes
+from scrapy.http import HtmlResponse, XmlResponse
+
+
+__all__ = ['Selector', 'SelectorList']
+
+
+def _st(response, st):
+    if st is None:
+        return 'xml' if isinstance(response, XmlResponse) else 'html'
+    return st
+
+
+def _response_from_text(text, st):
+    rt = XmlResponse if st == 'xml' else HtmlResponse
+    return rt(url='about:blank', encoding='utf-8',
+              body=to_bytes(text, 'utf-8'))
+
+
+class SelectorList(_ParselSelector.selectorlist_cls, object_ref):
+    """
+    The :class:`SelectorList` class is a subclass of the builtin ``list``
+    class, which provides a few additional methods.
+    """
+
+
+class Selector(_ParselSelector, object_ref):
+    """
+    An instance of :class:`Selector` is a wrapper over response to select
+    certain parts of its content.
+
+    ``response`` is an :class:`~scrapy.http.HtmlResponse` or an
+    :class:`~scrapy.http.XmlResponse` object that will be used for selecting
+    and extracting data.
+
+    ``text`` is a unicode string or utf-8 encoded text for cases when a
+    ``response`` isn't available. Using ``text`` and ``response`` together is
+    undefined behavior.
+
+    ``type`` defines the selector type, it can be ``"html"``, ``"xml"``
+    or ``None`` (default).
+
+    If ``type`` is ``None``, the selector automatically chooses the best type
+    based on ``response`` type (see below), or defaults to ``"html"`` in case it
+    is used together with ``text``.
+
+    If ``type`` is ``None`` and a ``response`` is passed, the selector type is
+    inferred from the response type as follows:
+
+    * ``"html"`` for :class:`~scrapy.http.HtmlResponse` type
+    * ``"xml"`` for :class:`~scrapy.http.XmlResponse` type
+    * ``"html"`` for anything else
+
+    Otherwise, if ``type`` is set, the selector type will be forced and no
+    detection will occur.
+    """
+
+    __slots__ = ['response']
+    selectorlist_cls = SelectorList
+
+    def __init__(self, response=None, text=None, type=None, root=None, **kwargs):
+        if response is not None and text is not None:
+            raise ValueError(f'{self.__class__.__name__}.__init__() received '
+                             'both response and text')
+
+        st = _st(response, type or self._default_type)
+
+        if text is not None:
+            response = _response_from_text(text, st)
+
+        if response is not None:
+            text = response.text
+            kwargs.setdefault('base_url', response.url)
+
+        self.response = response
+        super().__init__(text=text, type=st, root=root, **kwargs)