Ausgabe der neuen DB Einträge

2022-01-02 21:50:48 +01:00 · 2022-01-02 21:50:48 +01:00 · cfbbb9ee3d
commit cfbbb9ee3d
parent bad48e1627
2399 changed files with 843193 additions and 43 deletions
--- a/venv/lib/python3.9/site-packages/scrapy/utils/response.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/response.py
@ -0,0 +1,83 @@
+"""
+This module provides some useful functions for working with
+scrapy.http.Response objects
+"""
+import os
+import weakref
+import webbrowser
+import tempfile
+
+from twisted.web import http
+from scrapy.utils.python import to_bytes, to_unicode
+from w3lib import html
+
+
+_baseurl_cache = weakref.WeakKeyDictionary()
+
+
+def get_base_url(response):
+    """Return the base url of the given response, joined with the response url"""
+    if response not in _baseurl_cache:
+        text = response.text[0:4096]
+        _baseurl_cache[response] = html.get_base_url(text, response.url, response.encoding)
+    return _baseurl_cache[response]
+
+
+_metaref_cache = weakref.WeakKeyDictionary()
+
+
+def get_meta_refresh(response, ignore_tags=('script', 'noscript')):
+    """Parse the http-equiv refrsh parameter from the given response"""
+    if response not in _metaref_cache:
+        text = response.text[0:4096]
+        _metaref_cache[response] = html.get_meta_refresh(
+            text, response.url, response.encoding, ignore_tags=ignore_tags)
+    return _metaref_cache[response]
+
+
+def response_status_message(status):
+    """Return status code plus status text descriptive message
+    """
+    message = http.RESPONSES.get(int(status), "Unknown Status")
+    return f'{status} {to_unicode(message)}'
+
+
+def response_httprepr(response):
+    """Return raw HTTP representation (as bytes) of the given response. This
+    is provided only for reference, since it's not the exact stream of bytes
+    that was received (that's not exposed by Twisted).
+    """
+    values = [
+        b"HTTP/1.1 ",
+        to_bytes(str(response.status)),
+        b" ",
+        to_bytes(http.RESPONSES.get(response.status, b'')),
+        b"\r\n",
+    ]
+    if response.headers:
+        values.extend([response.headers.to_string(), b"\r\n"])
+    values.extend([b"\r\n", response.body])
+    return b"".join(values)
+
+
+def open_in_browser(response, _openfunc=webbrowser.open):
+    """Open the given response in a local web browser, populating the <base>
+    tag for external links to work
+    """
+    from scrapy.http import HtmlResponse, TextResponse
+    # XXX: this implementation is a bit dirty and could be improved
+    body = response.body
+    if isinstance(response, HtmlResponse):
+        if b'<base' not in body:
+            repl = f'<head><base href="{response.url}">'
+            body = body.replace(b'<head>', to_bytes(repl))
+        ext = '.html'
+    elif isinstance(response, TextResponse):
+        ext = '.txt'
+    else:
+        raise TypeError("Unsupported response type: "
+                        f"{response.__class__.__name__}")
+    fd, fname = tempfile.mkstemp(ext)
+    os.write(fd, body)
+    os.close(fd)
+    return _openfunc(f"file://{fname}")