Ausgabe der neuen DB Einträge

This commit is contained in:
hubobel 2022-01-02 21:50:48 +01:00
parent bad48e1627
commit cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions

View file

@ -0,0 +1,83 @@
"""
This module provides some useful functions for working with
scrapy.http.Response objects
"""
import os
import weakref
import webbrowser
import tempfile
from twisted.web import http
from scrapy.utils.python import to_bytes, to_unicode
from w3lib import html
_baseurl_cache = weakref.WeakKeyDictionary()
def get_base_url(response):
"""Return the base url of the given response, joined with the response url"""
if response not in _baseurl_cache:
text = response.text[0:4096]
_baseurl_cache[response] = html.get_base_url(text, response.url, response.encoding)
return _baseurl_cache[response]
_metaref_cache = weakref.WeakKeyDictionary()
def get_meta_refresh(response, ignore_tags=('script', 'noscript')):
"""Parse the http-equiv refrsh parameter from the given response"""
if response not in _metaref_cache:
text = response.text[0:4096]
_metaref_cache[response] = html.get_meta_refresh(
text, response.url, response.encoding, ignore_tags=ignore_tags)
return _metaref_cache[response]
def response_status_message(status):
"""Return status code plus status text descriptive message
"""
message = http.RESPONSES.get(int(status), "Unknown Status")
return f'{status} {to_unicode(message)}'
def response_httprepr(response):
"""Return raw HTTP representation (as bytes) of the given response. This
is provided only for reference, since it's not the exact stream of bytes
that was received (that's not exposed by Twisted).
"""
values = [
b"HTTP/1.1 ",
to_bytes(str(response.status)),
b" ",
to_bytes(http.RESPONSES.get(response.status, b'')),
b"\r\n",
]
if response.headers:
values.extend([response.headers.to_string(), b"\r\n"])
values.extend([b"\r\n", response.body])
return b"".join(values)
def open_in_browser(response, _openfunc=webbrowser.open):
"""Open the given response in a local web browser, populating the <base>
tag for external links to work
"""
from scrapy.http import HtmlResponse, TextResponse
# XXX: this implementation is a bit dirty and could be improved
body = response.body
if isinstance(response, HtmlResponse):
if b'<base' not in body:
repl = f'<head><base href="{response.url}">'
body = body.replace(b'<head>', to_bytes(repl))
ext = '.html'
elif isinstance(response, TextResponse):
ext = '.txt'
else:
raise TypeError("Unsupported response type: "
f"{response.__class__.__name__}")
fd, fname = tempfile.mkstemp(ext)
os.write(fd, body)
os.close(fd)
return _openfunc(f"file://{fname}")