Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
83
venv/lib/python3.9/site-packages/scrapy/utils/response.py
Normal file
83
venv/lib/python3.9/site-packages/scrapy/utils/response.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
"""
|
||||
This module provides some useful functions for working with
|
||||
scrapy.http.Response objects
|
||||
"""
|
||||
import os
|
||||
import weakref
|
||||
import webbrowser
|
||||
import tempfile
|
||||
|
||||
from twisted.web import http
|
||||
from scrapy.utils.python import to_bytes, to_unicode
|
||||
from w3lib import html
|
||||
|
||||
|
||||
_baseurl_cache = weakref.WeakKeyDictionary()
|
||||
|
||||
|
||||
def get_base_url(response):
|
||||
"""Return the base url of the given response, joined with the response url"""
|
||||
if response not in _baseurl_cache:
|
||||
text = response.text[0:4096]
|
||||
_baseurl_cache[response] = html.get_base_url(text, response.url, response.encoding)
|
||||
return _baseurl_cache[response]
|
||||
|
||||
|
||||
_metaref_cache = weakref.WeakKeyDictionary()
|
||||
|
||||
|
||||
def get_meta_refresh(response, ignore_tags=('script', 'noscript')):
|
||||
"""Parse the http-equiv refrsh parameter from the given response"""
|
||||
if response not in _metaref_cache:
|
||||
text = response.text[0:4096]
|
||||
_metaref_cache[response] = html.get_meta_refresh(
|
||||
text, response.url, response.encoding, ignore_tags=ignore_tags)
|
||||
return _metaref_cache[response]
|
||||
|
||||
|
||||
def response_status_message(status):
|
||||
"""Return status code plus status text descriptive message
|
||||
"""
|
||||
message = http.RESPONSES.get(int(status), "Unknown Status")
|
||||
return f'{status} {to_unicode(message)}'
|
||||
|
||||
|
||||
def response_httprepr(response):
|
||||
"""Return raw HTTP representation (as bytes) of the given response. This
|
||||
is provided only for reference, since it's not the exact stream of bytes
|
||||
that was received (that's not exposed by Twisted).
|
||||
"""
|
||||
values = [
|
||||
b"HTTP/1.1 ",
|
||||
to_bytes(str(response.status)),
|
||||
b" ",
|
||||
to_bytes(http.RESPONSES.get(response.status, b'')),
|
||||
b"\r\n",
|
||||
]
|
||||
if response.headers:
|
||||
values.extend([response.headers.to_string(), b"\r\n"])
|
||||
values.extend([b"\r\n", response.body])
|
||||
return b"".join(values)
|
||||
|
||||
|
||||
def open_in_browser(response, _openfunc=webbrowser.open):
|
||||
"""Open the given response in a local web browser, populating the <base>
|
||||
tag for external links to work
|
||||
"""
|
||||
from scrapy.http import HtmlResponse, TextResponse
|
||||
# XXX: this implementation is a bit dirty and could be improved
|
||||
body = response.body
|
||||
if isinstance(response, HtmlResponse):
|
||||
if b'<base' not in body:
|
||||
repl = f'<head><base href="{response.url}">'
|
||||
body = body.replace(b'<head>', to_bytes(repl))
|
||||
ext = '.html'
|
||||
elif isinstance(response, TextResponse):
|
||||
ext = '.txt'
|
||||
else:
|
||||
raise TypeError("Unsupported response type: "
|
||||
f"{response.__class__.__name__}")
|
||||
fd, fname = tempfile.mkstemp(ext)
|
||||
os.write(fd, body)
|
||||
os.close(fd)
|
||||
return _openfunc(f"file://{fname}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue