Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
58
venv/lib/python3.9/site-packages/scrapy/utils/gz.py
Normal file
58
venv/lib/python3.9/site-packages/scrapy/utils/gz.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
from gzip import GzipFile
|
||||
from io import BytesIO
|
||||
import re
|
||||
import struct
|
||||
|
||||
from scrapy.utils.decorators import deprecated
|
||||
|
||||
|
||||
# - GzipFile's read() has issues returning leftover uncompressed data when
|
||||
# input is corrupted
|
||||
# - read1(), which fetches data before raising EOFError on next call
|
||||
# works here
|
||||
@deprecated('GzipFile.read1')
|
||||
def read1(gzf, size=-1):
|
||||
return gzf.read1(size)
|
||||
|
||||
|
||||
def gunzip(data):
|
||||
"""Gunzip the given data and return as much data as possible.
|
||||
|
||||
This is resilient to CRC checksum errors.
|
||||
"""
|
||||
f = GzipFile(fileobj=BytesIO(data))
|
||||
output_list = []
|
||||
chunk = b'.'
|
||||
while chunk:
|
||||
try:
|
||||
chunk = f.read1(8196)
|
||||
output_list.append(chunk)
|
||||
except (IOError, EOFError, struct.error):
|
||||
# complete only if there is some data, otherwise re-raise
|
||||
# see issue 87 about catching struct.error
|
||||
# some pages are quite small so output_list is empty and f.extrabuf
|
||||
# contains the whole page content
|
||||
if output_list or getattr(f, 'extrabuf', None):
|
||||
try:
|
||||
output_list.append(f.extrabuf[-f.extrasize:])
|
||||
finally:
|
||||
break
|
||||
else:
|
||||
raise
|
||||
return b''.join(output_list)
|
||||
|
||||
|
||||
_is_gzipped = re.compile(br'^application/(x-)?gzip\b', re.I).search
|
||||
_is_octetstream = re.compile(br'^(application|binary)/octet-stream\b', re.I).search
|
||||
|
||||
|
||||
@deprecated
|
||||
def is_gzipped(response):
|
||||
"""Return True if the response is gzipped, or False otherwise"""
|
||||
ctype = response.headers.get('Content-Type', b'')
|
||||
cenc = response.headers.get('Content-Encoding', b'').lower()
|
||||
return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')
|
||||
|
||||
|
||||
def gzip_magic_number(response):
|
||||
return response.body[:3] == b'\x1f\x8b\x08'
|
||||
Loading…
Add table
Add a link
Reference in a new issue