Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
125
venv/lib/python3.9/site-packages/scrapy/squeues.py
Normal file
125
venv/lib/python3.9/site-packages/scrapy/squeues.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
"""
|
||||
Scheduler queues
|
||||
"""
|
||||
|
||||
import marshal
|
||||
import os
|
||||
import pickle
|
||||
|
||||
from queuelib import queue
|
||||
|
||||
from scrapy.utils.reqser import request_to_dict, request_from_dict
|
||||
|
||||
|
||||
def _with_mkdir(queue_class):
|
||||
|
||||
class DirectoriesCreated(queue_class):
|
||||
|
||||
def __init__(self, path, *args, **kwargs):
|
||||
dirname = os.path.dirname(path)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname, exist_ok=True)
|
||||
|
||||
super().__init__(path, *args, **kwargs)
|
||||
|
||||
return DirectoriesCreated
|
||||
|
||||
|
||||
def _serializable_queue(queue_class, serialize, deserialize):
|
||||
|
||||
class SerializableQueue(queue_class):
|
||||
|
||||
def push(self, obj):
|
||||
s = serialize(obj)
|
||||
super().push(s)
|
||||
|
||||
def pop(self):
|
||||
s = super().pop()
|
||||
if s:
|
||||
return deserialize(s)
|
||||
|
||||
return SerializableQueue
|
||||
|
||||
|
||||
def _scrapy_serialization_queue(queue_class):
|
||||
|
||||
class ScrapyRequestQueue(queue_class):
|
||||
|
||||
def __init__(self, crawler, key):
|
||||
self.spider = crawler.spider
|
||||
super().__init__(key)
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler, key, *args, **kwargs):
|
||||
return cls(crawler, key)
|
||||
|
||||
def push(self, request):
|
||||
request = request_to_dict(request, self.spider)
|
||||
return super().push(request)
|
||||
|
||||
def pop(self):
|
||||
request = super().pop()
|
||||
|
||||
if not request:
|
||||
return None
|
||||
|
||||
request = request_from_dict(request, self.spider)
|
||||
return request
|
||||
|
||||
return ScrapyRequestQueue
|
||||
|
||||
|
||||
def _scrapy_non_serialization_queue(queue_class):
|
||||
|
||||
class ScrapyRequestQueue(queue_class):
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler, *args, **kwargs):
|
||||
return cls()
|
||||
|
||||
return ScrapyRequestQueue
|
||||
|
||||
|
||||
def _pickle_serialize(obj):
|
||||
try:
|
||||
return pickle.dumps(obj, protocol=4)
|
||||
# Both pickle.PicklingError and AttributeError can be raised by pickle.dump(s)
|
||||
# TypeError is raised from parsel.Selector
|
||||
except (pickle.PicklingError, AttributeError, TypeError) as e:
|
||||
raise ValueError(str(e)) from e
|
||||
|
||||
|
||||
PickleFifoDiskQueueNonRequest = _serializable_queue(
|
||||
_with_mkdir(queue.FifoDiskQueue),
|
||||
_pickle_serialize,
|
||||
pickle.loads
|
||||
)
|
||||
PickleLifoDiskQueueNonRequest = _serializable_queue(
|
||||
_with_mkdir(queue.LifoDiskQueue),
|
||||
_pickle_serialize,
|
||||
pickle.loads
|
||||
)
|
||||
MarshalFifoDiskQueueNonRequest = _serializable_queue(
|
||||
_with_mkdir(queue.FifoDiskQueue),
|
||||
marshal.dumps,
|
||||
marshal.loads
|
||||
)
|
||||
MarshalLifoDiskQueueNonRequest = _serializable_queue(
|
||||
_with_mkdir(queue.LifoDiskQueue),
|
||||
marshal.dumps,
|
||||
marshal.loads
|
||||
)
|
||||
|
||||
PickleFifoDiskQueue = _scrapy_serialization_queue(
|
||||
PickleFifoDiskQueueNonRequest
|
||||
)
|
||||
PickleLifoDiskQueue = _scrapy_serialization_queue(
|
||||
PickleLifoDiskQueueNonRequest
|
||||
)
|
||||
MarshalFifoDiskQueue = _scrapy_serialization_queue(
|
||||
MarshalFifoDiskQueueNonRequest
|
||||
)
|
||||
MarshalLifoDiskQueue = _scrapy_serialization_queue(
|
||||
MarshalLifoDiskQueueNonRequest
|
||||
)
|
||||
FifoMemoryQueue = _scrapy_non_serialization_queue(queue.FifoMemoryQueue)
|
||||
LifoMemoryQueue = _scrapy_non_serialization_queue(queue.LifoMemoryQueue)
|
||||
Loading…
Add table
Add a link
Reference in a new issue