Ausgabe der neuen DB Einträge

This commit is contained in:
hubobel 2022-01-02 21:50:48 +01:00
parent bad48e1627
commit cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions

View file

@ -0,0 +1,125 @@
"""
Scheduler queues
"""
import marshal
import os
import pickle
from queuelib import queue
from scrapy.utils.reqser import request_to_dict, request_from_dict
def _with_mkdir(queue_class):
class DirectoriesCreated(queue_class):
def __init__(self, path, *args, **kwargs):
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname, exist_ok=True)
super().__init__(path, *args, **kwargs)
return DirectoriesCreated
def _serializable_queue(queue_class, serialize, deserialize):
class SerializableQueue(queue_class):
def push(self, obj):
s = serialize(obj)
super().push(s)
def pop(self):
s = super().pop()
if s:
return deserialize(s)
return SerializableQueue
def _scrapy_serialization_queue(queue_class):
class ScrapyRequestQueue(queue_class):
def __init__(self, crawler, key):
self.spider = crawler.spider
super().__init__(key)
@classmethod
def from_crawler(cls, crawler, key, *args, **kwargs):
return cls(crawler, key)
def push(self, request):
request = request_to_dict(request, self.spider)
return super().push(request)
def pop(self):
request = super().pop()
if not request:
return None
request = request_from_dict(request, self.spider)
return request
return ScrapyRequestQueue
def _scrapy_non_serialization_queue(queue_class):
class ScrapyRequestQueue(queue_class):
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
return cls()
return ScrapyRequestQueue
def _pickle_serialize(obj):
try:
return pickle.dumps(obj, protocol=4)
# Both pickle.PicklingError and AttributeError can be raised by pickle.dump(s)
# TypeError is raised from parsel.Selector
except (pickle.PicklingError, AttributeError, TypeError) as e:
raise ValueError(str(e)) from e
PickleFifoDiskQueueNonRequest = _serializable_queue(
_with_mkdir(queue.FifoDiskQueue),
_pickle_serialize,
pickle.loads
)
PickleLifoDiskQueueNonRequest = _serializable_queue(
_with_mkdir(queue.LifoDiskQueue),
_pickle_serialize,
pickle.loads
)
MarshalFifoDiskQueueNonRequest = _serializable_queue(
_with_mkdir(queue.FifoDiskQueue),
marshal.dumps,
marshal.loads
)
MarshalLifoDiskQueueNonRequest = _serializable_queue(
_with_mkdir(queue.LifoDiskQueue),
marshal.dumps,
marshal.loads
)
PickleFifoDiskQueue = _scrapy_serialization_queue(
PickleFifoDiskQueueNonRequest
)
PickleLifoDiskQueue = _scrapy_serialization_queue(
PickleLifoDiskQueueNonRequest
)
MarshalFifoDiskQueue = _scrapy_serialization_queue(
MarshalFifoDiskQueueNonRequest
)
MarshalLifoDiskQueue = _scrapy_serialization_queue(
MarshalLifoDiskQueueNonRequest
)
FifoMemoryQueue = _scrapy_non_serialization_queue(queue.FifoMemoryQueue)
LifoMemoryQueue = _scrapy_non_serialization_queue(queue.LifoMemoryQueue)