125 lines
3.1 KiB
Python
125 lines
3.1 KiB
Python
"""
|
|
Scheduler queues
|
|
"""
|
|
|
|
import marshal
|
|
import os
|
|
import pickle
|
|
|
|
from queuelib import queue
|
|
|
|
from scrapy.utils.reqser import request_to_dict, request_from_dict
|
|
|
|
|
|
def _with_mkdir(queue_class):
|
|
|
|
class DirectoriesCreated(queue_class):
|
|
|
|
def __init__(self, path, *args, **kwargs):
|
|
dirname = os.path.dirname(path)
|
|
if not os.path.exists(dirname):
|
|
os.makedirs(dirname, exist_ok=True)
|
|
|
|
super().__init__(path, *args, **kwargs)
|
|
|
|
return DirectoriesCreated
|
|
|
|
|
|
def _serializable_queue(queue_class, serialize, deserialize):
|
|
|
|
class SerializableQueue(queue_class):
|
|
|
|
def push(self, obj):
|
|
s = serialize(obj)
|
|
super().push(s)
|
|
|
|
def pop(self):
|
|
s = super().pop()
|
|
if s:
|
|
return deserialize(s)
|
|
|
|
return SerializableQueue
|
|
|
|
|
|
def _scrapy_serialization_queue(queue_class):
|
|
|
|
class ScrapyRequestQueue(queue_class):
|
|
|
|
def __init__(self, crawler, key):
|
|
self.spider = crawler.spider
|
|
super().__init__(key)
|
|
|
|
@classmethod
|
|
def from_crawler(cls, crawler, key, *args, **kwargs):
|
|
return cls(crawler, key)
|
|
|
|
def push(self, request):
|
|
request = request_to_dict(request, self.spider)
|
|
return super().push(request)
|
|
|
|
def pop(self):
|
|
request = super().pop()
|
|
|
|
if not request:
|
|
return None
|
|
|
|
request = request_from_dict(request, self.spider)
|
|
return request
|
|
|
|
return ScrapyRequestQueue
|
|
|
|
|
|
def _scrapy_non_serialization_queue(queue_class):
|
|
|
|
class ScrapyRequestQueue(queue_class):
|
|
@classmethod
|
|
def from_crawler(cls, crawler, *args, **kwargs):
|
|
return cls()
|
|
|
|
return ScrapyRequestQueue
|
|
|
|
|
|
def _pickle_serialize(obj):
|
|
try:
|
|
return pickle.dumps(obj, protocol=4)
|
|
# Both pickle.PicklingError and AttributeError can be raised by pickle.dump(s)
|
|
# TypeError is raised from parsel.Selector
|
|
except (pickle.PicklingError, AttributeError, TypeError) as e:
|
|
raise ValueError(str(e)) from e
|
|
|
|
|
|
PickleFifoDiskQueueNonRequest = _serializable_queue(
|
|
_with_mkdir(queue.FifoDiskQueue),
|
|
_pickle_serialize,
|
|
pickle.loads
|
|
)
|
|
PickleLifoDiskQueueNonRequest = _serializable_queue(
|
|
_with_mkdir(queue.LifoDiskQueue),
|
|
_pickle_serialize,
|
|
pickle.loads
|
|
)
|
|
MarshalFifoDiskQueueNonRequest = _serializable_queue(
|
|
_with_mkdir(queue.FifoDiskQueue),
|
|
marshal.dumps,
|
|
marshal.loads
|
|
)
|
|
MarshalLifoDiskQueueNonRequest = _serializable_queue(
|
|
_with_mkdir(queue.LifoDiskQueue),
|
|
marshal.dumps,
|
|
marshal.loads
|
|
)
|
|
|
|
PickleFifoDiskQueue = _scrapy_serialization_queue(
|
|
PickleFifoDiskQueueNonRequest
|
|
)
|
|
PickleLifoDiskQueue = _scrapy_serialization_queue(
|
|
PickleLifoDiskQueueNonRequest
|
|
)
|
|
MarshalFifoDiskQueue = _scrapy_serialization_queue(
|
|
MarshalFifoDiskQueueNonRequest
|
|
)
|
|
MarshalLifoDiskQueue = _scrapy_serialization_queue(
|
|
MarshalLifoDiskQueueNonRequest
|
|
)
|
|
FifoMemoryQueue = _scrapy_non_serialization_queue(queue.FifoMemoryQueue)
|
|
LifoMemoryQueue = _scrapy_non_serialization_queue(queue.LifoMemoryQueue)
|