Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
|
|
@ -0,0 +1,40 @@
|
|||
import os
|
||||
import pickle
|
||||
|
||||
from scrapy import signals
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.utils.job import job_dir
|
||||
|
||||
|
||||
class SpiderState:
|
||||
"""Store and load spider state during a scraping job"""
|
||||
|
||||
def __init__(self, jobdir=None):
|
||||
self.jobdir = jobdir
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
jobdir = job_dir(crawler.settings)
|
||||
if not jobdir:
|
||||
raise NotConfigured
|
||||
|
||||
obj = cls(jobdir)
|
||||
crawler.signals.connect(obj.spider_closed, signal=signals.spider_closed)
|
||||
crawler.signals.connect(obj.spider_opened, signal=signals.spider_opened)
|
||||
return obj
|
||||
|
||||
def spider_closed(self, spider):
|
||||
if self.jobdir:
|
||||
with open(self.statefn, 'wb') as f:
|
||||
pickle.dump(spider.state, f, protocol=4)
|
||||
|
||||
def spider_opened(self, spider):
|
||||
if self.jobdir and os.path.exists(self.statefn):
|
||||
with open(self.statefn, 'rb') as f:
|
||||
spider.state = pickle.load(f)
|
||||
else:
|
||||
spider.state = {}
|
||||
|
||||
@property
|
||||
def statefn(self):
|
||||
return os.path.join(self.jobdir, 'spider.state')
|
||||
Loading…
Add table
Add a link
Reference in a new issue