Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
|
|
@ -0,0 +1,52 @@
|
|||
import logging
|
||||
|
||||
from twisted.internet import task
|
||||
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy import signals
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LogStats:
|
||||
"""Log basic scraping stats periodically"""
|
||||
|
||||
def __init__(self, stats, interval=60.0):
|
||||
self.stats = stats
|
||||
self.interval = interval
|
||||
self.multiplier = 60.0 / self.interval
|
||||
self.task = None
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
interval = crawler.settings.getfloat('LOGSTATS_INTERVAL')
|
||||
if not interval:
|
||||
raise NotConfigured
|
||||
o = cls(crawler.stats, interval)
|
||||
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
|
||||
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
|
||||
return o
|
||||
|
||||
def spider_opened(self, spider):
|
||||
self.pagesprev = 0
|
||||
self.itemsprev = 0
|
||||
|
||||
self.task = task.LoopingCall(self.log, spider)
|
||||
self.task.start(self.interval)
|
||||
|
||||
def log(self, spider):
|
||||
items = self.stats.get_value('item_scraped_count', 0)
|
||||
pages = self.stats.get_value('response_received_count', 0)
|
||||
irate = (items - self.itemsprev) * self.multiplier
|
||||
prate = (pages - self.pagesprev) * self.multiplier
|
||||
self.pagesprev, self.itemsprev = pages, items
|
||||
|
||||
msg = ("Crawled %(pages)d pages (at %(pagerate)d pages/min), "
|
||||
"scraped %(items)d items (at %(itemrate)d items/min)")
|
||||
log_args = {'pages': pages, 'pagerate': prate,
|
||||
'items': items, 'itemrate': irate}
|
||||
logger.info(msg, log_args, extra={'spider': spider})
|
||||
|
||||
def spider_closed(self, spider, reason):
|
||||
if self.task and self.task.running:
|
||||
self.task.stop()
|
||||
Loading…
Add table
Add a link
Reference in a new issue