HubobelsPython/venv/lib/python3.9/site-packages/scrapy/settings/default_settings.py
2022-01-02 21:50:48 +01:00

306 lines
8.9 KiB
Python

"""
This module contains the default values for all settings used by Scrapy.
For more information about these settings you can read the settings
documentation in docs/topics/settings.rst
Scrapy developers, if you add a setting here remember to:
* add it in alphabetical order
* group similar settings without leaving blank lines
* add its documentation to the available settings documentation
(docs/topics/settings.rst)
"""
import sys
from importlib import import_module
from os.path import join, abspath, dirname
AJAXCRAWL_ENABLED = False
ASYNCIO_EVENT_LOOP = None
AUTOTHROTTLE_ENABLED = False
AUTOTHROTTLE_DEBUG = False
AUTOTHROTTLE_MAX_DELAY = 60.0
AUTOTHROTTLE_START_DELAY = 5.0
AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
BOT_NAME = 'scrapybot'
CLOSESPIDER_TIMEOUT = 0
CLOSESPIDER_PAGECOUNT = 0
CLOSESPIDER_ITEMCOUNT = 0
CLOSESPIDER_ERRORCOUNT = 0
COMMANDS_MODULE = ''
COMPRESSION_ENABLED = True
CONCURRENT_ITEMS = 100
CONCURRENT_REQUESTS = 16
CONCURRENT_REQUESTS_PER_DOMAIN = 8
CONCURRENT_REQUESTS_PER_IP = 0
COOKIES_ENABLED = True
COOKIES_DEBUG = False
DEFAULT_ITEM_CLASS = 'scrapy.item.Item'
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
}
DEPTH_LIMIT = 0
DEPTH_STATS_VERBOSE = False
DEPTH_PRIORITY = 0
DNSCACHE_ENABLED = True
DNSCACHE_SIZE = 10000
DNS_RESOLVER = 'scrapy.resolver.CachingThreadedResolver'
DNS_TIMEOUT = 60
DOWNLOAD_DELAY = 0
DOWNLOAD_HANDLERS = {}
DOWNLOAD_HANDLERS_BASE = {
'data': 'scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler',
'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
'ftp': 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler',
}
DOWNLOAD_TIMEOUT = 180 # 3mins
DOWNLOAD_MAXSIZE = 1024 * 1024 * 1024 # 1024m
DOWNLOAD_WARNSIZE = 32 * 1024 * 1024 # 32m
DOWNLOAD_FAIL_ON_DATALOSS = True
DOWNLOADER = 'scrapy.core.downloader.Downloader'
DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
DOWNLOADER_CLIENT_TLS_CIPHERS = 'DEFAULT'
# Use highest TLS/SSL protocol version supported by the platform, also allowing negotiation:
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS'
DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING = False
DOWNLOADER_MIDDLEWARES = {}
DOWNLOADER_MIDDLEWARES_BASE = {
# Engine side
'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware': 100,
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300,
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware': 350,
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware': 400,
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 500,
'scrapy.downloadermiddlewares.retry.RetryMiddleware': 550,
'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware': 560,
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware': 580,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 590,
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware': 600,
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': 700,
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 750,
'scrapy.downloadermiddlewares.stats.DownloaderStats': 850,
'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware': 900,
# Downloader side
}
DOWNLOADER_STATS = True
DUPEFILTER_CLASS = 'scrapy.dupefilters.RFPDupeFilter'
EDITOR = 'vi'
if sys.platform == 'win32':
EDITOR = '%s -m idlelib.idle'
EXTENSIONS = {}
EXTENSIONS_BASE = {
'scrapy.extensions.corestats.CoreStats': 0,
'scrapy.extensions.telnet.TelnetConsole': 0,
'scrapy.extensions.memusage.MemoryUsage': 0,
'scrapy.extensions.memdebug.MemoryDebugger': 0,
'scrapy.extensions.closespider.CloseSpider': 0,
'scrapy.extensions.feedexport.FeedExporter': 0,
'scrapy.extensions.logstats.LogStats': 0,
'scrapy.extensions.spiderstate.SpiderState': 0,
'scrapy.extensions.throttle.AutoThrottle': 0,
}
FEED_TEMPDIR = None
FEEDS = {}
FEED_URI_PARAMS = None # a function to extend uri arguments
FEED_STORE_EMPTY = False
FEED_EXPORT_ENCODING = None
FEED_EXPORT_FIELDS = None
FEED_STORAGES = {}
FEED_STORAGES_BASE = {
'': 'scrapy.extensions.feedexport.FileFeedStorage',
'file': 'scrapy.extensions.feedexport.FileFeedStorage',
'ftp': 'scrapy.extensions.feedexport.FTPFeedStorage',
'gs': 'scrapy.extensions.feedexport.GCSFeedStorage',
's3': 'scrapy.extensions.feedexport.S3FeedStorage',
'stdout': 'scrapy.extensions.feedexport.StdoutFeedStorage',
}
FEED_EXPORT_BATCH_ITEM_COUNT = 0
FEED_EXPORTERS = {}
FEED_EXPORTERS_BASE = {
'json': 'scrapy.exporters.JsonItemExporter',
'jsonlines': 'scrapy.exporters.JsonLinesItemExporter',
'jl': 'scrapy.exporters.JsonLinesItemExporter',
'csv': 'scrapy.exporters.CsvItemExporter',
'xml': 'scrapy.exporters.XmlItemExporter',
'marshal': 'scrapy.exporters.MarshalItemExporter',
'pickle': 'scrapy.exporters.PickleItemExporter',
}
FEED_EXPORT_INDENT = 0
FEED_STORAGE_FTP_ACTIVE = False
FEED_STORAGE_GCS_ACL = ''
FEED_STORAGE_S3_ACL = ''
FILES_STORE_S3_ACL = 'private'
FILES_STORE_GCS_ACL = ''
FTP_USER = 'anonymous'
FTP_PASSWORD = 'guest'
FTP_PASSIVE_MODE = True
GCS_PROJECT_ID = None
HTTPCACHE_ENABLED = False
HTTPCACHE_DIR = 'httpcache'
HTTPCACHE_IGNORE_MISSING = False
HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
HTTPCACHE_EXPIRATION_SECS = 0
HTTPCACHE_ALWAYS_STORE = False
HTTPCACHE_IGNORE_HTTP_CODES = []
HTTPCACHE_IGNORE_SCHEMES = ['file']
HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS = []
HTTPCACHE_DBM_MODULE = 'dbm'
HTTPCACHE_POLICY = 'scrapy.extensions.httpcache.DummyPolicy'
HTTPCACHE_GZIP = False
HTTPPROXY_ENABLED = True
HTTPPROXY_AUTH_ENCODING = 'latin-1'
IMAGES_STORE_S3_ACL = 'private'
IMAGES_STORE_GCS_ACL = ''
ITEM_PROCESSOR = 'scrapy.pipelines.ItemPipelineManager'
ITEM_PIPELINES = {}
ITEM_PIPELINES_BASE = {}
LOG_ENABLED = True
LOG_ENCODING = 'utf-8'
LOG_FORMATTER = 'scrapy.logformatter.LogFormatter'
LOG_FORMAT = '%(asctime)s [%(name)s] %(levelname)s: %(message)s'
LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S'
LOG_STDOUT = False
LOG_LEVEL = 'DEBUG'
LOG_FILE = None
LOG_SHORT_NAMES = False
SCHEDULER_DEBUG = False
LOGSTATS_INTERVAL = 60.0
MAIL_HOST = 'localhost'
MAIL_PORT = 25
MAIL_FROM = 'scrapy@localhost'
MAIL_PASS = None
MAIL_USER = None
MEMDEBUG_ENABLED = False # enable memory debugging
MEMDEBUG_NOTIFY = [] # send memory debugging report by mail at engine shutdown
MEMUSAGE_CHECK_INTERVAL_SECONDS = 60.0
MEMUSAGE_ENABLED = True
MEMUSAGE_LIMIT_MB = 0
MEMUSAGE_NOTIFY_MAIL = []
MEMUSAGE_WARNING_MB = 0
METAREFRESH_ENABLED = True
METAREFRESH_IGNORE_TAGS = []
METAREFRESH_MAXDELAY = 100
NEWSPIDER_MODULE = ''
RANDOMIZE_DOWNLOAD_DELAY = True
REACTOR_THREADPOOL_MAXSIZE = 10
REDIRECT_ENABLED = True
REDIRECT_MAX_TIMES = 20 # uses Firefox default setting
REDIRECT_PRIORITY_ADJUST = +2
REFERER_ENABLED = True
REFERRER_POLICY = 'scrapy.spidermiddlewares.referer.DefaultReferrerPolicy'
RETRY_ENABLED = True
RETRY_TIMES = 2 # initial response + 2 retries = 3 requests
RETRY_HTTP_CODES = [500, 502, 503, 504, 522, 524, 408, 429]
RETRY_PRIORITY_ADJUST = -1
ROBOTSTXT_OBEY = False
ROBOTSTXT_PARSER = 'scrapy.robotstxt.ProtegoRobotParser'
ROBOTSTXT_USER_AGENT = None
SCHEDULER = 'scrapy.core.scheduler.Scheduler'
SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue'
SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue'
SCHEDULER_PRIORITY_QUEUE = 'scrapy.pqueues.ScrapyPriorityQueue'
SCRAPER_SLOT_MAX_ACTIVE_SIZE = 5000000
SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader'
SPIDER_LOADER_WARN_ONLY = False
SPIDER_MIDDLEWARES = {}
SPIDER_MIDDLEWARES_BASE = {
# Engine side
'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware': 50,
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware': 500,
'scrapy.spidermiddlewares.referer.RefererMiddleware': 700,
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware': 800,
'scrapy.spidermiddlewares.depth.DepthMiddleware': 900,
# Spider side
}
SPIDER_MODULES = []
STATS_CLASS = 'scrapy.statscollectors.MemoryStatsCollector'
STATS_DUMP = True
STATSMAILER_RCPTS = []
TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates'))
URLLENGTH_LIMIT = 2083
USER_AGENT = f'Scrapy/{import_module("scrapy").__version__} (+https://scrapy.org)'
TELNETCONSOLE_ENABLED = 1
TELNETCONSOLE_PORT = [6023, 6073]
TELNETCONSOLE_HOST = '127.0.0.1'
TELNETCONSOLE_USERNAME = 'scrapy'
TELNETCONSOLE_PASSWORD = None
TWISTED_REACTOR = None
SPIDER_CONTRACTS = {}
SPIDER_CONTRACTS_BASE = {
'scrapy.contracts.default.UrlContract': 1,
'scrapy.contracts.default.CallbackKeywordArgumentsContract': 1,
'scrapy.contracts.default.ReturnsContract': 2,
'scrapy.contracts.default.ScrapesContract': 3,
}