Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
465
venv/lib/python3.9/site-packages/scrapy/settings/__init__.py
Normal file
465
venv/lib/python3.9/site-packages/scrapy/settings/__init__.py
Normal file
|
|
@ -0,0 +1,465 @@
|
|||
import json
|
||||
import copy
|
||||
from collections.abc import MutableMapping
|
||||
from importlib import import_module
|
||||
from pprint import pformat
|
||||
|
||||
from scrapy.settings import default_settings
|
||||
|
||||
|
||||
SETTINGS_PRIORITIES = {
|
||||
'default': 0,
|
||||
'command': 10,
|
||||
'project': 20,
|
||||
'spider': 30,
|
||||
'cmdline': 40,
|
||||
}
|
||||
|
||||
|
||||
def get_settings_priority(priority):
|
||||
"""
|
||||
Small helper function that looks up a given string priority in the
|
||||
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` dictionary and returns its
|
||||
numerical value, or directly returns a given numerical priority.
|
||||
"""
|
||||
if isinstance(priority, str):
|
||||
return SETTINGS_PRIORITIES[priority]
|
||||
else:
|
||||
return priority
|
||||
|
||||
|
||||
class SettingsAttribute:
|
||||
|
||||
"""Class for storing data related to settings attributes.
|
||||
|
||||
This class is intended for internal usage, you should try Settings class
|
||||
for settings configuration, not this one.
|
||||
"""
|
||||
|
||||
def __init__(self, value, priority):
|
||||
self.value = value
|
||||
if isinstance(self.value, BaseSettings):
|
||||
self.priority = max(self.value.maxpriority(), priority)
|
||||
else:
|
||||
self.priority = priority
|
||||
|
||||
def set(self, value, priority):
|
||||
"""Sets value if priority is higher or equal than current priority."""
|
||||
if priority >= self.priority:
|
||||
if isinstance(self.value, BaseSettings):
|
||||
value = BaseSettings(value, priority=priority)
|
||||
self.value = value
|
||||
self.priority = priority
|
||||
|
||||
def __str__(self):
|
||||
return f"<SettingsAttribute value={self.value!r} priority={self.priority}>"
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class BaseSettings(MutableMapping):
|
||||
"""
|
||||
Instances of this class behave like dictionaries, but store priorities
|
||||
along with their ``(key, value)`` pairs, and can be frozen (i.e. marked
|
||||
immutable).
|
||||
|
||||
Key-value entries can be passed on initialization with the ``values``
|
||||
argument, and they would take the ``priority`` level (unless ``values`` is
|
||||
already an instance of :class:`~scrapy.settings.BaseSettings`, in which
|
||||
case the existing priority levels will be kept). If the ``priority``
|
||||
argument is a string, the priority name will be looked up in
|
||||
:attr:`~scrapy.settings.SETTINGS_PRIORITIES`. Otherwise, a specific integer
|
||||
should be provided.
|
||||
|
||||
Once the object is created, new settings can be loaded or updated with the
|
||||
:meth:`~scrapy.settings.BaseSettings.set` method, and can be accessed with
|
||||
the square bracket notation of dictionaries, or with the
|
||||
:meth:`~scrapy.settings.BaseSettings.get` method of the instance and its
|
||||
value conversion variants. When requesting a stored key, the value with the
|
||||
highest priority will be retrieved.
|
||||
"""
|
||||
|
||||
def __init__(self, values=None, priority='project'):
|
||||
self.frozen = False
|
||||
self.attributes = {}
|
||||
if values:
|
||||
self.update(values, priority)
|
||||
|
||||
def __getitem__(self, opt_name):
|
||||
if opt_name not in self:
|
||||
return None
|
||||
return self.attributes[opt_name].value
|
||||
|
||||
def __contains__(self, name):
|
||||
return name in self.attributes
|
||||
|
||||
def get(self, name, default=None):
|
||||
"""
|
||||
Get a setting value without affecting its original type.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
|
||||
:param default: the value to return if no setting is found
|
||||
:type default: object
|
||||
"""
|
||||
return self[name] if self[name] is not None else default
|
||||
|
||||
def getbool(self, name, default=False):
|
||||
"""
|
||||
Get a setting value as a boolean.
|
||||
|
||||
``1``, ``'1'``, `True`` and ``'True'`` return ``True``,
|
||||
while ``0``, ``'0'``, ``False``, ``'False'`` and ``None`` return ``False``.
|
||||
|
||||
For example, settings populated through environment variables set to
|
||||
``'0'`` will return ``False`` when using this method.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
|
||||
:param default: the value to return if no setting is found
|
||||
:type default: object
|
||||
"""
|
||||
got = self.get(name, default)
|
||||
try:
|
||||
return bool(int(got))
|
||||
except ValueError:
|
||||
if got in ("True", "true"):
|
||||
return True
|
||||
if got in ("False", "false"):
|
||||
return False
|
||||
raise ValueError("Supported values for boolean settings "
|
||||
"are 0/1, True/False, '0'/'1', "
|
||||
"'True'/'False' and 'true'/'false'")
|
||||
|
||||
def getint(self, name, default=0):
|
||||
"""
|
||||
Get a setting value as an int.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
|
||||
:param default: the value to return if no setting is found
|
||||
:type default: object
|
||||
"""
|
||||
return int(self.get(name, default))
|
||||
|
||||
def getfloat(self, name, default=0.0):
|
||||
"""
|
||||
Get a setting value as a float.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
|
||||
:param default: the value to return if no setting is found
|
||||
:type default: object
|
||||
"""
|
||||
return float(self.get(name, default))
|
||||
|
||||
def getlist(self, name, default=None):
|
||||
"""
|
||||
Get a setting value as a list. If the setting original type is a list, a
|
||||
copy of it will be returned. If it's a string it will be split by ",".
|
||||
|
||||
For example, settings populated through environment variables set to
|
||||
``'one,two'`` will return a list ['one', 'two'] when using this method.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
|
||||
:param default: the value to return if no setting is found
|
||||
:type default: object
|
||||
"""
|
||||
value = self.get(name, default or [])
|
||||
if isinstance(value, str):
|
||||
value = value.split(',')
|
||||
return list(value)
|
||||
|
||||
def getdict(self, name, default=None):
|
||||
"""
|
||||
Get a setting value as a dictionary. If the setting original type is a
|
||||
dictionary, a copy of it will be returned. If it is a string it will be
|
||||
evaluated as a JSON dictionary. In the case that it is a
|
||||
:class:`~scrapy.settings.BaseSettings` instance itself, it will be
|
||||
converted to a dictionary, containing all its current settings values
|
||||
as they would be returned by :meth:`~scrapy.settings.BaseSettings.get`,
|
||||
and losing all information about priority and mutability.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
|
||||
:param default: the value to return if no setting is found
|
||||
:type default: object
|
||||
"""
|
||||
value = self.get(name, default or {})
|
||||
if isinstance(value, str):
|
||||
value = json.loads(value)
|
||||
return dict(value)
|
||||
|
||||
def getwithbase(self, name):
|
||||
"""Get a composition of a dictionary-like setting and its `_BASE`
|
||||
counterpart.
|
||||
|
||||
:param name: name of the dictionary-like setting
|
||||
:type name: str
|
||||
"""
|
||||
compbs = BaseSettings()
|
||||
compbs.update(self[name + '_BASE'])
|
||||
compbs.update(self[name])
|
||||
return compbs
|
||||
|
||||
def getpriority(self, name):
|
||||
"""
|
||||
Return the current numerical priority value of a setting, or ``None`` if
|
||||
the given ``name`` does not exist.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
"""
|
||||
if name not in self:
|
||||
return None
|
||||
return self.attributes[name].priority
|
||||
|
||||
def maxpriority(self):
|
||||
"""
|
||||
Return the numerical value of the highest priority present throughout
|
||||
all settings, or the numerical value for ``default`` from
|
||||
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` if there are no settings
|
||||
stored.
|
||||
"""
|
||||
if len(self) > 0:
|
||||
return max(self.getpriority(name) for name in self)
|
||||
else:
|
||||
return get_settings_priority('default')
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
self.set(name, value)
|
||||
|
||||
def set(self, name, value, priority='project'):
|
||||
"""
|
||||
Store a key/value attribute with a given priority.
|
||||
|
||||
Settings should be populated *before* configuring the Crawler object
|
||||
(through the :meth:`~scrapy.crawler.Crawler.configure` method),
|
||||
otherwise they won't have any effect.
|
||||
|
||||
:param name: the setting name
|
||||
:type name: str
|
||||
|
||||
:param value: the value to associate with the setting
|
||||
:type value: object
|
||||
|
||||
:param priority: the priority of the setting. Should be a key of
|
||||
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
|
||||
:type priority: str or int
|
||||
"""
|
||||
self._assert_mutability()
|
||||
priority = get_settings_priority(priority)
|
||||
if name not in self:
|
||||
if isinstance(value, SettingsAttribute):
|
||||
self.attributes[name] = value
|
||||
else:
|
||||
self.attributes[name] = SettingsAttribute(value, priority)
|
||||
else:
|
||||
self.attributes[name].set(value, priority)
|
||||
|
||||
def setdict(self, values, priority='project'):
|
||||
self.update(values, priority)
|
||||
|
||||
def setmodule(self, module, priority='project'):
|
||||
"""
|
||||
Store settings from a module with a given priority.
|
||||
|
||||
This is a helper function that calls
|
||||
:meth:`~scrapy.settings.BaseSettings.set` for every globally declared
|
||||
uppercase variable of ``module`` with the provided ``priority``.
|
||||
|
||||
:param module: the module or the path of the module
|
||||
:type module: types.ModuleType or str
|
||||
|
||||
:param priority: the priority of the settings. Should be a key of
|
||||
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
|
||||
:type priority: str or int
|
||||
"""
|
||||
self._assert_mutability()
|
||||
if isinstance(module, str):
|
||||
module = import_module(module)
|
||||
for key in dir(module):
|
||||
if key.isupper():
|
||||
self.set(key, getattr(module, key), priority)
|
||||
|
||||
def update(self, values, priority='project'):
|
||||
"""
|
||||
Store key/value pairs with a given priority.
|
||||
|
||||
This is a helper function that calls
|
||||
:meth:`~scrapy.settings.BaseSettings.set` for every item of ``values``
|
||||
with the provided ``priority``.
|
||||
|
||||
If ``values`` is a string, it is assumed to be JSON-encoded and parsed
|
||||
into a dict with ``json.loads()`` first. If it is a
|
||||
:class:`~scrapy.settings.BaseSettings` instance, the per-key priorities
|
||||
will be used and the ``priority`` parameter ignored. This allows
|
||||
inserting/updating settings with different priorities with a single
|
||||
command.
|
||||
|
||||
:param values: the settings names and values
|
||||
:type values: dict or string or :class:`~scrapy.settings.BaseSettings`
|
||||
|
||||
:param priority: the priority of the settings. Should be a key of
|
||||
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
|
||||
:type priority: str or int
|
||||
"""
|
||||
self._assert_mutability()
|
||||
if isinstance(values, str):
|
||||
values = json.loads(values)
|
||||
if values is not None:
|
||||
if isinstance(values, BaseSettings):
|
||||
for name, value in values.items():
|
||||
self.set(name, value, values.getpriority(name))
|
||||
else:
|
||||
for name, value in values.items():
|
||||
self.set(name, value, priority)
|
||||
|
||||
def delete(self, name, priority='project'):
|
||||
self._assert_mutability()
|
||||
priority = get_settings_priority(priority)
|
||||
if priority >= self.getpriority(name):
|
||||
del self.attributes[name]
|
||||
|
||||
def __delitem__(self, name):
|
||||
self._assert_mutability()
|
||||
del self.attributes[name]
|
||||
|
||||
def _assert_mutability(self):
|
||||
if self.frozen:
|
||||
raise TypeError("Trying to modify an immutable Settings object")
|
||||
|
||||
def copy(self):
|
||||
"""
|
||||
Make a deep copy of current settings.
|
||||
|
||||
This method returns a new instance of the :class:`Settings` class,
|
||||
populated with the same values and their priorities.
|
||||
|
||||
Modifications to the new object won't be reflected on the original
|
||||
settings.
|
||||
"""
|
||||
return copy.deepcopy(self)
|
||||
|
||||
def freeze(self):
|
||||
"""
|
||||
Disable further changes to the current settings.
|
||||
|
||||
After calling this method, the present state of the settings will become
|
||||
immutable. Trying to change values through the :meth:`~set` method and
|
||||
its variants won't be possible and will be alerted.
|
||||
"""
|
||||
self.frozen = True
|
||||
|
||||
def frozencopy(self):
|
||||
"""
|
||||
Return an immutable copy of the current settings.
|
||||
|
||||
Alias for a :meth:`~freeze` call in the object returned by :meth:`copy`.
|
||||
"""
|
||||
copy = self.copy()
|
||||
copy.freeze()
|
||||
return copy
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.attributes)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.attributes)
|
||||
|
||||
def _to_dict(self):
|
||||
return {k: (v._to_dict() if isinstance(v, BaseSettings) else v)
|
||||
for k, v in self.items()}
|
||||
|
||||
def copy_to_dict(self):
|
||||
"""
|
||||
Make a copy of current settings and convert to a dict.
|
||||
|
||||
This method returns a new dict populated with the same values
|
||||
and their priorities as the current settings.
|
||||
|
||||
Modifications to the returned dict won't be reflected on the original
|
||||
settings.
|
||||
|
||||
This method can be useful for example for printing settings
|
||||
in Scrapy shell.
|
||||
"""
|
||||
settings = self.copy()
|
||||
return settings._to_dict()
|
||||
|
||||
def _repr_pretty_(self, p, cycle):
|
||||
if cycle:
|
||||
p.text(repr(self))
|
||||
else:
|
||||
p.text(pformat(self.copy_to_dict()))
|
||||
|
||||
|
||||
class _DictProxy(MutableMapping):
|
||||
|
||||
def __init__(self, settings, priority):
|
||||
self.o = {}
|
||||
self.settings = settings
|
||||
self.priority = priority
|
||||
|
||||
def __len__(self):
|
||||
return len(self.o)
|
||||
|
||||
def __getitem__(self, k):
|
||||
return self.o[k]
|
||||
|
||||
def __setitem__(self, k, v):
|
||||
self.settings.set(k, v, priority=self.priority)
|
||||
self.o[k] = v
|
||||
|
||||
def __delitem__(self, k):
|
||||
del self.o[k]
|
||||
|
||||
def __iter__(self, k, v):
|
||||
return iter(self.o)
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""
|
||||
This object stores Scrapy settings for the configuration of internal
|
||||
components, and can be used for any further customization.
|
||||
|
||||
It is a direct subclass and supports all methods of
|
||||
:class:`~scrapy.settings.BaseSettings`. Additionally, after instantiation
|
||||
of this class, the new object will have the global default settings
|
||||
described on :ref:`topics-settings-ref` already populated.
|
||||
"""
|
||||
|
||||
def __init__(self, values=None, priority='project'):
|
||||
# Do not pass kwarg values here. We don't want to promote user-defined
|
||||
# dicts, and we want to update, not replace, default dicts with the
|
||||
# values given by the user
|
||||
super().__init__()
|
||||
self.setmodule(default_settings, 'default')
|
||||
# Promote default dictionaries to BaseSettings instances for per-key
|
||||
# priorities
|
||||
for name, val in self.items():
|
||||
if isinstance(val, dict):
|
||||
self.set(name, BaseSettings(val, 'default'), 'default')
|
||||
self.update(values, priority)
|
||||
|
||||
|
||||
def iter_default_settings():
|
||||
"""Return the default settings as an iterator of (name, value) tuples"""
|
||||
for name in dir(default_settings):
|
||||
if name.isupper():
|
||||
yield name, getattr(default_settings, name)
|
||||
|
||||
|
||||
def overridden_settings(settings):
|
||||
"""Return a dict of the settings that have been overridden"""
|
||||
for name, defvalue in iter_default_settings():
|
||||
value = settings[name]
|
||||
if not isinstance(defvalue, dict) and value != defvalue:
|
||||
yield name, value
|
||||
|
|
@ -0,0 +1,306 @@
|
|||
"""
|
||||
This module contains the default values for all settings used by Scrapy.
|
||||
|
||||
For more information about these settings you can read the settings
|
||||
documentation in docs/topics/settings.rst
|
||||
|
||||
Scrapy developers, if you add a setting here remember to:
|
||||
|
||||
* add it in alphabetical order
|
||||
* group similar settings without leaving blank lines
|
||||
* add its documentation to the available settings documentation
|
||||
(docs/topics/settings.rst)
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
from importlib import import_module
|
||||
from os.path import join, abspath, dirname
|
||||
|
||||
AJAXCRAWL_ENABLED = False
|
||||
|
||||
ASYNCIO_EVENT_LOOP = None
|
||||
|
||||
AUTOTHROTTLE_ENABLED = False
|
||||
AUTOTHROTTLE_DEBUG = False
|
||||
AUTOTHROTTLE_MAX_DELAY = 60.0
|
||||
AUTOTHROTTLE_START_DELAY = 5.0
|
||||
AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
|
||||
BOT_NAME = 'scrapybot'
|
||||
|
||||
CLOSESPIDER_TIMEOUT = 0
|
||||
CLOSESPIDER_PAGECOUNT = 0
|
||||
CLOSESPIDER_ITEMCOUNT = 0
|
||||
CLOSESPIDER_ERRORCOUNT = 0
|
||||
|
||||
COMMANDS_MODULE = ''
|
||||
|
||||
COMPRESSION_ENABLED = True
|
||||
|
||||
CONCURRENT_ITEMS = 100
|
||||
|
||||
CONCURRENT_REQUESTS = 16
|
||||
CONCURRENT_REQUESTS_PER_DOMAIN = 8
|
||||
CONCURRENT_REQUESTS_PER_IP = 0
|
||||
|
||||
COOKIES_ENABLED = True
|
||||
COOKIES_DEBUG = False
|
||||
|
||||
DEFAULT_ITEM_CLASS = 'scrapy.item.Item'
|
||||
|
||||
DEFAULT_REQUEST_HEADERS = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en',
|
||||
}
|
||||
|
||||
DEPTH_LIMIT = 0
|
||||
DEPTH_STATS_VERBOSE = False
|
||||
DEPTH_PRIORITY = 0
|
||||
|
||||
DNSCACHE_ENABLED = True
|
||||
DNSCACHE_SIZE = 10000
|
||||
DNS_RESOLVER = 'scrapy.resolver.CachingThreadedResolver'
|
||||
DNS_TIMEOUT = 60
|
||||
|
||||
DOWNLOAD_DELAY = 0
|
||||
|
||||
DOWNLOAD_HANDLERS = {}
|
||||
DOWNLOAD_HANDLERS_BASE = {
|
||||
'data': 'scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler',
|
||||
'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
|
||||
'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
|
||||
'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
|
||||
's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
|
||||
'ftp': 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler',
|
||||
}
|
||||
|
||||
DOWNLOAD_TIMEOUT = 180 # 3mins
|
||||
|
||||
DOWNLOAD_MAXSIZE = 1024 * 1024 * 1024 # 1024m
|
||||
DOWNLOAD_WARNSIZE = 32 * 1024 * 1024 # 32m
|
||||
|
||||
DOWNLOAD_FAIL_ON_DATALOSS = True
|
||||
|
||||
DOWNLOADER = 'scrapy.core.downloader.Downloader'
|
||||
|
||||
DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
|
||||
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
|
||||
DOWNLOADER_CLIENT_TLS_CIPHERS = 'DEFAULT'
|
||||
# Use highest TLS/SSL protocol version supported by the platform, also allowing negotiation:
|
||||
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS'
|
||||
DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING = False
|
||||
|
||||
DOWNLOADER_MIDDLEWARES = {}
|
||||
|
||||
DOWNLOADER_MIDDLEWARES_BASE = {
|
||||
# Engine side
|
||||
'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware': 100,
|
||||
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300,
|
||||
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware': 350,
|
||||
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware': 400,
|
||||
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 500,
|
||||
'scrapy.downloadermiddlewares.retry.RetryMiddleware': 550,
|
||||
'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware': 560,
|
||||
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware': 580,
|
||||
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 590,
|
||||
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware': 600,
|
||||
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': 700,
|
||||
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 750,
|
||||
'scrapy.downloadermiddlewares.stats.DownloaderStats': 850,
|
||||
'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware': 900,
|
||||
# Downloader side
|
||||
}
|
||||
|
||||
DOWNLOADER_STATS = True
|
||||
|
||||
DUPEFILTER_CLASS = 'scrapy.dupefilters.RFPDupeFilter'
|
||||
|
||||
EDITOR = 'vi'
|
||||
if sys.platform == 'win32':
|
||||
EDITOR = '%s -m idlelib.idle'
|
||||
|
||||
EXTENSIONS = {}
|
||||
|
||||
EXTENSIONS_BASE = {
|
||||
'scrapy.extensions.corestats.CoreStats': 0,
|
||||
'scrapy.extensions.telnet.TelnetConsole': 0,
|
||||
'scrapy.extensions.memusage.MemoryUsage': 0,
|
||||
'scrapy.extensions.memdebug.MemoryDebugger': 0,
|
||||
'scrapy.extensions.closespider.CloseSpider': 0,
|
||||
'scrapy.extensions.feedexport.FeedExporter': 0,
|
||||
'scrapy.extensions.logstats.LogStats': 0,
|
||||
'scrapy.extensions.spiderstate.SpiderState': 0,
|
||||
'scrapy.extensions.throttle.AutoThrottle': 0,
|
||||
}
|
||||
|
||||
FEED_TEMPDIR = None
|
||||
FEEDS = {}
|
||||
FEED_URI_PARAMS = None # a function to extend uri arguments
|
||||
FEED_STORE_EMPTY = False
|
||||
FEED_EXPORT_ENCODING = None
|
||||
FEED_EXPORT_FIELDS = None
|
||||
FEED_STORAGES = {}
|
||||
FEED_STORAGES_BASE = {
|
||||
'': 'scrapy.extensions.feedexport.FileFeedStorage',
|
||||
'file': 'scrapy.extensions.feedexport.FileFeedStorage',
|
||||
'ftp': 'scrapy.extensions.feedexport.FTPFeedStorage',
|
||||
'gs': 'scrapy.extensions.feedexport.GCSFeedStorage',
|
||||
's3': 'scrapy.extensions.feedexport.S3FeedStorage',
|
||||
'stdout': 'scrapy.extensions.feedexport.StdoutFeedStorage',
|
||||
}
|
||||
FEED_EXPORT_BATCH_ITEM_COUNT = 0
|
||||
FEED_EXPORTERS = {}
|
||||
FEED_EXPORTERS_BASE = {
|
||||
'json': 'scrapy.exporters.JsonItemExporter',
|
||||
'jsonlines': 'scrapy.exporters.JsonLinesItemExporter',
|
||||
'jl': 'scrapy.exporters.JsonLinesItemExporter',
|
||||
'csv': 'scrapy.exporters.CsvItemExporter',
|
||||
'xml': 'scrapy.exporters.XmlItemExporter',
|
||||
'marshal': 'scrapy.exporters.MarshalItemExporter',
|
||||
'pickle': 'scrapy.exporters.PickleItemExporter',
|
||||
}
|
||||
FEED_EXPORT_INDENT = 0
|
||||
|
||||
FEED_STORAGE_FTP_ACTIVE = False
|
||||
FEED_STORAGE_GCS_ACL = ''
|
||||
FEED_STORAGE_S3_ACL = ''
|
||||
|
||||
FILES_STORE_S3_ACL = 'private'
|
||||
FILES_STORE_GCS_ACL = ''
|
||||
|
||||
FTP_USER = 'anonymous'
|
||||
FTP_PASSWORD = 'guest'
|
||||
FTP_PASSIVE_MODE = True
|
||||
|
||||
GCS_PROJECT_ID = None
|
||||
|
||||
HTTPCACHE_ENABLED = False
|
||||
HTTPCACHE_DIR = 'httpcache'
|
||||
HTTPCACHE_IGNORE_MISSING = False
|
||||
HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
||||
HTTPCACHE_EXPIRATION_SECS = 0
|
||||
HTTPCACHE_ALWAYS_STORE = False
|
||||
HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
HTTPCACHE_IGNORE_SCHEMES = ['file']
|
||||
HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS = []
|
||||
HTTPCACHE_DBM_MODULE = 'dbm'
|
||||
HTTPCACHE_POLICY = 'scrapy.extensions.httpcache.DummyPolicy'
|
||||
HTTPCACHE_GZIP = False
|
||||
|
||||
HTTPPROXY_ENABLED = True
|
||||
HTTPPROXY_AUTH_ENCODING = 'latin-1'
|
||||
|
||||
IMAGES_STORE_S3_ACL = 'private'
|
||||
IMAGES_STORE_GCS_ACL = ''
|
||||
|
||||
ITEM_PROCESSOR = 'scrapy.pipelines.ItemPipelineManager'
|
||||
|
||||
ITEM_PIPELINES = {}
|
||||
ITEM_PIPELINES_BASE = {}
|
||||
|
||||
LOG_ENABLED = True
|
||||
LOG_ENCODING = 'utf-8'
|
||||
LOG_FORMATTER = 'scrapy.logformatter.LogFormatter'
|
||||
LOG_FORMAT = '%(asctime)s [%(name)s] %(levelname)s: %(message)s'
|
||||
LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S'
|
||||
LOG_STDOUT = False
|
||||
LOG_LEVEL = 'DEBUG'
|
||||
LOG_FILE = None
|
||||
LOG_SHORT_NAMES = False
|
||||
|
||||
SCHEDULER_DEBUG = False
|
||||
|
||||
LOGSTATS_INTERVAL = 60.0
|
||||
|
||||
MAIL_HOST = 'localhost'
|
||||
MAIL_PORT = 25
|
||||
MAIL_FROM = 'scrapy@localhost'
|
||||
MAIL_PASS = None
|
||||
MAIL_USER = None
|
||||
|
||||
MEMDEBUG_ENABLED = False # enable memory debugging
|
||||
MEMDEBUG_NOTIFY = [] # send memory debugging report by mail at engine shutdown
|
||||
|
||||
MEMUSAGE_CHECK_INTERVAL_SECONDS = 60.0
|
||||
MEMUSAGE_ENABLED = True
|
||||
MEMUSAGE_LIMIT_MB = 0
|
||||
MEMUSAGE_NOTIFY_MAIL = []
|
||||
MEMUSAGE_WARNING_MB = 0
|
||||
|
||||
METAREFRESH_ENABLED = True
|
||||
METAREFRESH_IGNORE_TAGS = []
|
||||
METAREFRESH_MAXDELAY = 100
|
||||
|
||||
NEWSPIDER_MODULE = ''
|
||||
|
||||
RANDOMIZE_DOWNLOAD_DELAY = True
|
||||
|
||||
REACTOR_THREADPOOL_MAXSIZE = 10
|
||||
|
||||
REDIRECT_ENABLED = True
|
||||
REDIRECT_MAX_TIMES = 20 # uses Firefox default setting
|
||||
REDIRECT_PRIORITY_ADJUST = +2
|
||||
|
||||
REFERER_ENABLED = True
|
||||
REFERRER_POLICY = 'scrapy.spidermiddlewares.referer.DefaultReferrerPolicy'
|
||||
|
||||
RETRY_ENABLED = True
|
||||
RETRY_TIMES = 2 # initial response + 2 retries = 3 requests
|
||||
RETRY_HTTP_CODES = [500, 502, 503, 504, 522, 524, 408, 429]
|
||||
RETRY_PRIORITY_ADJUST = -1
|
||||
|
||||
ROBOTSTXT_OBEY = False
|
||||
ROBOTSTXT_PARSER = 'scrapy.robotstxt.ProtegoRobotParser'
|
||||
ROBOTSTXT_USER_AGENT = None
|
||||
|
||||
SCHEDULER = 'scrapy.core.scheduler.Scheduler'
|
||||
SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue'
|
||||
SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue'
|
||||
SCHEDULER_PRIORITY_QUEUE = 'scrapy.pqueues.ScrapyPriorityQueue'
|
||||
|
||||
SCRAPER_SLOT_MAX_ACTIVE_SIZE = 5000000
|
||||
|
||||
SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader'
|
||||
SPIDER_LOADER_WARN_ONLY = False
|
||||
|
||||
SPIDER_MIDDLEWARES = {}
|
||||
|
||||
SPIDER_MIDDLEWARES_BASE = {
|
||||
# Engine side
|
||||
'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware': 50,
|
||||
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware': 500,
|
||||
'scrapy.spidermiddlewares.referer.RefererMiddleware': 700,
|
||||
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware': 800,
|
||||
'scrapy.spidermiddlewares.depth.DepthMiddleware': 900,
|
||||
# Spider side
|
||||
}
|
||||
|
||||
SPIDER_MODULES = []
|
||||
|
||||
STATS_CLASS = 'scrapy.statscollectors.MemoryStatsCollector'
|
||||
STATS_DUMP = True
|
||||
|
||||
STATSMAILER_RCPTS = []
|
||||
|
||||
TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates'))
|
||||
|
||||
URLLENGTH_LIMIT = 2083
|
||||
|
||||
USER_AGENT = f'Scrapy/{import_module("scrapy").__version__} (+https://scrapy.org)'
|
||||
|
||||
TELNETCONSOLE_ENABLED = 1
|
||||
TELNETCONSOLE_PORT = [6023, 6073]
|
||||
TELNETCONSOLE_HOST = '127.0.0.1'
|
||||
TELNETCONSOLE_USERNAME = 'scrapy'
|
||||
TELNETCONSOLE_PASSWORD = None
|
||||
|
||||
TWISTED_REACTOR = None
|
||||
|
||||
SPIDER_CONTRACTS = {}
|
||||
SPIDER_CONTRACTS_BASE = {
|
||||
'scrapy.contracts.default.UrlContract': 1,
|
||||
'scrapy.contracts.default.CallbackKeywordArgumentsContract': 1,
|
||||
'scrapy.contracts.default.ReturnsContract': 2,
|
||||
'scrapy.contracts.default.ScrapesContract': 3,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue