Ausgabe der neuen DB Einträge
This commit is contained in:
parent
bad48e1627
commit
cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions
195
venv/lib/python3.9/site-packages/scrapy/utils/conf.py
Normal file
195
venv/lib/python3.9/site-packages/scrapy/utils/conf.py
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
import numbers
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
from configparser import ConfigParser
|
||||
from operator import itemgetter
|
||||
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning, UsageError
|
||||
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.deprecate import update_classpath
|
||||
from scrapy.utils.python import without_none_values
|
||||
|
||||
|
||||
def build_component_list(compdict, custom=None, convert=update_classpath):
|
||||
"""Compose a component list from a { class: order } dictionary."""
|
||||
|
||||
def _check_components(complist):
|
||||
if len({convert(c) for c in complist}) != len(complist):
|
||||
raise ValueError(f'Some paths in {complist!r} convert to the same object, '
|
||||
'please update your settings')
|
||||
|
||||
def _map_keys(compdict):
|
||||
if isinstance(compdict, BaseSettings):
|
||||
compbs = BaseSettings()
|
||||
for k, v in compdict.items():
|
||||
prio = compdict.getpriority(k)
|
||||
if compbs.getpriority(convert(k)) == prio:
|
||||
raise ValueError(f'Some paths in {list(compdict.keys())!r} '
|
||||
'convert to the same '
|
||||
'object, please update your settings'
|
||||
)
|
||||
else:
|
||||
compbs.set(convert(k), v, priority=prio)
|
||||
return compbs
|
||||
else:
|
||||
_check_components(compdict)
|
||||
return {convert(k): v for k, v in compdict.items()}
|
||||
|
||||
def _validate_values(compdict):
|
||||
"""Fail if a value in the components dict is not a real number or None."""
|
||||
for name, value in compdict.items():
|
||||
if value is not None and not isinstance(value, numbers.Real):
|
||||
raise ValueError(f'Invalid value {value} for component {name}, '
|
||||
'please provide a real number or None instead')
|
||||
|
||||
# BEGIN Backward compatibility for old (base, custom) call signature
|
||||
if isinstance(custom, (list, tuple)):
|
||||
_check_components(custom)
|
||||
return type(custom)(convert(c) for c in custom)
|
||||
|
||||
if custom is not None:
|
||||
compdict.update(custom)
|
||||
# END Backward compatibility
|
||||
|
||||
_validate_values(compdict)
|
||||
compdict = without_none_values(_map_keys(compdict))
|
||||
return [k for k, v in sorted(compdict.items(), key=itemgetter(1))]
|
||||
|
||||
|
||||
def arglist_to_dict(arglist):
|
||||
"""Convert a list of arguments like ['arg1=val1', 'arg2=val2', ...] to a
|
||||
dict
|
||||
"""
|
||||
return dict(x.split('=', 1) for x in arglist)
|
||||
|
||||
|
||||
def closest_scrapy_cfg(path='.', prevpath=None):
|
||||
"""Return the path to the closest scrapy.cfg file by traversing the current
|
||||
directory and its parents
|
||||
"""
|
||||
if path == prevpath:
|
||||
return ''
|
||||
path = os.path.abspath(path)
|
||||
cfgfile = os.path.join(path, 'scrapy.cfg')
|
||||
if os.path.exists(cfgfile):
|
||||
return cfgfile
|
||||
return closest_scrapy_cfg(os.path.dirname(path), path)
|
||||
|
||||
|
||||
def init_env(project='default', set_syspath=True):
|
||||
"""Initialize environment to use command-line tool from inside a project
|
||||
dir. This sets the Scrapy settings module and modifies the Python path to
|
||||
be able to locate the project module.
|
||||
"""
|
||||
cfg = get_config()
|
||||
if cfg.has_option('settings', project):
|
||||
os.environ['SCRAPY_SETTINGS_MODULE'] = cfg.get('settings', project)
|
||||
closest = closest_scrapy_cfg()
|
||||
if closest:
|
||||
projdir = os.path.dirname(closest)
|
||||
if set_syspath and projdir not in sys.path:
|
||||
sys.path.append(projdir)
|
||||
|
||||
|
||||
def get_config(use_closest=True):
|
||||
"""Get Scrapy config file as a ConfigParser"""
|
||||
sources = get_sources(use_closest)
|
||||
cfg = ConfigParser()
|
||||
cfg.read(sources)
|
||||
return cfg
|
||||
|
||||
|
||||
def get_sources(use_closest=True):
|
||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME') or os.path.expanduser('~/.config')
|
||||
sources = [
|
||||
'/etc/scrapy.cfg',
|
||||
r'c:\scrapy\scrapy.cfg',
|
||||
xdg_config_home + '/scrapy.cfg',
|
||||
os.path.expanduser('~/.scrapy.cfg'),
|
||||
]
|
||||
if use_closest:
|
||||
sources.append(closest_scrapy_cfg())
|
||||
return sources
|
||||
|
||||
|
||||
def feed_complete_default_values_from_settings(feed, settings):
|
||||
out = feed.copy()
|
||||
out.setdefault("batch_item_count", settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT'))
|
||||
out.setdefault("encoding", settings["FEED_EXPORT_ENCODING"])
|
||||
out.setdefault("fields", settings.getlist("FEED_EXPORT_FIELDS") or None)
|
||||
out.setdefault("store_empty", settings.getbool("FEED_STORE_EMPTY"))
|
||||
out.setdefault("uri_params", settings["FEED_URI_PARAMS"])
|
||||
out.setdefault("item_export_kwargs", dict())
|
||||
if settings["FEED_EXPORT_INDENT"] is None:
|
||||
out.setdefault("indent", None)
|
||||
else:
|
||||
out.setdefault("indent", settings.getint("FEED_EXPORT_INDENT"))
|
||||
return out
|
||||
|
||||
|
||||
def feed_process_params_from_cli(settings, output, output_format=None,
|
||||
overwrite_output=None):
|
||||
"""
|
||||
Receives feed export params (from the 'crawl' or 'runspider' commands),
|
||||
checks for inconsistencies in their quantities and returns a dictionary
|
||||
suitable to be used as the FEEDS setting.
|
||||
"""
|
||||
valid_output_formats = without_none_values(
|
||||
settings.getwithbase('FEED_EXPORTERS')
|
||||
).keys()
|
||||
|
||||
def check_valid_format(output_format):
|
||||
if output_format not in valid_output_formats:
|
||||
raise UsageError(
|
||||
f"Unrecognized output format '{output_format}'. "
|
||||
f"Set a supported one ({tuple(valid_output_formats)}) "
|
||||
"after a colon at the end of the output URI (i.e. -o/-O "
|
||||
"<URI>:<FORMAT>) or as a file extension."
|
||||
)
|
||||
|
||||
overwrite = False
|
||||
if overwrite_output:
|
||||
if output:
|
||||
raise UsageError(
|
||||
"Please use only one of -o/--output and -O/--overwrite-output"
|
||||
)
|
||||
output = overwrite_output
|
||||
overwrite = True
|
||||
|
||||
if output_format:
|
||||
if len(output) == 1:
|
||||
check_valid_format(output_format)
|
||||
message = (
|
||||
'The -t command line option is deprecated in favor of '
|
||||
'specifying the output format within the output URI. See the '
|
||||
'documentation of the -o and -O options for more information.',
|
||||
)
|
||||
warnings.warn(message, ScrapyDeprecationWarning, stacklevel=2)
|
||||
return {output[0]: {'format': output_format}}
|
||||
else:
|
||||
raise UsageError(
|
||||
'The -t command-line option cannot be used if multiple output '
|
||||
'URIs are specified'
|
||||
)
|
||||
|
||||
result = {}
|
||||
for element in output:
|
||||
try:
|
||||
feed_uri, feed_format = element.rsplit(':', 1)
|
||||
except ValueError:
|
||||
feed_uri = element
|
||||
feed_format = os.path.splitext(element)[1].replace('.', '')
|
||||
else:
|
||||
if feed_uri == '-':
|
||||
feed_uri = 'stdout:'
|
||||
check_valid_format(feed_format)
|
||||
result[feed_uri] = {'format': feed_format}
|
||||
if overwrite:
|
||||
result[feed_uri]['overwrite'] = True
|
||||
|
||||
# FEEDS setting should take precedence over the matching CLI options
|
||||
result.update(settings.getdict('FEEDS'))
|
||||
|
||||
return result
|
||||
Loading…
Add table
Add a link
Reference in a new issue