HubobelsPython/venv/lib/python3.9/site-packages/scrapy/cmdline.py
2022-01-02 21:50:48 +01:00

173 lines
5.2 KiB
Python

import sys
import os
import optparse
import cProfile
import inspect
import pkg_resources
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.commands import ScrapyCommand
from scrapy.exceptions import UsageError
from scrapy.utils.misc import walk_modules
from scrapy.utils.project import inside_project, get_project_settings
from scrapy.utils.python import garbage_collect
def _iter_command_classes(module_name):
# TODO: add `name` attribute to commands and and merge this function with
# scrapy.utils.spider.iter_spider_classes
for module in walk_modules(module_name):
for obj in vars(module).values():
if (
inspect.isclass(obj)
and issubclass(obj, ScrapyCommand)
and obj.__module__ == module.__name__
and not obj == ScrapyCommand
):
yield obj
def _get_commands_from_module(module, inproject):
d = {}
for cmd in _iter_command_classes(module):
if inproject or not cmd.requires_project:
cmdname = cmd.__module__.split('.')[-1]
d[cmdname] = cmd()
return d
def _get_commands_from_entry_points(inproject, group='scrapy.commands'):
cmds = {}
for entry_point in pkg_resources.iter_entry_points(group):
obj = entry_point.load()
if inspect.isclass(obj):
cmds[entry_point.name] = obj()
else:
raise Exception(f"Invalid entry point {entry_point.name}")
return cmds
def _get_commands_dict(settings, inproject):
cmds = _get_commands_from_module('scrapy.commands', inproject)
cmds.update(_get_commands_from_entry_points(inproject))
cmds_module = settings['COMMANDS_MODULE']
if cmds_module:
cmds.update(_get_commands_from_module(cmds_module, inproject))
return cmds
def _pop_command_name(argv):
i = 0
for arg in argv[1:]:
if not arg.startswith('-'):
del argv[i]
return arg
i += 1
def _print_header(settings, inproject):
version = scrapy.__version__
if inproject:
print(f"Scrapy {version} - project: {settings['BOT_NAME']}\n")
else:
print(f"Scrapy {version} - no active project\n")
def _print_commands(settings, inproject):
_print_header(settings, inproject)
print("Usage:")
print(" scrapy <command> [options] [args]\n")
print("Available commands:")
cmds = _get_commands_dict(settings, inproject)
for cmdname, cmdclass in sorted(cmds.items()):
print(f" {cmdname:<13} {cmdclass.short_desc()}")
if not inproject:
print()
print(" [ more ] More commands available when run from project directory")
print()
print('Use "scrapy <command> -h" to see more info about a command')
def _print_unknown_command(settings, cmdname, inproject):
_print_header(settings, inproject)
print(f"Unknown command: {cmdname}\n")
print('Use "scrapy" to see available commands')
def _run_print_help(parser, func, *a, **kw):
try:
func(*a, **kw)
except UsageError as e:
if str(e):
parser.error(str(e))
if e.print_help:
parser.print_help()
sys.exit(2)
def execute(argv=None, settings=None):
if argv is None:
argv = sys.argv
if settings is None:
settings = get_project_settings()
# set EDITOR from environment if available
try:
editor = os.environ['EDITOR']
except KeyError:
pass
else:
settings['EDITOR'] = editor
inproject = inside_project()
cmds = _get_commands_dict(settings, inproject)
cmdname = _pop_command_name(argv)
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(),
conflict_handler='resolve')
if not cmdname:
_print_commands(settings, inproject)
sys.exit(0)
elif cmdname not in cmds:
_print_unknown_command(settings, cmdname, inproject)
sys.exit(2)
cmd = cmds[cmdname]
parser.usage = f"scrapy {cmdname} {cmd.syntax()}"
parser.description = cmd.long_desc()
settings.setdict(cmd.default_settings, priority='command')
cmd.settings = settings
cmd.add_options(parser)
opts, args = parser.parse_args(args=argv[1:])
_run_print_help(parser, cmd.process_options, args, opts)
cmd.crawler_process = CrawlerProcess(settings)
_run_print_help(parser, _run_command, cmd, args, opts)
sys.exit(cmd.exitcode)
def _run_command(cmd, args, opts):
if opts.profile:
_run_command_profiled(cmd, args, opts)
else:
cmd.run(args, opts)
def _run_command_profiled(cmd, args, opts):
if opts.profile:
sys.stderr.write(f"scrapy: writing cProfile stats to {opts.profile!r}\n")
loc = locals()
p = cProfile.Profile()
p.runctx('cmd.run(args, opts)', globals(), loc)
if opts.profile:
p.dump_stats(opts.profile)
if __name__ == '__main__':
try:
execute()
finally:
# Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
# http://doc.pypy.org/en/latest/cpython_differences.html
# ?highlight=gc.collect#differences-related-to-garbage-collection-strategies
garbage_collect()