Ausgabe der neuen DB Einträge

2022-01-02 21:50:48 +01:00 · 2022-01-02 21:50:48 +01:00 · cfbbb9ee3d
commit cfbbb9ee3d
parent bad48e1627
2399 changed files with 843193 additions and 43 deletions
--- a/venv/lib/python3.9/site-packages/scrapy/commands/init.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/init.py
@ -0,0 +1,137 @@
+"""
+Base class for Scrapy commands
+"""
+import os
+from optparse import OptionGroup
+from twisted.python import failure
+
+from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
+from scrapy.exceptions import UsageError
+
+
+class ScrapyCommand:
+
+    requires_project = False
+    crawler_process = None
+
+    # default settings to be used for this command instead of global defaults
+    default_settings = {}
+
+    exitcode = 0
+
+    def __init__(self):
+        self.settings = None  # set in scrapy.cmdline
+
+    def set_crawler(self, crawler):
+        if hasattr(self, '_crawler'):
+            raise RuntimeError("crawler already set")
+        self._crawler = crawler
+
+    def syntax(self):
+        """
+        Command syntax (preferably one-line). Do not include command name.
+        """
+        return ""
+
+    def short_desc(self):
+        """
+        A short description of the command
+        """
+        return ""
+
+    def long_desc(self):
+        """A long description of the command. Return short description when not
+        available. It cannot contain newlines, since contents will be formatted
+        by optparser which removes newlines and wraps text.
+        """
+        return self.short_desc()
+
+    def help(self):
+        """An extensive help for the command. It will be shown when using the
+        "help" command. It can contain newlines, since no post-formatting will
+        be applied to its contents.
+        """
+        return self.long_desc()
+
+    def add_options(self, parser):
+        """
+        Populate option parse with options available for this command
+        """
+        group = OptionGroup(parser, "Global Options")
+        group.add_option("--logfile", metavar="FILE",
+                         help="log file. if omitted stderr will be used")
+        group.add_option("-L", "--loglevel", metavar="LEVEL", default=None,
+                         help=f"log level (default: {self.settings['LOG_LEVEL']})")
+        group.add_option("--nolog", action="store_true",
+                         help="disable logging completely")
+        group.add_option("--profile", metavar="FILE", default=None,
+                         help="write python cProfile stats to FILE")
+        group.add_option("--pidfile", metavar="FILE",
+                         help="write process ID to FILE")
+        group.add_option("-s", "--set", action="append", default=[], metavar="NAME=VALUE",
+                         help="set/override setting (may be repeated)")
+        group.add_option("--pdb", action="store_true", help="enable pdb on failure")
+
+        parser.add_option_group(group)
+
+    def process_options(self, args, opts):
+        try:
+            self.settings.setdict(arglist_to_dict(opts.set),
+                                  priority='cmdline')
+        except ValueError:
+            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)
+
+        if opts.logfile:
+            self.settings.set('LOG_ENABLED', True, priority='cmdline')
+            self.settings.set('LOG_FILE', opts.logfile, priority='cmdline')
+
+        if opts.loglevel:
+            self.settings.set('LOG_ENABLED', True, priority='cmdline')
+            self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline')
+
+        if opts.nolog:
+            self.settings.set('LOG_ENABLED', False, priority='cmdline')
+
+        if opts.pidfile:
+            with open(opts.pidfile, "w") as f:
+                f.write(str(os.getpid()) + os.linesep)
+
+        if opts.pdb:
+            failure.startDebugMode()
+
+    def run(self, args, opts):
+        """
+        Entry point for running commands
+        """
+        raise NotImplementedError
+
+
+class BaseRunSpiderCommand(ScrapyCommand):
+    """
+    Common class used to share functionality between the crawl, parse and runspider commands
+    """
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
+                          help="set spider argument (may be repeated)")
+        parser.add_option("-o", "--output", metavar="FILE", action="append",
+                          help="append scraped items to the end of FILE (use - for stdout)")
+        parser.add_option("-O", "--overwrite-output", metavar="FILE", action="append",
+                          help="dump scraped items into FILE, overwriting any existing file")
+        parser.add_option("-t", "--output-format", metavar="FORMAT",
+                          help="format to use for dumping items")
+
+    def process_options(self, args, opts):
+        ScrapyCommand.process_options(self, args, opts)
+        try:
+            opts.spargs = arglist_to_dict(opts.spargs)
+        except ValueError:
+            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
+        if opts.output or opts.overwrite_output:
+            feeds = feed_process_params_from_cli(
+                self.settings,
+                opts.output,
+                opts.output_format,
+                opts.overwrite_output,
+            )
+            self.settings.set('FEEDS', feeds, priority='cmdline')
--- a/venv/lib/python3.9/site-packages/scrapy/commands/bench.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/bench.py
@ -0,0 +1,58 @@
+import sys
+import time
+import subprocess
+from urllib.parse import urlencode
+
+import scrapy
+from scrapy.commands import ScrapyCommand
+from scrapy.linkextractors import LinkExtractor
+
+
+class Command(ScrapyCommand):
+
+    default_settings = {
+        'LOG_LEVEL': 'INFO',
+        'LOGSTATS_INTERVAL': 1,
+        'CLOSESPIDER_TIMEOUT': 10,
+    }
+
+    def short_desc(self):
+        return "Run quick benchmark test"
+
+    def run(self, args, opts):
+        with _BenchServer():
+            self.crawler_process.crawl(_BenchSpider, total=100000)
+            self.crawler_process.start()
+
+
+class _BenchServer:
+
+    def __enter__(self):
+        from scrapy.utils.test import get_testenv
+        pargs = [sys.executable, '-u', '-m', 'scrapy.utils.benchserver']
+        self.proc = subprocess.Popen(pargs, stdout=subprocess.PIPE,
+                                     env=get_testenv())
+        self.proc.stdout.readline()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.proc.kill()
+        self.proc.wait()
+        time.sleep(0.2)
+
+
+class _BenchSpider(scrapy.Spider):
+    """A spider that follows all links"""
+    name = 'follow'
+    total = 10000
+    show = 20
+    baseurl = 'http://localhost:8998'
+    link_extractor = LinkExtractor()
+
+    def start_requests(self):
+        qargs = {'total': self.total, 'show': self.show}
+        url = f'{self.baseurl}?{urlencode(qargs, doseq=1)}'
+        return [scrapy.Request(url, dont_filter=True)]
+
+    def parse(self, response):
+        for link in self.link_extractor.extract_links(response):
+            yield scrapy.Request(link.url, callback=self.parse)
--- a/venv/lib/python3.9/site-packages/scrapy/commands/check.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/check.py
@ -0,0 +1,96 @@
+import time
+from collections import defaultdict
+from unittest import TextTestRunner, TextTestResult as _TextTestResult
+
+from scrapy.commands import ScrapyCommand
+from scrapy.contracts import ContractsManager
+from scrapy.utils.misc import load_object, set_environ
+from scrapy.utils.conf import build_component_list
+
+
+class TextTestResult(_TextTestResult):
+    def printSummary(self, start, stop):
+        write = self.stream.write
+        writeln = self.stream.writeln
+
+        run = self.testsRun
+        plural = "s" if run != 1 else ""
+
+        writeln(self.separator2)
+        writeln(f"Ran {run} contract{plural} in {stop - start:.3f}s")
+        writeln()
+
+        infos = []
+        if not self.wasSuccessful():
+            write("FAILED")
+            failed, errored = map(len, (self.failures, self.errors))
+            if failed:
+                infos.append(f"failures={failed}")
+            if errored:
+                infos.append(f"errors={errored}")
+        else:
+            write("OK")
+
+        if infos:
+            writeln(f" ({', '.join(infos)})")
+        else:
+            write("\n")
+
+
+class Command(ScrapyCommand):
+    requires_project = True
+    default_settings = {'LOG_ENABLED': False}
+
+    def syntax(self):
+        return "[options] <spider>"
+
+    def short_desc(self):
+        return "Check spider contracts"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-l", "--list", dest="list", action="store_true",
+                          help="only list contracts, without checking them")
+        parser.add_option("-v", "--verbose", dest="verbose", default=False, action='store_true',
+                          help="print contract tests for all spiders")
+
+    def run(self, args, opts):
+        # load contracts
+        contracts = build_component_list(self.settings.getwithbase('SPIDER_CONTRACTS'))
+        conman = ContractsManager(load_object(c) for c in contracts)
+        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
+        result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity)
+
+        # contract requests
+        contract_reqs = defaultdict(list)
+
+        spider_loader = self.crawler_process.spider_loader
+
+        with set_environ(SCRAPY_CHECK='true'):
+            for spidername in args or spider_loader.list():
+                spidercls = spider_loader.load(spidername)
+                spidercls.start_requests = lambda s: conman.from_spider(s, result)
+
+                tested_methods = conman.tested_methods_from_spidercls(spidercls)
+                if opts.list:
+                    for method in tested_methods:
+                        contract_reqs[spidercls.name].append(method)
+                elif tested_methods:
+                    self.crawler_process.crawl(spidercls)
+
+            # start checks
+            if opts.list:
+                for spider, methods in sorted(contract_reqs.items()):
+                    if not methods and not opts.verbose:
+                        continue
+                    print(spider)
+                    for method in sorted(methods):
+                        print(f'  * {method}')
+            else:
+                start = time.time()
+                self.crawler_process.start()
+                stop = time.time()
+
+                result.printErrors()
+                result.printSummary(start, stop)
+                self.exitcode = int(not result.wasSuccessful())
--- a/venv/lib/python3.9/site-packages/scrapy/commands/crawl.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/crawl.py
@ -0,0 +1,33 @@
+from scrapy.commands import BaseRunSpiderCommand
+from scrapy.exceptions import UsageError
+
+
+class Command(BaseRunSpiderCommand):
+
+    requires_project = True
+
+    def syntax(self):
+        return "[options] <spider>"
+
+    def short_desc(self):
+        return "Run a spider"
+
+    def run(self, args, opts):
+        if len(args) < 1:
+            raise UsageError()
+        elif len(args) > 1:
+            raise UsageError("running 'scrapy crawl' with more than one spider is no longer supported")
+        spname = args[0]
+
+        crawl_defer = self.crawler_process.crawl(spname, **opts.spargs)
+
+        if getattr(crawl_defer, 'result', None) is not None and issubclass(crawl_defer.result.type, Exception):
+            self.exitcode = 1
+        else:
+            self.crawler_process.start()
+
+            if (
+                self.crawler_process.bootstrap_failed
+                or hasattr(self.crawler_process, 'has_exception') and self.crawler_process.has_exception
+            ):
+                self.exitcode = 1
--- a/venv/lib/python3.9/site-packages/scrapy/commands/edit.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/edit.py
@ -0,0 +1,39 @@
+import sys
+import os
+
+from scrapy.commands import ScrapyCommand
+from scrapy.exceptions import UsageError
+
+
+class Command(ScrapyCommand):
+
+    requires_project = True
+    default_settings = {'LOG_ENABLED': False}
+
+    def syntax(self):
+        return "<spider>"
+
+    def short_desc(self):
+        return "Edit spider"
+
+    def long_desc(self):
+        return ("Edit a spider using the editor defined in the EDITOR environment"
+                " variable or else the EDITOR setting")
+
+    def _err(self, msg):
+        sys.stderr.write(msg + os.linesep)
+        self.exitcode = 1
+
+    def run(self, args, opts):
+        if len(args) != 1:
+            raise UsageError()
+
+        editor = self.settings['EDITOR']
+        try:
+            spidercls = self.crawler_process.spider_loader.load(args[0])
+        except KeyError:
+            return self._err(f"Spider not found: {args[0]}")
+
+        sfile = sys.modules[spidercls.__module__].__file__
+        sfile = sfile.replace('.pyc', '.py')
+        self.exitcode = os.system(f'{editor} "{sfile}"')
--- a/venv/lib/python3.9/site-packages/scrapy/commands/fetch.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/fetch.py
@ -0,0 +1,70 @@
+import sys
+from w3lib.url import is_url
+
+from scrapy.commands import ScrapyCommand
+from scrapy.http import Request
+from scrapy.exceptions import UsageError
+from scrapy.utils.datatypes import SequenceExclude
+from scrapy.utils.spider import spidercls_for_request, DefaultSpider
+
+
+class Command(ScrapyCommand):
+
+    requires_project = False
+
+    def syntax(self):
+        return "[options] <url>"
+
+    def short_desc(self):
+        return "Fetch a URL using the Scrapy downloader"
+
+    def long_desc(self):
+        return (
+            "Fetch a URL using the Scrapy downloader and print its content"
+            " to stdout. You may want to use --nolog to disable logging"
+        )
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("--spider", dest="spider", help="use this spider")
+        parser.add_option("--headers", dest="headers", action="store_true",
+                          help="print response HTTP headers instead of body")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")
+
+    def _print_headers(self, headers, prefix):
+        for key, values in headers.items():
+            for value in values:
+                self._print_bytes(prefix + b' ' + key + b': ' + value)
+
+    def _print_response(self, response, opts):
+        if opts.headers:
+            self._print_headers(response.request.headers, b'>')
+            print('>')
+            self._print_headers(response.headers, b'<')
+        else:
+            self._print_bytes(response.body)
+
+    def _print_bytes(self, bytes_):
+        sys.stdout.buffer.write(bytes_ + b'\n')
+
+    def run(self, args, opts):
+        if len(args) != 1 or not is_url(args[0]):
+            raise UsageError()
+        request = Request(args[0], callback=self._print_response,
+                          cb_kwargs={"opts": opts}, dont_filter=True)
+        # by default, let the framework handle redirects,
+        # i.e. command handles all codes expect 3xx
+        if not opts.no_redirect:
+            request.meta['handle_httpstatus_list'] = SequenceExclude(range(300, 400))
+        else:
+            request.meta['handle_httpstatus_all'] = True
+
+        spidercls = DefaultSpider
+        spider_loader = self.crawler_process.spider_loader
+        if opts.spider:
+            spidercls = spider_loader.load(opts.spider)
+        else:
+            spidercls = spidercls_for_request(spider_loader, request, spidercls)
+        self.crawler_process.crawl(spidercls, start_requests=lambda: [request])
+        self.crawler_process.start()
--- a/venv/lib/python3.9/site-packages/scrapy/commands/genspider.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/genspider.py
@ -0,0 +1,149 @@
+import os
+import shutil
+import string
+
+from importlib import import_module
+from os.path import join, dirname, abspath, exists, splitext
+
+import scrapy
+from scrapy.commands import ScrapyCommand
+from scrapy.utils.template import render_templatefile, string_camelcase
+from scrapy.exceptions import UsageError
+
+
+def sanitize_module_name(module_name):
+    """Sanitize the given module name, by replacing dashes and points
+    with underscores and prefixing it with a letter if it doesn't start
+    with one
+    """
+    module_name = module_name.replace('-', '_').replace('.', '_')
+    if module_name[0] not in string.ascii_letters:
+        module_name = "a" + module_name
+    return module_name
+
+
+class Command(ScrapyCommand):
+
+    requires_project = False
+    default_settings = {'LOG_ENABLED': False}
+
+    def syntax(self):
+        return "[options] <name> <domain>"
+
+    def short_desc(self):
+        return "Generate new spider using pre-defined templates"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-l", "--list", dest="list", action="store_true",
+                          help="List available templates")
+        parser.add_option("-e", "--edit", dest="edit", action="store_true",
+                          help="Edit spider after creating it")
+        parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE",
+                          help="Dump template to standard output")
+        parser.add_option("-t", "--template", dest="template", default="basic",
+                          help="Uses a custom template.")
+        parser.add_option("--force", dest="force", action="store_true",
+                          help="If the spider already exists, overwrite it with the template")
+
+    def run(self, args, opts):
+        if opts.list:
+            self._list_templates()
+            return
+        if opts.dump:
+            template_file = self._find_template(opts.dump)
+            if template_file:
+                with open(template_file, "r") as f:
+                    print(f.read())
+            return
+        if len(args) != 2:
+            raise UsageError()
+
+        name, domain = args[0:2]
+        module = sanitize_module_name(name)
+
+        if self.settings.get('BOT_NAME') == module:
+            print("Cannot create a spider with the same name as your project")
+            return
+
+        if not opts.force and self._spider_exists(name):
+            return
+
+        template_file = self._find_template(opts.template)
+        if template_file:
+            self._genspider(module, name, domain, opts.template, template_file)
+            if opts.edit:
+                self.exitcode = os.system(f'scrapy edit "{name}"')
+
+    def _genspider(self, module, name, domain, template_name, template_file):
+        """Generate the spider module, based on the given template"""
+        capitalized_module = ''.join(s.capitalize() for s in module.split('_'))
+        tvars = {
+            'project_name': self.settings.get('BOT_NAME'),
+            'ProjectName': string_camelcase(self.settings.get('BOT_NAME')),
+            'module': module,
+            'name': name,
+            'domain': domain,
+            'classname': f'{capitalized_module}Spider'
+        }
+        if self.settings.get('NEWSPIDER_MODULE'):
+            spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
+            spiders_dir = abspath(dirname(spiders_module.__file__))
+        else:
+            spiders_module = None
+            spiders_dir = "."
+        spider_file = f"{join(spiders_dir, module)}.py"
+        shutil.copyfile(template_file, spider_file)
+        render_templatefile(spider_file, **tvars)
+        print(f"Created spider {name!r} using template {template_name!r} ",
+              end=('' if spiders_module else '\n'))
+        if spiders_module:
+            print(f"in module:\n  {spiders_module.__name__}.{module}")
+
+    def _find_template(self, template):
+        template_file = join(self.templates_dir, f'{template}.tmpl')
+        if exists(template_file):
+            return template_file
+        print(f"Unable to find template: {template}\n")
+        print('Use "scrapy genspider --list" to see all available templates.')
+
+    def _list_templates(self):
+        print("Available templates:")
+        for filename in sorted(os.listdir(self.templates_dir)):
+            if filename.endswith('.tmpl'):
+                print(f"  {splitext(filename)[0]}")
+
+    def _spider_exists(self, name):
+        if not self.settings.get('NEWSPIDER_MODULE'):
+            # if run as a standalone command and file with same filename already exists
+            if exists(name + ".py"):
+                print(f"{abspath(name + '.py')} already exists")
+                return True
+            return False
+
+        try:
+            spidercls = self.crawler_process.spider_loader.load(name)
+        except KeyError:
+            pass
+        else:
+            # if spider with same name exists
+            print(f"Spider {name!r} already exists in module:")
+            print(f"  {spidercls.__module__}")
+            return True
+
+        # a file with the same name exists in the target directory
+        spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
+        spiders_dir = dirname(spiders_module.__file__)
+        spiders_dir_abs = abspath(spiders_dir)
+        if exists(join(spiders_dir_abs, name + ".py")):
+            print(f"{join(spiders_dir_abs, (name + '.py'))} already exists")
+            return True
+
+        return False
+
+    @property
+    def templates_dir(self):
+        return join(
+            self.settings['TEMPLATES_DIR'] or join(scrapy.__path__[0], 'templates'),
+            'spiders'
+        )
--- a/venv/lib/python3.9/site-packages/scrapy/commands/list.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/list.py
@ -0,0 +1,14 @@
+from scrapy.commands import ScrapyCommand
+
+
+class Command(ScrapyCommand):
+
+    requires_project = True
+    default_settings = {'LOG_ENABLED': False}
+
+    def short_desc(self):
+        return "List available spiders"
+
+    def run(self, args, opts):
+        for s in sorted(self.crawler_process.spider_loader.list()):
+            print(s)
--- a/venv/lib/python3.9/site-packages/scrapy/commands/parse.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/parse.py
@ -0,0 +1,256 @@
+import json
+import logging
+
+from itemadapter import is_item, ItemAdapter
+from w3lib.url import is_url
+
+from scrapy.commands import BaseRunSpiderCommand
+from scrapy.http import Request
+from scrapy.utils import display
+from scrapy.utils.spider import iterate_spider_output, spidercls_for_request
+from scrapy.exceptions import UsageError
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseRunSpiderCommand):
+    requires_project = True
+
+    spider = None
+    items = {}
+    requests = {}
+
+    first_response = None
+
+    def syntax(self):
+        return "[options] <url>"
+
+    def short_desc(self):
+        return "Parse URL (using its spider) and print the results"
+
+    def add_options(self, parser):
+        BaseRunSpiderCommand.add_options(self, parser)
+        parser.add_option("--spider", dest="spider", default=None,
+                          help="use this spider without looking for one")
+        parser.add_option("--pipelines", action="store_true",
+                          help="process items through pipelines")
+        parser.add_option("--nolinks", dest="nolinks", action="store_true",
+                          help="don't show links to follow (extracted requests)")
+        parser.add_option("--noitems", dest="noitems", action="store_true",
+                          help="don't show scraped items")
+        parser.add_option("--nocolour", dest="nocolour", action="store_true",
+                          help="avoid using pygments to colorize the output")
+        parser.add_option("-r", "--rules", dest="rules", action="store_true",
+                          help="use CrawlSpider rules to discover the callback")
+        parser.add_option("-c", "--callback", dest="callback",
+                          help="use this callback for parsing, instead looking for a callback")
+        parser.add_option("-m", "--meta", dest="meta",
+                          help="inject extra meta into the Request, it must be a valid raw json string")
+        parser.add_option("--cbkwargs", dest="cbkwargs",
+                          help="inject extra callback kwargs into the Request, it must be a valid raw json string")
+        parser.add_option("-d", "--depth", dest="depth", type="int", default=1,
+                          help="maximum depth for parsing requests [default: %default]")
+        parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
+                          help="print each depth level one by one")
+
+    @property
+    def max_level(self):
+        max_items, max_requests = 0, 0
+        if self.items:
+            max_items = max(self.items)
+        if self.requests:
+            max_requests = max(self.requests)
+        return max(max_items, max_requests)
+
+    def add_items(self, lvl, new_items):
+        old_items = self.items.get(lvl, [])
+        self.items[lvl] = old_items + new_items
+
+    def add_requests(self, lvl, new_reqs):
+        old_reqs = self.requests.get(lvl, [])
+        self.requests[lvl] = old_reqs + new_reqs
+
+    def print_items(self, lvl=None, colour=True):
+        if lvl is None:
+            items = [item for lst in self.items.values() for item in lst]
+        else:
+            items = self.items.get(lvl, [])
+
+        print("# Scraped Items ", "-" * 60)
+        display.pprint([ItemAdapter(x).asdict() for x in items], colorize=colour)
+
+    def print_requests(self, lvl=None, colour=True):
+        if lvl is None:
+            if self.requests:
+                requests = self.requests[max(self.requests)]
+            else:
+                requests = []
+        else:
+            requests = self.requests.get(lvl, [])
+
+        print("# Requests ", "-" * 65)
+        display.pprint(requests, colorize=colour)
+
+    def print_results(self, opts):
+        colour = not opts.nocolour
+
+        if opts.verbose:
+            for level in range(1, self.max_level + 1):
+                print(f'\n>>> DEPTH LEVEL: {level} <<<')
+                if not opts.noitems:
+                    self.print_items(level, colour)
+                if not opts.nolinks:
+                    self.print_requests(level, colour)
+        else:
+            print(f'\n>>> STATUS DEPTH LEVEL {self.max_level} <<<')
+            if not opts.noitems:
+                self.print_items(colour=colour)
+            if not opts.nolinks:
+                self.print_requests(colour=colour)
+
+    def run_callback(self, response, callback, cb_kwargs=None):
+        cb_kwargs = cb_kwargs or {}
+        items, requests = [], []
+
+        for x in iterate_spider_output(callback(response, **cb_kwargs)):
+            if is_item(x):
+                items.append(x)
+            elif isinstance(x, Request):
+                requests.append(x)
+        return items, requests
+
+    def get_callback_from_rules(self, spider, response):
+        if getattr(spider, 'rules', None):
+            for rule in spider.rules:
+                if rule.link_extractor.matches(response.url):
+                    return rule.callback or "parse"
+        else:
+            logger.error('No CrawlSpider rules found in spider %(spider)r, '
+                         'please specify a callback to use for parsing',
+                         {'spider': spider.name})
+
+    def set_spidercls(self, url, opts):
+        spider_loader = self.crawler_process.spider_loader
+        if opts.spider:
+            try:
+                self.spidercls = spider_loader.load(opts.spider)
+            except KeyError:
+                logger.error('Unable to find spider: %(spider)s',
+                             {'spider': opts.spider})
+        else:
+            self.spidercls = spidercls_for_request(spider_loader, Request(url))
+            if not self.spidercls:
+                logger.error('Unable to find spider for: %(url)s', {'url': url})
+
+        def _start_requests(spider):
+            yield self.prepare_request(spider, Request(url), opts)
+        self.spidercls.start_requests = _start_requests
+
+    def start_parsing(self, url, opts):
+        self.crawler_process.crawl(self.spidercls, **opts.spargs)
+        self.pcrawler = list(self.crawler_process.crawlers)[0]
+        self.crawler_process.start()
+
+        if not self.first_response:
+            logger.error('No response downloaded for: %(url)s',
+                         {'url': url})
+
+    def prepare_request(self, spider, request, opts):
+        def callback(response, **cb_kwargs):
+            # memorize first request
+            if not self.first_response:
+                self.first_response = response
+
+            # determine real callback
+            cb = response.meta['_callback']
+            if not cb:
+                if opts.callback:
+                    cb = opts.callback
+                elif opts.rules and self.first_response == response:
+                    cb = self.get_callback_from_rules(spider, response)
+
+                    if not cb:
+                        logger.error('Cannot find a rule that matches %(url)r in spider: %(spider)s',
+                                     {'url': response.url, 'spider': spider.name})
+                        return
+                else:
+                    cb = 'parse'
+
+            if not callable(cb):
+                cb_method = getattr(spider, cb, None)
+                if callable(cb_method):
+                    cb = cb_method
+                else:
+                    logger.error('Cannot find callback %(callback)r in spider: %(spider)s',
+                                 {'callback': cb, 'spider': spider.name})
+                    return
+
+            # parse items and requests
+            depth = response.meta['_depth']
+
+            items, requests = self.run_callback(response, cb, cb_kwargs)
+            if opts.pipelines:
+                itemproc = self.pcrawler.engine.scraper.itemproc
+                for item in items:
+                    itemproc.process_item(item, spider)
+            self.add_items(depth, items)
+            self.add_requests(depth, requests)
+
+            scraped_data = items if opts.output else []
+            if depth < opts.depth:
+                for req in requests:
+                    req.meta['_depth'] = depth + 1
+                    req.meta['_callback'] = req.callback
+                    req.callback = callback
+                scraped_data += requests
+
+            return scraped_data
+
+        # update request meta if any extra meta was passed through the --meta/-m opts.
+        if opts.meta:
+            request.meta.update(opts.meta)
+
+        # update cb_kwargs if any extra values were was passed through the --cbkwargs option.
+        if opts.cbkwargs:
+            request.cb_kwargs.update(opts.cbkwargs)
+
+        request.meta['_depth'] = 1
+        request.meta['_callback'] = request.callback
+        request.callback = callback
+        return request
+
+    def process_options(self, args, opts):
+        BaseRunSpiderCommand.process_options(self, args, opts)
+
+        self.process_request_meta(opts)
+        self.process_request_cb_kwargs(opts)
+
+    def process_request_meta(self, opts):
+        if opts.meta:
+            try:
+                opts.meta = json.loads(opts.meta)
+            except ValueError:
+                raise UsageError("Invalid -m/--meta value, pass a valid json string to -m or --meta. "
+                                 "Example: --meta='{\"foo\" : \"bar\"}'", print_help=False)
+
+    def process_request_cb_kwargs(self, opts):
+        if opts.cbkwargs:
+            try:
+                opts.cbkwargs = json.loads(opts.cbkwargs)
+            except ValueError:
+                raise UsageError("Invalid --cbkwargs value, pass a valid json string to --cbkwargs. "
+                                 "Example: --cbkwargs='{\"foo\" : \"bar\"}'", print_help=False)
+
+    def run(self, args, opts):
+        # parse arguments
+        if not len(args) == 1 or not is_url(args[0]):
+            raise UsageError()
+        else:
+            url = args[0]
+
+        # prepare spidercls
+        self.set_spidercls(url, opts)
+
+        if self.spidercls and opts.depth > 0:
+            self.start_parsing(url, opts)
+            self.print_results(opts)
--- a/venv/lib/python3.9/site-packages/scrapy/commands/runspider.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/runspider.py
@ -0,0 +1,59 @@
+import sys
+import os
+from importlib import import_module
+
+from scrapy.utils.spider import iter_spider_classes
+from scrapy.exceptions import UsageError
+from scrapy.commands import BaseRunSpiderCommand
+
+
+def _import_file(filepath):
+    abspath = os.path.abspath(filepath)
+    dirname, file = os.path.split(abspath)
+    fname, fext = os.path.splitext(file)
+    if fext not in ('.py', '.pyw'):
+        raise ValueError(f"Not a Python source file: {abspath}")
+    if dirname:
+        sys.path = [dirname] + sys.path
+    try:
+        module = import_module(fname)
+    finally:
+        if dirname:
+            sys.path.pop(0)
+    return module
+
+
+class Command(BaseRunSpiderCommand):
+
+    requires_project = False
+    default_settings = {'SPIDER_LOADER_WARN_ONLY': True}
+
+    def syntax(self):
+        return "[options] <spider_file>"
+
+    def short_desc(self):
+        return "Run a self-contained spider (without creating a project)"
+
+    def long_desc(self):
+        return "Run the spider defined in the given file"
+
+    def run(self, args, opts):
+        if len(args) != 1:
+            raise UsageError()
+        filename = args[0]
+        if not os.path.exists(filename):
+            raise UsageError(f"File not found: {filename}\n")
+        try:
+            module = _import_file(filename)
+        except (ImportError, ValueError) as e:
+            raise UsageError(f"Unable to load {filename!r}: {e}\n")
+        spclasses = list(iter_spider_classes(module))
+        if not spclasses:
+            raise UsageError(f"No spider found in file: {filename}\n")
+        spidercls = spclasses.pop()
+
+        self.crawler_process.crawl(spidercls, **opts.spargs)
+        self.crawler_process.start()
+
+        if self.crawler_process.bootstrap_failed:
+            self.exitcode = 1
--- a/venv/lib/python3.9/site-packages/scrapy/commands/settings.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/settings.py
@ -0,0 +1,47 @@
+import json
+
+from scrapy.commands import ScrapyCommand
+from scrapy.settings import BaseSettings
+
+
+class Command(ScrapyCommand):
+
+    requires_project = False
+    default_settings = {'LOG_ENABLED': False,
+                        'SPIDER_LOADER_WARN_ONLY': True}
+
+    def syntax(self):
+        return "[options]"
+
+    def short_desc(self):
+        return "Get settings values"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("--get", dest="get", metavar="SETTING",
+                          help="print raw setting value")
+        parser.add_option("--getbool", dest="getbool", metavar="SETTING",
+                          help="print setting value, interpreted as a boolean")
+        parser.add_option("--getint", dest="getint", metavar="SETTING",
+                          help="print setting value, interpreted as an integer")
+        parser.add_option("--getfloat", dest="getfloat", metavar="SETTING",
+                          help="print setting value, interpreted as a float")
+        parser.add_option("--getlist", dest="getlist", metavar="SETTING",
+                          help="print setting value, interpreted as a list")
+
+    def run(self, args, opts):
+        settings = self.crawler_process.settings
+        if opts.get:
+            s = settings.get(opts.get)
+            if isinstance(s, BaseSettings):
+                print(json.dumps(s.copy_to_dict()))
+            else:
+                print(s)
+        elif opts.getbool:
+            print(settings.getbool(opts.getbool))
+        elif opts.getint:
+            print(settings.getint(opts.getint))
+        elif opts.getfloat:
+            print(settings.getfloat(opts.getfloat))
+        elif opts.getlist:
+            print(settings.getlist(opts.getlist))
--- a/venv/lib/python3.9/site-packages/scrapy/commands/shell.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/shell.py
@ -0,0 +1,80 @@
+"""
+Scrapy Shell
+
+See documentation in docs/topics/shell.rst
+"""
+from threading import Thread
+
+from scrapy.commands import ScrapyCommand
+from scrapy.http import Request
+from scrapy.shell import Shell
+from scrapy.utils.spider import spidercls_for_request, DefaultSpider
+from scrapy.utils.url import guess_scheme
+
+
+class Command(ScrapyCommand):
+
+    requires_project = False
+    default_settings = {
+        'KEEP_ALIVE': True,
+        'LOGSTATS_INTERVAL': 0,
+        'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter',
+    }
+
+    def syntax(self):
+        return "[url|file]"
+
+    def short_desc(self):
+        return "Interactive scraping console"
+
+    def long_desc(self):
+        return ("Interactive console for scraping the given url or file. "
+                "Use ./file.html syntax or full path for local file.")
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-c", dest="code",
+                          help="evaluate the code in the shell, print the result and exit")
+        parser.add_option("--spider", dest="spider",
+                          help="use this spider")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")
+
+    def update_vars(self, vars):
+        """You can use this function to update the Scrapy objects that will be
+        available in the shell
+        """
+        pass
+
+    def run(self, args, opts):
+        url = args[0] if args else None
+        if url:
+            # first argument may be a local file
+            url = guess_scheme(url)
+
+        spider_loader = self.crawler_process.spider_loader
+
+        spidercls = DefaultSpider
+        if opts.spider:
+            spidercls = spider_loader.load(opts.spider)
+        elif url:
+            spidercls = spidercls_for_request(spider_loader, Request(url),
+                                              spidercls, log_multiple=True)
+
+        # The crawler is created this way since the Shell manually handles the
+        # crawling engine, so the set up in the crawl method won't work
+        crawler = self.crawler_process._create_crawler(spidercls)
+        # The Shell class needs a persistent engine in the crawler
+        crawler.engine = crawler._create_engine()
+        crawler.engine.start()
+
+        self._start_crawler_thread()
+
+        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
+        shell.start(url=url, redirect=not opts.no_redirect)
+
+    def _start_crawler_thread(self):
+        t = Thread(target=self.crawler_process.start,
+                   kwargs={'stop_after_crawl': False})
+        t.daemon = True
+        t.start()
--- a/venv/lib/python3.9/site-packages/scrapy/commands/startproject.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/startproject.py
@ -0,0 +1,128 @@
+import re
+import os
+import string
+from importlib import import_module
+from os.path import join, exists, abspath
+from shutil import ignore_patterns, move, copy2, copystat
+from stat import S_IWUSR as OWNER_WRITE_PERMISSION
+
+import scrapy
+from scrapy.commands import ScrapyCommand
+from scrapy.utils.template import render_templatefile, string_camelcase
+from scrapy.exceptions import UsageError
+
+
+TEMPLATES_TO_RENDER = (
+    ('scrapy.cfg',),
+    ('${project_name}', 'settings.py.tmpl'),
+    ('${project_name}', 'items.py.tmpl'),
+    ('${project_name}', 'pipelines.py.tmpl'),
+    ('${project_name}', 'middlewares.py.tmpl'),
+)
+
+IGNORE = ignore_patterns('*.pyc', '__pycache__', '.svn')
+
+
+def _make_writable(path):
+    current_permissions = os.stat(path).st_mode
+    os.chmod(path, current_permissions | OWNER_WRITE_PERMISSION)
+
+
+class Command(ScrapyCommand):
+
+    requires_project = False
+    default_settings = {'LOG_ENABLED': False,
+                        'SPIDER_LOADER_WARN_ONLY': True}
+
+    def syntax(self):
+        return "<project_name> [project_dir]"
+
+    def short_desc(self):
+        return "Create new project"
+
+    def _is_valid_name(self, project_name):
+        def _module_exists(module_name):
+            try:
+                import_module(module_name)
+                return True
+            except ImportError:
+                return False
+
+        if not re.search(r'^[_a-zA-Z]\w*$', project_name):
+            print('Error: Project names must begin with a letter and contain'
+                  ' only\nletters, numbers and underscores')
+        elif _module_exists(project_name):
+            print(f'Error: Module {project_name!r} already exists')
+        else:
+            return True
+        return False
+
+    def _copytree(self, src, dst):
+        """
+        Since the original function always creates the directory, to resolve
+        the issue a new function had to be created. It's a simple copy and
+        was reduced for this case.
+
+        More info at:
+        https://github.com/scrapy/scrapy/pull/2005
+        """
+        ignore = IGNORE
+        names = os.listdir(src)
+        ignored_names = ignore(src, names)
+
+        if not os.path.exists(dst):
+            os.makedirs(dst)
+
+        for name in names:
+            if name in ignored_names:
+                continue
+
+            srcname = os.path.join(src, name)
+            dstname = os.path.join(dst, name)
+            if os.path.isdir(srcname):
+                self._copytree(srcname, dstname)
+            else:
+                copy2(srcname, dstname)
+                _make_writable(dstname)
+
+        copystat(src, dst)
+        _make_writable(dst)
+
+    def run(self, args, opts):
+        if len(args) not in (1, 2):
+            raise UsageError()
+
+        project_name = args[0]
+        project_dir = args[0]
+
+        if len(args) == 2:
+            project_dir = args[1]
+
+        if exists(join(project_dir, 'scrapy.cfg')):
+            self.exitcode = 1
+            print(f'Error: scrapy.cfg already exists in {abspath(project_dir)}')
+            return
+
+        if not self._is_valid_name(project_name):
+            self.exitcode = 1
+            return
+
+        self._copytree(self.templates_dir, abspath(project_dir))
+        move(join(project_dir, 'module'), join(project_dir, project_name))
+        for paths in TEMPLATES_TO_RENDER:
+            path = join(*paths)
+            tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name))
+            render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name))
+        print(f"New Scrapy project '{project_name}', using template directory "
+              f"'{self.templates_dir}', created in:")
+        print(f"    {abspath(project_dir)}\n")
+        print("You can start your first spider with:")
+        print(f"    cd {project_dir}")
+        print("    scrapy genspider example example.com")
+
+    @property
+    def templates_dir(self):
+        return join(
+            self.settings['TEMPLATES_DIR'] or join(scrapy.__path__[0], 'templates'),
+            'project'
+        )
--- a/venv/lib/python3.9/site-packages/scrapy/commands/version.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/version.py
@ -0,0 +1,29 @@
+import scrapy
+from scrapy.commands import ScrapyCommand
+from scrapy.utils.versions import scrapy_components_versions
+
+
+class Command(ScrapyCommand):
+
+    default_settings = {'LOG_ENABLED': False,
+                        'SPIDER_LOADER_WARN_ONLY': True}
+
+    def syntax(self):
+        return "[-v]"
+
+    def short_desc(self):
+        return "Print Scrapy version"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("--verbose", "-v", dest="verbose", action="store_true",
+                          help="also display twisted/python/platform info (useful for bug reports)")
+
+    def run(self, args, opts):
+        if opts.verbose:
+            versions = scrapy_components_versions()
+            width = max(len(n) for (n, _) in versions)
+            for name, version in versions:
+                print(f"{name:<{width}} : {version}")
+        else:
+            print(f"Scrapy {scrapy.__version__}")
--- a/venv/lib/python3.9/site-packages/scrapy/commands/view.py
+++ b/venv/lib/python3.9/site-packages/scrapy/commands/view.py
@ -0,0 +1,18 @@
+from scrapy.commands import fetch
+from scrapy.utils.response import open_in_browser
+
+
+class Command(fetch.Command):
+
+    def short_desc(self):
+        return "Open URL in browser, as seen by Scrapy"
+
+    def long_desc(self):
+        return "Fetch a URL using the Scrapy downloader and show its contents in a browser"
+
+    def add_options(self, parser):
+        super().add_options(parser)
+        parser.remove_option("--headers")
+
+    def _print_response(self, response, opts):
+        open_in_browser(response)