Ausgabe der neuen DB Einträge

2022-01-02 21:50:48 +01:00 · 2022-01-02 21:50:48 +01:00 · cfbbb9ee3d
commit cfbbb9ee3d
parent bad48e1627
2399 changed files with 843193 additions and 43 deletions
--- a/venv/lib/python3.9/site-packages/scrapy/utils/init.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/init.py
--- a/venv/lib/python3.9/site-packages/scrapy/utils/benchserver.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/benchserver.py
@ -0,0 +1,44 @@
+import random
+from urllib.parse import urlencode
+
+from twisted.web.server import Site
+from twisted.web.resource import Resource
+
+
+class Root(Resource):
+
+    isLeaf = True
+
+    def getChild(self, name, request):
+        return self
+
+    def render(self, request):
+        total = _getarg(request, b'total', 100, int)
+        show = _getarg(request, b'show', 10, int)
+        nlist = [random.randint(1, total) for _ in range(show)]
+        request.write(b"<html><head></head><body>")
+        args = request.args.copy()
+        for nl in nlist:
+            args['n'] = nl
+            argstr = urlencode(args, doseq=True)
+            request.write(f"<a href='/follow?{argstr}'>follow {nl}</a><br>"
+                          .encode('utf8'))
+        request.write(b"</body></html>")
+        return b''
+
+
+def _getarg(request, name, default=None, type=str):
+    return type(request.args[name][0]) if name in request.args else default
+
+
+if __name__ == '__main__':
+    from twisted.internet import reactor
+    root = Root()
+    factory = Site(root)
+    httpPort = reactor.listenTCP(8998, Site(root))
+
+    def _print_listening():
+        httpHost = httpPort.getHost()
+        print(f"Bench server at http://{httpHost.host}:{httpHost.port}")
+    reactor.callWhenRunning(_print_listening)
+    reactor.run()
--- a/venv/lib/python3.9/site-packages/scrapy/utils/boto.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/boto.py
@ -0,0 +1,32 @@
+"""Boto/botocore helpers"""
+import warnings
+
+from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
+
+
+def is_botocore():
+    """ Returns True if botocore is available, otherwise raises NotConfigured. Never returns False.
+
+    Previously, when boto was supported in addition to botocore, this returned False if boto was available
+    but botocore wasn't.
+    """
+    message = (
+        'is_botocore() is deprecated and always returns True or raises an Exception, '
+        'so it cannot be used for checking if boto is available instead of botocore. '
+        'You can use scrapy.utils.boto.is_botocore_available() to check if botocore '
+        'is available.'
+    )
+    warnings.warn(message, ScrapyDeprecationWarning, stacklevel=2)
+    try:
+        import botocore  # noqa: F401
+        return True
+    except ImportError:
+        raise NotConfigured('missing botocore library')
+
+
+def is_botocore_available():
+    try:
+        import botocore  # noqa: F401
+        return True
+    except ImportError:
+        return False
--- a/venv/lib/python3.9/site-packages/scrapy/utils/conf.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/conf.py
@ -0,0 +1,195 @@
+import numbers
+import os
+import sys
+import warnings
+from configparser import ConfigParser
+from operator import itemgetter
+
+from scrapy.exceptions import ScrapyDeprecationWarning, UsageError
+
+from scrapy.settings import BaseSettings
+from scrapy.utils.deprecate import update_classpath
+from scrapy.utils.python import without_none_values
+
+
+def build_component_list(compdict, custom=None, convert=update_classpath):
+    """Compose a component list from a { class: order } dictionary."""
+
+    def _check_components(complist):
+        if len({convert(c) for c in complist}) != len(complist):
+            raise ValueError(f'Some paths in {complist!r} convert to the same object, '
+                             'please update your settings')
+
+    def _map_keys(compdict):
+        if isinstance(compdict, BaseSettings):
+            compbs = BaseSettings()
+            for k, v in compdict.items():
+                prio = compdict.getpriority(k)
+                if compbs.getpriority(convert(k)) == prio:
+                    raise ValueError(f'Some paths in {list(compdict.keys())!r} '
+                                     'convert to the same '
+                                     'object, please update your settings'
+                                     )
+                else:
+                    compbs.set(convert(k), v, priority=prio)
+            return compbs
+        else:
+            _check_components(compdict)
+            return {convert(k): v for k, v in compdict.items()}
+
+    def _validate_values(compdict):
+        """Fail if a value in the components dict is not a real number or None."""
+        for name, value in compdict.items():
+            if value is not None and not isinstance(value, numbers.Real):
+                raise ValueError(f'Invalid value {value} for component {name}, '
+                                 'please provide a real number or None instead')
+
+    # BEGIN Backward compatibility for old (base, custom) call signature
+    if isinstance(custom, (list, tuple)):
+        _check_components(custom)
+        return type(custom)(convert(c) for c in custom)
+
+    if custom is not None:
+        compdict.update(custom)
+    # END Backward compatibility
+
+    _validate_values(compdict)
+    compdict = without_none_values(_map_keys(compdict))
+    return [k for k, v in sorted(compdict.items(), key=itemgetter(1))]
+
+
+def arglist_to_dict(arglist):
+    """Convert a list of arguments like ['arg1=val1', 'arg2=val2', ...] to a
+    dict
+    """
+    return dict(x.split('=', 1) for x in arglist)
+
+
+def closest_scrapy_cfg(path='.', prevpath=None):
+    """Return the path to the closest scrapy.cfg file by traversing the current
+    directory and its parents
+    """
+    if path == prevpath:
+        return ''
+    path = os.path.abspath(path)
+    cfgfile = os.path.join(path, 'scrapy.cfg')
+    if os.path.exists(cfgfile):
+        return cfgfile
+    return closest_scrapy_cfg(os.path.dirname(path), path)
+
+
+def init_env(project='default', set_syspath=True):
+    """Initialize environment to use command-line tool from inside a project
+    dir. This sets the Scrapy settings module and modifies the Python path to
+    be able to locate the project module.
+    """
+    cfg = get_config()
+    if cfg.has_option('settings', project):
+        os.environ['SCRAPY_SETTINGS_MODULE'] = cfg.get('settings', project)
+    closest = closest_scrapy_cfg()
+    if closest:
+        projdir = os.path.dirname(closest)
+        if set_syspath and projdir not in sys.path:
+            sys.path.append(projdir)
+
+
+def get_config(use_closest=True):
+    """Get Scrapy config file as a ConfigParser"""
+    sources = get_sources(use_closest)
+    cfg = ConfigParser()
+    cfg.read(sources)
+    return cfg
+
+
+def get_sources(use_closest=True):
+    xdg_config_home = os.environ.get('XDG_CONFIG_HOME') or os.path.expanduser('~/.config')
+    sources = [
+        '/etc/scrapy.cfg',
+        r'c:\scrapy\scrapy.cfg',
+        xdg_config_home + '/scrapy.cfg',
+        os.path.expanduser('~/.scrapy.cfg'),
+    ]
+    if use_closest:
+        sources.append(closest_scrapy_cfg())
+    return sources
+
+
+def feed_complete_default_values_from_settings(feed, settings):
+    out = feed.copy()
+    out.setdefault("batch_item_count", settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT'))
+    out.setdefault("encoding", settings["FEED_EXPORT_ENCODING"])
+    out.setdefault("fields", settings.getlist("FEED_EXPORT_FIELDS") or None)
+    out.setdefault("store_empty", settings.getbool("FEED_STORE_EMPTY"))
+    out.setdefault("uri_params", settings["FEED_URI_PARAMS"])
+    out.setdefault("item_export_kwargs", dict())
+    if settings["FEED_EXPORT_INDENT"] is None:
+        out.setdefault("indent", None)
+    else:
+        out.setdefault("indent", settings.getint("FEED_EXPORT_INDENT"))
+    return out
+
+
+def feed_process_params_from_cli(settings, output, output_format=None,
+                                 overwrite_output=None):
+    """
+    Receives feed export params (from the 'crawl' or 'runspider' commands),
+    checks for inconsistencies in their quantities and returns a dictionary
+    suitable to be used as the FEEDS setting.
+    """
+    valid_output_formats = without_none_values(
+        settings.getwithbase('FEED_EXPORTERS')
+    ).keys()
+
+    def check_valid_format(output_format):
+        if output_format not in valid_output_formats:
+            raise UsageError(
+                f"Unrecognized output format '{output_format}'. "
+                f"Set a supported one ({tuple(valid_output_formats)}) "
+                "after a colon at the end of the output URI (i.e. -o/-O "
+                "<URI>:<FORMAT>) or as a file extension."
+            )
+
+    overwrite = False
+    if overwrite_output:
+        if output:
+            raise UsageError(
+                "Please use only one of -o/--output and -O/--overwrite-output"
+            )
+        output = overwrite_output
+        overwrite = True
+
+    if output_format:
+        if len(output) == 1:
+            check_valid_format(output_format)
+            message = (
+                'The -t command line option is deprecated in favor of '
+                'specifying the output format within the output URI. See the '
+                'documentation of the -o and -O options for more information.',
+            )
+            warnings.warn(message, ScrapyDeprecationWarning, stacklevel=2)
+            return {output[0]: {'format': output_format}}
+        else:
+            raise UsageError(
+                'The -t command-line option cannot be used if multiple output '
+                'URIs are specified'
+            )
+
+    result = {}
+    for element in output:
+        try:
+            feed_uri, feed_format = element.rsplit(':', 1)
+        except ValueError:
+            feed_uri = element
+            feed_format = os.path.splitext(element)[1].replace('.', '')
+        else:
+            if feed_uri == '-':
+                feed_uri = 'stdout:'
+        check_valid_format(feed_format)
+        result[feed_uri] = {'format': feed_format}
+        if overwrite:
+            result[feed_uri]['overwrite'] = True
+
+    # FEEDS setting should take precedence over the matching CLI options
+    result.update(settings.getdict('FEEDS'))
+
+    return result
--- a/venv/lib/python3.9/site-packages/scrapy/utils/console.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/console.py
@ -0,0 +1,104 @@
+from functools import wraps
+from collections import OrderedDict
+
+
+def _embed_ipython_shell(namespace={}, banner=''):
+    """Start an IPython Shell"""
+    try:
+        from IPython.terminal.embed import InteractiveShellEmbed
+        from IPython.terminal.ipapp import load_default_config
+    except ImportError:
+        from IPython.frontend.terminal.embed import InteractiveShellEmbed
+        from IPython.frontend.terminal.ipapp import load_default_config
+
+    @wraps(_embed_ipython_shell)
+    def wrapper(namespace=namespace, banner=''):
+        config = load_default_config()
+        # Always use .instace() to ensure _instance propagation to all parents
+        # this is needed for <TAB> completion works well for new imports
+        # and clear the instance to always have the fresh env
+        # on repeated breaks like with inspect_response()
+        InteractiveShellEmbed.clear_instance()
+        shell = InteractiveShellEmbed.instance(
+            banner1=banner, user_ns=namespace, config=config)
+        shell()
+    return wrapper
+
+
+def _embed_bpython_shell(namespace={}, banner=''):
+    """Start a bpython shell"""
+    import bpython
+
+    @wraps(_embed_bpython_shell)
+    def wrapper(namespace=namespace, banner=''):
+        bpython.embed(locals_=namespace, banner=banner)
+    return wrapper
+
+
+def _embed_ptpython_shell(namespace={}, banner=''):
+    """Start a ptpython shell"""
+    import ptpython.repl
+
+    @wraps(_embed_ptpython_shell)
+    def wrapper(namespace=namespace, banner=''):
+        print(banner)
+        ptpython.repl.embed(locals=namespace)
+    return wrapper
+
+
+def _embed_standard_shell(namespace={}, banner=''):
+    """Start a standard python shell"""
+    import code
+    try:  # readline module is only available on unix systems
+        import readline
+    except ImportError:
+        pass
+    else:
+        import rlcompleter  # noqa: F401
+        readline.parse_and_bind("tab:complete")
+
+    @wraps(_embed_standard_shell)
+    def wrapper(namespace=namespace, banner=''):
+        code.interact(banner=banner, local=namespace)
+    return wrapper
+
+
+DEFAULT_PYTHON_SHELLS = OrderedDict([
+    ('ptpython', _embed_ptpython_shell),
+    ('ipython', _embed_ipython_shell),
+    ('bpython', _embed_bpython_shell),
+    ('python', _embed_standard_shell),
+])
+
+
+def get_shell_embed_func(shells=None, known_shells=None):
+    """Return the first acceptable shell-embed function
+    from a given list of shell names.
+    """
+    if shells is None:  # list, preference order of shells
+        shells = DEFAULT_PYTHON_SHELLS.keys()
+    if known_shells is None:  # available embeddable shells
+        known_shells = DEFAULT_PYTHON_SHELLS.copy()
+    for shell in shells:
+        if shell in known_shells:
+            try:
+                # function test: run all setup code (imports),
+                # but dont fall into the shell
+                return known_shells[shell]()
+            except ImportError:
+                continue
+
+
+def start_python_console(namespace=None, banner='', shells=None):
+    """Start Python console bound to the given namespace.
+    Readline support and tab completion will be used on Unix, if available.
+    """
+    if namespace is None:
+        namespace = {}
+
+    try:
+        shell = get_shell_embed_func(shells)
+        if shell is not None:
+            shell(namespace=namespace, banner=banner)
+    except SystemExit:  # raised when using exit() in python code.interact
+        pass
--- a/venv/lib/python3.9/site-packages/scrapy/utils/curl.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/curl.py
@ -0,0 +1,100 @@
+import argparse
+import warnings
+from shlex import split
+from http.cookies import SimpleCookie
+from urllib.parse import urlparse
+
+from w3lib.http import basic_auth_header
+
+
+class CurlParser(argparse.ArgumentParser):
+    def error(self, message):
+        error_msg = f'There was an error parsing the curl command: {message}'
+        raise ValueError(error_msg)
+
+
+curl_parser = CurlParser()
+curl_parser.add_argument('url')
+curl_parser.add_argument('-H', '--header', dest='headers', action='append')
+curl_parser.add_argument('-X', '--request', dest='method')
+curl_parser.add_argument('-d', '--data', '--data-raw', dest='data')
+curl_parser.add_argument('-u', '--user', dest='auth')
+
+
+safe_to_ignore_arguments = [
+    ['--compressed'],
+    # `--compressed` argument is not safe to ignore, but it's included here
+    # because the `HttpCompressionMiddleware` is enabled by default
+    ['-s', '--silent'],
+    ['-v', '--verbose'],
+    ['-#', '--progress-bar']
+]
+
+for argument in safe_to_ignore_arguments:
+    curl_parser.add_argument(*argument, action='store_true')
+
+
+def curl_to_request_kwargs(curl_command, ignore_unknown_options=True):
+    """Convert a cURL command syntax to Request kwargs.
+
+    :param str curl_command: string containing the curl command
+    :param bool ignore_unknown_options: If true, only a warning is emitted when
+                                        cURL options are unknown. Otherwise
+                                        raises an error. (default: True)
+    :return: dictionary of Request kwargs
+    """
+
+    curl_args = split(curl_command)
+
+    if curl_args[0] != 'curl':
+        raise ValueError('A curl command must start with "curl"')
+
+    parsed_args, argv = curl_parser.parse_known_args(curl_args[1:])
+
+    if argv:
+        msg = f'Unrecognized options: {", ".join(argv)}'
+        if ignore_unknown_options:
+            warnings.warn(msg)
+        else:
+            raise ValueError(msg)
+
+    url = parsed_args.url
+
+    # curl automatically prepends 'http' if the scheme is missing, but Request
+    # needs the scheme to work
+    parsed_url = urlparse(url)
+    if not parsed_url.scheme:
+        url = 'http://' + url
+
+    method = parsed_args.method or 'GET'
+
+    result = {'method': method.upper(), 'url': url}
+
+    headers = []
+    cookies = {}
+    for header in parsed_args.headers or ():
+        name, val = header.split(':', 1)
+        name = name.strip()
+        val = val.strip()
+        if name.title() == 'Cookie':
+            for name, morsel in SimpleCookie(val).items():
+                cookies[name] = morsel.value
+        else:
+            headers.append((name, val))
+
+    if parsed_args.auth:
+        user, password = parsed_args.auth.split(':', 1)
+        headers.append(('Authorization', basic_auth_header(user, password)))
+
+    if headers:
+        result['headers'] = headers
+    if cookies:
+        result['cookies'] = cookies
+    if parsed_args.data:
+        result['body'] = parsed_args.data
+        if not parsed_args.method:
+            # if the "data" is specified but the "method" is not specified,
+            # the default method is 'POST'
+            result['method'] = 'POST'
+
+    return result
--- a/venv/lib/python3.9/site-packages/scrapy/utils/datatypes.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/datatypes.py
@ -0,0 +1,119 @@
+"""
+This module contains data types used by Scrapy which are not included in the
+Python Standard Library.
+
+This module must not depend on any module outside the Standard Library.
+"""
+
+import collections
+import weakref
+from collections.abc import Mapping
+
+
+class CaselessDict(dict):
+
+    __slots__ = ()
+
+    def __init__(self, seq=None):
+        super().__init__()
+        if seq:
+            self.update(seq)
+
+    def __getitem__(self, key):
+        return dict.__getitem__(self, self.normkey(key))
+
+    def __setitem__(self, key, value):
+        dict.__setitem__(self, self.normkey(key), self.normvalue(value))
+
+    def __delitem__(self, key):
+        dict.__delitem__(self, self.normkey(key))
+
+    def __contains__(self, key):
+        return dict.__contains__(self, self.normkey(key))
+    has_key = __contains__
+
+    def __copy__(self):
+        return self.__class__(self)
+    copy = __copy__
+
+    def normkey(self, key):
+        """Method to normalize dictionary key access"""
+        return key.lower()
+
+    def normvalue(self, value):
+        """Method to normalize values prior to be setted"""
+        return value
+
+    def get(self, key, def_val=None):
+        return dict.get(self, self.normkey(key), self.normvalue(def_val))
+
+    def setdefault(self, key, def_val=None):
+        return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))
+
+    def update(self, seq):
+        seq = seq.items() if isinstance(seq, Mapping) else seq
+        iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
+        super().update(iseq)
+
+    @classmethod
+    def fromkeys(cls, keys, value=None):
+        return cls((k, value) for k in keys)
+
+    def pop(self, key, *args):
+        return dict.pop(self, self.normkey(key), *args)
+
+
+class LocalCache(collections.OrderedDict):
+    """Dictionary with a finite number of keys.
+
+    Older items expires first.
+    """
+
+    def __init__(self, limit=None):
+        super().__init__()
+        self.limit = limit
+
+    def __setitem__(self, key, value):
+        if self.limit:
+            while len(self) >= self.limit:
+                self.popitem(last=False)
+        super().__setitem__(key, value)
+
+
+class LocalWeakReferencedCache(weakref.WeakKeyDictionary):
+    """
+    A weakref.WeakKeyDictionary implementation that uses LocalCache as its
+    underlying data structure, making it ordered and capable of being size-limited.
+
+    Useful for memoization, while avoiding keeping received
+    arguments in memory only because of the cached references.
+
+    Note: like LocalCache and unlike weakref.WeakKeyDictionary,
+    it cannot be instantiated with an initial dictionary.
+    """
+
+    def __init__(self, limit=None):
+        super().__init__()
+        self.data = LocalCache(limit=limit)
+
+    def __setitem__(self, key, value):
+        try:
+            super().__setitem__(key, value)
+        except TypeError:
+            pass  # key is not weak-referenceable, skip caching
+
+    def __getitem__(self, key):
+        try:
+            return super().__getitem__(key)
+        except (TypeError, KeyError):
+            return None  # key is either not weak-referenceable or not cached
+
+
+class SequenceExclude:
+    """Object to test if an item is NOT within some sequence."""
+
+    def __init__(self, seq):
+        self.seq = seq
+
+    def __contains__(self, item):
+        return item not in self.seq
--- a/venv/lib/python3.9/site-packages/scrapy/utils/decorators.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/decorators.py
@ -0,0 +1,45 @@
+import warnings
+from functools import wraps
+
+from twisted.internet import defer, threads
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+
+
+def deprecated(use_instead=None):
+    """This is a decorator which can be used to mark functions
+    as deprecated. It will result in a warning being emitted
+    when the function is used."""
+
+    def deco(func):
+        @wraps(func)
+        def wrapped(*args, **kwargs):
+            message = f"Call to deprecated function {func.__name__}."
+            if use_instead:
+                message += f" Use {use_instead} instead."
+            warnings.warn(message, category=ScrapyDeprecationWarning, stacklevel=2)
+            return func(*args, **kwargs)
+        return wrapped
+
+    if callable(use_instead):
+        deco = deco(use_instead)
+        use_instead = None
+    return deco
+
+
+def defers(func):
+    """Decorator to make sure a function always returns a deferred"""
+    @wraps(func)
+    def wrapped(*a, **kw):
+        return defer.maybeDeferred(func, *a, **kw)
+    return wrapped
+
+
+def inthread(func):
+    """Decorator to call a function in a thread and return a deferred with the
+    result
+    """
+    @wraps(func)
+    def wrapped(*a, **kw):
+        return threads.deferToThread(func, *a, **kw)
+    return wrapped
--- a/venv/lib/python3.9/site-packages/scrapy/utils/defer.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/defer.py
@ -0,0 +1,168 @@
+"""
+Helper functions for dealing with Twisted deferreds
+"""
+import asyncio
+import inspect
+from functools import wraps
+
+from twisted.internet import defer, task
+from twisted.python import failure
+
+from scrapy.exceptions import IgnoreRequest
+from scrapy.utils.reactor import is_asyncio_reactor_installed
+
+
+def defer_fail(_failure):
+    """Same as twisted.internet.defer.fail but delay calling errback until
+    next reactor loop
+
+    It delays by 100ms so reactor has a chance to go through readers and writers
+    before attending pending delayed calls, so do not set delay to zero.
+    """
+    from twisted.internet import reactor
+    d = defer.Deferred()
+    reactor.callLater(0.1, d.errback, _failure)
+    return d
+
+
+def defer_succeed(result):
+    """Same as twisted.internet.defer.succeed but delay calling callback until
+    next reactor loop
+
+    It delays by 100ms so reactor has a chance to go trough readers and writers
+    before attending pending delayed calls, so do not set delay to zero.
+    """
+    from twisted.internet import reactor
+    d = defer.Deferred()
+    reactor.callLater(0.1, d.callback, result)
+    return d
+
+
+def defer_result(result):
+    if isinstance(result, defer.Deferred):
+        return result
+    elif isinstance(result, failure.Failure):
+        return defer_fail(result)
+    else:
+        return defer_succeed(result)
+
+
+def mustbe_deferred(f, *args, **kw):
+    """Same as twisted.internet.defer.maybeDeferred, but delay calling
+    callback/errback to next reactor loop
+    """
+    try:
+        result = f(*args, **kw)
+    # FIXME: Hack to avoid introspecting tracebacks. This to speed up
+    # processing of IgnoreRequest errors which are, by far, the most common
+    # exception in Scrapy - see #125
+    except IgnoreRequest as e:
+        return defer_fail(failure.Failure(e))
+    except Exception:
+        return defer_fail(failure.Failure())
+    else:
+        return defer_result(result)
+
+
+def parallel(iterable, count, callable, *args, **named):
+    """Execute a callable over the objects in the given iterable, in parallel,
+    using no more than ``count`` concurrent calls.
+
+    Taken from: https://jcalderone.livejournal.com/24285.html
+    """
+    coop = task.Cooperator()
+    work = (callable(elem, *args, **named) for elem in iterable)
+    return defer.DeferredList([coop.coiterate(work) for _ in range(count)])
+
+
+def process_chain(callbacks, input, *a, **kw):
+    """Return a Deferred built by chaining the given callbacks"""
+    d = defer.Deferred()
+    for x in callbacks:
+        d.addCallback(x, *a, **kw)
+    d.callback(input)
+    return d
+
+
+def process_chain_both(callbacks, errbacks, input, *a, **kw):
+    """Return a Deferred built by chaining the given callbacks and errbacks"""
+    d = defer.Deferred()
+    for cb, eb in zip(callbacks, errbacks):
+        d.addCallbacks(
+            callback=cb, errback=eb,
+            callbackArgs=a, callbackKeywords=kw,
+            errbackArgs=a, errbackKeywords=kw,
+        )
+    if isinstance(input, failure.Failure):
+        d.errback(input)
+    else:
+        d.callback(input)
+    return d
+
+
+def process_parallel(callbacks, input, *a, **kw):
+    """Return a Deferred with the output of all successful calls to the given
+    callbacks
+    """
+    dfds = [defer.succeed(input).addCallback(x, *a, **kw) for x in callbacks]
+    d = defer.DeferredList(dfds, fireOnOneErrback=1, consumeErrors=1)
+    d.addCallbacks(lambda r: [x[1] for x in r], lambda f: f.value.subFailure)
+    return d
+
+
+def iter_errback(iterable, errback, *a, **kw):
+    """Wraps an iterable calling an errback if an error is caught while
+    iterating it.
+    """
+    it = iter(iterable)
+    while True:
+        try:
+            yield next(it)
+        except StopIteration:
+            break
+        except Exception:
+            errback(failure.Failure(), *a, **kw)
+
+
+def deferred_from_coro(o):
+    """Converts a coroutine into a Deferred, or returns the object as is if it isn't a coroutine"""
+    if isinstance(o, defer.Deferred):
+        return o
+    if asyncio.isfuture(o) or inspect.isawaitable(o):
+        if not is_asyncio_reactor_installed():
+            # wrapping the coroutine directly into a Deferred, this doesn't work correctly with coroutines
+            # that use asyncio, e.g. "await asyncio.sleep(1)"
+            return defer.ensureDeferred(o)
+        else:
+            # wrapping the coroutine into a Future and then into a Deferred, this requires AsyncioSelectorReactor
+            return defer.Deferred.fromFuture(asyncio.ensure_future(o))
+    return o
+
+
+def deferred_f_from_coro_f(coro_f):
+    """ Converts a coroutine function into a function that returns a Deferred.
+
+    The coroutine function will be called at the time when the wrapper is called. Wrapper args will be passed to it.
+    This is useful for callback chains, as callback functions are called with the previous callback result.
+    """
+    @wraps(coro_f)
+    def f(*coro_args, **coro_kwargs):
+        return deferred_from_coro(coro_f(*coro_args, **coro_kwargs))
+    return f
+
+
+def maybeDeferred_coro(f, *args, **kw):
+    """ Copy of defer.maybeDeferred that also converts coroutines to Deferreds. """
+    try:
+        result = f(*args, **kw)
+    except:  # noqa: E722
+        return defer.fail(failure.Failure(captureVars=defer.Deferred.debug))
+
+    if isinstance(result, defer.Deferred):
+        return result
+    elif asyncio.isfuture(result) or inspect.isawaitable(result):
+        return deferred_from_coro(result)
+    elif isinstance(result, failure.Failure):
+        return defer.fail(result)
+    else:
+        return defer.succeed(result)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/deprecate.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/deprecate.py
@ -0,0 +1,174 @@
+"""Some helpers for deprecation messages"""
+
+import warnings
+import inspect
+from scrapy.exceptions import ScrapyDeprecationWarning
+
+
+def attribute(obj, oldattr, newattr, version='0.12'):
+    cname = obj.__class__.__name__
+    warnings.warn(
+        f"{cname}.{oldattr} attribute is deprecated and will be no longer supported "
+        f"in Scrapy {version}, use {cname}.{newattr} attribute instead",
+        ScrapyDeprecationWarning,
+        stacklevel=3)
+
+
+def create_deprecated_class(
+    name,
+    new_class,
+    clsdict=None,
+    warn_category=ScrapyDeprecationWarning,
+    warn_once=True,
+    old_class_path=None,
+    new_class_path=None,
+    subclass_warn_message="{cls} inherits from deprecated class {old}, please inherit from {new}.",
+    instance_warn_message="{cls} is deprecated, instantiate {new} instead."
+):
+    """
+    Return a "deprecated" class that causes its subclasses to issue a warning.
+    Subclasses of ``new_class`` are considered subclasses of this class.
+    It also warns when the deprecated class is instantiated, but do not when
+    its subclasses are instantiated.
+
+    It can be used to rename a base class in a library. For example, if we
+    have
+
+        class OldName(SomeClass):
+            # ...
+
+    and we want to rename it to NewName, we can do the following::
+
+        class NewName(SomeClass):
+            # ...
+
+        OldName = create_deprecated_class('OldName', NewName)
+
+    Then, if user class inherits from OldName, warning is issued. Also, if
+    some code uses ``issubclass(sub, OldName)`` or ``isinstance(sub(), OldName)``
+    checks they'll still return True if sub is a subclass of NewName instead of
+    OldName.
+    """
+
+    class DeprecatedClass(new_class.__class__):
+
+        deprecated_class = None
+        warned_on_subclass = False
+
+        def __new__(metacls, name, bases, clsdict_):
+            cls = super().__new__(metacls, name, bases, clsdict_)
+            if metacls.deprecated_class is None:
+                metacls.deprecated_class = cls
+            return cls
+
+        def __init__(cls, name, bases, clsdict_):
+            meta = cls.__class__
+            old = meta.deprecated_class
+            if old in bases and not (warn_once and meta.warned_on_subclass):
+                meta.warned_on_subclass = True
+                msg = subclass_warn_message.format(cls=_clspath(cls),
+                                                   old=_clspath(old, old_class_path),
+                                                   new=_clspath(new_class, new_class_path))
+                if warn_once:
+                    msg += ' (warning only on first subclass, there may be others)'
+                warnings.warn(msg, warn_category, stacklevel=2)
+            super().__init__(name, bases, clsdict_)
+
+        # see https://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass
+        # and https://docs.python.org/reference/datamodel.html#customizing-instance-and-subclass-checks
+        # for implementation details
+        def __instancecheck__(cls, inst):
+            return any(cls.__subclasscheck__(c)
+                       for c in {type(inst), inst.__class__})
+
+        def __subclasscheck__(cls, sub):
+            if cls is not DeprecatedClass.deprecated_class:
+                # we should do the magic only if second `issubclass` argument
+                # is the deprecated class itself - subclasses of the
+                # deprecated class should not use custom `__subclasscheck__`
+                # method.
+                return super().__subclasscheck__(sub)
+
+            if not inspect.isclass(sub):
+                raise TypeError("issubclass() arg 1 must be a class")
+
+            mro = getattr(sub, '__mro__', ())
+            return any(c in {cls, new_class} for c in mro)
+
+        def __call__(cls, *args, **kwargs):
+            old = DeprecatedClass.deprecated_class
+            if cls is old:
+                msg = instance_warn_message.format(cls=_clspath(cls, old_class_path),
+                                                   new=_clspath(new_class, new_class_path))
+                warnings.warn(msg, warn_category, stacklevel=2)
+            return super().__call__(*args, **kwargs)
+
+    deprecated_cls = DeprecatedClass(name, (new_class,), clsdict or {})
+
+    try:
+        frm = inspect.stack()[1]
+        parent_module = inspect.getmodule(frm[0])
+        if parent_module is not None:
+            deprecated_cls.__module__ = parent_module.__name__
+    except Exception as e:
+        # Sometimes inspect.stack() fails (e.g. when the first import of
+        # deprecated class is in jinja2 template). __module__ attribute is not
+        # important enough to raise an exception as users may be unable
+        # to fix inspect.stack() errors.
+        warnings.warn(f"Error detecting parent module: {e!r}")
+
+    return deprecated_cls
+
+
+def _clspath(cls, forced=None):
+    if forced is not None:
+        return forced
+    return f'{cls.__module__}.{cls.__name__}'
+
+
+DEPRECATION_RULES = [
+    ('scrapy.telnet.', 'scrapy.extensions.telnet.'),
+]
+
+
+def update_classpath(path):
+    """Update a deprecated path from an object with its new location"""
+    for prefix, replacement in DEPRECATION_RULES:
+        if isinstance(path, str) and path.startswith(prefix):
+            new_path = path.replace(prefix, replacement, 1)
+            warnings.warn(f"`{path}` class is deprecated, use `{new_path}` instead",
+                          ScrapyDeprecationWarning)
+            return new_path
+    return path
+
+
+def method_is_overridden(subclass, base_class, method_name):
+    """
+    Return True if a method named ``method_name`` of a ``base_class``
+    is overridden in a ``subclass``.
+
+    >>> class Base:
+    ...     def foo(self):
+    ...         pass
+    >>> class Sub1(Base):
+    ...     pass
+    >>> class Sub2(Base):
+    ...     def foo(self):
+    ...         pass
+    >>> class Sub3(Sub1):
+    ...     def foo(self):
+    ...         pass
+    >>> class Sub4(Sub2):
+    ...     pass
+    >>> method_is_overridden(Sub1, Base, 'foo')
+    False
+    >>> method_is_overridden(Sub2, Base, 'foo')
+    True
+    >>> method_is_overridden(Sub3, Base, 'foo')
+    True
+    >>> method_is_overridden(Sub4, Base, 'foo')
+    True
+    """
+    base_method = getattr(base_class, method_name)
+    sub_method = getattr(subclass, method_name)
+    return base_method.__code__ is not sub_method.__code__
--- a/venv/lib/python3.9/site-packages/scrapy/utils/display.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/display.py
@ -0,0 +1,48 @@
+"""
+pprint and pformat wrappers with colorization support
+"""
+
+import ctypes
+import platform
+import sys
+from distutils.version import LooseVersion as parse_version
+from pprint import pformat as pformat_
+
+
+def _enable_windows_terminal_processing():
+    # https://stackoverflow.com/a/36760881
+    kernel32 = ctypes.windll.kernel32
+    return bool(kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7))
+
+
+def _tty_supports_color():
+    if sys.platform != "win32":
+        return True
+
+    if parse_version(platform.version()) < parse_version("10.0.14393"):
+        return True
+
+    # Windows >= 10.0.14393 interprets ANSI escape sequences providing terminal
+    # processing is enabled.
+    return _enable_windows_terminal_processing()
+
+
+def _colorize(text, colorize=True):
+    if not colorize or not sys.stdout.isatty() or not _tty_supports_color():
+        return text
+    try:
+        from pygments import highlight
+    except ImportError:
+        return text
+    else:
+        from pygments.formatters import TerminalFormatter
+        from pygments.lexers import PythonLexer
+        return highlight(text, PythonLexer(), TerminalFormatter())
+
+
+def pformat(obj, *args, **kwargs):
+    return _colorize(pformat_(obj), kwargs.pop('colorize', True))
+
+
+def pprint(obj, *args, **kwargs):
+    print(pformat(obj, *args, **kwargs))
--- a/venv/lib/python3.9/site-packages/scrapy/utils/engine.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/engine.py
@ -0,0 +1,48 @@
+"""Some debugging functions for working with the Scrapy engine"""
+
+# used in global tests code
+from time import time  # noqa: F401
+
+
+def get_engine_status(engine):
+    """Return a report of the current engine status"""
+    tests = [
+        "time()-engine.start_time",
+        "engine.has_capacity()",
+        "len(engine.downloader.active)",
+        "engine.scraper.is_idle()",
+        "engine.spider.name",
+        "engine.spider_is_idle(engine.spider)",
+        "engine.slot.closing",
+        "len(engine.slot.inprogress)",
+        "len(engine.slot.scheduler.dqs or [])",
+        "len(engine.slot.scheduler.mqs)",
+        "len(engine.scraper.slot.queue)",
+        "len(engine.scraper.slot.active)",
+        "engine.scraper.slot.active_size",
+        "engine.scraper.slot.itemproc_size",
+        "engine.scraper.slot.needs_backout()",
+    ]
+
+    checks = []
+    for test in tests:
+        try:
+            checks += [(test, eval(test))]
+        except Exception as e:
+            checks += [(test, f"{type(e).__name__} (exception)")]
+
+    return checks
+
+
+def format_engine_status(engine=None):
+    checks = get_engine_status(engine)
+    s = "Execution engine status\n\n"
+    for test, result in checks:
+        s += f"{test:<47} : {result}\n"
+    s += "\n"
+
+    return s
+
+
+def print_engine_status(engine):
+    print(format_engine_status(engine))
--- a/venv/lib/python3.9/site-packages/scrapy/utils/ftp.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/ftp.py
@ -0,0 +1,37 @@
+import posixpath
+
+from ftplib import error_perm, FTP
+from posixpath import dirname
+
+
+def ftp_makedirs_cwd(ftp, path, first_call=True):
+    """Set the current directory of the FTP connection given in the ``ftp``
+    argument (as a ftplib.FTP object), creating all parent directories if they
+    don't exist. The ftplib.FTP object must be already connected and logged in.
+    """
+    try:
+        ftp.cwd(path)
+    except error_perm:
+        ftp_makedirs_cwd(ftp, dirname(path), False)
+        ftp.mkd(path)
+        if first_call:
+            ftp.cwd(path)
+
+
+def ftp_store_file(
+        *, path, file, host, port,
+        username, password, use_active_mode=False, overwrite=True):
+    """Opens a FTP connection with passed credentials,sets current directory
+    to the directory extracted from given path, then uploads the file to server
+    """
+    with FTP() as ftp:
+        ftp.connect(host, port)
+        ftp.login(username, password)
+        if use_active_mode:
+            ftp.set_pasv(False)
+        file.seek(0)
+        dirname, filename = posixpath.split(path)
+        ftp_makedirs_cwd(ftp, dirname)
+        command = 'STOR' if overwrite else 'APPE'
+        ftp.storbinary(f'{command} {filename}', file)
+        file.close()
--- a/venv/lib/python3.9/site-packages/scrapy/utils/gz.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/gz.py
@ -0,0 +1,58 @@
+from gzip import GzipFile
+from io import BytesIO
+import re
+import struct
+
+from scrapy.utils.decorators import deprecated
+
+
+# - GzipFile's read() has issues returning leftover uncompressed data when
+#   input is corrupted
+# - read1(), which fetches data before raising EOFError on next call
+#   works here
+@deprecated('GzipFile.read1')
+def read1(gzf, size=-1):
+    return gzf.read1(size)
+
+
+def gunzip(data):
+    """Gunzip the given data and return as much data as possible.
+
+    This is resilient to CRC checksum errors.
+    """
+    f = GzipFile(fileobj=BytesIO(data))
+    output_list = []
+    chunk = b'.'
+    while chunk:
+        try:
+            chunk = f.read1(8196)
+            output_list.append(chunk)
+        except (IOError, EOFError, struct.error):
+            # complete only if there is some data, otherwise re-raise
+            # see issue 87 about catching struct.error
+            # some pages are quite small so output_list is empty and f.extrabuf
+            # contains the whole page content
+            if output_list or getattr(f, 'extrabuf', None):
+                try:
+                    output_list.append(f.extrabuf[-f.extrasize:])
+                finally:
+                    break
+            else:
+                raise
+    return b''.join(output_list)
+
+
+_is_gzipped = re.compile(br'^application/(x-)?gzip\b', re.I).search
+_is_octetstream = re.compile(br'^(application|binary)/octet-stream\b', re.I).search
+
+
+@deprecated
+def is_gzipped(response):
+    """Return True if the response is gzipped, or False otherwise"""
+    ctype = response.headers.get('Content-Type', b'')
+    cenc = response.headers.get('Content-Encoding', b'').lower()
+    return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')
+
+
+def gzip_magic_number(response):
+    return response.body[:3] == b'\x1f\x8b\x08'
--- a/venv/lib/python3.9/site-packages/scrapy/utils/http.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/http.py
@ -0,0 +1,36 @@
+"""
+Transitional module for moving to the w3lib library.
+
+For new code, always import from w3lib.http instead of this module
+"""
+
+import warnings
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.decorators import deprecated
+from w3lib.http import *  # noqa: F401
+
+
+warnings.warn("Module `scrapy.utils.http` is deprecated, "
+              "Please import from `w3lib.http` instead.",
+              ScrapyDeprecationWarning, stacklevel=2)
+
+
+@deprecated
+def decode_chunked_transfer(chunked_body):
+    """Parsed body received with chunked transfer encoding, and return the
+    decoded body.
+
+    For more info see:
+    https://en.wikipedia.org/wiki/Chunked_transfer_encoding
+
+    """
+    body, h, t = '', '', chunked_body
+    while t:
+        h, t = t.split('\r\n', 1)
+        if h == '0':
+            break
+        size = int(h, 16)
+        body += t[:size]
+        t = t[size + 2:]
+    return body
--- a/venv/lib/python3.9/site-packages/scrapy/utils/httpobj.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/httpobj.py
@ -0,0 +1,16 @@
+"""Helper functions for scrapy.http objects (Request, Response)"""
+
+import weakref
+from urllib.parse import urlparse
+
+
+_urlparse_cache = weakref.WeakKeyDictionary()
+
+
+def urlparse_cached(request_or_response):
+    """Return urlparse.urlparse caching the result, where the argument can be a
+    Request or Response object
+    """
+    if request_or_response not in _urlparse_cache:
+        _urlparse_cache[request_or_response] = urlparse(request_or_response.url)
+    return _urlparse_cache[request_or_response]
--- a/venv/lib/python3.9/site-packages/scrapy/utils/iterators.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/iterators.py
@ -0,0 +1,162 @@
+import csv
+import logging
+import re
+from io import StringIO
+
+from scrapy.http import TextResponse, Response
+from scrapy.selector import Selector
+from scrapy.utils.python import re_rsearch, to_unicode
+
+
+logger = logging.getLogger(__name__)
+
+
+def xmliter(obj, nodename):
+    """Return a iterator of Selector's over all nodes of a XML document,
+       given the name of the node to iterate. Useful for parsing XML feeds.
+
+    obj can be:
+    - a Response object
+    - a unicode string
+    - a string encoded as utf-8
+    """
+    nodename_patt = re.escape(nodename)
+
+    DOCUMENT_HEADER_RE = re.compile(r'<\?xml[^>]+>\s*', re.S)
+    HEADER_END_RE = re.compile(fr'<\s*/{nodename_patt}\s*>', re.S)
+    END_TAG_RE = re.compile(r'<\s*/([^\s>]+)\s*>', re.S)
+    NAMESPACE_RE = re.compile(r'((xmlns[:A-Za-z]*)=[^>\s]+)', re.S)
+    text = _body_or_str(obj)
+
+    document_header = re.search(DOCUMENT_HEADER_RE, text)
+    document_header = document_header.group().strip() if document_header else ''
+    header_end_idx = re_rsearch(HEADER_END_RE, text)
+    header_end = text[header_end_idx[1]:].strip() if header_end_idx else ''
+    namespaces = {}
+    if header_end:
+        for tagname in reversed(re.findall(END_TAG_RE, header_end)):
+            tag = re.search(fr'<\s*{tagname}.*?xmlns[:=][^>]*>', text[:header_end_idx[1]], re.S)
+            if tag:
+                namespaces.update(reversed(x) for x in re.findall(NAMESPACE_RE, tag.group()))
+
+    r = re.compile(fr'<{nodename_patt}[\s>].*?</{nodename_patt}>', re.DOTALL)
+    for match in r.finditer(text):
+        nodetext = (
+            document_header
+            + match.group().replace(
+                nodename,
+                f'{nodename} {" ".join(namespaces.values())}',
+                1
+            )
+            + header_end
+        )
+        yield Selector(text=nodetext, type='xml')
+
+
+def xmliter_lxml(obj, nodename, namespace=None, prefix='x'):
+    from lxml import etree
+    reader = _StreamReader(obj)
+    tag = f'{{{namespace}}}{nodename}' if namespace else nodename
+    iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
+    selxpath = '//' + (f'{prefix}:{nodename}' if namespace else nodename)
+    for _, node in iterable:
+        nodetext = etree.tostring(node, encoding='unicode')
+        node.clear()
+        xs = Selector(text=nodetext, type='xml')
+        if namespace:
+            xs.register_namespace(prefix, namespace)
+        yield xs.xpath(selxpath)[0]
+
+
+class _StreamReader:
+
+    def __init__(self, obj):
+        self._ptr = 0
+        if isinstance(obj, Response):
+            self._text, self.encoding = obj.body, obj.encoding
+        else:
+            self._text, self.encoding = obj, 'utf-8'
+        self._is_unicode = isinstance(self._text, str)
+
+    def read(self, n=65535):
+        self.read = self._read_unicode if self._is_unicode else self._read_string
+        return self.read(n).lstrip()
+
+    def _read_string(self, n=65535):
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return self._text[s:e]
+
+    def _read_unicode(self, n=65535):
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return self._text[s:e].encode('utf-8')
+
+
+def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
+    """ Returns an iterator of dictionaries from the given csv object
+
+    obj can be:
+    - a Response object
+    - a unicode string
+    - a string encoded as utf-8
+
+    delimiter is the character used to separate fields on the given obj.
+
+    headers is an iterable that when provided offers the keys
+    for the returned dictionaries, if not the first row is used.
+
+    quotechar is the character used to enclosure fields on the given obj.
+    """
+
+    encoding = obj.encoding if isinstance(obj, TextResponse) else encoding or 'utf-8'
+
+    def row_to_unicode(row_):
+        return [to_unicode(field, encoding) for field in row_]
+
+    lines = StringIO(_body_or_str(obj, unicode=True))
+
+    kwargs = {}
+    if delimiter:
+        kwargs["delimiter"] = delimiter
+    if quotechar:
+        kwargs["quotechar"] = quotechar
+    csv_r = csv.reader(lines, **kwargs)
+
+    if not headers:
+        try:
+            row = next(csv_r)
+        except StopIteration:
+            return
+        headers = row_to_unicode(row)
+
+    for row in csv_r:
+        row = row_to_unicode(row)
+        if len(row) != len(headers):
+            logger.warning("ignoring row %(csvlnum)d (length: %(csvrow)d, "
+                           "should be: %(csvheader)d)",
+                           {'csvlnum': csv_r.line_num, 'csvrow': len(row),
+                            'csvheader': len(headers)})
+            continue
+        else:
+            yield dict(zip(headers, row))
+
+
+def _body_or_str(obj, unicode=True):
+    expected_types = (Response, str, bytes)
+    if not isinstance(obj, expected_types):
+        expected_types_str = " or ".join(t.__name__ for t in expected_types)
+        raise TypeError(
+            f"Object {obj!r} must be {expected_types_str}, not {type(obj).__name__}"
+        )
+    if isinstance(obj, Response):
+        if not unicode:
+            return obj.body
+        elif isinstance(obj, TextResponse):
+            return obj.text
+        else:
+            return obj.body.decode('utf-8')
+    elif isinstance(obj, str):
+        return obj if unicode else obj.encode('utf-8')
+    else:
+        return obj.decode('utf-8') if unicode else obj
--- a/venv/lib/python3.9/site-packages/scrapy/utils/job.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/job.py
@ -0,0 +1,8 @@
+import os
+
+
+def job_dir(settings):
+    path = settings['JOBDIR']
+    if path and not os.path.exists(path):
+        os.makedirs(path)
+    return path
--- a/venv/lib/python3.9/site-packages/scrapy/utils/log.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/log.py
@ -0,0 +1,215 @@
+import logging
+import sys
+import warnings
+from logging.config import dictConfig
+
+from twisted.python import log as twisted_log
+from twisted.python.failure import Failure
+
+import scrapy
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.settings import Settings
+from scrapy.utils.versions import scrapy_components_versions
+
+
+logger = logging.getLogger(__name__)
+
+
+def failure_to_exc_info(failure):
+    """Extract exc_info from Failure instances"""
+    if isinstance(failure, Failure):
+        return (failure.type, failure.value, failure.getTracebackObject())
+
+
+class TopLevelFormatter(logging.Filter):
+    """Keep only top level loggers's name (direct children from root) from
+    records.
+
+    This filter will replace Scrapy loggers' names with 'scrapy'. This mimics
+    the old Scrapy log behaviour and helps shortening long names.
+
+    Since it can't be set for just one logger (it won't propagate for its
+    children), it's going to be set in the root handler, with a parametrized
+    ``loggers`` list where it should act.
+    """
+
+    def __init__(self, loggers=None):
+        self.loggers = loggers or []
+
+    def filter(self, record):
+        if any(record.name.startswith(logger + '.') for logger in self.loggers):
+            record.name = record.name.split('.', 1)[0]
+        return True
+
+
+DEFAULT_LOGGING = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'loggers': {
+        'scrapy': {
+            'level': 'DEBUG',
+        },
+        'twisted': {
+            'level': 'ERROR',
+        },
+    }
+}
+
+
+def configure_logging(settings=None, install_root_handler=True):
+    """
+    Initialize logging defaults for Scrapy.
+
+    :param settings: settings used to create and configure a handler for the
+        root logger (default: None).
+    :type settings: dict, :class:`~scrapy.settings.Settings` object or ``None``
+
+    :param install_root_handler: whether to install root logging handler
+        (default: True)
+    :type install_root_handler: bool
+
+    This function does:
+
+    - Route warnings and twisted logging through Python standard logging
+    - Assign DEBUG and ERROR level to Scrapy and Twisted loggers respectively
+    - Route stdout to log if LOG_STDOUT setting is True
+
+    When ``install_root_handler`` is True (default), this function also
+    creates a handler for the root logger according to given settings
+    (see :ref:`topics-logging-settings`). You can override default options
+    using ``settings`` argument. When ``settings`` is empty or None, defaults
+    are used.
+    """
+    if not sys.warnoptions:
+        # Route warnings through python logging
+        logging.captureWarnings(True)
+
+    observer = twisted_log.PythonLoggingObserver('twisted')
+    observer.start()
+
+    dictConfig(DEFAULT_LOGGING)
+
+    if isinstance(settings, dict) or settings is None:
+        settings = Settings(settings)
+
+    if settings.getbool('LOG_STDOUT'):
+        sys.stdout = StreamLogger(logging.getLogger('stdout'))
+
+    if install_root_handler:
+        install_scrapy_root_handler(settings)
+
+
+def install_scrapy_root_handler(settings):
+    global _scrapy_root_handler
+
+    if (_scrapy_root_handler is not None
+            and _scrapy_root_handler in logging.root.handlers):
+        logging.root.removeHandler(_scrapy_root_handler)
+    logging.root.setLevel(logging.NOTSET)
+    _scrapy_root_handler = _get_handler(settings)
+    logging.root.addHandler(_scrapy_root_handler)
+
+
+def get_scrapy_root_handler():
+    return _scrapy_root_handler
+
+
+_scrapy_root_handler = None
+
+
+def _get_handler(settings):
+    """ Return a log handler object according to settings """
+    filename = settings.get('LOG_FILE')
+    if filename:
+        encoding = settings.get('LOG_ENCODING')
+        handler = logging.FileHandler(filename, encoding=encoding)
+    elif settings.getbool('LOG_ENABLED'):
+        handler = logging.StreamHandler()
+    else:
+        handler = logging.NullHandler()
+
+    formatter = logging.Formatter(
+        fmt=settings.get('LOG_FORMAT'),
+        datefmt=settings.get('LOG_DATEFORMAT')
+    )
+    handler.setFormatter(formatter)
+    handler.setLevel(settings.get('LOG_LEVEL'))
+    if settings.getbool('LOG_SHORT_NAMES'):
+        handler.addFilter(TopLevelFormatter(['scrapy']))
+    return handler
+
+
+def log_scrapy_info(settings):
+    logger.info("Scrapy %(version)s started (bot: %(bot)s)",
+                {'version': scrapy.__version__, 'bot': settings['BOT_NAME']})
+    versions = [
+        f"{name} {version}"
+        for name, version in scrapy_components_versions()
+        if name != "Scrapy"
+    ]
+    logger.info("Versions: %(versions)s", {'versions': ", ".join(versions)})
+    from twisted.internet import reactor
+    logger.debug("Using reactor: %s.%s", reactor.__module__, reactor.__class__.__name__)
+    from twisted.internet import asyncioreactor
+    if isinstance(reactor, asyncioreactor.AsyncioSelectorReactor):
+        logger.debug(
+            "Using asyncio event loop: %s.%s",
+            reactor._asyncioEventloop.__module__,
+            reactor._asyncioEventloop.__class__.__name__,
+        )
+
+
+class StreamLogger:
+    """Fake file-like stream object that redirects writes to a logger instance
+
+    Taken from:
+        https://www.electricmonk.nl/log/2011/08/14/redirect-stdout-and-stderr-to-a-logger-in-python/
+    """
+    def __init__(self, logger, log_level=logging.INFO):
+        self.logger = logger
+        self.log_level = log_level
+        self.linebuf = ''
+
+    def write(self, buf):
+        for line in buf.rstrip().splitlines():
+            self.logger.log(self.log_level, line.rstrip())
+
+    def flush(self):
+        for h in self.logger.handlers:
+            h.flush()
+
+
+class LogCounterHandler(logging.Handler):
+    """Record log levels count into a crawler stats"""
+
+    def __init__(self, crawler, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.crawler = crawler
+
+    def emit(self, record):
+        sname = f'log_count/{record.levelname}'
+        self.crawler.stats.inc_value(sname)
+
+
+def logformatter_adapter(logkws):
+    """
+    Helper that takes the dictionary output from the methods in LogFormatter
+    and adapts it into a tuple of positional arguments for logger.log calls,
+    handling backward compatibility as well.
+    """
+    if not {'level', 'msg', 'args'} <= set(logkws):
+        warnings.warn('Missing keys in LogFormatter method',
+                      ScrapyDeprecationWarning)
+
+    if 'format' in logkws:
+        warnings.warn('`format` key in LogFormatter methods has been '
+                      'deprecated, use `msg` instead',
+                      ScrapyDeprecationWarning)
+
+    level = logkws.get('level', logging.INFO)
+    message = logkws.get('format', logkws.get('msg'))
+    # NOTE: This also handles 'args' being an empty dict, that case doesn't
+    # play well in logger.log calls
+    args = logkws if not logkws.get('args') else logkws['args']
+
+    return (level, message, args)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/markup.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/markup.py
@ -0,0 +1,14 @@
+"""
+Transitional module for moving to the w3lib library.
+
+For new code, always import from w3lib.html instead of this module
+"""
+import warnings
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from w3lib.html import *  # noqa: F401
+
+
+warnings.warn("Module `scrapy.utils.markup` is deprecated. "
+              "Please import from `w3lib.html` instead.",
+              ScrapyDeprecationWarning, stacklevel=2)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/misc.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/misc.py
@ -0,0 +1,253 @@
+"""Helper functions which don't fit anywhere else"""
+import ast
+import inspect
+import os
+import re
+import hashlib
+import warnings
+from collections import deque
+from contextlib import contextmanager
+from importlib import import_module
+from pkgutil import iter_modules
+from textwrap import dedent
+
+from w3lib.html import replace_entities
+
+from scrapy.utils.datatypes import LocalWeakReferencedCache
+from scrapy.utils.python import flatten, to_unicode
+from scrapy.item import _BaseItem
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
+
+
+_ITERABLE_SINGLE_VALUES = dict, _BaseItem, str, bytes
+
+
+def arg_to_iter(arg):
+    """Convert an argument to an iterable. The argument can be a None, single
+    value, or an iterable.
+
+    Exception: if arg is a dict, [arg] will be returned
+    """
+    if arg is None:
+        return []
+    elif not isinstance(arg, _ITERABLE_SINGLE_VALUES) and hasattr(arg, '__iter__'):
+        return arg
+    else:
+        return [arg]
+
+
+def load_object(path):
+    """Load an object given its absolute object path, and return it.
+
+    The object can be the import path of a class, function, variable or an
+    instance, e.g. 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware'.
+
+    If ``path`` is not a string, but is a callable object, such as a class or
+    a function, then return it as is.
+    """
+
+    if not isinstance(path, str):
+        if callable(path):
+            return path
+        else:
+            raise TypeError("Unexpected argument type, expected string "
+                            "or object, got: %s" % type(path))
+
+    try:
+        dot = path.rindex('.')
+    except ValueError:
+        raise ValueError(f"Error loading object '{path}': not a full path")
+
+    module, name = path[:dot], path[dot + 1:]
+    mod = import_module(module)
+
+    try:
+        obj = getattr(mod, name)
+    except AttributeError:
+        raise NameError(f"Module '{module}' doesn't define any object named '{name}'")
+
+    return obj
+
+
+def walk_modules(path):
+    """Loads a module and all its submodules from the given module path and
+    returns them. If *any* module throws an exception while importing, that
+    exception is thrown back.
+
+    For example: walk_modules('scrapy.utils')
+    """
+
+    mods = []
+    mod = import_module(path)
+    mods.append(mod)
+    if hasattr(mod, '__path__'):
+        for _, subpath, ispkg in iter_modules(mod.__path__):
+            fullpath = path + '.' + subpath
+            if ispkg:
+                mods += walk_modules(fullpath)
+            else:
+                submod = import_module(fullpath)
+                mods.append(submod)
+    return mods
+
+
+def extract_regex(regex, text, encoding='utf-8'):
+    """Extract a list of unicode strings from the given text/encoding using the following policies:
+
+    * if the regex contains a named group called "extract" that will be returned
+    * if the regex contains multiple numbered groups, all those will be returned (flattened)
+    * if the regex doesn't contain any group the entire regex matching is returned
+    """
+    warnings.warn(
+        "scrapy.utils.misc.extract_regex has moved to parsel.utils.extract_regex.",
+        ScrapyDeprecationWarning,
+        stacklevel=2
+    )
+
+    if isinstance(regex, str):
+        regex = re.compile(regex, re.UNICODE)
+
+    try:
+        strings = [regex.search(text).group('extract')]   # named group
+    except Exception:
+        strings = regex.findall(text)    # full regex or numbered groups
+    strings = flatten(strings)
+
+    if isinstance(text, str):
+        return [replace_entities(s, keep=['lt', 'amp']) for s in strings]
+    else:
+        return [replace_entities(to_unicode(s, encoding), keep=['lt', 'amp'])
+                for s in strings]
+
+
+def md5sum(file):
+    """Calculate the md5 checksum of a file-like object without reading its
+    whole content in memory.
+
+    >>> from io import BytesIO
+    >>> md5sum(BytesIO(b'file content to hash'))
+    '784406af91dd5a54fbb9c84c2236595a'
+    """
+    m = hashlib.md5()
+    while True:
+        d = file.read(8096)
+        if not d:
+            break
+        m.update(d)
+    return m.hexdigest()
+
+
+def rel_has_nofollow(rel):
+    """Return True if link rel attribute has nofollow type"""
+    return rel is not None and 'nofollow' in rel.split()
+
+
+def create_instance(objcls, settings, crawler, *args, **kwargs):
+    """Construct a class instance using its ``from_crawler`` or
+    ``from_settings`` constructors, if available.
+
+    At least one of ``settings`` and ``crawler`` needs to be different from
+    ``None``. If ``settings `` is ``None``, ``crawler.settings`` will be used.
+    If ``crawler`` is ``None``, only the ``from_settings`` constructor will be
+    tried.
+
+    ``*args`` and ``**kwargs`` are forwarded to the constructors.
+
+    Raises ``ValueError`` if both ``settings`` and ``crawler`` are ``None``.
+
+    .. versionchanged:: 2.2
+       Raises ``TypeError`` if the resulting instance is ``None`` (e.g. if an
+       extension has not been implemented correctly).
+    """
+    if settings is None:
+        if crawler is None:
+            raise ValueError("Specify at least one of settings and crawler.")
+        settings = crawler.settings
+    if crawler and hasattr(objcls, 'from_crawler'):
+        instance = objcls.from_crawler(crawler, *args, **kwargs)
+        method_name = 'from_crawler'
+    elif hasattr(objcls, 'from_settings'):
+        instance = objcls.from_settings(settings, *args, **kwargs)
+        method_name = 'from_settings'
+    else:
+        instance = objcls(*args, **kwargs)
+        method_name = '__new__'
+    if instance is None:
+        raise TypeError(f"{objcls.__qualname__}.{method_name} returned None")
+    return instance
+
+
+@contextmanager
+def set_environ(**kwargs):
+    """Temporarily set environment variables inside the context manager and
+    fully restore previous environment afterwards
+    """
+
+    original_env = {k: os.environ.get(k) for k in kwargs}
+    os.environ.update(kwargs)
+    try:
+        yield
+    finally:
+        for k, v in original_env.items():
+            if v is None:
+                del os.environ[k]
+            else:
+                os.environ[k] = v
+
+
+def walk_callable(node):
+    """Similar to ``ast.walk``, but walks only function body and skips nested
+    functions defined within the node.
+    """
+    todo = deque([node])
+    walked_func_def = False
+    while todo:
+        node = todo.popleft()
+        if isinstance(node, ast.FunctionDef):
+            if walked_func_def:
+                continue
+            walked_func_def = True
+        todo.extend(ast.iter_child_nodes(node))
+        yield node
+
+
+_generator_callbacks_cache = LocalWeakReferencedCache(limit=128)
+
+
+def is_generator_with_return_value(callable):
+    """
+    Returns True if a callable is a generator function which includes a
+    'return' statement with a value different than None, False otherwise
+    """
+    if callable in _generator_callbacks_cache:
+        return _generator_callbacks_cache[callable]
+
+    def returns_none(return_node):
+        value = return_node.value
+        return value is None or isinstance(value, ast.NameConstant) and value.value is None
+
+    if inspect.isgeneratorfunction(callable):
+        tree = ast.parse(dedent(inspect.getsource(callable)))
+        for node in walk_callable(tree):
+            if isinstance(node, ast.Return) and not returns_none(node):
+                _generator_callbacks_cache[callable] = True
+                return _generator_callbacks_cache[callable]
+
+    _generator_callbacks_cache[callable] = False
+    return _generator_callbacks_cache[callable]
+
+
+def warn_on_generator_with_return_value(spider, callable):
+    """
+    Logs a warning if a callable is a generator function and includes
+    a 'return' statement with a value different than None
+    """
+    if is_generator_with_return_value(callable):
+        warnings.warn(
+            f'The "{spider.__class__.__name__}.{callable.__name__}" method is '
+            'a generator and includes a "return" statement with a value '
+            'different than None. This could lead to unexpected behaviour. Please see '
+            'https://docs.python.org/3/reference/simple_stmts.html#the-return-statement '
+            'for details about the semantics of the "return" statement within generators',
+            stacklevel=2,
+        )
--- a/venv/lib/python3.9/site-packages/scrapy/utils/multipart.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/multipart.py
@ -0,0 +1,15 @@
+"""
+Transitional module for moving to the w3lib library.
+
+For new code, always import from w3lib.form instead of this module
+"""
+import warnings
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from w3lib.form import *  # noqa: F401
+
+
+warnings.warn("Module `scrapy.utils.multipart` is deprecated. "
+              "If you're using `encode_multipart` function, please use "
+              "`urllib3.filepost.encode_multipart_formdata` instead",
+              ScrapyDeprecationWarning, stacklevel=2)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/ossignal.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/ossignal.py
@ -0,0 +1,25 @@
+import signal
+
+
+signal_names = {}
+for signame in dir(signal):
+    if signame.startswith('SIG') and not signame.startswith('SIG_'):
+        signum = getattr(signal, signame)
+        if isinstance(signum, int):
+            signal_names[signum] = signame
+
+
+def install_shutdown_handlers(function, override_sigint=True):
+    """Install the given function as a signal handler for all common shutdown
+    signals (such as SIGINT, SIGTERM, etc). If override_sigint is ``False`` the
+    SIGINT handler won't be install if there is already a handler in place
+    (e.g.  Pdb)
+    """
+    from twisted.internet import reactor
+    reactor._handleSignals()
+    signal.signal(signal.SIGTERM, function)
+    if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint:
+        signal.signal(signal.SIGINT, function)
+    # Catch Ctrl-Break in windows
+    if hasattr(signal, 'SIGBREAK'):
+        signal.signal(signal.SIGBREAK, function)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/project.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/project.py
@ -0,0 +1,98 @@
+import os
+import pickle
+import warnings
+
+from importlib import import_module
+from os.path import join, dirname, abspath, isabs, exists
+
+from scrapy.utils.conf import closest_scrapy_cfg, get_config, init_env
+from scrapy.settings import Settings
+from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
+
+
+ENVVAR = 'SCRAPY_SETTINGS_MODULE'
+DATADIR_CFG_SECTION = 'datadir'
+
+
+def inside_project():
+    scrapy_module = os.environ.get('SCRAPY_SETTINGS_MODULE')
+    if scrapy_module is not None:
+        try:
+            import_module(scrapy_module)
+        except ImportError as exc:
+            warnings.warn(f"Cannot import scrapy settings module {scrapy_module}: {exc}")
+        else:
+            return True
+    return bool(closest_scrapy_cfg())
+
+
+def project_data_dir(project='default'):
+    """Return the current project data dir, creating it if it doesn't exist"""
+    if not inside_project():
+        raise NotConfigured("Not inside a project")
+    cfg = get_config()
+    if cfg.has_option(DATADIR_CFG_SECTION, project):
+        d = cfg.get(DATADIR_CFG_SECTION, project)
+    else:
+        scrapy_cfg = closest_scrapy_cfg()
+        if not scrapy_cfg:
+            raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir")
+        d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
+    if not exists(d):
+        os.makedirs(d)
+    return d
+
+
+def data_path(path, createdir=False):
+    """
+    Return the given path joined with the .scrapy data directory.
+    If given an absolute path, return it unmodified.
+    """
+    if not isabs(path):
+        if inside_project():
+            path = join(project_data_dir(), path)
+        else:
+            path = join('.scrapy', path)
+    if createdir and not exists(path):
+        os.makedirs(path)
+    return path
+
+
+def get_project_settings():
+    if ENVVAR not in os.environ:
+        project = os.environ.get('SCRAPY_PROJECT', 'default')
+        init_env(project)
+
+    settings = Settings()
+    settings_module_path = os.environ.get(ENVVAR)
+    if settings_module_path:
+        settings.setmodule(settings_module_path, priority='project')
+
+    pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
+    if pickled_settings:
+        warnings.warn("Use of environment variable "
+                      "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' "
+                      "is deprecated.", ScrapyDeprecationWarning)
+        settings.setdict(pickle.loads(pickled_settings), priority='project')
+
+    scrapy_envvars = {k[7:]: v for k, v in os.environ.items() if
+                      k.startswith('SCRAPY_')}
+    valid_envvars = {
+        'CHECK',
+        'PICKLED_SETTINGS_TO_OVERRIDE',
+        'PROJECT',
+        'PYTHON_SHELL',
+        'SETTINGS_MODULE',
+    }
+    setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars}
+    if setting_envvars:
+        setting_envvar_list = ', '.join(sorted(setting_envvars))
+        warnings.warn(
+            'Use of environment variables prefixed with SCRAPY_ to override '
+            'settings is deprecated. The following environment variables are '
+            f'currently defined: {setting_envvar_list}',
+            ScrapyDeprecationWarning
+        )
+    settings.setdict(scrapy_envvars, priority='project')
+
+    return settings
--- a/venv/lib/python3.9/site-packages/scrapy/utils/py36.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/py36.py
@ -0,0 +1,10 @@
+"""
+Helpers using Python 3.6+ syntax (ignore SyntaxError on import).
+"""
+
+
+async def collect_asyncgen(result):
+    results = []
+    async for x in result:
+        results.append(x)
+    return results
--- a/venv/lib/python3.9/site-packages/scrapy/utils/python.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/python.py
@ -0,0 +1,357 @@
+"""
+This module contains essential stuff that should've come with Python itself ;)
+"""
+import errno
+import gc
+import inspect
+import re
+import sys
+import warnings
+import weakref
+from functools import partial, wraps
+from itertools import chain
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.decorators import deprecated
+
+
+def flatten(x):
+    """flatten(sequence) -> list
+
+    Returns a single, flat list which contains all elements retrieved
+    from the sequence and all recursively contained sub-sequences
+    (iterables).
+
+    Examples:
+    >>> [1, 2, [3,4], (5,6)]
+    [1, 2, [3, 4], (5, 6)]
+    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])
+    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]
+    >>> flatten(["foo", "bar"])
+    ['foo', 'bar']
+    >>> flatten(["foo", ["baz", 42], "bar"])
+    ['foo', 'baz', 42, 'bar']
+    """
+    return list(iflatten(x))
+
+
+def iflatten(x):
+    """iflatten(sequence) -> iterator
+
+    Similar to ``.flatten()``, but returns iterator instead"""
+    for el in x:
+        if is_listlike(el):
+            for el_ in iflatten(el):
+                yield el_
+        else:
+            yield el
+
+
+def is_listlike(x):
+    """
+    >>> is_listlike("foo")
+    False
+    >>> is_listlike(5)
+    False
+    >>> is_listlike(b"foo")
+    False
+    >>> is_listlike([b"foo"])
+    True
+    >>> is_listlike((b"foo",))
+    True
+    >>> is_listlike({})
+    True
+    >>> is_listlike(set())
+    True
+    >>> is_listlike((x for x in range(3)))
+    True
+    >>> is_listlike(range(5))
+    True
+    """
+    return hasattr(x, "__iter__") and not isinstance(x, (str, bytes))
+
+
+def unique(list_, key=lambda x: x):
+    """efficient function to uniquify a list preserving item order"""
+    seen = set()
+    result = []
+    for item in list_:
+        seenkey = key(item)
+        if seenkey in seen:
+            continue
+        seen.add(seenkey)
+        result.append(item)
+    return result
+
+
+def to_unicode(text, encoding=None, errors='strict'):
+    """Return the unicode representation of a bytes object ``text``. If
+    ``text`` is already an unicode object, return it as-is."""
+    if isinstance(text, str):
+        return text
+    if not isinstance(text, (bytes, str)):
+        raise TypeError('to_unicode must receive a bytes or str '
+                        f'object, got {type(text).__name__}')
+    if encoding is None:
+        encoding = 'utf-8'
+    return text.decode(encoding, errors)
+
+
+def to_bytes(text, encoding=None, errors='strict'):
+    """Return the binary representation of ``text``. If ``text``
+    is already a bytes object, return it as-is."""
+    if isinstance(text, bytes):
+        return text
+    if not isinstance(text, str):
+        raise TypeError('to_bytes must receive a str or bytes '
+                        f'object, got {type(text).__name__}')
+    if encoding is None:
+        encoding = 'utf-8'
+    return text.encode(encoding, errors)
+
+
+@deprecated('to_unicode')
+def to_native_str(text, encoding=None, errors='strict'):
+    """ Return str representation of ``text``. """
+    return to_unicode(text, encoding, errors)
+
+
+def re_rsearch(pattern, text, chunk_size=1024):
+    """
+    This function does a reverse search in a text using a regular expression
+    given in the attribute 'pattern'.
+    Since the re module does not provide this functionality, we have to find for
+    the expression into chunks of text extracted from the end (for the sake of efficiency).
+    At first, a chunk of 'chunk_size' kilobytes is extracted from the end, and searched for
+    the pattern. If the pattern is not found, another chunk is extracted, and another
+    search is performed.
+    This process continues until a match is found, or until the whole file is read.
+    In case the pattern wasn't found, None is returned, otherwise it returns a tuple containing
+    the start position of the match, and the ending (regarding the entire text).
+    """
+
+    def _chunk_iter():
+        offset = len(text)
+        while True:
+            offset -= (chunk_size * 1024)
+            if offset <= 0:
+                break
+            yield (text[offset:], offset)
+        yield (text, 0)
+
+    if isinstance(pattern, str):
+        pattern = re.compile(pattern)
+
+    for chunk, offset in _chunk_iter():
+        matches = [match for match in pattern.finditer(chunk)]
+        if matches:
+            start, end = matches[-1].span()
+            return offset + start, offset + end
+    return None
+
+
+def memoizemethod_noargs(method):
+    """Decorator to cache the result of a method (without arguments) using a
+    weak reference to its object
+    """
+    cache = weakref.WeakKeyDictionary()
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        if self not in cache:
+            cache[self] = method(self, *args, **kwargs)
+        return cache[self]
+
+    return new_method
+
+
+_BINARYCHARS = {to_bytes(chr(i)) for i in range(32)} - {b"\0", b"\t", b"\n", b"\r"}
+_BINARYCHARS |= {ord(ch) for ch in _BINARYCHARS}
+
+
+def binary_is_text(data):
+    """ Returns ``True`` if the given ``data`` argument (a ``bytes`` object)
+    does not contain unprintable control characters.
+    """
+    if not isinstance(data, bytes):
+        raise TypeError(f"data must be bytes, got '{type(data).__name__}'")
+    return all(c not in _BINARYCHARS for c in data)
+
+
+def _getargspec_py23(func):
+    """_getargspec_py23(function) -> named tuple ArgSpec(args, varargs, keywords,
+                                                        defaults)
+
+    Was identical to inspect.getargspec() in python2, but uses
+    inspect.getfullargspec() for python3 behind the scenes to avoid
+    DeprecationWarning.
+
+    >>> def f(a, b=2, *ar, **kw):
+    ...     pass
+
+    >>> _getargspec_py23(f)
+    ArgSpec(args=['a', 'b'], varargs='ar', keywords='kw', defaults=(2,))
+    """
+    return inspect.ArgSpec(*inspect.getfullargspec(func)[:4])
+
+
+def get_func_args(func, stripself=False):
+    """Return the argument name list of a callable"""
+    if inspect.isfunction(func):
+        spec = inspect.getfullargspec(func)
+        func_args = spec.args + spec.kwonlyargs
+    elif inspect.isclass(func):
+        return get_func_args(func.__init__, True)
+    elif inspect.ismethod(func):
+        return get_func_args(func.__func__, True)
+    elif inspect.ismethoddescriptor(func):
+        return []
+    elif isinstance(func, partial):
+        return [x for x in get_func_args(func.func)[len(func.args):]
+                if not (func.keywords and x in func.keywords)]
+    elif hasattr(func, '__call__'):
+        if inspect.isroutine(func):
+            return []
+        elif getattr(func, '__name__', None) == '__call__':
+            return []
+        else:
+            return get_func_args(func.__call__, True)
+    else:
+        raise TypeError(f'{type(func)} is not callable')
+    if stripself:
+        func_args.pop(0)
+    return func_args
+
+
+def get_spec(func):
+    """Returns (args, kwargs) tuple for a function
+    >>> import re
+    >>> get_spec(re.match)
+    (['pattern', 'string'], {'flags': 0})
+
+    >>> class Test:
+    ...     def __call__(self, val):
+    ...         pass
+    ...     def method(self, val, flags=0):
+    ...         pass
+
+    >>> get_spec(Test)
+    (['self', 'val'], {})
+
+    >>> get_spec(Test.method)
+    (['self', 'val'], {'flags': 0})
+
+    >>> get_spec(Test().method)
+    (['self', 'val'], {'flags': 0})
+    """
+
+    if inspect.isfunction(func) or inspect.ismethod(func):
+        spec = _getargspec_py23(func)
+    elif hasattr(func, '__call__'):
+        spec = _getargspec_py23(func.__call__)
+    else:
+        raise TypeError(f'{type(func)} is not callable')
+
+    defaults = spec.defaults or []
+
+    firstdefault = len(spec.args) - len(defaults)
+    args = spec.args[:firstdefault]
+    kwargs = dict(zip(spec.args[firstdefault:], defaults))
+    return args, kwargs
+
+
+def equal_attributes(obj1, obj2, attributes):
+    """Compare two objects attributes"""
+    # not attributes given return False by default
+    if not attributes:
+        return False
+
+    temp1, temp2 = object(), object()
+    for attr in attributes:
+        # support callables like itemgetter
+        if callable(attr):
+            if attr(obj1) != attr(obj2):
+                return False
+        elif getattr(obj1, attr, temp1) != getattr(obj2, attr, temp2):
+            return False
+    # all attributes equal
+    return True
+
+
+class WeakKeyCache:
+
+    def __init__(self, default_factory):
+        warnings.warn("The WeakKeyCache class is deprecated", category=ScrapyDeprecationWarning, stacklevel=2)
+        self.default_factory = default_factory
+        self._weakdict = weakref.WeakKeyDictionary()
+
+    def __getitem__(self, key):
+        if key not in self._weakdict:
+            self._weakdict[key] = self.default_factory(key)
+        return self._weakdict[key]
+
+
+@deprecated
+def retry_on_eintr(function, *args, **kw):
+    """Run a function and retry it while getting EINTR errors"""
+    while True:
+        try:
+            return function(*args, **kw)
+        except IOError as e:
+            if e.errno != errno.EINTR:
+                raise
+
+
+def without_none_values(iterable):
+    """Return a copy of ``iterable`` with all ``None`` entries removed.
+
+    If ``iterable`` is a mapping, return a dictionary where all pairs that have
+    value ``None`` have been removed.
+    """
+    try:
+        return {k: v for k, v in iterable.items() if v is not None}
+    except AttributeError:
+        return type(iterable)((v for v in iterable if v is not None))
+
+
+def global_object_name(obj):
+    """
+    Return full name of a global object.
+
+    >>> from scrapy import Request
+    >>> global_object_name(Request)
+    'scrapy.http.request.Request'
+    """
+    return f"{obj.__module__}.{obj.__name__}"
+
+
+if hasattr(sys, "pypy_version_info"):
+    def garbage_collect():
+        # Collecting weakreferences can take two collections on PyPy.
+        gc.collect()
+        gc.collect()
+else:
+    def garbage_collect():
+        gc.collect()
+
+
+class MutableChain:
+    """
+    Thin wrapper around itertools.chain, allowing to add iterables "in-place"
+    """
+
+    def __init__(self, *args):
+        self.data = chain.from_iterable(args)
+
+    def extend(self, *iterables):
+        self.data = chain(self.data, chain.from_iterable(iterables))
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self.data)
+
+    @deprecated("scrapy.utils.python.MutableChain.__next__")
+    def next(self):
+        return self.__next__()
--- a/venv/lib/python3.9/site-packages/scrapy/utils/reactor.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/reactor.py
@ -0,0 +1,90 @@
+import asyncio
+from contextlib import suppress
+
+from twisted.internet import asyncioreactor, error
+
+from scrapy.utils.misc import load_object
+
+
+def listen_tcp(portrange, host, factory):
+    """Like reactor.listenTCP but tries different ports in a range."""
+    from twisted.internet import reactor
+    if len(portrange) > 2:
+        raise ValueError(f"invalid portrange: {portrange}")
+    if not portrange:
+        return reactor.listenTCP(0, factory, interface=host)
+    if not hasattr(portrange, '__iter__'):
+        return reactor.listenTCP(portrange, factory, interface=host)
+    if len(portrange) == 1:
+        return reactor.listenTCP(portrange[0], factory, interface=host)
+    for x in range(portrange[0], portrange[1] + 1):
+        try:
+            return reactor.listenTCP(x, factory, interface=host)
+        except error.CannotListenError:
+            if x == portrange[1]:
+                raise
+
+
+class CallLaterOnce:
+    """Schedule a function to be called in the next reactor loop, but only if
+    it hasn't been already scheduled since the last time it ran.
+    """
+
+    def __init__(self, func, *a, **kw):
+        self._func = func
+        self._a = a
+        self._kw = kw
+        self._call = None
+
+    def schedule(self, delay=0):
+        from twisted.internet import reactor
+        if self._call is None:
+            self._call = reactor.callLater(delay, self)
+
+    def cancel(self):
+        if self._call:
+            self._call.cancel()
+
+    def __call__(self):
+        self._call = None
+        return self._func(*self._a, **self._kw)
+
+
+def install_reactor(reactor_path, event_loop_path=None):
+    """Installs the :mod:`~twisted.internet.reactor` with the specified
+    import path. Also installs the asyncio event loop with the specified import
+    path if the asyncio reactor is enabled"""
+    reactor_class = load_object(reactor_path)
+    if reactor_class is asyncioreactor.AsyncioSelectorReactor:
+        with suppress(error.ReactorAlreadyInstalledError):
+            if event_loop_path is not None:
+                event_loop_class = load_object(event_loop_path)
+                event_loop = event_loop_class()
+                asyncio.set_event_loop(event_loop)
+            else:
+                event_loop = asyncio.get_event_loop()
+            asyncioreactor.install(eventloop=event_loop)
+    else:
+        *module, _ = reactor_path.split(".")
+        installer_path = module + ["install"]
+        installer = load_object(".".join(installer_path))
+        with suppress(error.ReactorAlreadyInstalledError):
+            installer()
+
+
+def verify_installed_reactor(reactor_path):
+    """Raises :exc:`Exception` if the installed
+    :mod:`~twisted.internet.reactor` does not match the specified import
+    path."""
+    from twisted.internet import reactor
+    reactor_class = load_object(reactor_path)
+    if not isinstance(reactor, reactor_class):
+        msg = ("The installed reactor "
+               f"({reactor.__module__}.{reactor.__class__.__name__}) does not "
+               f"match the requested one ({reactor_path})")
+        raise Exception(msg)
+
+
+def is_asyncio_reactor_installed():
+    from twisted.internet import reactor
+    return isinstance(reactor, asyncioreactor.AsyncioSelectorReactor)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/reqser.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/reqser.py
@ -0,0 +1,95 @@
+"""
+Helper functions for serializing (and deserializing) requests.
+"""
+import inspect
+
+from scrapy.http import Request
+from scrapy.utils.python import to_unicode
+from scrapy.utils.misc import load_object
+
+
+def request_to_dict(request, spider=None):
+    """Convert Request object to a dict.
+
+    If a spider is given, it will try to find out the name of the spider method
+    used in the callback and store that as the callback.
+    """
+    cb = request.callback
+    if callable(cb):
+        cb = _find_method(spider, cb)
+    eb = request.errback
+    if callable(eb):
+        eb = _find_method(spider, eb)
+    d = {
+        'url': to_unicode(request.url),  # urls should be safe (safe_string_url)
+        'callback': cb,
+        'errback': eb,
+        'method': request.method,
+        'headers': dict(request.headers),
+        'body': request.body,
+        'cookies': request.cookies,
+        'meta': request.meta,
+        '_encoding': request._encoding,
+        'priority': request.priority,
+        'dont_filter': request.dont_filter,
+        'flags': request.flags,
+        'cb_kwargs': request.cb_kwargs,
+    }
+    if type(request) is not Request:
+        d['_class'] = request.__module__ + '.' + request.__class__.__name__
+    return d
+
+
+def request_from_dict(d, spider=None):
+    """Create Request object from a dict.
+
+    If a spider is given, it will try to resolve the callbacks looking at the
+    spider for methods with the same name.
+    """
+    cb = d['callback']
+    if cb and spider:
+        cb = _get_method(spider, cb)
+    eb = d['errback']
+    if eb and spider:
+        eb = _get_method(spider, eb)
+    request_cls = load_object(d['_class']) if '_class' in d else Request
+    return request_cls(
+        url=to_unicode(d['url']),
+        callback=cb,
+        errback=eb,
+        method=d['method'],
+        headers=d['headers'],
+        body=d['body'],
+        cookies=d['cookies'],
+        meta=d['meta'],
+        encoding=d['_encoding'],
+        priority=d['priority'],
+        dont_filter=d['dont_filter'],
+        flags=d.get('flags'),
+        cb_kwargs=d.get('cb_kwargs'),
+    )
+
+
+def _find_method(obj, func):
+    # Only instance methods contain ``__func__``
+    if obj and hasattr(func, '__func__'):
+        members = inspect.getmembers(obj, predicate=inspect.ismethod)
+        for name, obj_func in members:
+            # We need to use __func__ to access the original
+            # function object because instance method objects
+            # are generated each time attribute is retrieved from
+            # instance.
+            #
+            # Reference: The standard type hierarchy
+            # https://docs.python.org/3/reference/datamodel.html
+            if obj_func.__func__ is func.__func__:
+                return name
+    raise ValueError(f"Function {func} is not an instance method in: {obj}")
+
+
+def _get_method(obj, name):
+    name = str(name)
+    try:
+        return getattr(obj, name)
+    except AttributeError:
+        raise ValueError(f"Method {name!r} not found in: {obj}")
--- a/venv/lib/python3.9/site-packages/scrapy/utils/request.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/request.py
@ -0,0 +1,100 @@
+"""
+This module provides some useful functions for working with
+scrapy.http.Request objects
+"""
+
+import hashlib
+import weakref
+from urllib.parse import urlunparse
+
+from w3lib.http import basic_auth_header
+from w3lib.url import canonicalize_url
+
+from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.python import to_bytes, to_unicode
+
+
+_fingerprint_cache = weakref.WeakKeyDictionary()
+
+
+def request_fingerprint(request, include_headers=None, keep_fragments=False):
+    """
+    Return the request fingerprint.
+
+    The request fingerprint is a hash that uniquely identifies the resource the
+    request points to. For example, take the following two urls:
+
+    http://www.example.com/query?id=111&cat=222
+    http://www.example.com/query?cat=222&id=111
+
+    Even though those are two different URLs both point to the same resource
+    and are equivalent (i.e. they should return the same response).
+
+    Another example are cookies used to store session ids. Suppose the
+    following page is only accessible to authenticated users:
+
+    http://www.example.com/members/offers.html
+
+    Lot of sites use a cookie to store the session id, which adds a random
+    component to the HTTP Request and thus should be ignored when calculating
+    the fingerprint.
+
+    For this reason, request headers are ignored by default when calculating
+    the fingeprint. If you want to include specific headers use the
+    include_headers argument, which is a list of Request headers to include.
+
+    Also, servers usually ignore fragments in urls when handling requests,
+    so they are also ignored by default when calculating the fingerprint.
+    If you want to include them, set the keep_fragments argument to True
+    (for instance when handling requests with a headless browser).
+
+    """
+    if include_headers:
+        include_headers = tuple(to_bytes(h.lower()) for h in sorted(include_headers))
+    cache = _fingerprint_cache.setdefault(request, {})
+    cache_key = (include_headers, keep_fragments)
+    if cache_key not in cache:
+        fp = hashlib.sha1()
+        fp.update(to_bytes(request.method))
+        fp.update(to_bytes(canonicalize_url(request.url, keep_fragments=keep_fragments)))
+        fp.update(request.body or b'')
+        if include_headers:
+            for hdr in include_headers:
+                if hdr in request.headers:
+                    fp.update(hdr)
+                    for v in request.headers.getlist(hdr):
+                        fp.update(v)
+        cache[cache_key] = fp.hexdigest()
+    return cache[cache_key]
+
+
+def request_authenticate(request, username, password):
+    """Autenticate the given request (in place) using the HTTP basic access
+    authentication mechanism (RFC 2617) and the given username and password
+    """
+    request.headers['Authorization'] = basic_auth_header(username, password)
+
+
+def request_httprepr(request):
+    """Return the raw HTTP representation (as bytes) of the given request.
+    This is provided only for reference since it's not the actual stream of
+    bytes that will be send when performing the request (that's controlled
+    by Twisted).
+    """
+    parsed = urlparse_cached(request)
+    path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
+    s = to_bytes(request.method) + b" " + to_bytes(path) + b" HTTP/1.1\r\n"
+    s += b"Host: " + to_bytes(parsed.hostname or b'') + b"\r\n"
+    if request.headers:
+        s += request.headers.to_string() + b"\r\n"
+    s += b"\r\n"
+    s += request.body
+    return s
+
+
+def referer_str(request):
+    """ Return Referer HTTP header suitable for logging. """
+    referrer = request.headers.get('Referer')
+    if referrer is None:
+        return referrer
+    return to_unicode(referrer, errors='replace')
--- a/venv/lib/python3.9/site-packages/scrapy/utils/response.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/response.py
@ -0,0 +1,83 @@
+"""
+This module provides some useful functions for working with
+scrapy.http.Response objects
+"""
+import os
+import weakref
+import webbrowser
+import tempfile
+
+from twisted.web import http
+from scrapy.utils.python import to_bytes, to_unicode
+from w3lib import html
+
+
+_baseurl_cache = weakref.WeakKeyDictionary()
+
+
+def get_base_url(response):
+    """Return the base url of the given response, joined with the response url"""
+    if response not in _baseurl_cache:
+        text = response.text[0:4096]
+        _baseurl_cache[response] = html.get_base_url(text, response.url, response.encoding)
+    return _baseurl_cache[response]
+
+
+_metaref_cache = weakref.WeakKeyDictionary()
+
+
+def get_meta_refresh(response, ignore_tags=('script', 'noscript')):
+    """Parse the http-equiv refrsh parameter from the given response"""
+    if response not in _metaref_cache:
+        text = response.text[0:4096]
+        _metaref_cache[response] = html.get_meta_refresh(
+            text, response.url, response.encoding, ignore_tags=ignore_tags)
+    return _metaref_cache[response]
+
+
+def response_status_message(status):
+    """Return status code plus status text descriptive message
+    """
+    message = http.RESPONSES.get(int(status), "Unknown Status")
+    return f'{status} {to_unicode(message)}'
+
+
+def response_httprepr(response):
+    """Return raw HTTP representation (as bytes) of the given response. This
+    is provided only for reference, since it's not the exact stream of bytes
+    that was received (that's not exposed by Twisted).
+    """
+    values = [
+        b"HTTP/1.1 ",
+        to_bytes(str(response.status)),
+        b" ",
+        to_bytes(http.RESPONSES.get(response.status, b'')),
+        b"\r\n",
+    ]
+    if response.headers:
+        values.extend([response.headers.to_string(), b"\r\n"])
+    values.extend([b"\r\n", response.body])
+    return b"".join(values)
+
+
+def open_in_browser(response, _openfunc=webbrowser.open):
+    """Open the given response in a local web browser, populating the <base>
+    tag for external links to work
+    """
+    from scrapy.http import HtmlResponse, TextResponse
+    # XXX: this implementation is a bit dirty and could be improved
+    body = response.body
+    if isinstance(response, HtmlResponse):
+        if b'<base' not in body:
+            repl = f'<head><base href="{response.url}">'
+            body = body.replace(b'<head>', to_bytes(repl))
+        ext = '.html'
+    elif isinstance(response, TextResponse):
+        ext = '.txt'
+    else:
+        raise TypeError("Unsupported response type: "
+                        f"{response.__class__.__name__}")
+    fd, fname = tempfile.mkstemp(ext)
+    os.write(fd, body)
+    os.close(fd)
+    return _openfunc(f"file://{fname}")
--- a/venv/lib/python3.9/site-packages/scrapy/utils/serialize.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/serialize.py
@ -0,0 +1,40 @@
+import json
+import datetime
+import decimal
+
+from itemadapter import is_item, ItemAdapter
+from twisted.internet import defer
+
+from scrapy.http import Request, Response
+
+
+class ScrapyJSONEncoder(json.JSONEncoder):
+
+    DATE_FORMAT = "%Y-%m-%d"
+    TIME_FORMAT = "%H:%M:%S"
+
+    def default(self, o):
+        if isinstance(o, set):
+            return list(o)
+        elif isinstance(o, datetime.datetime):
+            return o.strftime(f"{self.DATE_FORMAT} {self.TIME_FORMAT}")
+        elif isinstance(o, datetime.date):
+            return o.strftime(self.DATE_FORMAT)
+        elif isinstance(o, datetime.time):
+            return o.strftime(self.TIME_FORMAT)
+        elif isinstance(o, decimal.Decimal):
+            return str(o)
+        elif isinstance(o, defer.Deferred):
+            return str(o)
+        elif is_item(o):
+            return ItemAdapter(o).asdict()
+        elif isinstance(o, Request):
+            return f"<{type(o).__name__} {o.method} {o.url}>"
+        elif isinstance(o, Response):
+            return f"<{type(o).__name__} {o.status} {o.url}>"
+        else:
+            return super().default(o)
+
+
+class ScrapyJSONDecoder(json.JSONDecoder):
+    pass
--- a/venv/lib/python3.9/site-packages/scrapy/utils/signal.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/signal.py
@ -0,0 +1,80 @@
+"""Helper functions for working with signals"""
+
+import logging
+
+from twisted.internet.defer import DeferredList, Deferred
+from twisted.python.failure import Failure
+
+from pydispatch.dispatcher import Anonymous, Any, disconnect, getAllReceivers, liveReceivers
+from pydispatch.robustapply import robustApply
+
+from scrapy.exceptions import StopDownload
+from scrapy.utils.defer import maybeDeferred_coro
+from scrapy.utils.log import failure_to_exc_info
+
+
+logger = logging.getLogger(__name__)
+
+
+class _IgnoredException(Exception):
+    pass
+
+
+def send_catch_log(signal=Any, sender=Anonymous, *arguments, **named):
+    """Like pydispatcher.robust.sendRobust but it also logs errors and returns
+    Failures instead of exceptions.
+    """
+    dont_log = (named.pop('dont_log', _IgnoredException), StopDownload)
+    spider = named.get('spider', None)
+    responses = []
+    for receiver in liveReceivers(getAllReceivers(sender, signal)):
+        try:
+            response = robustApply(receiver, signal=signal, sender=sender, *arguments, **named)
+            if isinstance(response, Deferred):
+                logger.error("Cannot return deferreds from signal handler: %(receiver)s",
+                             {'receiver': receiver}, extra={'spider': spider})
+        except dont_log:
+            result = Failure()
+        except Exception:
+            result = Failure()
+            logger.error("Error caught on signal handler: %(receiver)s",
+                         {'receiver': receiver},
+                         exc_info=True, extra={'spider': spider})
+        else:
+            result = response
+        responses.append((receiver, result))
+    return responses
+
+
+def send_catch_log_deferred(signal=Any, sender=Anonymous, *arguments, **named):
+    """Like send_catch_log but supports returning deferreds on signal handlers.
+    Returns a deferred that gets fired once all signal handlers deferreds were
+    fired.
+    """
+    def logerror(failure, recv):
+        if dont_log is None or not isinstance(failure.value, dont_log):
+            logger.error("Error caught on signal handler: %(receiver)s",
+                         {'receiver': recv},
+                         exc_info=failure_to_exc_info(failure),
+                         extra={'spider': spider})
+        return failure
+
+    dont_log = named.pop('dont_log', None)
+    spider = named.get('spider', None)
+    dfds = []
+    for receiver in liveReceivers(getAllReceivers(sender, signal)):
+        d = maybeDeferred_coro(robustApply, receiver, signal=signal, sender=sender, *arguments, **named)
+        d.addErrback(logerror, receiver)
+        d.addBoth(lambda result: (receiver, result))
+        dfds.append(d)
+    d = DeferredList(dfds)
+    d.addCallback(lambda out: [x[1] for x in out])
+    return d
+
+
+def disconnect_all(signal=Any, sender=Any):
+    """Disconnect all signal handlers. Useful for cleaning up after running
+    tests
+    """
+    for receiver in liveReceivers(getAllReceivers(sender, signal)):
+        disconnect(receiver, signal=signal, sender=sender)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/sitemap.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/sitemap.py
@ -0,0 +1,47 @@
+"""
+Module for processing Sitemaps.
+
+Note: The main purpose of this module is to provide support for the
+SitemapSpider, its API is subject to change without notice.
+"""
+
+from urllib.parse import urljoin
+
+import lxml.etree
+
+
+class Sitemap:
+    """Class to parse Sitemap (type=urlset) and Sitemap Index
+    (type=sitemapindex) files"""
+
+    def __init__(self, xmltext):
+        xmlp = lxml.etree.XMLParser(recover=True, remove_comments=True, resolve_entities=False)
+        self._root = lxml.etree.fromstring(xmltext, parser=xmlp)
+        rt = self._root.tag
+        self.type = self._root.tag.split('}', 1)[1] if '}' in rt else rt
+
+    def __iter__(self):
+        for elem in self._root.getchildren():
+            d = {}
+            for el in elem.getchildren():
+                tag = el.tag
+                name = tag.split('}', 1)[1] if '}' in tag else tag
+
+                if name == 'link':
+                    if 'href' in el.attrib:
+                        d.setdefault('alternate', []).append(el.get('href'))
+                else:
+                    d[name] = el.text.strip() if el.text else ''
+
+            if 'loc' in d:
+                yield d
+
+
+def sitemap_urls_from_robots(robots_text, base_url=None):
+    """Return an iterator over all sitemap urls contained in the given
+    robots.txt file
+    """
+    for line in robots_text.splitlines():
+        if line.lstrip().lower().startswith('sitemap:'):
+            url = line.split(':', 1)[1].strip()
+            yield urljoin(base_url, url)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/spider.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/spider.py
@ -0,0 +1,74 @@
+import inspect
+import logging
+
+from scrapy.spiders import Spider
+from scrapy.utils.defer import deferred_from_coro
+from scrapy.utils.misc import arg_to_iter
+try:
+    from scrapy.utils.py36 import collect_asyncgen
+except SyntaxError:
+    collect_asyncgen = None
+
+
+logger = logging.getLogger(__name__)
+
+
+def iterate_spider_output(result):
+    if collect_asyncgen and hasattr(inspect, 'isasyncgen') and inspect.isasyncgen(result):
+        d = deferred_from_coro(collect_asyncgen(result))
+        d.addCallback(iterate_spider_output)
+        return d
+    elif inspect.iscoroutine(result):
+        d = deferred_from_coro(result)
+        d.addCallback(iterate_spider_output)
+        return d
+    return arg_to_iter(result)
+
+
+def iter_spider_classes(module):
+    """Return an iterator over all spider classes defined in the given module
+    that can be instantiated (i.e. which have name)
+    """
+    # this needs to be imported here until get rid of the spider manager
+    # singleton in scrapy.spider.spiders
+    from scrapy.spiders import Spider
+
+    for obj in vars(module).values():
+        if (
+            inspect.isclass(obj)
+            and issubclass(obj, Spider)
+            and obj.__module__ == module.__name__
+            and getattr(obj, 'name', None)
+        ):
+            yield obj
+
+
+def spidercls_for_request(spider_loader, request, default_spidercls=None,
+                          log_none=False, log_multiple=False):
+    """Return a spider class that handles the given Request.
+
+    This will look for the spiders that can handle the given request (using
+    the spider loader) and return a Spider class if (and only if) there is
+    only one Spider able to handle the Request.
+
+    If multiple spiders (or no spider) are found, it will return the
+    default_spidercls passed. It can optionally log if multiple or no spiders
+    are found.
+    """
+    snames = spider_loader.find_by_request(request)
+    if len(snames) == 1:
+        return spider_loader.load(snames[0])
+
+    if len(snames) > 1 and log_multiple:
+        logger.error('More than one spider can handle: %(request)s - %(snames)s',
+                     {'request': request, 'snames': ', '.join(snames)})
+
+    if len(snames) == 0 and log_none:
+        logger.error('Unable to find spider that handles: %(request)s',
+                     {'request': request})
+
+    return default_spidercls
+
+
+class DefaultSpider(Spider):
+    name = 'default'
--- a/venv/lib/python3.9/site-packages/scrapy/utils/ssl.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/ssl.py
@ -0,0 +1,61 @@
+import OpenSSL
+import OpenSSL._util as pyOpenSSLutil
+
+from scrapy.utils.python import to_unicode
+
+
+# The OpenSSL symbol is present since 1.1.1 but it's not currently supported in any version of pyOpenSSL.
+# Using the binding directly, as this code does, requires cryptography 2.4.
+SSL_OP_NO_TLSv1_3 = getattr(pyOpenSSLutil.lib, 'SSL_OP_NO_TLSv1_3', 0)
+
+
+def ffi_buf_to_string(buf):
+    return to_unicode(pyOpenSSLutil.ffi.string(buf))
+
+
+def x509name_to_string(x509name):
+    # from OpenSSL.crypto.X509Name.__repr__
+    result_buffer = pyOpenSSLutil.ffi.new("char[]", 512)
+    pyOpenSSLutil.lib.X509_NAME_oneline(x509name._name, result_buffer, len(result_buffer))
+
+    return ffi_buf_to_string(result_buffer)
+
+
+def get_temp_key_info(ssl_object):
+    if not hasattr(pyOpenSSLutil.lib, 'SSL_get_server_tmp_key'):  # requires OpenSSL 1.0.2
+        return None
+
+    # adapted from OpenSSL apps/s_cb.c::ssl_print_tmp_key()
+    temp_key_p = pyOpenSSLutil.ffi.new("EVP_PKEY **")
+    if not pyOpenSSLutil.lib.SSL_get_server_tmp_key(ssl_object, temp_key_p):
+        return None
+    temp_key = temp_key_p[0]
+    if temp_key == pyOpenSSLutil.ffi.NULL:
+        return None
+    temp_key = pyOpenSSLutil.ffi.gc(temp_key, pyOpenSSLutil.lib.EVP_PKEY_free)
+    key_info = []
+    key_type = pyOpenSSLutil.lib.EVP_PKEY_id(temp_key)
+    if key_type == pyOpenSSLutil.lib.EVP_PKEY_RSA:
+        key_info.append('RSA')
+    elif key_type == pyOpenSSLutil.lib.EVP_PKEY_DH:
+        key_info.append('DH')
+    elif key_type == pyOpenSSLutil.lib.EVP_PKEY_EC:
+        key_info.append('ECDH')
+        ec_key = pyOpenSSLutil.lib.EVP_PKEY_get1_EC_KEY(temp_key)
+        ec_key = pyOpenSSLutil.ffi.gc(ec_key, pyOpenSSLutil.lib.EC_KEY_free)
+        nid = pyOpenSSLutil.lib.EC_GROUP_get_curve_name(pyOpenSSLutil.lib.EC_KEY_get0_group(ec_key))
+        cname = pyOpenSSLutil.lib.EC_curve_nid2nist(nid)
+        if cname == pyOpenSSLutil.ffi.NULL:
+            cname = pyOpenSSLutil.lib.OBJ_nid2sn(nid)
+        key_info.append(ffi_buf_to_string(cname))
+    else:
+        key_info.append(ffi_buf_to_string(pyOpenSSLutil.lib.OBJ_nid2sn(key_type)))
+    key_info.append(f'{pyOpenSSLutil.lib.EVP_PKEY_bits(temp_key)} bits')
+    return ', '.join(key_info)
+
+
+def get_openssl_version():
+    system_openssl = OpenSSL.SSL.SSLeay_version(
+        OpenSSL.SSL.SSLEAY_VERSION
+    ).decode('ascii', errors='replace')
+    return f'{OpenSSL.version.__version__} ({system_openssl})'
--- a/venv/lib/python3.9/site-packages/scrapy/utils/template.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/template.py
@ -0,0 +1,36 @@
+"""Helper functions for working with templates"""
+
+import os
+import re
+import string
+
+
+def render_templatefile(path, **kwargs):
+    with open(path, 'rb') as fp:
+        raw = fp.read().decode('utf8')
+
+    content = string.Template(raw).substitute(**kwargs)
+
+    render_path = path[:-len('.tmpl')] if path.endswith('.tmpl') else path
+
+    if path.endswith('.tmpl'):
+        os.rename(path, render_path)
+
+    with open(render_path, 'wb') as fp:
+        fp.write(content.encode('utf8'))
+
+
+CAMELCASE_INVALID_CHARS = re.compile(r'[^a-zA-Z\d]')
+
+
+def string_camelcase(string):
+    """ Convert a word  to its CamelCase version and remove invalid chars
+
+    >>> string_camelcase('lost-pound')
+    'LostPound'
+
+    >>> string_camelcase('missing_images')
+    'MissingImages'
+
+    """
+    return CAMELCASE_INVALID_CHARS.sub('', string.title())
--- a/venv/lib/python3.9/site-packages/scrapy/utils/test.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/test.py
@ -0,0 +1,112 @@
+"""
+This module contains some assorted functions used in tests
+"""
+
+import asyncio
+import os
+from posixpath import split
+from unittest import mock
+
+from importlib import import_module
+from twisted.trial.unittest import SkipTest
+
+from scrapy.utils.boto import is_botocore_available
+
+
+def assert_gcs_environ():
+    if 'GCS_PROJECT_ID' not in os.environ:
+        raise SkipTest("GCS_PROJECT_ID not found")
+
+
+def skip_if_no_boto():
+    if not is_botocore_available():
+        raise SkipTest('missing botocore library')
+
+
+def get_gcs_content_and_delete(bucket, path):
+    from google.cloud import storage
+    client = storage.Client(project=os.environ.get('GCS_PROJECT_ID'))
+    bucket = client.get_bucket(bucket)
+    blob = bucket.get_blob(path)
+    content = blob.download_as_string()
+    acl = list(blob.acl)  # loads acl before it will be deleted
+    bucket.delete_blob(path)
+    return content, acl, blob
+
+
+def get_ftp_content_and_delete(
+        path, host, port, username,
+        password, use_active_mode=False):
+    from ftplib import FTP
+    ftp = FTP()
+    ftp.connect(host, port)
+    ftp.login(username, password)
+    if use_active_mode:
+        ftp.set_pasv(False)
+    ftp_data = []
+
+    def buffer_data(data):
+        ftp_data.append(data)
+    ftp.retrbinary(f'RETR {path}', buffer_data)
+    dirname, filename = split(path)
+    ftp.cwd(dirname)
+    ftp.delete(filename)
+    return "".join(ftp_data)
+
+
+def get_crawler(spidercls=None, settings_dict=None):
+    """Return an unconfigured Crawler object. If settings_dict is given, it
+    will be used to populate the crawler settings with a project level
+    priority.
+    """
+    from scrapy.crawler import CrawlerRunner
+    from scrapy.spiders import Spider
+
+    runner = CrawlerRunner(settings_dict)
+    return runner.create_crawler(spidercls or Spider)
+
+
+def get_pythonpath():
+    """Return a PYTHONPATH suitable to use in processes so that they find this
+    installation of Scrapy"""
+    scrapy_path = import_module('scrapy').__path__[0]
+    return os.path.dirname(scrapy_path) + os.pathsep + os.environ.get('PYTHONPATH', '')
+
+
+def get_testenv():
+    """Return a OS environment dict suitable to fork processes that need to import
+    this installation of Scrapy, instead of a system installed one.
+    """
+    env = os.environ.copy()
+    env['PYTHONPATH'] = get_pythonpath()
+    return env
+
+
+def assert_samelines(testcase, text1, text2, msg=None):
+    """Asserts text1 and text2 have the same lines, ignoring differences in
+    line endings between platforms
+    """
+    testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg)
+
+
+def get_from_asyncio_queue(value):
+    q = asyncio.Queue()
+    getter = q.get()
+    q.put_nowait(value)
+    return getter
+
+
+def mock_google_cloud_storage():
+    """Creates autospec mocks for google-cloud-storage Client, Bucket and Blob
+    classes and set their proper return values.
+    """
+    from google.cloud.storage import Client, Bucket, Blob
+    client_mock = mock.create_autospec(Client)
+
+    bucket_mock = mock.create_autospec(Bucket)
+    client_mock.get_bucket.return_value = bucket_mock
+
+    blob_mock = mock.create_autospec(Blob)
+    bucket_mock.blob.return_value = blob_mock
+
+    return (client_mock, bucket_mock, blob_mock)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/testproc.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/testproc.py
@ -0,0 +1,50 @@
+import sys
+import os
+
+from twisted.internet import defer, protocol
+
+
+class ProcessTest:
+
+    command = None
+    prefix = [sys.executable, '-m', 'scrapy.cmdline']
+    cwd = os.getcwd()  # trial chdirs to temp dir
+
+    def execute(self, args, check_code=True, settings=None):
+        from twisted.internet import reactor
+        env = os.environ.copy()
+        if settings is not None:
+            env['SCRAPY_SETTINGS_MODULE'] = settings
+        cmd = self.prefix + [self.command] + list(args)
+        pp = TestProcessProtocol()
+        pp.deferred.addBoth(self._process_finished, cmd, check_code)
+        reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)
+        return pp.deferred
+
+    def _process_finished(self, pp, cmd, check_code):
+        if pp.exitcode and check_code:
+            msg = f"process {cmd} exit with code {pp.exitcode}"
+            msg += f"\n>>> stdout <<<\n{pp.out}"
+            msg += "\n"
+            msg += f"\n>>> stderr <<<\n{pp.err}"
+            raise RuntimeError(msg)
+        return pp.exitcode, pp.out, pp.err
+
+
+class TestProcessProtocol(protocol.ProcessProtocol):
+
+    def __init__(self):
+        self.deferred = defer.Deferred()
+        self.out = b''
+        self.err = b''
+        self.exitcode = None
+
+    def outReceived(self, data):
+        self.out += data
+
+    def errReceived(self, data):
+        self.err += data
+
+    def processEnded(self, status):
+        self.exitcode = status.value.exitCode
+        self.deferred.callback(self)
--- a/venv/lib/python3.9/site-packages/scrapy/utils/testsite.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/testsite.py
@ -0,0 +1,44 @@
+from urllib.parse import urljoin
+
+from twisted.web import server, resource, static, util
+
+
+class SiteTest:
+
+    def setUp(self):
+        from twisted.internet import reactor
+        super().setUp()
+        self.site = reactor.listenTCP(0, test_site(), interface="127.0.0.1")
+        self.baseurl = f"http://localhost:{self.site.getHost().port}/"
+
+    def tearDown(self):
+        super().tearDown()
+        self.site.stopListening()
+
+    def url(self, path):
+        return urljoin(self.baseurl, path)
+
+
+class NoMetaRefreshRedirect(util.Redirect):
+    def render(self, request):
+        content = util.Redirect.render(self, request)
+        return content.replace(b'http-equiv=\"refresh\"',
+            b'http-no-equiv=\"do-not-refresh-me\"')
+
+
+def test_site():
+    r = resource.Resource()
+    r.putChild(b"text", static.Data(b"Works", "text/plain"))
+    r.putChild(b"html", static.Data(b"<body><p class='one'>Works</p><p class='two'>World</p></body>", "text/html"))
+    r.putChild(b"enc-gb18030", static.Data(b"<p>gb18030 encoding</p>", "text/html; charset=gb18030"))
+    r.putChild(b"redirect", util.Redirect(b"/redirected"))
+    r.putChild(b"redirect-no-meta-refresh", NoMetaRefreshRedirect(b"/redirected"))
+    r.putChild(b"redirected", static.Data(b"Redirected here", "text/plain"))
+    return server.Site(r)
+
+
+if __name__ == '__main__':
+    from twisted.internet import reactor
+    port = reactor.listenTCP(0, test_site(), interface="127.0.0.1")
+    print(f"http://localhost:{port.getHost().port}/")
+    reactor.run()
--- a/venv/lib/python3.9/site-packages/scrapy/utils/trackref.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/trackref.py
@ -0,0 +1,66 @@
+"""This module provides some functions and classes to record and report
+references to live object instances.
+
+If you want live objects for a particular class to be tracked, you only have to
+subclass from object_ref (instead of object).
+
+About performance: This library has a minimal performance impact when enabled,
+and no performance penalty at all when disabled (as object_ref becomes just an
+alias to object in that case).
+"""
+
+import weakref
+from time import time
+from operator import itemgetter
+from collections import defaultdict
+
+
+NoneType = type(None)
+live_refs = defaultdict(weakref.WeakKeyDictionary)
+
+
+class object_ref:
+    """Inherit from this class to a keep a record of live instances"""
+
+    __slots__ = ()
+
+    def __new__(cls, *args, **kwargs):
+        obj = object.__new__(cls)
+        live_refs[cls][obj] = time()
+        return obj
+
+
+def format_live_refs(ignore=NoneType):
+    """Return a tabular representation of tracked objects"""
+    s = "Live References\n\n"
+    now = time()
+    for cls, wdict in sorted(live_refs.items(),
+                             key=lambda x: x[0].__name__):
+        if not wdict:
+            continue
+        if issubclass(cls, ignore):
+            continue
+        oldest = min(wdict.values())
+        s += f"{cls.__name__:<30} {len(wdict):6}   oldest: {int(now - oldest)}s ago\n"
+    return s
+
+
+def print_live_refs(*a, **kw):
+    """Print tracked objects"""
+    print(format_live_refs(*a, **kw))
+
+
+def get_oldest(class_name):
+    """Get the oldest object for a specific class name"""
+    for cls, wdict in live_refs.items():
+        if cls.__name__ == class_name:
+            if not wdict:
+                break
+            return min(wdict.items(), key=itemgetter(1))[0]
+
+
+def iter_all(class_name):
+    """Iterate over all objects of the same class by its class name"""
+    for cls, wdict in live_refs.items():
+        if cls.__name__ == class_name:
+            return wdict.keys()
--- a/venv/lib/python3.9/site-packages/scrapy/utils/url.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/url.py
@ -0,0 +1,164 @@
+"""
+This module contains general purpose URL functions not found in the standard
+library.
+
+Some of the functions that used to be imported from this module have been moved
+to the w3lib.url module. Always import those from there instead.
+"""
+import posixpath
+import re
+from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
+
+# scrapy.utils.url was moved to w3lib.url and import * ensures this
+# move doesn't break old code
+from w3lib.url import *
+from w3lib.url import _safe_chars, _unquotepath  # noqa: F401
+from scrapy.utils.python import to_unicode
+
+
+def url_is_from_any_domain(url, domains):
+    """Return True if the url belongs to any of the given domains"""
+    host = parse_url(url).netloc.lower()
+    if not host:
+        return False
+    domains = [d.lower() for d in domains]
+    return any((host == d) or (host.endswith(f'.{d}')) for d in domains)
+
+
+def url_is_from_spider(url, spider):
+    """Return True if the url belongs to the given spider"""
+    return url_is_from_any_domain(url, [spider.name] + list(getattr(spider, 'allowed_domains', [])))
+
+
+def url_has_any_extension(url, extensions):
+    return posixpath.splitext(parse_url(url).path)[1].lower() in extensions
+
+
+def parse_url(url, encoding=None):
+    """Return urlparsed url from the given argument (which could be an already
+    parsed url)
+    """
+    if isinstance(url, ParseResult):
+        return url
+    return urlparse(to_unicode(url, encoding))
+
+
+def escape_ajax(url):
+    """
+    Return the crawleable url according to:
+    https://developers.google.com/webmasters/ajax-crawling/docs/getting-started
+
+    >>> escape_ajax("www.example.com/ajax.html#!key=value")
+    'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue'
+    >>> escape_ajax("www.example.com/ajax.html?k1=v1&k2=v2#!key=value")
+    'www.example.com/ajax.html?k1=v1&k2=v2&_escaped_fragment_=key%3Dvalue'
+    >>> escape_ajax("www.example.com/ajax.html?#!key=value")
+    'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue'
+    >>> escape_ajax("www.example.com/ajax.html#!")
+    'www.example.com/ajax.html?_escaped_fragment_='
+
+    URLs that are not "AJAX crawlable" (according to Google) returned as-is:
+
+    >>> escape_ajax("www.example.com/ajax.html#key=value")
+    'www.example.com/ajax.html#key=value'
+    >>> escape_ajax("www.example.com/ajax.html#")
+    'www.example.com/ajax.html#'
+    >>> escape_ajax("www.example.com/ajax.html")
+    'www.example.com/ajax.html'
+    """
+    defrag, frag = urldefrag(url)
+    if not frag.startswith('!'):
+        return url
+    return add_or_replace_parameter(defrag, '_escaped_fragment_', frag[1:])
+
+
+def add_http_if_no_scheme(url):
+    """Add http as the default scheme if it is missing from the url."""
+    match = re.match(r"^\w+://", url, flags=re.I)
+    if not match:
+        parts = urlparse(url)
+        scheme = "http:" if parts.netloc else "http://"
+        url = scheme + url
+
+    return url
+
+
+def _is_posix_path(string):
+    return bool(
+        re.match(
+            r'''
+            ^                   # start with...
+            (
+                \.              # ...a single dot,
+                (
+                    \. | [^/\.]+  # optionally followed by
+                )?                # either a second dot or some characters
+                |
+                ~   # $HOME
+            )?      # optional match of ".", ".." or ".blabla"
+            /       # at least one "/" for a file path,
+            .       # and something after the "/"
+            ''',
+            string,
+            flags=re.VERBOSE,
+        )
+    )
+
+
+def _is_windows_path(string):
+    return bool(
+        re.match(
+            r'''
+            ^
+            (
+                [a-z]:\\
+                | \\\\
+            )
+            ''',
+            string,
+            flags=re.IGNORECASE | re.VERBOSE,
+        )
+    )
+
+
+def _is_filesystem_path(string):
+    return _is_posix_path(string) or _is_windows_path(string)
+
+
+def guess_scheme(url):
+    """Add an URL scheme if missing: file:// for filepath-like input or
+    http:// otherwise."""
+    if _is_filesystem_path(url):
+        return any_to_uri(url)
+    return add_http_if_no_scheme(url)
+
+
+def strip_url(url, strip_credentials=True, strip_default_port=True, origin_only=False, strip_fragment=True):
+
+    """Strip URL string from some of its components:
+
+    - ``strip_credentials`` removes "user:password@"
+    - ``strip_default_port`` removes ":80" (resp. ":443", ":21")
+      from http:// (resp. https://, ftp://) URLs
+    - ``origin_only`` replaces path component with "/", also dropping
+      query and fragment components ; it also strips credentials
+    - ``strip_fragment`` drops any #fragment component
+    """
+
+    parsed_url = urlparse(url)
+    netloc = parsed_url.netloc
+    if (strip_credentials or origin_only) and (parsed_url.username or parsed_url.password):
+        netloc = netloc.split('@')[-1]
+    if strip_default_port and parsed_url.port:
+        if (parsed_url.scheme, parsed_url.port) in (('http', 80),
+                                                    ('https', 443),
+                                                    ('ftp', 21)):
+            netloc = netloc.replace(f':{parsed_url.port}', '')
+    return urlunparse((
+        parsed_url.scheme,
+        netloc,
+        '/' if origin_only else parsed_url.path,
+        '' if origin_only else parsed_url.params,
+        '' if origin_only else parsed_url.query,
+        '' if strip_fragment else parsed_url.fragment
+    ))
--- a/venv/lib/python3.9/site-packages/scrapy/utils/versions.py
+++ b/venv/lib/python3.9/site-packages/scrapy/utils/versions.py
@ -0,0 +1,31 @@
+import platform
+import sys
+
+import cryptography
+import cssselect
+import lxml.etree
+import parsel
+import twisted
+import w3lib
+
+import scrapy
+from scrapy.utils.ssl import get_openssl_version
+
+
+def scrapy_components_versions():
+    lxml_version = ".".join(map(str, lxml.etree.LXML_VERSION))
+    libxml2_version = ".".join(map(str, lxml.etree.LIBXML_VERSION))
+
+    return [
+        ("Scrapy", scrapy.__version__),
+        ("lxml", lxml_version),
+        ("libxml2", libxml2_version),
+        ("cssselect", cssselect.__version__),
+        ("parsel", parsel.__version__),
+        ("w3lib", w3lib.__version__),
+        ("Twisted", twisted.version.short()),
+        ("Python", sys.version.replace("\n", "- ")),
+        ("pyOpenSSL", get_openssl_version()),
+        ("cryptography", cryptography.__version__),
+        ("Platform", platform.platform()),
+    ]