HubobelsPython/venv/lib/python3.9/site-packages/scrapy/utils/test.py
2022-01-02 21:50:48 +01:00

112 lines
3.2 KiB
Python

"""
This module contains some assorted functions used in tests
"""
import asyncio
import os
from posixpath import split
from unittest import mock
from importlib import import_module
from twisted.trial.unittest import SkipTest
from scrapy.utils.boto import is_botocore_available
def assert_gcs_environ():
if 'GCS_PROJECT_ID' not in os.environ:
raise SkipTest("GCS_PROJECT_ID not found")
def skip_if_no_boto():
if not is_botocore_available():
raise SkipTest('missing botocore library')
def get_gcs_content_and_delete(bucket, path):
from google.cloud import storage
client = storage.Client(project=os.environ.get('GCS_PROJECT_ID'))
bucket = client.get_bucket(bucket)
blob = bucket.get_blob(path)
content = blob.download_as_string()
acl = list(blob.acl) # loads acl before it will be deleted
bucket.delete_blob(path)
return content, acl, blob
def get_ftp_content_and_delete(
path, host, port, username,
password, use_active_mode=False):
from ftplib import FTP
ftp = FTP()
ftp.connect(host, port)
ftp.login(username, password)
if use_active_mode:
ftp.set_pasv(False)
ftp_data = []
def buffer_data(data):
ftp_data.append(data)
ftp.retrbinary(f'RETR {path}', buffer_data)
dirname, filename = split(path)
ftp.cwd(dirname)
ftp.delete(filename)
return "".join(ftp_data)
def get_crawler(spidercls=None, settings_dict=None):
"""Return an unconfigured Crawler object. If settings_dict is given, it
will be used to populate the crawler settings with a project level
priority.
"""
from scrapy.crawler import CrawlerRunner
from scrapy.spiders import Spider
runner = CrawlerRunner(settings_dict)
return runner.create_crawler(spidercls or Spider)
def get_pythonpath():
"""Return a PYTHONPATH suitable to use in processes so that they find this
installation of Scrapy"""
scrapy_path = import_module('scrapy').__path__[0]
return os.path.dirname(scrapy_path) + os.pathsep + os.environ.get('PYTHONPATH', '')
def get_testenv():
"""Return a OS environment dict suitable to fork processes that need to import
this installation of Scrapy, instead of a system installed one.
"""
env = os.environ.copy()
env['PYTHONPATH'] = get_pythonpath()
return env
def assert_samelines(testcase, text1, text2, msg=None):
"""Asserts text1 and text2 have the same lines, ignoring differences in
line endings between platforms
"""
testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg)
def get_from_asyncio_queue(value):
q = asyncio.Queue()
getter = q.get()
q.put_nowait(value)
return getter
def mock_google_cloud_storage():
"""Creates autospec mocks for google-cloud-storage Client, Bucket and Blob
classes and set their proper return values.
"""
from google.cloud.storage import Client, Bucket, Blob
client_mock = mock.create_autospec(Client)
bucket_mock = mock.create_autospec(Bucket)
client_mock.get_bucket.return_value = bucket_mock
blob_mock = mock.create_autospec(Blob)
bucket_mock.blob.return_value = blob_mock
return (client_mock, bucket_mock, blob_mock)