Ausgabe der neuen DB Einträge

This commit is contained in:
hubobel 2022-01-02 21:50:48 +01:00
parent bad48e1627
commit cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions

View file

@ -0,0 +1,5 @@
from .adapter import ItemAdapter # noqa: F401
from .utils import get_field_meta_from_class, is_item # noqa: F401
__version__ = "0.2.0"

View file

@ -0,0 +1,264 @@
from abc import abstractmethod, ABCMeta
from collections import deque
from collections.abc import KeysView, MutableMapping
from types import MappingProxyType
from typing import Any, Iterator
from itemadapter.utils import (
is_attrs_instance,
is_dataclass_instance,
is_item,
is_scrapy_item,
)
__all__ = [
"AdapterInterface",
"AttrsAdapter",
"DataclassAdapter",
"DictAdapter",
"ItemAdapter",
"ScrapyItemAdapter",
]
class AdapterInterface(MutableMapping, metaclass=ABCMeta):
"""
Abstract Base Class for adapters.
An adapter that handles a specific type of item should inherit from this
class and implement the abstract methods defined here, plus the
abtract methods inherited from the MutableMapping base class.
"""
def __init__(self, item: Any) -> None:
self.item = item
@classmethod
@abstractmethod
def is_item(cls, item: Any) -> bool:
"""
Return True if the adapter can handle the given item, False otherwise
"""
raise NotImplementedError()
def get_field_meta(self, field_name: str) -> MappingProxyType:
"""
Return metadata for the given field name, if available
"""
return MappingProxyType({})
def field_names(self) -> KeysView:
"""
Return a dynamic view of the item's field names
"""
return self.keys() # type: ignore
class _MixinAttrsDataclassAdapter:
_fields_dict: dict
item: Any
def get_field_meta(self, field_name: str) -> MappingProxyType:
return self._fields_dict[field_name].metadata # type: ignore
def field_names(self) -> KeysView:
return KeysView(self._fields_dict)
def __getitem__(self, field_name: str) -> Any:
if field_name in self._fields_dict:
return getattr(self.item, field_name)
raise KeyError(field_name)
def __setitem__(self, field_name: str, value: Any) -> None:
if field_name in self._fields_dict:
setattr(self.item, field_name, value)
else:
raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}")
def __delitem__(self, field_name: str) -> None:
if field_name in self._fields_dict:
try:
delattr(self.item, field_name)
except AttributeError:
raise KeyError(field_name)
else:
raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}")
def __iter__(self) -> Iterator:
return iter(attr for attr in self._fields_dict if hasattr(self.item, attr))
def __len__(self) -> int:
return len(list(iter(self)))
class AttrsAdapter(_MixinAttrsDataclassAdapter, AdapterInterface):
def __init__(self, item: Any) -> None:
super().__init__(item)
import attr
# store a reference to the item's fields to avoid O(n) lookups and O(n^2) traversals
self._fields_dict = attr.fields_dict(self.item.__class__)
@classmethod
def is_item(cls, item: Any) -> bool:
return is_attrs_instance(item)
class DataclassAdapter(_MixinAttrsDataclassAdapter, AdapterInterface):
def __init__(self, item: Any) -> None:
super().__init__(item)
import dataclasses
# store a reference to the item's fields to avoid O(n) lookups and O(n^2) traversals
self._fields_dict = {field.name: field for field in dataclasses.fields(self.item)}
@classmethod
def is_item(cls, item: Any) -> bool:
return is_dataclass_instance(item)
class _MixinDictScrapyItemAdapter:
_fields_dict: dict
item: Any
def __getitem__(self, field_name: str) -> Any:
return self.item[field_name]
def __setitem__(self, field_name: str, value: Any) -> None:
self.item[field_name] = value
def __delitem__(self, field_name: str) -> None:
del self.item[field_name]
def __iter__(self) -> Iterator:
return iter(self.item)
def __len__(self) -> int:
return len(self.item)
class DictAdapter(_MixinDictScrapyItemAdapter, AdapterInterface):
@classmethod
def is_item(cls, item: Any) -> bool:
return isinstance(item, dict)
def get_field_meta(self, field_name: str) -> MappingProxyType:
return MappingProxyType({})
def field_names(self) -> KeysView:
return KeysView(self.item)
class ScrapyItemAdapter(_MixinDictScrapyItemAdapter, AdapterInterface):
@classmethod
def is_item(cls, item: Any) -> bool:
return is_scrapy_item(item)
def get_field_meta(self, field_name: str) -> MappingProxyType:
return MappingProxyType(self.item.fields[field_name])
def field_names(self) -> KeysView:
return KeysView(self.item.fields)
class ItemAdapter(MutableMapping):
"""
Wrapper class to interact with data container objects. It provides a common interface
to extract and set data without having to take the object's type into account.
"""
ADAPTER_CLASSES = deque(
[
ScrapyItemAdapter,
DictAdapter,
DataclassAdapter,
AttrsAdapter,
]
)
def __init__(self, item: Any) -> None:
self.adapter_class = None
for cls in self.ADAPTER_CLASSES:
if cls.is_item(item):
self.adapter = cls(item) # type: ignore
break
else:
raise TypeError(f"No adapter found for objects of type: {type(item)} ({item})")
@classmethod
def is_item(self, item: Any) -> bool:
for cls in self.ADAPTER_CLASSES:
if cls.is_item(item):
return True
return False
@property
def item(self) -> Any:
return self.adapter.item
def __repr__(self) -> str:
values = ", ".join(["%s=%r" % (key, value) for key, value in self.items()])
return f"<ItemAdapter for {self.item.__class__.__name__}({values})>"
def __getitem__(self, field_name: str) -> Any:
return self.adapter.__getitem__(field_name)
def __setitem__(self, field_name: str, value: Any) -> None:
self.adapter.__setitem__(field_name, value)
def __delitem__(self, field_name: str) -> None:
self.adapter.__delitem__(field_name)
def __iter__(self) -> Iterator:
return self.adapter.__iter__()
def __len__(self) -> int:
return self.adapter.__len__()
def get_field_meta(self, field_name: str) -> MappingProxyType:
"""
Return a read-only mapping with metadata for the given field name. If there is no metadata
for the field, or the wrapped item does not support field metadata, an empty object is
returned.
Field metadata is taken from different sources, depending on the item type:
* scrapy.item.Item: corresponding scrapy.item.Field object
* dataclass items: "metadata" attribute for the corresponding field
* attrs items: "metadata" attribute for the corresponding field
The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view
of the original mapping, which gets automatically updated if the original mapping changes.
"""
return self.adapter.get_field_meta(field_name)
def field_names(self) -> KeysView:
"""
Return read-only key view with the names of all the defined fields for the item
"""
return self.adapter.field_names()
def asdict(self) -> dict:
"""
Return a dict object with the contents of the adapter. This works slightly different than
calling `dict(adapter)`: it's applied recursively to nested items (if there are any).
"""
return {key: _asdict(value) for key, value in self.items()} # type: ignore
def _asdict(obj: Any) -> Any:
"""
Helper for ItemAdapter.asdict
"""
if isinstance(obj, dict):
return {key: _asdict(value) for key, value in obj.items()}
elif isinstance(obj, (list, set, tuple)):
return obj.__class__(_asdict(x) for x in obj)
elif isinstance(obj, ItemAdapter):
return obj.asdict()
elif is_item(obj):
return ItemAdapter(obj).asdict()
else:
return obj

View file

@ -0,0 +1,114 @@
from types import MappingProxyType
from typing import Any
def _get_scrapy_item_classes() -> tuple:
try:
import scrapy
except ImportError:
return ()
else:
try:
_base_item_cls = getattr(scrapy.item, "_BaseItem", scrapy.item.BaseItem) # deprecated
return (scrapy.item.Item, _base_item_cls)
except AttributeError:
return (scrapy.item.Item,)
def _is_dataclass(obj: Any) -> bool:
try:
import dataclasses
except ImportError:
return False
return dataclasses.is_dataclass(obj)
def _is_attrs_class(obj: Any) -> bool:
try:
import attr
except ImportError:
return False
return attr.has(obj)
def is_dataclass_instance(obj: Any) -> bool:
"""
Return True if the given object is a dataclass object, False otherwise.
In py36, this function returns False if the "dataclasses" backport is not available.
Taken from https://docs.python.org/3/library/dataclasses.html#dataclasses.is_dataclass.
"""
return _is_dataclass(obj) and not isinstance(obj, type)
def is_attrs_instance(obj: Any) -> bool:
"""
Return True if the given object is a attrs-based object, False otherwise.
"""
return _is_attrs_class(obj) and not isinstance(obj, type)
def is_scrapy_item(obj: Any) -> bool:
"""
Return True if the given object is a Scrapy item, False otherwise.
"""
try:
import scrapy
except ImportError:
return False
if isinstance(obj, scrapy.item.Item):
return True
try:
# handle deprecated BaseItem
BaseItem = getattr(scrapy.item, "_BaseItem", scrapy.item.BaseItem)
return isinstance(obj, BaseItem)
except AttributeError:
return False
def is_item(obj: Any) -> bool:
"""
Return True if the given object belongs to one of the supported types, False otherwise.
Alias for ItemAdapter.is_item
"""
from itemadapter.adapter import ItemAdapter
return ItemAdapter.is_item(obj)
def get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType:
"""
Return a read-only mapping with metadata for the given field name, within the given item class.
If there is no metadata for the field, or the item class does not support field metadata,
an empty object is returned.
Field metadata is taken from different sources, depending on the item type:
* scrapy.item.Item: corresponding scrapy.item.Field object
* dataclass items: "metadata" attribute for the corresponding field
* attrs items: "metadata" attribute for the corresponding field
The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view
of the original mapping, which gets automatically updated if the original mapping changes.
"""
if issubclass(item_class, _get_scrapy_item_classes()):
return MappingProxyType(item_class.fields[field_name]) # type: ignore
elif _is_dataclass(item_class):
from dataclasses import fields
for field in fields(item_class):
if field.name == field_name:
return field.metadata # type: ignore
raise KeyError("%s does not support field: %s" % (item_class.__name__, field_name))
elif _is_attrs_class(item_class):
from attr import fields_dict
try:
return fields_dict(item_class)[field_name].metadata # type: ignore
except KeyError:
raise KeyError("%s does not support field: %s" % (item_class.__name__, field_name))
elif issubclass(item_class, dict):
return MappingProxyType({})
else:
raise TypeError("%s is not a valid item class" % (item_class,))