Source code for bocoel.corpora.storages.interfaces
# Copyright (c) RenChu Wang - All Rights Reserved
import abc
import typing
from collections.abc import Collection, Mapping, Sequence
from typing import Any, Protocol
import typeguard
from bocoel import common
[docs]
class Storage(Protocol):
"""
Storage is responsible for storing the data.
This can be thought of as a table.
"""
def __repr__(self) -> str:
name = common.remove_base_suffix(self, Storage)
return f"{name}({list(self.keys())}, {len(self)})"
@abc.abstractmethod
def __len__(self) -> int:
"""
Returns the number of rows in the storage.
"""
...
@typing.overload
def __getitem__(self, idx: int) -> Mapping[str, Any]: ...
@typing.overload
def __getitem__(self, idx: slice) -> Mapping[str, Sequence[Any]]: ...
def __getitem__(
self, idx: int | slice | Sequence[int]
) -> Mapping[str, Any] | Mapping[str, Sequence[Any]]:
if isinstance(idx, int):
return self._getitem(idx)
elif isinstance(idx, slice):
slice_range = range(*idx.indices(len(self)))
return self.collate([self._getitem(i) for i in slice_range])
elif isinstance(idx, Sequence):
typeguard.check_type(idx, Sequence[int])
return self.collate([self._getitem(i) for i in idx])
else:
raise TypeError(f"Index must be int or sequence, got {type(idx)}")
@abc.abstractmethod
def _getitem(self, idx: int) -> Mapping[str, Any]:
"""
Returns the row at the given index.
"""
...
@abc.abstractmethod
def keys(self) -> Collection[str]: ...
@staticmethod
def collate(mappings: Sequence[Mapping[str, Any]]) -> Mapping[str, Sequence[Any]]:
if len(mappings) == 0:
return {}
first = mappings[0]
keys = first.keys()
result = {}
for key in keys:
extracted = [item[key] for item in mappings]
result[key] = extracted
return result