Source code for bocoel.corpora.indices.interfaces.boundaries

import dataclasses as dcls

import numpy as np
from numpy.typing import NDArray


[docs] @dcls.dataclass(frozen=True) class Boundary: """ The boundary of embeddings in a corpus. The boundary is defined as a hyperrectangle in the embedding space. """ bounds: NDArray """ The boundary arrays of the corpus. Must be of shape `[dims, 2]`, where dims is the number of dimensions. The first column is the lower bound, the second column is the upper bound. """ def __post_init__(self) -> None: if self.bounds.ndim != 2: raise ValueError(f"Expected 2D bounds, got {self.bounds.ndim}D") if self.bounds.shape[1] != 2: raise ValueError(f"Expected 2 columns, got {self.bounds.shape[1]}") if (self.lower > self.upper).any(): raise ValueError("Expected lower <= upper") def __len__(self) -> int: return self.dims def __getitem__(self, idx: int, /) -> NDArray: return self.bounds[idx] @property def dims(self) -> int: "The number of dimensions." return self.bounds.shape[0] @property def lower(self) -> NDArray: "The lower bounds. Must be of shape `[dims]`." return self.bounds[:, 0] @property def upper(self) -> NDArray: "The upper bounds. Must be of shape `[dims]`." return self.bounds[:, 1]
[docs] @classmethod def fixed(cls, lower: float, upper: float, dims: int) -> "Boundary": """ Create a fixed boundary for all dimensions. Parameters: lower: The lower bound. upper: The upper bound. dims: The number of dimensions. Returns: A `Boundary` instance. Raises: ValueError: If lower > upper. """ if lower > upper: raise ValueError("Expected lower <= upper") return cls(bounds=np.array([[lower, upper]] * dims))