Source code for bocoel.corpora.indices.interfaces.boundaries
# Copyright (c) RenChu Wang - All Rights Reserved
import dataclasses as dcls
import numpy as np
from numpy.typing import NDArray
[docs]
@dcls.dataclass(frozen=True)
class Boundary:
"""
The boundary of embeddings in a corpus.
The boundary is defined as a hyperrectangle in the embedding space.
"""
bounds: NDArray
"""
The boundary arrays of the corpus.
Must be of shape `[dims, 2]`, where dims is the number of dimensions.
The first column is the lower bound, the second column is the upper bound.
"""
def __post_init__(self) -> None:
if self.bounds.ndim != 2:
raise ValueError(f"Expected 2D bounds, got {self.bounds.ndim}D")
if self.bounds.shape[1] != 2:
raise ValueError(f"Expected 2 columns, got {self.bounds.shape[1]}")
if (self.lower > self.upper).any():
raise ValueError("Expected lower <= upper")
def __len__(self) -> int:
return self.dims
def __getitem__(self, idx: int, /) -> NDArray:
return self.bounds[idx]
@property
def dims(self) -> int:
"The number of dimensions."
return self.bounds.shape[0]
@property
def lower(self) -> NDArray:
"The lower bounds. Must be of shape `[dims]`."
return self.bounds[:, 0]
@property
def upper(self) -> NDArray:
"The upper bounds. Must be of shape `[dims]`."
return self.bounds[:, 1]
[docs]
@classmethod
def fixed(cls, lower: float, upper: float, dims: int) -> "Boundary":
"""
Create a fixed boundary for all dimensions.
Parameters:
lower: The lower bound.
upper: The upper bound.
dims: The number of dimensions.
Returns:
A `Boundary` instance.
Raises:
ValueError: If lower > upper.
"""
if lower > upper:
raise ValueError("Expected lower <= upper")
return cls(bounds=np.array([[lower, upper]] * dims))