Source code for bocoel.models.lms.huggingface.sequences

from collections.abc import Sequence

import torch
from numpy.typing import NDArray

from bocoel.models.lms.interfaces import ClassifierModel

from .tokenizers import HuggingfaceTokenizer


[docs] class HuggingfaceSequenceLM(ClassifierModel): """ The sequence classification model backed by huggingface's transformers library. """
[docs] def __init__( self, model_path: str, device: str, choices: Sequence[str], add_sep_token: bool = False, ) -> None: # Optional dependency from transformers import AutoModelForSequenceClassification self._model_path = model_path self._tokenizer = HuggingfaceTokenizer( model_path=model_path, device=device, add_sep_token=add_sep_token ) self._choices = choices classifier = AutoModelForSequenceClassification.from_pretrained(model_path) self._classifier = classifier.to(device) self._classifier.config.pad_token_id = self._tokenizer.pad_token_id
def __repr__(self) -> str: return f"{type(self).__name__}({self._model_path}, {self._choices})" @property def choices(self) -> Sequence[str]: return self._choices @torch.no_grad() def _classify(self, prompts: Sequence[str], /) -> NDArray: tokenized = self._tokenizer(prompts) output = self._classifier(**tokenized) return output.logits.cpu().numpy() def to(self, device: str, /) -> "HuggingfaceSequenceLM": self._tokenizer.to(device) self._classifier.to(device) return self