Source code for src.benchmark.regular

from __future__ import annotations

import traceback
from typing import TYPE_CHECKING

import mlflow

if TYPE_CHECKING:
    from ..config import Config
    from logging import Logger
    from ..sparql import RequestHandler

from ..dataset import DatasetBuilder, create_dataset
from ..enums import DecisionQuery
from ..models import Model, get_model
from .evaluate import MultilabelEvaluation
from .base import BenchmarkBase



[docs]
class BenchmarkWrapper(BenchmarkBase):
    """
    This is the baseclass for benchmarking models.

    The main objective of this class is brining multiple components togheter, these components all contain their own
    custom behaviour for a certain part of the code.

    These sub-components are:
        *   Dataset: How do we transform the data from sparql data into training/inference data?
        *   Model architecture (or type): A wrapper object around a model to allow abstract usage of methods that are universally implmented.
        *   Model base: Model weights to load in the predefined model Architecture
        *   Taxonomy: A specific taxonomy to use for predictions

    The previously mentioned components are mostly declared in the config, only the model base is provided under the
    model_ids parameter.

    Typical usage:
        >>> benchmark = BenchmarkWrapper(
                config=Config(),
                logger=logging.logger,
                request_handler=RequestHandler(),
                model_ids=["...", ...],
                taxonomoy_reference="..."
            )
        >>> benchmark()

    """

    def __init__(
            self,
            config: Config,
            logger: Logger,
            request_handler: RequestHandler,
            model_ids: list[str] | str,
            taxonomy_reference: str,
            checkpoint_dir: str = "data",
            nested_mlflow_run: bool = False
    ) -> None:

        self.config = config
        self.logger = logger
        self.request_handler = request_handler

        self.model_ids = [model_ids] if isinstance(model_ids, str) else model_ids
        self.taxonomy_reference = taxonomy_reference

        self._create_dataset(checkpoint=checkpoint_dir)

        self.nested_mlflow_run = nested_mlflow_run
        self._default_mlflow_tags = dict()
        self._default_description = str()


[docs]
    def _create_dataset(self, checkpoint: str | None) -> None:
        """
        Internal function that is responsible for the creation of the benchmarking dataset.
        When it is not provided with a checkpoint, it will automatically start building the dataset by pulling all
        annotated information for the provided taxonomy.


        :param checkpoint: folder or path where the benchmark data can be found
        :return: Nothing
        """

        if checkpoint is None:
            dataset_builder = DatasetBuilder.from_sparql(
                config=self.config,
                logger=self.logger,
                request_handler=self.request_handler,
                taxonomy_uri=self.taxonomy_reference,
                query_type=DecisionQuery.ANNOTATED
            )

        else:
            dataset_builder = DatasetBuilder.from_checkpoint(
                config=self.config,
                logger=self.logger,
                checkpoint_folder=checkpoint
            )

        taxonomy = dataset_builder.taxonomy
        self.train_ds = create_dataset(
            config=self.config,
            logger=self.logger,
            dataset=dataset_builder.train_dataset,
            taxonomy=taxonomy
        )



[docs]
    def _create_model(self, model_id: str) -> Model:
        return get_model(
            config=self.config,
            logger=self.logger,
            model_id=model_id,
            taxonomy=self.train_ds.taxonomy
        )



[docs]
    def _create_run_name(self, model_id: str | None = None) -> str:
        """
        Internal function that generates custom run names, these are used for verbose naming in the mlflow
        tracking.

        :param model_id: the model id of the currently selected model
        :return: a custom string that represents the unique combination of components
        """
        if model_id is None:
            return f"{self.config.run.model.type}_{self.config.run.dataset.type}"
        else:
            return f"{self.config.run.model.type}_{self.config.run.dataset.type}_{model_id.split('/')[-1]}"



[docs]
    def exec(self):
        """
        This function is responsible for the execution of the benchmark.
        It creates (nested-)mlflow runs, these runs are based on the pre-defined config and selected base model.

        For each combination, a (nested) run will appear in the mlflow interface containing all the artifacts created by
        a benchmark run. (more info about the artifacts can be found in the evaluate class)

        Example usage:
            >>> benchmark = BenchmarkWrapper(...)
            >>> benchmark.exec()

        :return: Nothing at all
        """
        with mlflow.start_run(
                tags=self.default_mlflow_tags,
                description=self.default_description,
                nested=self.nested_mlflow_run,
                run_name=self._create_run_name()
        ):

            mlflow.set_tag(
                key="dataset_type",
                value=self.config.run.dataset.type
            )

            for model_id in self.model_ids:
                self.logger.debug(f"model_id in list of models {model_id}")
                with mlflow.start_run(
                        nested=True,
                        tags={"model_id": model_id},
                        description=f"Evaluate performance for: {model_id}",
                        run_name=self._create_run_name(model_id=model_id)
                ):
                    try:
                        _model = self._create_model(model_id=model_id)
                    except Exception as ex:
                        traceback.print_exception(ex)
                        self.logger.error(f"The following error occured during initalization of the model {ex}")


                    try:
                        MultilabelEvaluation(
                            config=self.config,
                            logger=self.logger,
                            dataset=self.train_ds,
                            model=_model
                        ).evaluate()
                    except Exception as ex:
                        self.logger.warning(f"Benchmark failed for {model_id} with error: \n {traceback.format_exc()}")

            mlflow.log_dict(
                dictionary=self.config.to_dict(),
                artifact_file="config.json"
            )


    def __call__(self):
        """
        The call function references the exec funciton, for more information check those docs

        Example usage:
            >>> benchmark = BenchmarkWrapper(...)
            >>> benchmark()

        :return: Nothing at all
        """
        self.exec()

    @property
    def default_mlflow_tags(self) -> dict[str, str]:
        """
        This property provides a getter for the default mlflow tags that are provided by the selection of the class

        Example usage:
            >>> benchmark = BenchmarkWrapper(...)
            >>> mlflow_tags = benchmark.default_mlflow_tags

        :return: tags for mlflow
        """
        return self._default_mlflow_tags

    @property
    def default_description(self) -> str:
        """
        This property provides a getter for the default description that should be provided for mlflow logging

        Example usage:
            >>> benchmark = BenchmarkWrapper(...)
            >>> description = benchmark.default_description

        :return: string description for mlflow run
        """
        return self._default_description