Source code for src.enums.datasets

from __future__ import annotations
import enum



[docs]
class DatasetType(str, enum.Enum):
    """
    This enum is used to specify what type of dataset to use,
    """
    # default
    UNPROCESSED: str = "mirror"

    # single-label dataset
    SINGLE_BASIC: str = ""
    SINGLE_TOP_LEVEL_ALL_BASED: str = "s1_general"

    # multilabel dataset
    MULTI_SECOND_LEVEL_ALL_BASED: str = "m2_general"
    MULTI_TOP_LEVEL_ALL_BASED: str = "m1_general"
    MULTI_TOP_LEVEL_ARTICLE_BASED: str = "m1_article"
    MULTI_TOP_LEVEL_ARTICLE_SPLIT: str = "m1_article_split"
    MULTI_TOP_LEVEL_DESCRIPTION_BASED: str = "m1_description"
    MULTI_TOP_LEVEL_MOTIVATION_BASED: str = "m1_motivation"
    MULTI_TOP_LEVEL_SHORT_TITLE_BASED: str = "m1_shorttitle"

    DYNAMIC: str = "dynamic_general"
    # summary statistic dataset
    SUMMARY_STATISTIC_DATASET: str = "summary_stat_dataset"

    # other?


[docs]
    @classmethod
    def _list(cls):
        """
        internal classmethod that allows us to retrieve all possible datasets
        :return:
        """
        return list(map(lambda c: c.value, cls))



[docs]
    @staticmethod
    def get_multilevel_datasets(level: int = 1):
        """
        this function allows us to retrieve only the multilabel datasets of a specific level

        :param level: the label level you want to retrieve datasets for
        :return: a list with dataset that comply with the filter
        """
        lvl = None
        match level:
            case 1:
                lvl = "m1"

        return [v for v in DatasetType._list() if v.split("_")[0] == lvl]



[docs]
    @staticmethod
    def get_from_prefix(model_type: str):
        """
        this function allows us to retrieve only the models compliant with the prefix filter

        :param model_type: the string prefix to filter the models with
        :return: a list with models that comply with the filter
        """
        return [v for v in DatasetType._list() if v.startswith(model_type)]



[docs]
    def get_single_level_datasets(self):
        return NotImplementedError