Source code for src.topic_modeling

from __future__ import annotations

# typing imports
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from .data_models import Taxonomy

from .dataset import DatasetBuilder
from .config import Config
from .sparql import RequestHandler
from .utils import LoggingBase
from .enums import DecisionQuery, DatasetType, ModelType
from .models import get_topic_model

import fire
import copy


[docs] def main( dataset_type: DatasetType, model_type: ModelType, checkpoint_folder: str = None ): """ This function is the entrypoint for the topic modeling functionality. It calls on the specified class to generate the topic modeling artifacts that can be user for further analysis :param dataset_type: the type of dataset to use as input formatting :param model_type: the type of topic modeling to use :param checkpoint_folder: a checkpoint that can be used to restore the input data from :return: """ config = Config() config.run.dataset.type = dataset_type logger = LoggingBase(config=config.logging).logger request_handler = RequestHandler( config=config, logger=logger ) if checkpoint_folder is None: dataset_builder = DatasetBuilder.from_sparql( config=config, logger=logger, request_handler=request_handler, query_type=DecisionQuery.ALL, do_train_test_split=False, taxonomy_uri="http://stad.gent/id/concepts/gent_words" ) dataset_builder.create_checkpoint("/tmp/data") else: dataset_builder = DatasetBuilder.from_checkpoint( config=config, logger=logger, checkpoint_folder=checkpoint_folder ) topic_model = get_topic_model( model_type=model_type, config=config, logger=logger, dataset_builder=dataset_builder ) topic_model.analyse()
if __name__ == "__main__": fire.Fire(main)