Source code for stouputils.data_science.config.set

""" Configuration file for the project. """

# Imports
import os
from typing import Literal

from stouputils.decorators import LogLevels
from stouputils.io import get_root_path

# Environment variables
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "9"         # Suppress TensorFlow logging
os.environ["GRPC_VERBOSITY"] = "ERROR"           # Suppress gRPC logging


# Configuration class

[docs]
class DataScienceConfig:
	""" Configuration class for the project. """

	# Common
	SEED: int = 42
	""" Seed for the random number generator. """

	ERROR_LOG: LogLevels = LogLevels.WARNING_TRACEBACK
	""" Log level for errors for all functions. """

	AUGMENTED_FILE_SUFFIX: str = "_aug_"
	""" Suffix for augmented files, e.g. 'image_008_aug_1.png'. """

	AUGMENTED_DIRECTORY_PREFIX: str = "aug_"
	""" Prefix for augmented directories, e.g. 'data/hip_implant' -> 'data/aug_hip_implant'. """

	PREPROCESSED_DIRECTORY_SUFFIX: str = "_preprocessed"
	""" Suffix for preprocessed directories, e.g. 'data/hip_implant' -> 'data/hip_implant_preprocessed'. """


	# Directories
	ROOT: str = get_root_path(__file__, go_up=3)
	""" Root directory of the project. """

	MLFLOW_FOLDER: str = f"{ROOT}/mlruns"
	""" Folder containing the mlflow data. """
	MLFLOW_URI: str = f"file://{MLFLOW_FOLDER}"
	""" URI to the mlflow data. """

	DATA_FOLDER: str = f"{ROOT}/data"
	""" Folder containing all the data (e.g. subfolders containing images). """

	TEMP_FOLDER: str = f"{ROOT}/temp"
	""" Folder containing temporary files (e.g. models checkpoints, plots, etc.). """

	LOGS_FOLDER: str = f"{ROOT}/logs"
	""" Folder containing the logs. """

	TENSORBOARD_FOLDER: str = f"{ROOT}/tensorboard"
	""" Folder containing the tensorboard logs. """


	# Behaviours
	TEST_SIZE: float = 0.2
	""" Size of the test set by default (0.2 means 80% training, 20% test). """

	VALIDATION_SIZE: float = 0.2
	""" Size of the validation set by default (0.2 means 80% training, 20% validation). """

	# Machine learning
	SAVE_MODEL: bool = False
	""" If the model should be saved in the mlflow folder using mlflow.*.save_model. """

	DO_SALIENCY_AND_GRADCAM: bool = True
	""" If the saliency and gradcam should be done during the run. """

	DO_LEARNING_RATE_FINDER: Literal[0, 1, 2] = 1
	""" If the learning rate finder should be done during the run.
	0: no, 1: only plot, 2: plot and use value for the remaining run
	"""

	DO_UNFREEZE_FINDER: Literal[0, 1, 2] = 0
	""" If the unfreeze finder should be done during the run.
	0: no, 1: only plot, 2: plot and use value for the remaining run
	"""

	DO_FIT_IN_SUBPROCESS: bool = True
	""" If the model should be fitted in a subprocess.
	Is memory efficient, and more stable. Turn it off only if you are having issues.

	Note: This allow a program to make lots of runs without getting killed by the OS for using too much resources.
	(e.g. LeaveOneOut Cross Validation, Grid Search, etc.)
	"""

	MIXED_PRECISION_POLICY: Literal["mixed_float16", "mixed_bfloat16", "float32"] = "mixed_float16"
	""" Mixed precision policy to use. Turn back to "float32" if you are having issues with a specific model or metrics.
	See: https://www.tensorflow.org/guide/mixed_precision
	"""

	TENSORFLOW_DEVICE: str = "/gpu:1"
	""" TensorFlow device to use. """




[docs]
	@classmethod
	def update_root(cls, new_root: str) -> None:
		""" Update the root directory and recalculate all dependent paths. 
		
		Args:
			new_root: The new root directory path.
		"""
		cls.ROOT = new_root
		
		# Update all paths that depend on ROOT
		cls.MLFLOW_FOLDER = f"{cls.ROOT}/mlruns"
		cls.MLFLOW_URI = f"file://{cls.MLFLOW_FOLDER}"
		cls.DATA_FOLDER = f"{cls.ROOT}/data"
		cls.TEMP_FOLDER = f"{cls.ROOT}/temp"
		cls.LOGS_FOLDER = f"{cls.ROOT}/logs"
		cls.TENSORBOARD_FOLDER = f"{cls.ROOT}/tensorboard"
		
		# Fix MLFLOW_URI for Windows by adding a missing slash
		if os.name == "nt":
			cls.MLFLOW_URI = cls.MLFLOW_URI.replace("file:", "file:/")




# Fix MLFLOW_URI for Windows by adding a missing slash
if os.name == "nt":
	DataScienceConfig.MLFLOW_URI = DataScienceConfig.MLFLOW_URI.replace("file:", "file:/")