Source code for stouputils.data_science.config.set
""" Configuration file for the project. """
# Imports
import os
from typing import Literal
from stouputils.decorators import LogLevels
from stouputils.io import get_root_path
# Environment variables
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "9" # Suppress TensorFlow logging
os.environ["GRPC_VERBOSITY"] = "ERROR" # Suppress gRPC logging
# Configuration class
[docs]
class DataScienceConfig:
""" Configuration class for the project. """
# Common
SEED: int = 42
""" Seed for the random number generator. """
ERROR_LOG: LogLevels = LogLevels.WARNING_TRACEBACK
""" Log level for errors for all functions. """
AUGMENTED_FILE_SUFFIX: str = "_aug_"
""" Suffix for augmented files, e.g. 'image_008_aug_1.png'. """
AUGMENTED_DIRECTORY_PREFIX: str = "aug_"
""" Prefix for augmented directories, e.g. 'data/hip_implant' -> 'data/aug_hip_implant'. """
PREPROCESSED_DIRECTORY_SUFFIX: str = "_preprocessed"
""" Suffix for preprocessed directories, e.g. 'data/hip_implant' -> 'data/hip_implant_preprocessed'. """
# Directories
ROOT: str = get_root_path(__file__, go_up=3)
""" Root directory of the project. """
MLFLOW_FOLDER: str = f"{ROOT}/mlruns"
""" Folder containing the mlflow data. """
MLFLOW_URI: str = f"file://{MLFLOW_FOLDER}"
""" URI to the mlflow data. """
DATA_FOLDER: str = f"{ROOT}/data"
""" Folder containing all the data (e.g. subfolders containing images). """
TEMP_FOLDER: str = f"{ROOT}/temp"
""" Folder containing temporary files (e.g. models checkpoints, plots, etc.). """
LOGS_FOLDER: str = f"{ROOT}/logs"
""" Folder containing the logs. """
TENSORBOARD_FOLDER: str = f"{ROOT}/tensorboard"
""" Folder containing the tensorboard logs. """
# Behaviours
TEST_SIZE: float = 0.2
""" Size of the test set by default (0.2 means 80% training, 20% test). """
VALIDATION_SIZE: float = 0.2
""" Size of the validation set by default (0.2 means 80% training, 20% validation). """
# Machine learning
SAVE_MODEL: bool = False
""" If the model should be saved in the mlflow folder using mlflow.*.save_model. """
DO_SALIENCY_AND_GRADCAM: bool = True
""" If the saliency and gradcam should be done during the run. """
DO_LEARNING_RATE_FINDER: Literal[0, 1, 2] = 1
""" If the learning rate finder should be done during the run.
0: no, 1: only plot, 2: plot and use value for the remaining run
"""
DO_UNFREEZE_FINDER: Literal[0, 1, 2] = 0
""" If the unfreeze finder should be done during the run.
0: no, 1: only plot, 2: plot and use value for the remaining run
"""
DO_FIT_IN_SUBPROCESS: bool = True
""" If the model should be fitted in a subprocess.
Is memory efficient, and more stable. Turn it off only if you are having issues.
Note: This allow a program to make lots of runs without getting killed by the OS for using too much resources.
(e.g. LeaveOneOut Cross Validation, Grid Search, etc.)
"""
MIXED_PRECISION_POLICY: Literal["mixed_float16", "mixed_bfloat16", "float32"] = "mixed_float16"
""" Mixed precision policy to use. Turn back to "float32" if you are having issues with a specific model or metrics.
See: https://www.tensorflow.org/guide/mixed_precision
"""
TENSORFLOW_DEVICE: str = "/gpu:1"
""" TensorFlow device to use. """
[docs]
@classmethod
def update_root(cls, new_root: str) -> None:
""" Update the root directory and recalculate all dependent paths.
Args:
new_root: The new root directory path.
"""
cls.ROOT = new_root
# Update all paths that depend on ROOT
cls.MLFLOW_FOLDER = f"{cls.ROOT}/mlruns"
cls.MLFLOW_URI = f"file://{cls.MLFLOW_FOLDER}"
cls.DATA_FOLDER = f"{cls.ROOT}/data"
cls.TEMP_FOLDER = f"{cls.ROOT}/temp"
cls.LOGS_FOLDER = f"{cls.ROOT}/logs"
cls.TENSORBOARD_FOLDER = f"{cls.ROOT}/tensorboard"
# Fix MLFLOW_URI for Windows by adding a missing slash
if os.name == "nt":
cls.MLFLOW_URI = cls.MLFLOW_URI.replace("file:", "file:/")
# Fix MLFLOW_URI for Windows by adding a missing slash
if os.name == "nt":
DataScienceConfig.MLFLOW_URI = DataScienceConfig.MLFLOW_URI.replace("file:", "file:/")