stouputils.data_science.utils module#

This module contains the Utils class, which provides static methods for common operations.

This class contains static methods for:

Safe division (with 0 as denominator or None)
Safe multiplication (with None)
Converting between one-hot encoding and class indices
Calculating ROC curves and AUC scores

class Utils[source]#

Bases: object

Utility class providing common operations.

static safe_divide_float(a: float, b: float) → float[source]#

Safe division of two numbers, return 0 if denominator is 0.

Parameters:

a (float) – First number
b (float) – Second number

Returns:

Result of the division

Return type:

float

Examples

>>> Utils.safe_divide_float(10, 2)
5.0
>>> Utils.safe_divide_float(0, 5)
0.0
>>> Utils.safe_divide_float(10, 0)
0
>>> Utils.safe_divide_float(-10, 2)
-5.0

static safe_divide_none(a: float | None, b: float | None) → float | None[source]#

Safe division of two numbers, return None if either number is None or denominator is 0.

Parameters:

a (float | None) – First number
b (float | None) – Second number

Returns:

Result of the division or None if denominator is None

Return type:

float | None

Examples

>>> None == Utils.safe_divide_none(None, 2)
True
>>> None == Utils.safe_divide_none(10, None)
True
>>> None == Utils.safe_divide_none(10, 0)
True
>>> Utils.safe_divide_none(10, 2)
5.0

static safe_multiply_none(a: float | None, b: float | None) → float | None[source]#

Safe multiplication of two numbers, return None if either number is None.

Parameters:

a (float | None) – First number
b (float | None) – Second number

Returns:

Result of the multiplication or None if either number is None

Return type:

float | None

Examples

>>> None == Utils.safe_multiply_none(None, 2)
True
>>> None == Utils.safe_multiply_none(10, None)
True
>>> Utils.safe_multiply_none(10, 2)
20
>>> Utils.safe_multiply_none(-10, 2)
-20

static convert_to_class_indices(y: ndarray[Any, dtype[int32 | float32]] | list[ndarray[Any, dtype[int32 | float32]]]) → ndarray[Any, dtype[Any]][source]#

Convert array from one-hot encoded format to class indices. If the input is already class indices, it returns the same array.

Parameters:: y (NDArray[intc | single] | list[NDArray[intc | single]]) – Input array (either one-hot encoded or class indices)
Returns:: Array of class indices: [[0, 0, 1, 0], [1, 0, 0, 0]] -> [2, 0]
Return type:: NDArray[Any]

Examples

>>> Utils.convert_to_class_indices(np.array([[0, 0, 1, 0], [1, 0, 0, 0]])).tolist()
[2, 0]
>>> Utils.convert_to_class_indices(np.array([2, 0, 1])).tolist()
[2, 0, 1]
>>> Utils.convert_to_class_indices(np.array([[1], [0]])).tolist()
[[1], [0]]
>>> Utils.convert_to_class_indices(np.array([])).tolist()
[]

static convert_to_one_hot(y: ndarray[Any, dtype[int32 | float32]] | list[ndarray[Any, dtype[int32 | float32]]], num_classes: int) → ndarray[Any, dtype[Any]][source]#

Convert array from class indices to one-hot encoded format. If the input is already one-hot encoded, it returns the same array.

Parameters:

y (NDArray[intc|single] | list[NDArray[intc|single]]) – Input array (either class indices or one-hot encoded)
num_classes (int) – Total number of classes

Returns:

One-hot encoded array: [2, 0] -> [[0, 0, 1, 0], [1, 0, 0, 0]]

Return type:

NDArray[Any]

Examples

>>> Utils.convert_to_one_hot(np.array([2, 0]), 4).tolist()
[[0.0, 0.0, 1.0, 0.0], [1.0, 0.0, 0.0, 0.0]]
>>> Utils.convert_to_one_hot(np.array([[0, 0, 1, 0], [1, 0, 0, 0]]), 4).tolist()
[[0, 0, 1, 0], [1, 0, 0, 0]]
>>> Utils.convert_to_one_hot(np.array([0, 1, 2]), 3).shape
(3, 3)
>>> Utils.convert_to_one_hot(np.array([]), 3)
array([], shape=(0, 3), dtype=float32)

>>> array = np.array([[0.1, 0.9], [0.2, 0.8]])
>>> array = Utils.convert_to_class_indices(array)
>>> array = Utils.convert_to_one_hot(array, 2)
>>> array.tolist()
[[0.0, 1.0], [0.0, 1.0]]

static get_roc_curve_and_auc(y_true: ndarray[Any, dtype[int32 | float32]], y_pred: ndarray[Any, dtype[float32]]) → tuple[float, ndarray[Any, dtype[float32]], ndarray[Any, dtype[float32]], ndarray[Any, dtype[float32]]][source]#

Calculate ROC curve and AUC score.

Parameters:

y_true (NDArray[intc | single]) – True class labels (either one-hot encoded or class indices)
y_pred (NDArray[single]) – Predicted probabilities (must be probability scores, not class indices)

Returns:

Tuple containing AUC score, False Positive Rate, True Positive Rate, and Thresholds

Return type:

tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]

Examples

>>> # Binary classification example
>>> y_true = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
>>> y_pred = np.array([[0.2, 0.8], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8], [0.7, 0.3]])
>>> auc_value, fpr, tpr, thresholds = Utils.get_roc_curve_and_auc(y_true, y_pred)
>>> round(auc_value, 2)
0.92
>>> [round(x, 2) for x in fpr.tolist()]
[0.0, 0.0, 0.33, 0.67, 1.0]
>>> [round(x, 2) for x in tpr.tolist()]
[0.0, 0.5, 1.0, 1.0, 1.0]
>>> [round(x, 2) for x in thresholds.tolist()]
[inf, 0.9, 0.8, 0.3, 0.2]

static get_pr_curve_and_auc(y_true: ndarray[Any, dtype[int32 | float32]], y_pred: ndarray[Any, dtype[float32]], negative: bool = False) → tuple[float, float, ndarray[Any, dtype[float32]], ndarray[Any, dtype[float32]], ndarray[Any, dtype[float32]]][source]#

Calculate Precision-Recall Curve (or Negative Precision-Recall Curve) and AUC score.

Parameters:

y_true (NDArray[intc | single]) – True class labels (either one-hot encoded or class indices)
y_pred (NDArray[single]) – Predicted probabilities (must be probability scores, not class indices)
negative (bool) – Whether to calculate the negative Precision-Recall Curve

Returns:

Tuple containing either:

AUC score, Average Precision, Precision, Recall, and Thresholds
AUC score, Average Precision, Negative Predictive Value, Specificity, and Thresholds for the negative class

Return type:

tuple[float, NDArray[np.single], NDArray[np.single], NDArray[np.single]]

Examples

>>> # Binary classification example
>>> y_true = np.array([0.0, 1.0, 0.0, 1.0, 0.0])
>>> y_pred = np.array([[0.2, 0.8], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8], [0.7, 0.3]])
>>> auc_value, average_precision, precision, recall, thresholds = Utils.get_pr_curve_and_auc(y_true, y_pred)
>>> round(auc_value, 2)
0.92
>>> round(average_precision, 2)
0.83
>>> [round(x, 2) for x in precision.tolist()]
[0.4, 0.5, 0.67, 1.0, 1.0]
>>> [round(x, 2) for x in recall.tolist()]
[1.0, 1.0, 1.0, 0.5, 0.0]
>>> [round(x, 2) for x in thresholds.tolist()]
[0.2, 0.3, 0.8, 0.9]

stouputils.data_science.utils module#

This Page