Skip to content

Metrics API

Metrics are used to evaluate the performance of the model. Unlike loss functions, they are not used for training (backpropagation).

Base Metric

mpneuralnetwork.metrics.Metric

Base class for evaluation metrics.

Metrics are used to judge the performance of the model. Unlike Loss functions, metrics are not used during backpropagation (optimization), only for reporting.

Source code in src/mpneuralnetwork/metrics.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class Metric:
    """Base class for evaluation metrics.

    Metrics are used to judge the performance of the model. Unlike Loss functions,
    metrics are not used during backpropagation (optimization), only for reporting.
    """

    def get_config(self) -> dict:
        """Returns the metric configuration."""
        return {"type": self.__class__.__name__}

    @abstractmethod
    def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
        """Computes the metric value.

        Args:
            y_true (ArrayType): Ground truth values.
            y_pred (ArrayType): Model predictions (probabilities or values).

        Returns:
            float: The metric score.
        """
        pass

__call__(y_true, y_pred) abstractmethod

Computes the metric value.

Parameters:

Name Type Description Default
y_true ArrayType

Ground truth values.

required
y_pred ArrayType

Model predictions (probabilities or values).

required

Returns:

Name Type Description
float float

The metric score.

Source code in src/mpneuralnetwork/metrics.py
17
18
19
20
21
22
23
24
25
26
27
28
@abstractmethod
def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
    """Computes the metric value.

    Args:
        y_true (ArrayType): Ground truth values.
        y_pred (ArrayType): Model predictions (probabilities or values).

    Returns:
        float: The metric score.
    """
    pass

get_config()

Returns the metric configuration.

Source code in src/mpneuralnetwork/metrics.py
13
14
15
def get_config(self) -> dict:
    """Returns the metric configuration."""
    return {"type": self.__class__.__name__}

Regression Metrics

mpneuralnetwork.metrics.RMSE

Bases: Metric

Root Mean Squared Error.

Formula

RMSE = sqrt( (1/N) * sum((y_pred - y_true)^2) )

Used primarily for regression tasks. Lower is better.

Source code in src/mpneuralnetwork/metrics.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
class RMSE(Metric):
    """Root Mean Squared Error.

    Formula:
        `RMSE = sqrt( (1/N) * sum((y_pred - y_true)^2) )`

    Used primarily for regression tasks. Lower is better.
    """

    def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
        mse = xp.mean(xp.sum(xp.square(y_true - y_pred), axis=1, dtype=DTYPE), dtype=DTYPE)
        return self.from_mse(float(mse))

    def from_mse(self, mse: float) -> float:
        """Helper to compute RMSE from an existing MSE value."""
        res: float = xp.sqrt(mse, dtype=DTYPE)
        return res

from_mse(mse)

Helper to compute RMSE from an existing MSE value.

Source code in src/mpneuralnetwork/metrics.py
44
45
46
47
def from_mse(self, mse: float) -> float:
    """Helper to compute RMSE from an existing MSE value."""
    res: float = xp.sqrt(mse, dtype=DTYPE)
    return res

mpneuralnetwork.metrics.MAE

Bases: Metric

Mean Absolute Error.

Formula

MAE = (1/N) * sum( |y_pred - y_true| )

Used for regression. Less sensitive to outliers than RMSE. Lower is better.

Source code in src/mpneuralnetwork/metrics.py
50
51
52
53
54
55
56
57
58
59
60
61
class MAE(Metric):
    """Mean Absolute Error.

    Formula:
        `MAE = (1/N) * sum( |y_pred - y_true| )`

    Used for regression. Less sensitive to outliers than RMSE. Lower is better.
    """

    def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
        res: float = xp.mean(xp.sum(xp.abs(y_true - y_pred), axis=1, dtype=DTYPE), dtype=DTYPE)
        return res

mpneuralnetwork.metrics.R2Score

Bases: Metric

R^2 Score (Coefficient of Determination).

Measures how well the regression predictions approximate the real data points.

Formula

R2 = 1 - (SS_res / SS_tot) SS_res = sum((y_true - y_pred)^2) SS_tot = sum((y_true - mean(y_true))^2)

Range: (-inf, 1.0]. 1.0 is perfect prediction. 0.0 is equivalent to a constant model predicting the mean. Negative values indicate the model is worse than just predicting the mean.

Source code in src/mpneuralnetwork/metrics.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
class R2Score(Metric):
    """R^2 Score (Coefficient of Determination).

    Measures how well the regression predictions approximate the real data points.

    Formula:
        `R2 = 1 - (SS_res / SS_tot)`
        `SS_res = sum((y_true - y_pred)^2)`
        `SS_tot = sum((y_true - mean(y_true))^2)`

    Range: (-inf, 1.0].
    1.0 is perfect prediction. 0.0 is equivalent to a constant model predicting the mean.
    Negative values indicate the model is worse than just predicting the mean.
    """

    def __init__(self, epsilon: float = 1e-8) -> None:
        self.epsilon: float = epsilon

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"epsilon": self.epsilon})
        return config

    def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
        var_tp = xp.sum(xp.square(y_true - y_pred), dtype=DTYPE)
        var_tm = xp.sum(xp.square(y_true - xp.mean(y_true, axis=0, dtype=DTYPE)), dtype=DTYPE)

        res: float = 1 - var_tp / (var_tm + self.epsilon)
        return res

Classification Metrics

mpneuralnetwork.metrics.Accuracy

Bases: Metric

Classification Accuracy.

Formula

Accuracy = (TP + TN) / Total Samples

Works for: - Binary classification (threshold at 0.5). - Multi-class classification (argmax).

Source code in src/mpneuralnetwork/metrics.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
class Accuracy(Metric):
    """Classification Accuracy.

    Formula:
        `Accuracy = (TP + TN) / Total Samples`

    Works for:
    - Binary classification (threshold at 0.5).
    - Multi-class classification (argmax).
    """

    def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
        if y_true.ndim == 2 and y_true.shape[1] > 1:
            y_true = xp.argmax(y_true, axis=1)
            y_pred = xp.argmax(y_pred, axis=1)
        else:
            y_pred = xp.round(y_pred)

        res: float = xp.mean(y_true == y_pred, dtype=DTYPE)
        return res

mpneuralnetwork.metrics.Precision

Bases: Metric

Precision Metric (Positive Predictive Value).

Formula

Precision = TP / (TP + FP)

Measures the proportion of positive identifications that were actually correct.

Source code in src/mpneuralnetwork/metrics.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
class Precision(Metric):
    """Precision Metric (Positive Predictive Value).

    Formula:
        `Precision = TP / (TP + FP)`

    Measures the proportion of positive identifications that were actually correct.
    """

    def __init__(self, epsilon: float = 1e-8) -> None:
        self.epsilon: float = epsilon

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"epsilon": self.epsilon})
        return config

    def __call__(
        self,
        y_true: ArrayType,
        y_pred: ArrayType,
        num_classes: int = 1,
        no_check: bool = False,
    ) -> float:
        if not no_check:
            num_classes = y_true.shape[1]
            if y_true.ndim == 2 and num_classes > 1:
                y_true = xp.argmax(y_true, axis=1)
                y_pred = xp.argmax(y_pred, axis=1)
            else:
                y_pred = xp.round(y_pred)

        sum_score: float = 0
        for c in range(num_classes):
            tp = xp.sum((y_pred == c) & (y_true == c))
            fp = xp.sum((y_pred == c) & (y_true != c))

            sum_score += tp / (tp + fp + self.epsilon)

        return sum_score / num_classes

mpneuralnetwork.metrics.Recall

Bases: Metric

Recall Metric (Sensitivity / True Positive Rate).

Formula

Recall = TP / (TP + FN)

Measures the proportion of actual positives that were identified correctly.

Source code in src/mpneuralnetwork/metrics.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
class Recall(Metric):
    """Recall Metric (Sensitivity / True Positive Rate).

    Formula:
        `Recall = TP / (TP + FN)`

    Measures the proportion of actual positives that were identified correctly.
    """

    def __init__(self, epsilon: float = 1e-8) -> None:
        self.epsilon: float = epsilon

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"epsilon": self.epsilon})
        return config

    def __call__(
        self,
        y_true: ArrayType,
        y_pred: ArrayType,
        num_classes: int = 1,
        no_check: bool = False,
    ) -> float:
        if not no_check:
            num_classes = y_true.shape[1]
            if y_true.ndim == 2 and num_classes > 1:
                y_true = xp.argmax(y_true, axis=1)
                y_pred = xp.argmax(y_pred, axis=1)
            else:
                y_pred = xp.round(y_pred)

        sum_score: float = 0
        for c in range(num_classes):
            tp = xp.sum((y_pred == c) & (y_true == c))
            fn = xp.sum((y_pred != c) & (y_true == c))
            sum_score += tp / (tp + fn + self.epsilon)

        return sum_score / num_classes

mpneuralnetwork.metrics.F1Score

Bases: Metric

F1 Score.

Formula

F1 = 2 * (Precision * Recall) / (Precision + Recall)

Harmonic mean of Precision and Recall. Useful for imbalanced datasets.

Source code in src/mpneuralnetwork/metrics.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
class F1Score(Metric):
    """F1 Score.

    Formula:
        `F1 = 2 * (Precision * Recall) / (Precision + Recall)`

    Harmonic mean of Precision and Recall. Useful for imbalanced datasets.
    """

    def __init__(self, epsilon: float = 1e-8) -> None:
        self.epsilon: float = epsilon

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"epsilon": self.epsilon})
        return config

    def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
        num_classes = y_true.shape[1]
        if y_true.ndim == 2 and num_classes > 1:
            y_true = xp.argmax(y_true, axis=1)
            y_pred = xp.argmax(y_pred, axis=1)
        else:
            y_pred = xp.round(y_pred)

        precision = Precision(self.epsilon)(y_true, y_pred, num_classes=num_classes, no_check=True)
        recall = Recall(self.epsilon)(y_true, y_pred, num_classes=num_classes, no_check=True)

        return 2 * precision * recall / (precision + recall + self.epsilon)

mpneuralnetwork.metrics.TopKAccuracy

Bases: Metric

Top-K Accuracy.

Consider the prediction correct if the true label is among the top K probabilities. Commonly used in ImageNet classification (Top-5).

Parameters:

Name Type Description Default
k int

Number of top predictions to consider.

required
Source code in src/mpneuralnetwork/metrics.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
class TopKAccuracy(Metric):
    """Top-K Accuracy.

    Consider the prediction correct if the true label is among the top K probabilities.
    Commonly used in ImageNet classification (Top-5).

    Args:
        k (int): Number of top predictions to consider.
    """

    def __init__(self, k: int) -> None:
        self.k: int = k

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"k": self.k})
        return config

    def __call__(self, y_true: ArrayType, y_pred: ArrayType) -> float:
        # TODO: no_check ?
        top_k_preds = xp.argsort(y_pred, axis=1)[:, -self.k :]

        if y_true.ndim == 2 and y_true.shape[1] > 1:
            y_true = xp.argmax(y_true, axis=1)

        y_true = y_true.reshape(-1, 1)

        res: float = xp.mean(xp.any(top_k_preds == y_true, axis=1), dtype=DTYPE)
        return res