Skip to content

Layers API

Layers are the building blocks of neural networks. They store parameters (weights, biases) and implement the forward/backward propagation logic.

Base Layer

mpneuralnetwork.layers.Layer

Abstract base class for all neural network layers.

This class defines the interface that all layers must implement, including forward/backward passes and parameter management.

Attributes:

Name Type Description
input_shape tuple[int, ...]

Shape of the input data (excluding batch dimension).

output_shape tuple[int, ...]

Shape of the output data (excluding batch dimension).

input ArrayType

Caches the input for the backward pass.

output ArrayType

Caches the output.

Source code in src/mpneuralnetwork/layers/layer.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
class Layer:
    """Abstract base class for all neural network layers.

    This class defines the interface that all layers must implement, including
    forward/backward passes and parameter management.

    Attributes:
        input_shape (tuple[int, ...]): Shape of the input data (excluding batch dimension).
        output_shape (tuple[int, ...]): Shape of the output data (excluding batch dimension).
        input (ArrayType): Caches the input for the backward pass.
        output (ArrayType): Caches the output.
    """

    def __init__(self, output_shape: int | tuple[int, ...] | None = None, input_shape: int | tuple[int, ...] | None = None) -> None:
        """Initializes the Layer.

        Args:
            output_shape (int | tuple[int, ...], optional): Desired output shape.
            input_shape (int | tuple[int, ...], optional): Known input shape.
        """
        self.output_shape: tuple[int, ...]
        if output_shape is not None:
            if isinstance(output_shape, int):
                output_shape = (output_shape,)
            self.output_shape = output_shape

        self.input_shape: tuple[int, ...]
        if input_shape is not None:
            if isinstance(input_shape, int):
                input_shape = (input_shape,)
            self.input_shape = input_shape

        self.input: ArrayType
        self.output: ArrayType

    def get_config(self) -> dict:
        """Returns the configuration of the layer for serialization.

        Returns:
            dict: Dictionary containing layer configuration.
        """
        return {"type": self.__class__.__name__}

    def build(self, input_shape: int | tuple[int, ...]) -> None:
        """Configures the layer based on the input shape.

        Called automatically by the Model before training.

        Args:
            input_shape (int | tuple[int, ...]): The shape of the input.
        """
        if isinstance(input_shape, int):
            input_shape = (input_shape,)

        self.input_shape = input_shape

        if not hasattr(self, "output_shape"):
            self.output_shape = input_shape

    @abstractmethod
    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        """Performs the forward propagation pass.

        Args:
            input_batch (ArrayType): Input data of shape (batch_size, ...).
            training (bool, optional): Whether the layer is in training mode. Defaults to True.

        Returns:
            ArrayType: Output data.
        """
        pass

    @abstractmethod
    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        """Performs the backward propagation pass.

        Computes the gradient of the loss function with respect to the input.
        Also calculates gradients for any trainable parameters.

        Args:
            output_gradient_batch (ArrayType): Gradient of the loss w.r.t the output.

        Returns:
            ArrayType: Gradient of the loss w.r.t the input.
        """
        pass

    @property
    def params(self) -> dict[str, tuple[ArrayType, ArrayType]]:
        """Returns trainable parameters and their gradients.

        Returns:
            dict[str, tuple[ArrayType, ArrayType]]: A dictionary where keys are parameter names
            (e.g., "weights", "biases") and values are tuples of (parameter_value, parameter_gradient).
        """
        return {}

    def load_params(self, params: dict[str, ArrayType]) -> None:
        """Loads trainable parameters into the layer.

        Args:
            params (dict[str, ArrayType]): Dictionary mapping parameter names to values.
        """
        pass

    @property
    def state(self) -> dict[str, ArrayType]:
        """Returns non-trainable internal state (e.g., BatchNorm running means).

        Returns:
            dict[str, ArrayType]: Dictionary of state variables.
        """
        return {}

    @state.setter
    def state(self, state: dict[str, ArrayType]) -> None:
        """Restores non-trainable internal state.

        Args:
            state (dict[str, ArrayType]): Dictionary of state variables.
        """
        pass

    @property
    def input_size(self) -> int:
        """Returns the total number of elements in the input (excluding batch)."""
        return int(np.prod(self.input_shape))

    @property
    def output_size(self) -> int:
        """Returns the total number of elements in the output (excluding batch)."""
        return int(np.prod(self.output_shape))

input_size property

Returns the total number of elements in the input (excluding batch).

output_size property

Returns the total number of elements in the output (excluding batch).

params property

Returns trainable parameters and their gradients.

Returns:

Type Description
dict[str, tuple[ArrayType, ArrayType]]

dict[str, tuple[ArrayType, ArrayType]]: A dictionary where keys are parameter names

dict[str, tuple[ArrayType, ArrayType]]

(e.g., "weights", "biases") and values are tuples of (parameter_value, parameter_gradient).

state property writable

Returns non-trainable internal state (e.g., BatchNorm running means).

Returns:

Type Description
dict[str, ArrayType]

dict[str, ArrayType]: Dictionary of state variables.

__init__(output_shape=None, input_shape=None)

Initializes the Layer.

Parameters:

Name Type Description Default
output_shape int | tuple[int, ...]

Desired output shape.

None
input_shape int | tuple[int, ...]

Known input shape.

None
Source code in src/mpneuralnetwork/layers/layer.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(self, output_shape: int | tuple[int, ...] | None = None, input_shape: int | tuple[int, ...] | None = None) -> None:
    """Initializes the Layer.

    Args:
        output_shape (int | tuple[int, ...], optional): Desired output shape.
        input_shape (int | tuple[int, ...], optional): Known input shape.
    """
    self.output_shape: tuple[int, ...]
    if output_shape is not None:
        if isinstance(output_shape, int):
            output_shape = (output_shape,)
        self.output_shape = output_shape

    self.input_shape: tuple[int, ...]
    if input_shape is not None:
        if isinstance(input_shape, int):
            input_shape = (input_shape,)
        self.input_shape = input_shape

    self.input: ArrayType
    self.output: ArrayType

backward(output_gradient_batch) abstractmethod

Performs the backward propagation pass.

Computes the gradient of the loss function with respect to the input. Also calculates gradients for any trainable parameters.

Parameters:

Name Type Description Default
output_gradient_batch ArrayType

Gradient of the loss w.r.t the output.

required

Returns:

Name Type Description
ArrayType ArrayType

Gradient of the loss w.r.t the input.

Source code in src/mpneuralnetwork/layers/layer.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@abstractmethod
def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
    """Performs the backward propagation pass.

    Computes the gradient of the loss function with respect to the input.
    Also calculates gradients for any trainable parameters.

    Args:
        output_gradient_batch (ArrayType): Gradient of the loss w.r.t the output.

    Returns:
        ArrayType: Gradient of the loss w.r.t the input.
    """
    pass

build(input_shape)

Configures the layer based on the input shape.

Called automatically by the Model before training.

Parameters:

Name Type Description Default
input_shape int | tuple[int, ...]

The shape of the input.

required
Source code in src/mpneuralnetwork/layers/layer.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def build(self, input_shape: int | tuple[int, ...]) -> None:
    """Configures the layer based on the input shape.

    Called automatically by the Model before training.

    Args:
        input_shape (int | tuple[int, ...]): The shape of the input.
    """
    if isinstance(input_shape, int):
        input_shape = (input_shape,)

    self.input_shape = input_shape

    if not hasattr(self, "output_shape"):
        self.output_shape = input_shape

forward(input_batch, training=True) abstractmethod

Performs the forward propagation pass.

Parameters:

Name Type Description Default
input_batch ArrayType

Input data of shape (batch_size, ...).

required
training bool

Whether the layer is in training mode. Defaults to True.

True

Returns:

Name Type Description
ArrayType ArrayType

Output data.

Source code in src/mpneuralnetwork/layers/layer.py
70
71
72
73
74
75
76
77
78
79
80
81
@abstractmethod
def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
    """Performs the forward propagation pass.

    Args:
        input_batch (ArrayType): Input data of shape (batch_size, ...).
        training (bool, optional): Whether the layer is in training mode. Defaults to True.

    Returns:
        ArrayType: Output data.
    """
    pass

get_config()

Returns the configuration of the layer for serialization.

Returns:

Name Type Description
dict dict

Dictionary containing layer configuration.

Source code in src/mpneuralnetwork/layers/layer.py
46
47
48
49
50
51
52
def get_config(self) -> dict:
    """Returns the configuration of the layer for serialization.

    Returns:
        dict: Dictionary containing layer configuration.
    """
    return {"type": self.__class__.__name__}

load_params(params)

Loads trainable parameters into the layer.

Parameters:

Name Type Description Default
params dict[str, ArrayType]

Dictionary mapping parameter names to values.

required
Source code in src/mpneuralnetwork/layers/layer.py
108
109
110
111
112
113
114
def load_params(self, params: dict[str, ArrayType]) -> None:
    """Loads trainable parameters into the layer.

    Args:
        params (dict[str, ArrayType]): Dictionary mapping parameter names to values.
    """
    pass

1D Layers (Dense)

Layers typically used for Multi-Layer Perceptrons (MLP) or final classification stages.

mpneuralnetwork.layers.Dense

Bases: Layer

Fully Connected (Dense) Layer.

Every neuron in the input is connected to every neuron in the output.

Operation

Y = X @ W + b

Attributes:

Name Type Description
output_size int

Dimensionality of the output space.

input_size int

Dimensionality of the input space.

initialization Lit_W

Weight initialization method ("auto", "he", "xavier").

no_bias bool

Whether to disable the bias vector.

weights ArrayType

Weight matrix of shape (input_size, output_size).

biases ArrayType

Bias vector of shape (1, output_size).

Source code in src/mpneuralnetwork/layers/layer1d.py
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class Dense(Layer):
    """Fully Connected (Dense) Layer.

    Every neuron in the input is connected to every neuron in the output.

    Operation:
        `Y = X @ W + b`

    Attributes:
        output_size (int): Dimensionality of the output space.
        input_size (int): Dimensionality of the input space.
        initialization (Lit_W): Weight initialization method ("auto", "he", "xavier").
        no_bias (bool): Whether to disable the bias vector.
        weights (ArrayType): Weight matrix of shape (input_size, output_size).
        biases (ArrayType): Bias vector of shape (1, output_size).
    """

    def __init__(
        self,
        output_size: int,
        input_size: int | None = None,
        initialization: Lit_W = "auto",
        no_bias: bool = False,
    ) -> None:
        """Initializes the Dense layer.

        Args:
            output_size (int): Number of neurons in this layer.
            input_size (int | None, optional): Number of input features. If None, inferred at build time.
            initialization (Lit_W, optional): Weight init strategy. Defaults to "auto".
            no_bias (bool, optional): If True, bias is not used. Defaults to False.
        """
        super().__init__(output_shape=output_size, input_shape=input_size)
        self.initialization: Lit_W = initialization
        self.no_bias: bool = no_bias

        self.weights: ArrayType
        self.weights_gradient: ArrayType

        self.biases: ArrayType
        self.biases_gradient: ArrayType

        if input_size is not None:
            self.build(input_size)

    def get_config(self) -> dict:
        config = super().get_config()
        config.update(
            {
                "output_size": self.output_size,
                "input_size": self.input_size,
                "initialization": self.initialization,
                "no_bias": self.no_bias,
            }
        )
        return config

    def build(self, input_shape: int | tuple[int, ...]) -> None:
        super().build(input_shape)

        if self.initialization != "auto":
            self.init_weights(self.initialization, self.no_bias)

    def init_weights(self, method: Lit_W, no_bias: bool) -> None:
        """Initializes weights using the specified method.

        Args:
            method (Lit_W): Initialization method.
                - "he": Kaiming He initialization (for ReLU).
                - "xavier": Xavier Glorot initialization (for Sigmoid/Tanh).
            no_bias (bool): Whether to disable bias (e.g. if followed by BatchNorm).
        """
        std_dev = 0.1

        if method == "he":
            std_dev = xp.sqrt(2.0 / self.input_size, dtype=DTYPE)
        elif method == "xavier":
            std_dev = xp.sqrt(1.0 / self.input_size, dtype=DTYPE)

        self.weights = xp.random.randn(self.input_size, self.output_size).astype(DTYPE) * std_dev
        self.weights_gradient = xp.zeros_like(self.weights, dtype=DTYPE)

        self.no_bias = no_bias

        if not self.no_bias:
            self.biases = xp.random.randn(1, self.output_size).astype(DTYPE)
            self.biases_gradient = xp.zeros_like(self.biases, dtype=DTYPE)

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        """Performs forward propagation.

        Args:
            input_batch (ArrayType): Input data of shape (batch_size, input_size).
            training (bool, optional): Unused for Dense layer. Defaults to True.

        Returns:
            ArrayType: Output data of shape (batch_size, output_size).
        """
        self.input = input_batch

        res: ArrayType = self.input @ self.weights
        if not self.no_bias:
            res += self.biases
        return res

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        """Performs backward propagation.

        Computes gradients for weights, biases, and inputs.

        Args:
            output_gradient_batch (ArrayType): Gradient w.r.t output (batch_size, output_size).

        Returns:
            ArrayType: Gradient w.r.t input (batch_size, input_size).
        """
        self.weights_gradient = self.input.T @ output_gradient_batch
        if not self.no_bias:
            self.biases_gradient = xp.sum(output_gradient_batch, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore

        grad: ArrayType = output_gradient_batch @ self.weights.T
        return grad

    @property
    def params(self) -> dict[str, tuple[ArrayType, ArrayType]]:
        params = {"weights": (self.weights, self.weights_gradient)}
        if not self.no_bias:
            params["biases"] = (self.biases, self.biases_gradient)
        return params

    def load_params(self, params: dict[str, ArrayType]) -> None:
        self.weights[:] = params["weights"]
        if not self.no_bias:
            self.biases[:] = params["biases"]

__init__(output_size, input_size=None, initialization='auto', no_bias=False)

Initializes the Dense layer.

Parameters:

Name Type Description Default
output_size int

Number of neurons in this layer.

required
input_size int | None

Number of input features. If None, inferred at build time.

None
initialization Lit_W

Weight init strategy. Defaults to "auto".

'auto'
no_bias bool

If True, bias is not used. Defaults to False.

False
Source code in src/mpneuralnetwork/layers/layer1d.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def __init__(
    self,
    output_size: int,
    input_size: int | None = None,
    initialization: Lit_W = "auto",
    no_bias: bool = False,
) -> None:
    """Initializes the Dense layer.

    Args:
        output_size (int): Number of neurons in this layer.
        input_size (int | None, optional): Number of input features. If None, inferred at build time.
        initialization (Lit_W, optional): Weight init strategy. Defaults to "auto".
        no_bias (bool, optional): If True, bias is not used. Defaults to False.
    """
    super().__init__(output_shape=output_size, input_shape=input_size)
    self.initialization: Lit_W = initialization
    self.no_bias: bool = no_bias

    self.weights: ArrayType
    self.weights_gradient: ArrayType

    self.biases: ArrayType
    self.biases_gradient: ArrayType

    if input_size is not None:
        self.build(input_size)

backward(output_gradient_batch)

Performs backward propagation.

Computes gradients for weights, biases, and inputs.

Parameters:

Name Type Description Default
output_gradient_batch ArrayType

Gradient w.r.t output (batch_size, output_size).

required

Returns:

Name Type Description
ArrayType ArrayType

Gradient w.r.t input (batch_size, input_size).

Source code in src/mpneuralnetwork/layers/layer1d.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
    """Performs backward propagation.

    Computes gradients for weights, biases, and inputs.

    Args:
        output_gradient_batch (ArrayType): Gradient w.r.t output (batch_size, output_size).

    Returns:
        ArrayType: Gradient w.r.t input (batch_size, input_size).
    """
    self.weights_gradient = self.input.T @ output_gradient_batch
    if not self.no_bias:
        self.biases_gradient = xp.sum(output_gradient_batch, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore

    grad: ArrayType = output_gradient_batch @ self.weights.T
    return grad

forward(input_batch, training=True)

Performs forward propagation.

Parameters:

Name Type Description Default
input_batch ArrayType

Input data of shape (batch_size, input_size).

required
training bool

Unused for Dense layer. Defaults to True.

True

Returns:

Name Type Description
ArrayType ArrayType

Output data of shape (batch_size, output_size).

Source code in src/mpneuralnetwork/layers/layer1d.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
    """Performs forward propagation.

    Args:
        input_batch (ArrayType): Input data of shape (batch_size, input_size).
        training (bool, optional): Unused for Dense layer. Defaults to True.

    Returns:
        ArrayType: Output data of shape (batch_size, output_size).
    """
    self.input = input_batch

    res: ArrayType = self.input @ self.weights
    if not self.no_bias:
        res += self.biases
    return res

init_weights(method, no_bias)

Initializes weights using the specified method.

Parameters:

Name Type Description Default
method Lit_W

Initialization method. - "he": Kaiming He initialization (for ReLU). - "xavier": Xavier Glorot initialization (for Sigmoid/Tanh).

required
no_bias bool

Whether to disable bias (e.g. if followed by BatchNorm).

required
Source code in src/mpneuralnetwork/layers/layer1d.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def init_weights(self, method: Lit_W, no_bias: bool) -> None:
    """Initializes weights using the specified method.

    Args:
        method (Lit_W): Initialization method.
            - "he": Kaiming He initialization (for ReLU).
            - "xavier": Xavier Glorot initialization (for Sigmoid/Tanh).
        no_bias (bool): Whether to disable bias (e.g. if followed by BatchNorm).
    """
    std_dev = 0.1

    if method == "he":
        std_dev = xp.sqrt(2.0 / self.input_size, dtype=DTYPE)
    elif method == "xavier":
        std_dev = xp.sqrt(1.0 / self.input_size, dtype=DTYPE)

    self.weights = xp.random.randn(self.input_size, self.output_size).astype(DTYPE) * std_dev
    self.weights_gradient = xp.zeros_like(self.weights, dtype=DTYPE)

    self.no_bias = no_bias

    if not self.no_bias:
        self.biases = xp.random.randn(1, self.output_size).astype(DTYPE)
        self.biases_gradient = xp.zeros_like(self.biases, dtype=DTYPE)

mpneuralnetwork.layers.Dropout

Bases: Layer

Dropout Layer for regularization.

Randomly sets input units to 0 with a frequency of probability at each step during training time, which helps prevent overfitting.

Training

output = input * mask (where mask is Bernoulli(1-p)) Values are scaled by 1/(1-p) to preserve magnitude.

Inference

output = input (Identity function).

Attributes:

Name Type Description
probability float

The dropout rate (fraction of input units to drop).

Source code in src/mpneuralnetwork/layers/layer1d.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
class Dropout(Layer):
    """Dropout Layer for regularization.

    Randomly sets input units to 0 with a frequency of `probability` at each step during training time,
    which helps prevent overfitting.

    Training:
        `output = input * mask` (where mask is Bernoulli(1-p))
        Values are scaled by `1/(1-p)` to preserve magnitude.

    Inference:
        `output = input` (Identity function).

    Attributes:
        probability (float): The dropout rate (fraction of input units to drop).
    """

    def __init__(self, probability: float = 0.5) -> None:
        """Initializes Dropout.

        Args:
            probability (float, optional): Fraction of the input units to drop. Defaults to 0.5.
        """
        super().__init__()
        self.probability: float = probability
        self.mask: ArrayType

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"probability": self.probability})
        return config

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        """Applies dropout to the input.

        Args:
            input_batch (ArrayType): Input data.
            training (bool, optional): If True, applies random dropout. If False, returns input as is.

        Returns:
            ArrayType: Processed input.
        """
        if not training:
            return input_batch

        self.mask = xp.random.binomial(1, 1 - self.probability, size=input_batch.shape).astype(DTYPE) / (1 - self.probability)

        res: ArrayType = input_batch * self.mask
        return res

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        """Propagates gradients through the dropout mask.

        Args:
            output_gradient_batch (ArrayType): Gradient from next layer.

        Returns:
            ArrayType: Gradient w.r.t input (zeroed out where inputs were dropped).
        """
        grad: ArrayType = output_gradient_batch * self.mask
        return grad

__init__(probability=0.5)

Initializes Dropout.

Parameters:

Name Type Description Default
probability float

Fraction of the input units to drop. Defaults to 0.5.

0.5
Source code in src/mpneuralnetwork/layers/layer1d.py
158
159
160
161
162
163
164
165
166
def __init__(self, probability: float = 0.5) -> None:
    """Initializes Dropout.

    Args:
        probability (float, optional): Fraction of the input units to drop. Defaults to 0.5.
    """
    super().__init__()
    self.probability: float = probability
    self.mask: ArrayType

backward(output_gradient_batch)

Propagates gradients through the dropout mask.

Parameters:

Name Type Description Default
output_gradient_batch ArrayType

Gradient from next layer.

required

Returns:

Name Type Description
ArrayType ArrayType

Gradient w.r.t input (zeroed out where inputs were dropped).

Source code in src/mpneuralnetwork/layers/layer1d.py
191
192
193
194
195
196
197
198
199
200
201
def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
    """Propagates gradients through the dropout mask.

    Args:
        output_gradient_batch (ArrayType): Gradient from next layer.

    Returns:
        ArrayType: Gradient w.r.t input (zeroed out where inputs were dropped).
    """
    grad: ArrayType = output_gradient_batch * self.mask
    return grad

forward(input_batch, training=True)

Applies dropout to the input.

Parameters:

Name Type Description Default
input_batch ArrayType

Input data.

required
training bool

If True, applies random dropout. If False, returns input as is.

True

Returns:

Name Type Description
ArrayType ArrayType

Processed input.

Source code in src/mpneuralnetwork/layers/layer1d.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
    """Applies dropout to the input.

    Args:
        input_batch (ArrayType): Input data.
        training (bool, optional): If True, applies random dropout. If False, returns input as is.

    Returns:
        ArrayType: Processed input.
    """
    if not training:
        return input_batch

    self.mask = xp.random.binomial(1, 1 - self.probability, size=input_batch.shape).astype(DTYPE) / (1 - self.probability)

    res: ArrayType = input_batch * self.mask
    return res

mpneuralnetwork.layers.BatchNormalization

Bases: Layer

Batch Normalization Layer (1D).

Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.

Training

Uses batch statistics (mean, variance) to normalize. Updates running moving averages.

Inference

Uses learned running statistics (cache_m, cache_v) to normalize.

Attributes:

Name Type Description
momentum float

Momentum for the moving average updating.

epsilon float

Small float added to variance to avoid dividing by zero.

gamma ArrayType

Learnable scale parameter.

beta ArrayType

Learnable shift parameter.

Source code in src/mpneuralnetwork/layers/layer1d.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
class BatchNormalization(Layer):
    """Batch Normalization Layer (1D).

    Normalize the activations of the previous layer at each batch, i.e. applies a transformation
    that maintains the mean activation close to 0 and the activation standard deviation close to 1.

    Training:
        Uses batch statistics (mean, variance) to normalize. Updates running moving averages.

    Inference:
        Uses learned running statistics (cache_m, cache_v) to normalize.

    Attributes:
        momentum (float): Momentum for the moving average updating.
        epsilon (float): Small float added to variance to avoid dividing by zero.
        gamma (ArrayType): Learnable scale parameter.
        beta (ArrayType): Learnable shift parameter.
    """

    def __init__(self, momentum: float = 0.9, epsilon: float = 1e-8) -> None:
        """Initializes BatchNormalization.

        Args:
            momentum (float, optional): Momentum for moving average (typically 0.9 or 0.99). Defaults to 0.9.
            epsilon (float, optional): Epsilon for stability. Defaults to 1e-8.
        """
        super().__init__()
        self.momentum: float = momentum
        self.epsilon: float = epsilon

        self.gamma: ArrayType
        self.beta: ArrayType

        self.cache_m: ArrayType
        self.cache_v: ArrayType

    def build(self, input_shape: int | tuple[int, ...]) -> None:
        super().build(input_shape)

        self.gamma = xp.ones((1, self.input_size), dtype=DTYPE)
        self.gamma_gradient = xp.zeros_like(self.gamma, dtype=DTYPE)

        self.beta = xp.zeros((1, self.input_size), dtype=DTYPE)
        self.beta_gradient = xp.zeros_like(self.beta, dtype=DTYPE)

        self.cache_m = xp.zeros((1, self.input_size), dtype=DTYPE)
        self.cache_v = xp.ones((1, self.input_size), dtype=DTYPE)

        self.std_inv: ArrayType
        self.x_centered: ArrayType
        self.x_norm: ArrayType

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"momentum": self.momentum, "epsilon": self.epsilon})
        return config

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        """Performs batch normalization.

        Args:
            input_batch (ArrayType): Input data of shape (batch_size, input_size).
            training (bool, optional): If True, uses batch stats and updates running averages.
                If False, uses running averages.

        Returns:
            ArrayType: Normalized and scaled data.
        """
        self.input = input_batch

        mean: ArrayType
        var: ArrayType

        if training:
            mean = xp.mean(self.input, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore
            var = xp.var(self.input, axis=0, keepdims=True, dtype=DTYPE)

            self.cache_m = self.momentum * self.cache_m + (1 - self.momentum) * mean
            self.cache_v = self.momentum * self.cache_v + (1 - self.momentum) * var

        else:
            mean = self.cache_m
            var = self.cache_v

        self.std_inv = 1 / xp.sqrt(var + self.epsilon, dtype=DTYPE)
        self.x_centered = self.input - mean
        self.x_norm = self.x_centered * self.std_inv

        res: ArrayType = self.x_norm * self.gamma
        res += self.beta
        return res

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        """Computes gradients for BN.

        Args:
            output_gradient_batch (ArrayType): Gradient w.r.t output.

        Returns:
            ArrayType: Gradient w.r.t input.
        """
        self.gamma_gradient = xp.sum(self.x_norm * output_gradient_batch, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore
        self.beta_gradient = xp.sum(output_gradient_batch, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore

        N = output_gradient_batch.shape[0]
        dx_norm = output_gradient_batch * self.gamma

        grad: ArrayType = (
            (1 / N)
            * self.std_inv
            * (
                N * dx_norm
                - xp.sum(dx_norm, axis=0, keepdims=True, dtype=DTYPE)
                - self.x_norm * xp.sum(dx_norm * self.x_norm, axis=0, keepdims=True, dtype=DTYPE)
            )
        )
        return grad

    @property
    def params(self) -> dict[str, tuple[ArrayType, ArrayType]]:
        return {  # type: ignore
            "gamma": (self.gamma, self.gamma_gradient),
            "beta": (self.beta, self.beta_gradient),
        }

    def load_params(self, params: dict[str, ArrayType]) -> None:
        self.gamma[:] = params["gamma"]
        self.beta[:] = params["beta"]

    @property
    def state(self) -> dict[str, ArrayType]:
        return {"cache_m": self.cache_m, "cache_v": self.cache_v}

    @state.setter
    def state(self, state: dict[str, ArrayType]) -> None:
        self.cache_m = state["cache_m"]
        self.cache_v = state["cache_v"]

__init__(momentum=0.9, epsilon=1e-08)

Initializes BatchNormalization.

Parameters:

Name Type Description Default
momentum float

Momentum for moving average (typically 0.9 or 0.99). Defaults to 0.9.

0.9
epsilon float

Epsilon for stability. Defaults to 1e-8.

1e-08
Source code in src/mpneuralnetwork/layers/layer1d.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def __init__(self, momentum: float = 0.9, epsilon: float = 1e-8) -> None:
    """Initializes BatchNormalization.

    Args:
        momentum (float, optional): Momentum for moving average (typically 0.9 or 0.99). Defaults to 0.9.
        epsilon (float, optional): Epsilon for stability. Defaults to 1e-8.
    """
    super().__init__()
    self.momentum: float = momentum
    self.epsilon: float = epsilon

    self.gamma: ArrayType
    self.beta: ArrayType

    self.cache_m: ArrayType
    self.cache_v: ArrayType

backward(output_gradient_batch)

Computes gradients for BN.

Parameters:

Name Type Description Default
output_gradient_batch ArrayType

Gradient w.r.t output.

required

Returns:

Name Type Description
ArrayType ArrayType

Gradient w.r.t input.

Source code in src/mpneuralnetwork/layers/layer1d.py
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
    """Computes gradients for BN.

    Args:
        output_gradient_batch (ArrayType): Gradient w.r.t output.

    Returns:
        ArrayType: Gradient w.r.t input.
    """
    self.gamma_gradient = xp.sum(self.x_norm * output_gradient_batch, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore
    self.beta_gradient = xp.sum(output_gradient_batch, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore

    N = output_gradient_batch.shape[0]
    dx_norm = output_gradient_batch * self.gamma

    grad: ArrayType = (
        (1 / N)
        * self.std_inv
        * (
            N * dx_norm
            - xp.sum(dx_norm, axis=0, keepdims=True, dtype=DTYPE)
            - self.x_norm * xp.sum(dx_norm * self.x_norm, axis=0, keepdims=True, dtype=DTYPE)
        )
    )
    return grad

forward(input_batch, training=True)

Performs batch normalization.

Parameters:

Name Type Description Default
input_batch ArrayType

Input data of shape (batch_size, input_size).

required
training bool

If True, uses batch stats and updates running averages. If False, uses running averages.

True

Returns:

Name Type Description
ArrayType ArrayType

Normalized and scaled data.

Source code in src/mpneuralnetwork/layers/layer1d.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
    """Performs batch normalization.

    Args:
        input_batch (ArrayType): Input data of shape (batch_size, input_size).
        training (bool, optional): If True, uses batch stats and updates running averages.
            If False, uses running averages.

    Returns:
        ArrayType: Normalized and scaled data.
    """
    self.input = input_batch

    mean: ArrayType
    var: ArrayType

    if training:
        mean = xp.mean(self.input, axis=0, keepdims=True, dtype=DTYPE)  # type: ignore
        var = xp.var(self.input, axis=0, keepdims=True, dtype=DTYPE)

        self.cache_m = self.momentum * self.cache_m + (1 - self.momentum) * mean
        self.cache_v = self.momentum * self.cache_v + (1 - self.momentum) * var

    else:
        mean = self.cache_m
        var = self.cache_v

    self.std_inv = 1 / xp.sqrt(var + self.epsilon, dtype=DTYPE)
    self.x_centered = self.input - mean
    self.x_norm = self.x_centered * self.std_inv

    res: ArrayType = self.x_norm * self.gamma
    res += self.beta
    return res

2D Layers (Convolutional)

Layers designed for processing grid-like data (e.g., images) using the im2col optimization.

mpneuralnetwork.layers.Convolutional

Bases: Layer

2D Convolutional Layer.

Applies a 2D convolution over an input signal composed of several input planes. Uses the im2col optimization to convert convolution into matrix multiplication, allowing for efficient vectorization.

Attributes:

Name Type Description
output_depth int

Number of output channels (filters).

kernel_size int

Size of the square convolution kernel.

stride int

Step size of the convolution.

padding int

Amount of zero-padding applied to both sides of the input.

initialization Lit_W

Weight initialization strategy.

no_bias bool

Whether to disable bias.

kernels ArrayType

Learnable filters (output_depth, input_depth, k, k).

biases ArrayType

Learnable biases (output_depth,).

Source code in src/mpneuralnetwork/layers/layer2d.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
class Convolutional(Layer):
    """2D Convolutional Layer.

    Applies a 2D convolution over an input signal composed of several input planes.
    Uses the `im2col` optimization to convert convolution into matrix multiplication,
    allowing for efficient vectorization.

    Attributes:
        output_depth (int): Number of output channels (filters).
        kernel_size (int): Size of the square convolution kernel.
        stride (int): Step size of the convolution.
        padding (int): Amount of zero-padding applied to both sides of the input.
        initialization (Lit_W): Weight initialization strategy.
        no_bias (bool): Whether to disable bias.
        kernels (ArrayType): Learnable filters (output_depth, input_depth, k, k).
        biases (ArrayType): Learnable biases (output_depth,).
    """

    def __init__(
        self,
        output_depth: int,
        kernel_size: int,
        input_shape: tuple | None = None,
        initialization: Lit_W = "auto",
        no_bias: bool = False,
        padding: int | Literal["valid", "same"] = "valid",
        stride: int = 1,
    ) -> None:
        """Initializes the Convolutional layer.

        Args:
            output_depth (int): Number of filters.
            kernel_size (int): Height/Width of the filter (assumed square).
            input_shape (tuple | None, optional): Shape of input (depth, height, width).
            initialization (Lit_W, optional): Weight init method ("auto", "he", "xavier").
            no_bias (bool, optional): Disable bias. Defaults to False.
            padding (int | str, optional): Padding strategy. Can be an integer (amount of padding),
                "valid" (no padding), or "same" (padding to preserve spatial dimensions with stride=1).
                Defaults to "valid".
            stride (int, optional): Stride of the convolution. Defaults to 1.
        """
        super().__init__()
        self.output_depth: int = output_depth
        self.kernel_size: int = kernel_size
        self.initialization: Lit_W = initialization
        self.no_bias: bool = no_bias
        self.stride: int = stride
        self.padding_arg: int | Literal["valid", "same"] = padding

        self.padding: int
        if self.padding_arg == "valid":
            self.padding = 0
        elif self.padding_arg == "same":
            self.padding = (self.kernel_size - 1) // 2
        elif isinstance(self.padding_arg, int):
            self.padding = self.padding_arg
        else:
            raise ValueError("Padding must be 'valid', 'same', or an integer.")

        self.kernels: ArrayType
        self.kernels_gradient: ArrayType
        self.biases: ArrayType
        self.biases_gradient: ArrayType
        self.input_padded_shape: tuple

        if input_shape is not None:
            self.build(input_shape)

    def get_config(self) -> dict:
        config = super().get_config()
        config.update(
            {
                "output_depth": self.output_depth,
                "kernel_size": self.kernel_size,
                "input_shape": self.input_shape,
                "initialization": self.initialization,
                "no_bias": self.no_bias,
                "stride": self.stride,
                "padding": self.padding_arg,
            }
        )
        return config

    def build(self, input_shape: int | tuple[int, ...]) -> None:
        super().build(input_shape)

        _, input_height, input_width = self.input_shape

        output_height = (input_height - self.kernel_size + 2 * self.padding) // self.stride + 1
        output_width = (input_width - self.kernel_size + 2 * self.padding) // self.stride + 1
        self.output_shape = (self.output_depth, output_height, output_width)

        if self.initialization != "auto":
            self.init_weights(self.initialization, self.no_bias)

    def init_weights(self, method: Lit_W, no_bias: bool) -> None:
        """Initializes kernels and biases."""
        std_dev = 0.1

        input_depth, _, _ = self.input_shape

        if method == "he":
            std_dev = xp.sqrt(2.0 / (input_depth * self.kernel_size * self.kernel_size), dtype=DTYPE)
        elif method == "xavier":
            std_dev = xp.sqrt(1.0 / (input_depth * self.kernel_size * self.kernel_size), dtype=DTYPE)

        kernels_shape = (
            self.output_depth,
            input_depth,
            self.kernel_size,
            self.kernel_size,
        )

        self.kernels = xp.random.randn(*kernels_shape).astype(DTYPE) * std_dev
        self.kernels_gradient = xp.zeros_like(self.kernels, dtype=DTYPE)

        self.no_bias = no_bias

        if not self.no_bias:
            self.biases = xp.random.randn(self.output_depth).astype(DTYPE)
            self.biases_gradient = xp.zeros_like(self.biases, dtype=DTYPE)

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        """Performs 2D Convolution.

        Args:
            input_batch (ArrayType): Input data (N, C_in, H, W).
            training (bool, optional): Unused. Defaults to True.

        Returns:
            ArrayType: Feature maps (N, C_out, H_out, W_out).
        """
        self.input = input_batch

        if self.padding > 0:
            input_batch_padded = xp.pad(
                input_batch,
                (
                    (0, 0),
                    (0, 0),
                    (self.padding, self.padding),
                    (self.padding, self.padding),
                ),
            )
        else:
            input_batch_padded = input_batch

        self.input_padded_shape = input_batch_padded.shape

        input_windows = im2col(input_batch_padded, self.kernel_size, self.stride)

        output = xp.tensordot(input_windows, self.kernels, axes=((3, 4, 5), (1, 2, 3)))

        if not self.no_bias:
            output += self.biases

        return output.transpose(0, 3, 1, 2)  # type: ignore[no-any-return]

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        """Backpropagates gradients through convolution.

        Uses `col2im` to reconstruct the gradient for the input image.

        Args:
            output_gradient_batch (ArrayType): Gradient w.r.t output.

        Returns:
            ArrayType: Gradient w.r.t input.
        """
        grad_transposed = output_gradient_batch.transpose(0, 2, 3, 1)

        if not self.no_bias:
            self.biases_gradient = xp.sum(grad_transposed, axis=(0, 1, 2), dtype=DTYPE)

        if self.padding > 0:
            input_batch_padded = xp.pad(
                self.input,
                (
                    (0, 0),
                    (0, 0),
                    (self.padding, self.padding),
                    (self.padding, self.padding),
                ),
            )
        else:
            input_batch_padded = self.input

        input_windows = im2col(input_batch_padded, self.kernel_size, self.stride)

        self.kernels_gradient = xp.tensordot(grad_transposed, input_windows, axes=((0, 1, 2), (0, 1, 2)))

        input_grad_windows = xp.tensordot(grad_transposed, self.kernels, axes=((3), (0)))

        input_grad_windows_transposed = input_grad_windows.transpose(0, 3, 1, 2, 4, 5)

        input_grad_padded = col2im(
            input_grad_windows_transposed,
            self.input_padded_shape,
            self.output_shape,
            self.kernel_size,
            self.stride,
        )

        if self.padding > 0:
            input_grad = input_grad_padded[:, :, self.padding : -self.padding, self.padding : -self.padding]
        else:
            input_grad = input_grad_padded

        return input_grad

    @property
    def params(self) -> dict[str, tuple[ArrayType, ArrayType]]:
        return {
            "kernels": (self.kernels, self.kernels_gradient),
            "biases": (self.biases, self.biases_gradient),
        }

    def load_params(self, params: dict[str, ArrayType]) -> None:
        self.kernels[:] = params["kernels"]
        self.biases[:] = params["biases"]

__init__(output_depth, kernel_size, input_shape=None, initialization='auto', no_bias=False, padding='valid', stride=1)

Initializes the Convolutional layer.

Parameters:

Name Type Description Default
output_depth int

Number of filters.

required
kernel_size int

Height/Width of the filter (assumed square).

required
input_shape tuple | None

Shape of input (depth, height, width).

None
initialization Lit_W

Weight init method ("auto", "he", "xavier").

'auto'
no_bias bool

Disable bias. Defaults to False.

False
padding int | str

Padding strategy. Can be an integer (amount of padding), "valid" (no padding), or "same" (padding to preserve spatial dimensions with stride=1). Defaults to "valid".

'valid'
stride int

Stride of the convolution. Defaults to 1.

1
Source code in src/mpneuralnetwork/layers/layer2d.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def __init__(
    self,
    output_depth: int,
    kernel_size: int,
    input_shape: tuple | None = None,
    initialization: Lit_W = "auto",
    no_bias: bool = False,
    padding: int | Literal["valid", "same"] = "valid",
    stride: int = 1,
) -> None:
    """Initializes the Convolutional layer.

    Args:
        output_depth (int): Number of filters.
        kernel_size (int): Height/Width of the filter (assumed square).
        input_shape (tuple | None, optional): Shape of input (depth, height, width).
        initialization (Lit_W, optional): Weight init method ("auto", "he", "xavier").
        no_bias (bool, optional): Disable bias. Defaults to False.
        padding (int | str, optional): Padding strategy. Can be an integer (amount of padding),
            "valid" (no padding), or "same" (padding to preserve spatial dimensions with stride=1).
            Defaults to "valid".
        stride (int, optional): Stride of the convolution. Defaults to 1.
    """
    super().__init__()
    self.output_depth: int = output_depth
    self.kernel_size: int = kernel_size
    self.initialization: Lit_W = initialization
    self.no_bias: bool = no_bias
    self.stride: int = stride
    self.padding_arg: int | Literal["valid", "same"] = padding

    self.padding: int
    if self.padding_arg == "valid":
        self.padding = 0
    elif self.padding_arg == "same":
        self.padding = (self.kernel_size - 1) // 2
    elif isinstance(self.padding_arg, int):
        self.padding = self.padding_arg
    else:
        raise ValueError("Padding must be 'valid', 'same', or an integer.")

    self.kernels: ArrayType
    self.kernels_gradient: ArrayType
    self.biases: ArrayType
    self.biases_gradient: ArrayType
    self.input_padded_shape: tuple

    if input_shape is not None:
        self.build(input_shape)

backward(output_gradient_batch)

Backpropagates gradients through convolution.

Uses col2im to reconstruct the gradient for the input image.

Parameters:

Name Type Description Default
output_gradient_batch ArrayType

Gradient w.r.t output.

required

Returns:

Name Type Description
ArrayType ArrayType

Gradient w.r.t input.

Source code in src/mpneuralnetwork/layers/layer2d.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
    """Backpropagates gradients through convolution.

    Uses `col2im` to reconstruct the gradient for the input image.

    Args:
        output_gradient_batch (ArrayType): Gradient w.r.t output.

    Returns:
        ArrayType: Gradient w.r.t input.
    """
    grad_transposed = output_gradient_batch.transpose(0, 2, 3, 1)

    if not self.no_bias:
        self.biases_gradient = xp.sum(grad_transposed, axis=(0, 1, 2), dtype=DTYPE)

    if self.padding > 0:
        input_batch_padded = xp.pad(
            self.input,
            (
                (0, 0),
                (0, 0),
                (self.padding, self.padding),
                (self.padding, self.padding),
            ),
        )
    else:
        input_batch_padded = self.input

    input_windows = im2col(input_batch_padded, self.kernel_size, self.stride)

    self.kernels_gradient = xp.tensordot(grad_transposed, input_windows, axes=((0, 1, 2), (0, 1, 2)))

    input_grad_windows = xp.tensordot(grad_transposed, self.kernels, axes=((3), (0)))

    input_grad_windows_transposed = input_grad_windows.transpose(0, 3, 1, 2, 4, 5)

    input_grad_padded = col2im(
        input_grad_windows_transposed,
        self.input_padded_shape,
        self.output_shape,
        self.kernel_size,
        self.stride,
    )

    if self.padding > 0:
        input_grad = input_grad_padded[:, :, self.padding : -self.padding, self.padding : -self.padding]
    else:
        input_grad = input_grad_padded

    return input_grad

forward(input_batch, training=True)

Performs 2D Convolution.

Parameters:

Name Type Description Default
input_batch ArrayType

Input data (N, C_in, H, W).

required
training bool

Unused. Defaults to True.

True

Returns:

Name Type Description
ArrayType ArrayType

Feature maps (N, C_out, H_out, W_out).

Source code in src/mpneuralnetwork/layers/layer2d.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
    """Performs 2D Convolution.

    Args:
        input_batch (ArrayType): Input data (N, C_in, H, W).
        training (bool, optional): Unused. Defaults to True.

    Returns:
        ArrayType: Feature maps (N, C_out, H_out, W_out).
    """
    self.input = input_batch

    if self.padding > 0:
        input_batch_padded = xp.pad(
            input_batch,
            (
                (0, 0),
                (0, 0),
                (self.padding, self.padding),
                (self.padding, self.padding),
            ),
        )
    else:
        input_batch_padded = input_batch

    self.input_padded_shape = input_batch_padded.shape

    input_windows = im2col(input_batch_padded, self.kernel_size, self.stride)

    output = xp.tensordot(input_windows, self.kernels, axes=((3, 4, 5), (1, 2, 3)))

    if not self.no_bias:
        output += self.biases

    return output.transpose(0, 3, 1, 2)  # type: ignore[no-any-return]

init_weights(method, no_bias)

Initializes kernels and biases.

Source code in src/mpneuralnetwork/layers/layer2d.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def init_weights(self, method: Lit_W, no_bias: bool) -> None:
    """Initializes kernels and biases."""
    std_dev = 0.1

    input_depth, _, _ = self.input_shape

    if method == "he":
        std_dev = xp.sqrt(2.0 / (input_depth * self.kernel_size * self.kernel_size), dtype=DTYPE)
    elif method == "xavier":
        std_dev = xp.sqrt(1.0 / (input_depth * self.kernel_size * self.kernel_size), dtype=DTYPE)

    kernels_shape = (
        self.output_depth,
        input_depth,
        self.kernel_size,
        self.kernel_size,
    )

    self.kernels = xp.random.randn(*kernels_shape).astype(DTYPE) * std_dev
    self.kernels_gradient = xp.zeros_like(self.kernels, dtype=DTYPE)

    self.no_bias = no_bias

    if not self.no_bias:
        self.biases = xp.random.randn(self.output_depth).astype(DTYPE)
        self.biases_gradient = xp.zeros_like(self.biases, dtype=DTYPE)

mpneuralnetwork.layers.MaxPooling2D

Bases: Layer

Max Pooling 2D Layer.

Downsamples the input by taking the maximum value over a window. Reduces spatial dimensions and computation, while providing translational invariance.

Attributes:

Name Type Description
pool_size int

Size of the pooling window.

stride int

Stride of the pooling operation.

Source code in src/mpneuralnetwork/layers/layer2d.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
class MaxPooling2D(Layer):
    """Max Pooling 2D Layer.

    Downsamples the input by taking the maximum value over a window.
    Reduces spatial dimensions and computation, while providing translational invariance.

    Attributes:
        pool_size (int): Size of the pooling window.
        stride (int): Stride of the pooling operation.
    """

    def __init__(self, pool_size: int = 2, strides: int | None = None):
        super().__init__()
        self.pool_size: int = pool_size
        self.stride: int = strides if strides is not None else pool_size

        self.windows: ArrayType

    def build(self, input_shape: int | tuple[int, ...]) -> None:
        super().build(input_shape)
        C, H, W = self.input_shape

        out_h = (H - self.pool_size) // self.stride + 1
        out_w = (W - self.pool_size) // self.stride + 1

        self.output_shape = (C, out_h, out_w)

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        self.input_shape = input_batch.shape
        self.windows = im2col(input_batch, self.pool_size, self.stride)
        max_val = xp.max(self.windows, axis=(4, 5))

        return max_val.transpose(0, 3, 1, 2)  # type: ignore

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        grad_transposed = output_gradient_batch.transpose(0, 2, 3, 1)

        grad_expanded = grad_transposed[..., None, None]

        max_vals = xp.max(self.windows, axis=(4, 5), keepdims=True)
        mask = self.windows == max_vals

        d_windows = grad_expanded * mask
        d_windows = d_windows.transpose(0, 3, 1, 2, 4, 5)

        output_shape_no_batch = output_gradient_batch.shape[1:]

        return col2im(
            d_windows,
            self.input_shape,
            output_shape_no_batch,
            self.pool_size,
            self.stride,
        )

mpneuralnetwork.layers.AveragePooling2D

Bases: Layer

Average Pooling 2D Layer.

Downsamples the input by taking the average value over a window.

Attributes:

Name Type Description
pool_size int

Size of the pooling window.

stride int

Stride of the pooling operation.

Source code in src/mpneuralnetwork/layers/layer2d.py
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
class AveragePooling2D(Layer):
    """Average Pooling 2D Layer.

    Downsamples the input by taking the average value over a window.

    Attributes:
        pool_size (int): Size of the pooling window.
        stride (int): Stride of the pooling operation.
    """

    def __init__(self, pool_size: int = 2, strides: int | None = None):
        super().__init__()
        self.pool_size: int = pool_size
        self.stride: int = strides if strides is not None else pool_size

        self.windows: ArrayType

    def build(self, input_shape: int | tuple[int, ...]) -> None:
        super().build(input_shape)
        C, H, W = self.input_shape

        out_h = (H - self.pool_size) // self.stride + 1
        out_w = (W - self.pool_size) // self.stride + 1

        self.output_shape = (C, out_h, out_w)

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        self.input_shape = input_batch.shape
        self.windows = im2col(input_batch, self.pool_size, self.stride)
        means = xp.mean(self.windows, axis=(4, 5), dtype=DTYPE)

        return means.transpose(0, 3, 1, 2)  # type: ignore

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        grad_transposed = output_gradient_batch.transpose(0, 2, 3, 1)

        grad_expanded = grad_transposed[..., None, None]

        d_windows = grad_expanded * xp.ones_like(self.windows, dtype=DTYPE) / (self.pool_size * self.pool_size)
        d_windows = d_windows.transpose(0, 3, 1, 2, 4, 5)

        output_shape_no_batch = output_gradient_batch.shape[1:]

        return col2im(
            d_windows,
            self.input_shape,
            output_shape_no_batch,
            self.pool_size,
            self.stride,
        )

mpneuralnetwork.layers.Flatten

Bases: Layer

Flatten Layer.

Flattens the input tensor into a 1D tensor (vector) per sample. Crucial for connecting Convolutional/Pooling layers to Dense layers.

Input: (Batch, Channel, Height, Width) Output: (Batch, Channel * Height * Width)

Source code in src/mpneuralnetwork/layers/layer2d.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
class Flatten(Layer):
    """Flatten Layer.

    Flattens the input tensor into a 1D tensor (vector) per sample.
    Crucial for connecting Convolutional/Pooling layers to Dense layers.

    Input: (Batch, Channel, Height, Width)
    Output: (Batch, Channel * Height * Width)
    """

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        return input_batch.reshape(input_batch.shape[0], -1)

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        return output_gradient_batch.reshape(output_gradient_batch.shape[0], *self.input_shape)

mpneuralnetwork.layers.BatchNormalization2D

Bases: Layer

Batch Normalization Layer (2D) for Convolutional Networks.

Normalize the activations of the previous layer at each batch. Operates on the channel dimension (axis 1), so statistics are computed over (Batch, Height, Width).

Attributes:

Name Type Description
momentum float

Momentum for the moving average.

epsilon float

Small float added to variance to avoid dividing by zero.

Source code in src/mpneuralnetwork/layers/layer2d.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
class BatchNormalization2D(Layer):
    """Batch Normalization Layer (2D) for Convolutional Networks.

    Normalize the activations of the previous layer at each batch.
    Operates on the channel dimension (axis 1), so statistics are computed
    over (Batch, Height, Width).

    Attributes:
        momentum (float): Momentum for the moving average.
        epsilon (float): Small float added to variance to avoid dividing by zero.
    """

    def __init__(self, momentum: float = 0.9, epsilon: float = 1e-8) -> None:
        super().__init__()
        self.momentum: float = momentum
        self.epsilon: float = epsilon

        self.gamma: ArrayType
        self.beta: ArrayType

        self.cache_m: ArrayType
        self.cache_v: ArrayType

    def build(self, input_shape: int | tuple[int, ...]) -> None:
        super().build(input_shape)
        C, H, W = self.input_shape

        self.gamma = xp.ones((1, C, 1, 1), dtype=DTYPE)
        self.gamma_gradient = xp.zeros_like(self.gamma, dtype=DTYPE)

        self.beta = xp.zeros((1, C, 1, 1), dtype=DTYPE)
        self.beta_gradient = xp.zeros_like(self.beta, dtype=DTYPE)

        self.cache_m = xp.zeros((1, C, 1, 1), dtype=DTYPE)
        self.cache_v = xp.ones((1, C, 1, 1), dtype=DTYPE)

        self.std_inv: ArrayType
        self.x_centered: ArrayType
        self.x_norm: ArrayType

    def get_config(self) -> dict:
        config = super().get_config()
        config.update({"momentum": self.momentum, "epsilon": self.epsilon})
        return config

    def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
        """Performs spatial batch normalization.

        Args:
            input_batch (ArrayType): Input (N, C, H, W).
            training (bool, optional): If True, updates running stats.

        Returns:
            ArrayType: Normalized input.
        """
        self.input = input_batch

        mean: ArrayType
        var: ArrayType

        if training:
            mean = xp.mean(self.input, axis=(0, 2, 3), keepdims=True, dtype=DTYPE)  # type: ignore
            var = xp.var(self.input, axis=(0, 2, 3), keepdims=True, dtype=DTYPE)

            self.cache_m = self.momentum * self.cache_m + (1 - self.momentum) * mean
            self.cache_v = self.momentum * self.cache_v + (1 - self.momentum) * var

        else:
            mean = self.cache_m
            var = self.cache_v

        self.std_inv = 1 / xp.sqrt(var + self.epsilon, dtype=DTYPE)
        self.x_centered = self.input - mean
        self.x_norm = self.x_centered * self.std_inv

        res: ArrayType = self.x_norm * self.gamma
        res += self.beta
        return res

    def backward(self, output_gradient_batch: ArrayType) -> ArrayType:
        self.gamma_gradient = xp.sum(  # type: ignore
            self.x_norm * output_gradient_batch,
            axis=(0, 2, 3),
            keepdims=True,
            dtype=DTYPE,
        )
        self.beta_gradient = xp.sum(output_gradient_batch, axis=(0, 2, 3), keepdims=True, dtype=DTYPE)  # type: ignore

        N = output_gradient_batch.shape[0] * output_gradient_batch.shape[2] * output_gradient_batch.shape[3]

        dx_norm = output_gradient_batch * self.gamma

        grad: ArrayType = (
            (1 / N)
            * self.std_inv
            * (
                N * dx_norm
                - xp.sum(dx_norm, axis=(0, 2, 3), keepdims=True, dtype=DTYPE)
                - self.x_norm * xp.sum(dx_norm * self.x_norm, axis=(0, 2, 3), keepdims=True, dtype=DTYPE)
            )
        )
        return grad

    @property
    def params(self) -> dict[str, tuple[ArrayType, ArrayType]]:
        return {  # type: ignore
            "gamma": (self.gamma, self.gamma_gradient),
            "beta": (self.beta, self.beta_gradient),
        }

    def load_params(self, params: dict[str, ArrayType]) -> None:
        self.gamma[:] = params["gamma"]
        self.beta[:] = params["beta"]

    @property
    def state(self) -> dict[str, ArrayType]:
        return {"cache_m": self.cache_m, "cache_v": self.cache_v}

    @state.setter
    def state(self, state: dict[str, ArrayType]) -> None:
        self.cache_m = state["cache_m"]
        self.cache_v = state["cache_v"]

forward(input_batch, training=True)

Performs spatial batch normalization.

Parameters:

Name Type Description Default
input_batch ArrayType

Input (N, C, H, W).

required
training bool

If True, updates running stats.

True

Returns:

Name Type Description
ArrayType ArrayType

Normalized input.

Source code in src/mpneuralnetwork/layers/layer2d.py
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
def forward(self, input_batch: ArrayType, training: bool = True) -> ArrayType:
    """Performs spatial batch normalization.

    Args:
        input_batch (ArrayType): Input (N, C, H, W).
        training (bool, optional): If True, updates running stats.

    Returns:
        ArrayType: Normalized input.
    """
    self.input = input_batch

    mean: ArrayType
    var: ArrayType

    if training:
        mean = xp.mean(self.input, axis=(0, 2, 3), keepdims=True, dtype=DTYPE)  # type: ignore
        var = xp.var(self.input, axis=(0, 2, 3), keepdims=True, dtype=DTYPE)

        self.cache_m = self.momentum * self.cache_m + (1 - self.momentum) * mean
        self.cache_v = self.momentum * self.cache_v + (1 - self.momentum) * var

    else:
        mean = self.cache_m
        var = self.cache_v

    self.std_inv = 1 / xp.sqrt(var + self.epsilon, dtype=DTYPE)
    self.x_centered = self.input - mean
    self.x_norm = self.x_centered * self.std_inv

    res: ArrayType = self.x_norm * self.gamma
    res += self.beta
    return res

Utilities

Low-level utility functions used for implementing efficient convolutions.

mpneuralnetwork.layers.utils.im2col(input_batch, window_size, stride=None)

Image to Column transformation.

Rearranges image blocks into columns to perform convolution as a matrix multiplication. This is the core optimization that enables vectorized convolution.

Transformation

Input: (N, C, H, W) Output: (N, H_out, W_out, C * K * K) (before flattening for matmul)

Parameters:

Name Type Description Default
input_batch ArrayType

Input images of shape (N, C, H, W).

required
window_size int

Size of the kernel (K).

required
stride int | None

Stride of the operation. Defaults to window_size if None (for pooling) or 1.

None

Returns:

Name Type Description
ArrayType ArrayType

The column matrix ready for GEMM (General Matrix Multiplication).

Source code in src/mpneuralnetwork/layers/utils.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def im2col(input_batch: ArrayType, window_size: int, stride: int | None = None) -> ArrayType:
    """Image to Column transformation.

    Rearranges image blocks into columns to perform convolution as a matrix multiplication.
    This is the core optimization that enables vectorized convolution.

    Transformation:
        Input:  (N, C, H, W)
        Output: (N, H_out, W_out, C * K * K) (before flattening for matmul)

    Args:
        input_batch (ArrayType): Input images of shape (N, C, H, W).
        window_size (int): Size of the kernel (K).
        stride (int | None, optional): Stride of the operation. Defaults to window_size if None (for pooling) or 1.

    Returns:
        ArrayType: The column matrix ready for GEMM (General Matrix Multiplication).
    """
    windows = xp.lib.stride_tricks.sliding_window_view(input_batch, window_shape=(window_size, window_size), axis=(2, 3))  # type: ignore[call-overload]

    if stride is not None:
        windows = windows[:, :, ::stride, ::stride, :, :]

    return windows.transpose(0, 2, 3, 1, 4, 5)  # type: ignore[no-any-return]

mpneuralnetwork.layers.utils.col2im(cols, input_shape, output_shape, window_size, stride=1)

Column to Image transformation (Reverse im2col).

Used during backpropagation to reconstruct the gradient of the input image from the gradients of the columns. Accumulates gradients in overlapping regions.

Parameters:

Name Type Description Default
cols ArrayType

The column matrix from the gradient calculation.

required
input_shape tuple[int, ...]

Original shape of the input image (N, C, H, W).

required
output_shape tuple[int, ...]

Shape of the output (N, C_out, H_out, W_out).

required
window_size int

Kernel size.

required
stride int

Stride. Defaults to 1.

1

Returns:

Name Type Description
ArrayType ArrayType

Reconstructed image gradient of shape (N, C, H, W).

Source code in src/mpneuralnetwork/layers/utils.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def col2im(
    cols: ArrayType,
    input_shape: tuple[int, ...],
    output_shape: tuple[int, ...],
    window_size: int,
    stride: int = 1,
) -> ArrayType:
    """Column to Image transformation (Reverse im2col).

    Used during backpropagation to reconstruct the gradient of the input image
    from the gradients of the columns. Accumulates gradients in overlapping regions.

    Args:
        cols (ArrayType): The column matrix from the gradient calculation.
        input_shape (tuple[int, ...]): Original shape of the input image (N, C, H, W).
        output_shape (tuple[int, ...]): Shape of the output (N, C_out, H_out, W_out).
        window_size (int): Kernel size.
        stride (int, optional): Stride. Defaults to 1.

    Returns:
        ArrayType: Reconstructed image gradient of shape (N, C, H, W).
    """
    _, H_out, W_out = output_shape
    K = window_size

    im = xp.zeros(input_shape, dtype=DTYPE)

    for i in range(K):
        for j in range(K):
            im[:, :, i : i + H_out * stride : stride, j : j + W_out * stride : stride] += cols[:, :, :, :, i, j]

    return im