import torch
from torch import Tensor, nn
from torch.nn import functional as F
from robustML.advertrain.dependencies.dropblock import DropBlock2d
[docs]
class Normalize(nn.Module):
def __init__(self, mean: Tensor, std: Tensor, device: torch.device) -> None:
"""
Initialize the Normalize module.
This module is used to normalize image data by subtracting the mean and
dividing by the standard deviation.
Args:
mean (Tensor): A tensor containing the mean values for each channel.
std (Tensor): A tensor containing the standard deviation for each channel.
device (torch.device): The device (CPU or GPU) to which the tensors should be moved.
"""
super().__init__()
self.mean = mean.unsqueeze(-1).unsqueeze(-1).to(device)
self.std = std.unsqueeze(-1).unsqueeze(-1).to(device)
[docs]
def forward(self, x: Tensor) -> Tensor:
"""
Normalize the input tensor.
Applies the normalization operation on the input tensor using the mean and
standard deviation provided during initialization.
Args:
x (Tensor): The input tensor to be normalized.
Returns:
Tensor: The normalized tensor.
"""
return (x - self.mean) / self.std
[docs]
class ConvNet(nn.Module):
"""
Convolutional Neural Network with dropout layers, designed for processing images of size 64x128.
This network includes a normalization layer, several convolutional layers with
ReLU activation and max pooling, followed by fully connected layers with dropout
for regularization. It is suited for tasks like image classification where dropout
can help reduce overfitting.
Attributes:
norm (Normalize): Normalization layer to preprocess the input images.
conv1, conv2_1, conv3_1, conv4_1 (nn.Conv2d): Convolutional layers for feature extraction.
pooling (nn.MaxPool2d): Max pooling layer to reduce spatial dimensions.
activation (nn.ReLU): Activation function.
dropout (nn.Dropout): Dropout layer for regularization.
linear1, linear2, linear3 (nn.Linear): Fully connected layers for classification.
"""
def __init__(self, device: torch.device, p: float = 0.2) -> None:
"""
Initializes the ConvNetDropout model with dropout layers.
Args:
device (torch.device): The device to which the model and tensors should be moved.
p (float): Dropout probability. Default is 0.2.
"""
super().__init__()
self.norm = Normalize(
torch.Tensor([0.4632, 0.4532, 0.4485]),
torch.Tensor([0.1646, 0.1759, 0.1739]),
device,
)
self.conv1 = nn.Conv2d(3, 32, 7, padding=3)
self.conv2_1 = nn.Conv2d(32, 64, 5, padding=2)
self.conv3_1 = nn.Conv2d(64, 128, 5, padding=2)
self.conv4_1 = nn.Conv2d(128, 256, 5, padding=2)
self.pooling = nn.MaxPool2d(2)
self.activation = nn.ReLU()
self.dropout = nn.Dropout(p=p)
self.linear1 = nn.Linear(4 * 8 * 256, 2048)
self.linear2 = nn.Linear(2048, 1024)
self.linear3 = nn.Linear(1024, 2)
self.to(device)
[docs]
def forward(self, x: Tensor) -> Tensor:
"""
Defines the forward pass of the ConvNetDropout.
The input tensor is processed through normalization, convolutional layers,
pooling layers, dropout layers, and fully connected layers sequentially to
produce the output tensor.
Args:
x (Tensor): Input tensor of shape (batch_size, 3, 64, 128).
Returns:
Tensor: Output tensor after processing through the network.
"""
x = self.norm(x)
y = self.activation(self.conv1(x))
y = self.pooling(y)
y = self.activation(self.conv2_1(y))
y = self.pooling(y)
y = self.activation(self.conv3_1(y))
y = self.pooling(y)
y = self.activation(self.conv4_1(y))
y = self.pooling(y)
y = self.activation(self.linear1(torch.reshape(y, (-1, 4 * 8 * 256))))
y = self.dropout(y)
y = self.activation(self.linear2(y))
y = self.dropout(y)
y = self.linear3(y)
return y
[docs]
class ConvNetDropblock(nn.Module):
"""
Convolutional Neural Network with DropBlock regularization, designed for processing images of size 64x128.
This network includes a normalization layer, several convolutional layers with
ReLU activation and max pooling, followed by fully connected layers with dropout
and DropBlock for regularization. It is suited for tasks like image classification
where advanced regularization techniques can help reduce overfitting.
Attributes:
norm (Normalize): Normalization layer to preprocess the input images.
conv1, conv2_1, conv3_1, conv4_1 (nn.Conv2d): Convolutional layers for feature extraction.
pooling (nn.MaxPool2d): Max pooling layer to reduce spatial dimensions.
activation (nn.ReLU): Activation function.
dropout (nn.Dropout): Dropout layer for regularization.
dropblock (DropBlock2d): DropBlock layer for structured dropout.
linear1, linear2, linear3 (nn.Linear): Fully connected layers for classification.
"""
def __init__(self, device: torch.device, p: float = 0.2, drop_prob: float = 0.0, n_steps: int = 10) -> None:
"""
Initializes the ConvNetDropblock model with DropBlock layers.
Args:
device (torch.device): The device (CPU or GPU) to which the model and tensors should be moved.
p (float): Dropout probability for the standard dropout layers. Default is 0.2.
drop_prob (float): Initial probability for DropBlock. Default is 0.1.
n_steps (int): Number of steps over which DropBlock probability should reach its maximum. Default is 10.
"""
super().__init__()
self.norm = Normalize(
torch.Tensor([0.4632, 0.4532, 0.4485]),
torch.Tensor([0.1646, 0.1759, 0.1739]),
device,
)
self.conv1 = nn.Conv2d(3, 32, 7, padding=3)
self.conv2_1 = nn.Conv2d(32, 64, 5, padding=2)
self.conv3_1 = nn.Conv2d(64, 128, 5, padding=2)
self.conv4_1 = nn.Conv2d(128, 256, 5, padding=2)
self.pooling = nn.MaxPool2d(2)
self.activation = nn.ReLU()
self.dropout = nn.Dropout(p=p)
self.dropblock = DropBlock2d(drop_prob=drop_prob)
self.drop_prob = drop_prob
self.n_epochs = n_steps
self.epochs = 0
self.linear1 = nn.Linear(4 * 8 * 256, 2048)
self.linear2 = nn.Linear(2048, 1024)
self.linear3 = nn.Linear(1024, 2)
self.to(device)
[docs]
def forward(self, x: Tensor) -> Tensor:
"""
Defines the forward pass of the ConvNetDropblock.
The input tensor is processed through normalization, convolutional layers,
pooling layers, DropBlock layers, dropout layers, and fully connected layers
sequentially to produce the output tensor.
Args:
x (Tensor): Input tensor of shape.
Returns:
Tensor: Output tensor after processing through the network.
"""
x = self.norm(x)
y = self.activation(self.conv1(x))
y = self.pooling(y)
y = self.dropblock(y)
y = self.activation(self.conv2_1(y))
y = self.pooling(y)
y = self.dropblock(y)
y = self.activation(self.conv3_1(y))
y = self.pooling(y)
y = self.dropblock(y)
y = self.activation(self.conv4_1(y))
y = self.pooling(y)
y = self.activation(self.linear1(torch.reshape(y, (-1, 4 * 8 * 256))))
y = self.dropout(y)
y = self.activation(self.linear2(y))
y = self.dropout(y)
y = self.linear3(y)
return y
[docs]
class ResNet(nn.Module):
"""
A custom implementation of a Residual Network (ResNet) for processing images.
This network consists of multiple convolutional layers, each followed by batch normalization,
and some layers include dropout for regularization. The network uses skip connections
similar to a ResNet architecture, adding the output of one layer to another layer.
"""
def __init__(self, device: torch.device, p: float = 0.2) -> None:
"""
Initializes the ResNet model.
Args:
device (torch.device): The device to which the model and tensors should be moved.
"""
super().__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
self.conv1_bn = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.conv2_bn = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.conv3_drop = nn.Dropout2d(p=0.2)
self.conv3_bn = nn.BatchNorm2d(32)
self.conv4 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.conv4_bn = nn.BatchNorm2d(64)
self.conv5 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.conv5_bn = nn.BatchNorm2d(64)
self.conv6 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.conv6_drop = nn.Dropout2d(p=0.2)
self.conv6_bn = nn.BatchNorm2d(64)
self.conv7 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.conv7_bn = nn.BatchNorm2d(128)
self.conv8 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.conv8_bn = nn.BatchNorm2d(128)
self.conv9 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.conv9_drop = nn.Dropout2d(p=p)
self.conv9_bn = nn.BatchNorm2d(128)
self.conv10 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv10_bn = nn.BatchNorm2d(256)
self.conv11 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.conv11_bn = nn.BatchNorm2d(256)
self.conv12 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.conv12_drop = nn.Dropout2d(p=p)
self.conv12_bn = nn.BatchNorm2d(256)
self.fc1 = nn.Linear(256 * 8 * 16, 2048)
self.fc1_bn = nn.BatchNorm1d(2048)
self.fc2 = nn.Linear(2048, 2)
self.to(device)
[docs]
def forward(self, inp: Tensor) -> Tensor:
"""
Defines the forward pass of the ResNet.
The input tensor is processed through a series of convolutional layers with skip connections,
batch normalization, and dropout, followed by fully connected layers to produce the output tensor.
Args:
inp (Tensor): Input tensor of appropriate shape, typically matching the input size of the first
convolutional layer.
Returns:
Tensor: Output tensor after processing through the network.
"""
res = F.relu(self.conv1_bn(self.conv1(inp)))
x = F.relu(self.conv2_bn(self.conv2(res)))
x = self.conv3_drop(self.conv3(x))
block1_out = F.relu(self.conv3_bn(F.max_pool2d(x + res, 2))) # 32x64
res = F.relu(self.conv4_bn(self.conv4(block1_out)))
x = F.relu(self.conv5_bn(self.conv5(res)))
x = self.conv6_drop(self.conv6(x))
block2_out = F.relu(self.conv6_bn(F.max_pool2d(x + res, 2))) # 16x32
res = F.relu(self.conv7_bn(self.conv7(block2_out)))
x = F.relu(self.conv8_bn(self.conv8(res)))
x = self.conv9_drop(self.conv9(x))
block3_out = F.relu(self.conv9_bn(F.max_pool2d(x + res, 2))) # 8x16
res = F.relu(self.conv10_bn(self.conv10(block3_out)))
x = F.relu(self.conv11_bn(self.conv11(res)))
x = F.relu(self.conv12_bn(self.conv12_drop(self.conv12(x + res))))
x = x.view(-1, 256 * 8 * 16)
x = F.relu(self.fc1_bn(self.fc1(x)))
x = F.dropout(x, training=self.training, p=0.2)
x = self.fc2(x)
return x
[docs]
class ResNetDropblock(nn.Module):
"""
A custom implementation of a Residual Network (ResNet) for processing images.
This network consists of multiple convolutional layers, each followed by batch normalization,
and some layers include dropout for regularization. The network uses skip connections
similar to a ResNet architecture, adding the output of one layer to another layer.
"""
def __init__(self, device: torch.device, p: float = 0.2, drop_prob: float = 0.0) -> None:
"""
Initializes the ResNet model.
Args:
device (torch.device): The device to which the model and tensors should be moved.
"""
super().__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
self.conv1_bn = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.conv2_bn = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.conv3_drop = nn.Dropout2d(p=0.2)
self.conv3_bn = nn.BatchNorm2d(32)
self.conv4 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.conv4_bn = nn.BatchNorm2d(64)
self.conv5 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.conv5_bn = nn.BatchNorm2d(64)
self.conv6 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.conv6_drop = nn.Dropout2d(p=0.2)
self.conv6_bn = nn.BatchNorm2d(64)
self.conv7 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
self.conv7_bn = nn.BatchNorm2d(128)
self.conv8 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.conv8_bn = nn.BatchNorm2d(128)
self.conv9 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.conv9_drop = nn.Dropout2d(p=p)
self.conv9_bn = nn.BatchNorm2d(128)
self.conv10 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv10_bn = nn.BatchNorm2d(256)
self.conv11 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.conv11_bn = nn.BatchNorm2d(256)
self.conv12 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.conv12_drop = nn.Dropout2d(p=p)
self.conv12_bn = nn.BatchNorm2d(256)
self.dropblock = DropBlock2d(drop_prob=drop_prob)
self.drop_prob = drop_prob
self.fc1 = nn.Linear(256 * 8 * 16, 2048)
self.fc1_bn = nn.BatchNorm1d(2048)
self.fc2 = nn.Linear(2048, 2)
self.to(device)
[docs]
def forward(self, inp: Tensor) -> Tensor:
"""
Defines the forward pass of the ResNet.
The input tensor is processed through a series of convolutional layers with skip connections,
batch normalization, and dropout, followed by fully connected layers to produce the output tensor.
Args:
inp (Tensor): Input tensor of appropriate shape, typically matching the input size of the first
convolutional layer.
Returns:
Tensor: Output tensor after processing through the network.
"""
res = F.relu(self.conv1_bn(self.conv1(inp)))
x = F.relu(self.conv2_bn(self.conv2(res)))
x = self.conv3_drop(self.conv3(x))
x = self.dropblock(x)
block1_out = F.relu(self.conv3_bn(F.max_pool2d(x + res, 2))) # 32x64
res = F.relu(self.conv4_bn(self.conv4(block1_out)))
x = F.relu(self.conv5_bn(self.conv5(res)))
x = self.conv6_drop(self.conv6(x))
x = self.dropblock(x)
block2_out = F.relu(self.conv6_bn(F.max_pool2d(x + res, 2))) # 16x32
res = F.relu(self.conv7_bn(self.conv7(block2_out)))
x = F.relu(self.conv8_bn(self.conv8(res)))
x = self.conv9_drop(self.conv9(x))
x = self.dropblock(x)
block3_out = F.relu(self.conv9_bn(F.max_pool2d(x + res, 2))) # 8x16
res = F.relu(self.conv10_bn(self.conv10(block3_out)))
x = F.relu(self.conv11_bn(self.conv11(res)))
x = F.relu(self.conv12_bn(self.conv12_drop(self.conv12(x + res))))
x = self.dropblock(x)
x = x.view(-1, 256 * 8 * 16)
x = F.relu(self.fc1_bn(self.fc1(x)))
x = F.dropout(x, training=self.training, p=0.2)
x = self.fc2(x)
return x