手写ResNet

ResNet

网络退化:层次变深后性能还不如之前的网络(并不是梯度消失或梯度爆炸,也不是过拟合)

image-20221226111804113
image-20221226111804113

右侧的支路也被称为短路连接(shortcut connection)。引入短路连接,既没有引入额外的参数,也没有增加计算复杂度。

求和之后再用ReLU激活函数。注意输入和输出必须为同维(逐个元素相加)。

过去是直接拟合\(H(x)\),现在是拟合残差\(F(x)=H(x)-x\)

残差块的结构:

image-20221226111612290
image-20221226111612290

残差网络的优点:①易于优化收敛。②解决网络退化问题。③可以使网络变得很深,准确率大大提升。

resnet_main.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# 主程序 1, 3, 4
# 1 导入数据 2 创建网络 3 训练网络 4 测试验证
import torch
# 导入数据集
from torchvision import datasets
# 对数据做变形,变换
from torchvision import transforms
# 加载数据
from torch.utils.data import DataLoader
# 常见的神经网络相关模块
from torch import nn, optim
# 导入自己构建的模型
from resnet import ResNet18
# 损失函数
from torch.nn import functional as F

batch_size = 128
# 导入数据
def main():
cifar_train = datasets.CIFAR10(
root="/media/D/dataset/CIFAR10",
train=True,
transform=transforms.Compose([
# 将照片转化成32*32的特征图
transforms.Resize((32, 32)),
# 将数据转化成tensor
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]), download=False)
cifar_train = DataLoader(
dataset=cifar_train,
batch_size=batch_size,
shuffle=True,
)
cifar_test = datasets.CIFAR10(
root="/media/D/dataset/CIFAR10",
train=False,
transform=transforms.Compose([
# 将照片转化成32*32的特征图
transforms.Resize((32, 32)),
# 将数据转化成tensor
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]), download=False)
cifar_test = DataLoader(
dataset=cifar_test,
batch_size=batch_size,
shuffle=True,
)
# 对数据预览 iter()方法得到数据集的迭代器,next()生成取出数据
x, label = iter(cifar_train).next()
print(x.shape, label.shape)

# 训练网络
device = torch.device("cuda")
# 实例化网络
model = ResNet18().to(device)
# 构建CrossEntropyLoss损失函数(包含了Softmax), 需要构建函数名, 不能直接使用
criterion = nn.CrossEntropyLoss().to(device)
# 构建优化器:优化器自己选择需要梯度的参数
optimizer = optim.SGD(model.parameters(), lr=0.001)
for epoch in range(10000):
model.train()
for batch_idx, (x, label) in enumerate(cifar_train):
# 将训练集放入模型
x, label = x.to(device), label.to(device)
logits = model(x)
# 计算loss:交叉熵损失函数
loss = criterion(logits, label)
# 优化迭代老三样
# 梯度清零
optimizer.zero_grad()
# 梯度计算
loss.backward()
# 梯度迭代更新
optimizer.step()
print(epoch, loss.item())
# 测试数据, 测试集的数据拿来作验证
model.eval()
with torch.no_grad():
total_correct = 0
total_num = 0
for x, label in cifar_test:
x, label = x.to(device), label.to(device)
logits = model(x)
# logits = F.softmax(logits, dim=1)
pred = logits.argmax(dim=1)
correct = torch.eq(pred, label).float().sum().item()

total_correct += correct
total_num += x.size(0)
acc = total_correct / total_num
print(epoch, acc)


if __name__ == '__main__':
main()

resnet.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# 构造残差块和残差神经网络
import torch
from torch import nn
from torch.nn import functional as F


# 残差神经网络
# 残差块——短路设计
class ResBlk(nn.Module):
# 定义初始化的网络实例
# 与图片有关的参数 in out
def __init__(self, ch_in, ch_out, stride=1):
super(ResBlk, self).__init__()
# 卷积层C1:只改变channel的大小; kernel, stride, padding影响图片的大小
self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
# 标准化B2:不改变channel的大小
self.bn1 = nn.BatchNorm2d(ch_out)
# 卷积层C3
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
# 标准化B4
self.bn2 = nn.BatchNorm2d(ch_out)
self.extra = nn.Sequential()
if ch_out != ch_in:
self.extra = nn.Sequential(
nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
nn.BatchNorm2d(ch_out)
)


def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out = out + self.extra(x)
return out


# 构造18层的ResNet神经网络
class ResNet18(nn.Module):
def __init__(self):
super(ResNet18, self).__init__()
# 卷积+标准化
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64)
)
# 添加残差块
self.blk1 = ResBlk(64, 128, stride=2)
self.blk2 = ResBlk(128, 256, stride=2)
self.blk3 = ResBlk(256, 512, stride=2)
self.blk4 = ResBlk(512, 512)
# 过最后一个全连接层,时的输出结果为10
self.outlayer = nn.Linear(8192, 10)

def forward(self, x):
x = F.relu(self.conv1(x))
x = self.blk1(x)
x = self.blk2(x)
x = self.blk3(x)
x = self.blk4(x)
# x = F.adaptive_max_pool2d(x, [1, 1])
x = x.view(x.size(0), -1)
x = self.outlayer(x)

return x

def main():
x = torch.randn(2, 3, 32, 32)
model = ResNet18()
out = model(x)
print(out.shape)


if __name__ == '__main__':
main()

# strid = 2 的调整是最显而易见的效果 高宽减半
# 观察out + extra(x)的报错, 从而调整stride, 使得短路结构成立
# 经过一系列复杂变换, 确实很难得知张量的维度, 打平操作后放入线性层, 最终的输出是[10]
# 随便设置线性层的维度信息(?, 10), 报错含有两个矩阵(线性层之前的输入的矩阵 和 当前线性层设置的矩阵)