数据集处理
基础操作
TensorDataset
与 DataLoader
import torch
import torch.utils.data
# 创建一个tensor输入
inputs = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0],
[6.0, 7.0], [7.0, 8.0], [8.0, 9.0], [9.0, 10.0], [10.0, 11.0]])
# 创建一个tensor标签
labels = torch.tensor([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
# 创建TensorDataset对象
dataset = torch.utils.data.TensorDataset(inputs, labels) # 利用torch.utils.data.TensorDataset()
print(dataset)
print(type(dataset))
for data, i in dataset:
print(data, i)
print()
# 创建Dataloader对象
dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True) # 利用torch.utils.
print(dataloader)
print(type(dataloader))
for batch_data, batch_label in dataloader:
print(batch_data, batch_label)
输出
<torch.utils.data.dataset.TensorDataset object at 0x7ff28b5da220>
<class 'torch.utils.data.dataset.TensorDataset'>
tensor([1., 2.]) tensor(0)
tensor([2., 3.]) tensor(1)
tensor([3., 4.]) tensor(0)
tensor([4., 5.]) tensor(1)
tensor([5., 6.]) tensor(0)
tensor([6., 7.]) tensor(1)
tensor([7., 8.]) tensor(0)
tensor([8., 9.]) tensor(1)
tensor([ 9., 10.]) tensor(0)
tensor([10., 11.]) tensor(1)
<torch.utils.data.dataloader.DataLoader object at 0x7ff28b5dabb0>
<class 'torch.utils.data.dataloader.DataLoader'>
tensor([[4., 5.],
[6., 7.]]) tensor([1, 1])
tensor([[5., 6.],
[2., 3.]]) tensor([0, 1])
tensor([[ 7., 8.],
[10., 11.]]) tensor([0, 1])
tensor([[1., 2.],
[8., 9.]]) tensor([0, 1])
tensor([[ 9., 10.],
[ 3., 4.]]) tensor([0, 0])
经典数据集导入
MNIST
CNN
https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
神经网络搭建
LeNet-5
内容 | 链接 | 创作者 |
---|---|---|
Python Class Tutorial | 链接🔗 | Corey Schafer |
CIFAR-10分类
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optiM
import torch.nn.functional as F
# 数据预处理
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# 加载训练集,并且应用预处理操作
trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)
# 加载测试集,并且应用预处理操作
testset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True, num_workers=2)
# 定义卷积神经网络
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(128 * 4 * 4, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 10)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = x.view(-1, 128 * 4 * 4)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# 创建模型实例
model = SimpleCNN()
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam优化器,学习率为0.001
# 4. 训练模型
# for epoch in range(10): # 训练10个epoch
# running_loss = 0.0
# for i, data in enumerate(trainloader, 0):
# inputs, labels = data
# optimizer.zero_grad() # 清除梯度
#
# outputs = model(inputs) # 前向传播
# loss = criterion(outputs, labels) # 计算损失
# loss.backward() # 反向传播
# optimizer.step() # 优化
#
# running_loss += loss.item()
# if i % 100 == 99: # 每100个批次打印一次
# print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
# running_loss = 0.0
#
# print("Finished Training")
# 5. 测试模型
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')
# 6. 保存模型
torch.save(model.state_dict(), 'simple_cnn.pth')