# 文件结构为: # |--- data # |--- dog # |--- dog1_1.jpg # |--- dog1_2.jpg # |--- cat # |--- cat2_1.jpg # |--- cat2_2.jpg
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
# 定义数据集的根目录和预处理的转换
data_dir = '../data' # 数据集的根目录
transform = transforms.Compose([
transforms.Resize((224, 224)), # 调整图像大小为 224x224
transforms.ToTensor(), # 转换为张量
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化
])
# 创建 ImageFolder 数据集实例
dataset = torchvision.datasets.ImageFolder(root=data_dir, transform=transform)
# 划分训练集和测试集
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
print(len(train_dataset))
print(len(test_dataset))
# 创建数据加载器
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 定义预训练的卷积神经网络模型
model = torchvision.models.resnet18(pretrained=True) #pretrained=False表示不使用预训练的权重,True表示使用预训练的权重
num_classes = len(dataset.classes) #获取图片的类别数量
model.fc = nn.Linear(model.fc.in_features, num_classes) #提取model.fc.in_features线性层中固定输入的size,
# num_classes分类图片的类型['cat','dog']
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# 训练模型
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for epoch in range(num_epochs):
model.train() #(训练模式,这句代码主要是对模型中的Droupout层和Normsize(均值方差计算)起作用)
running_loss = 0.0
for images, labels in train_loader:
images = images.to(device) #将图片放到GPU训练
labels = labels.to(device) #标签放到GPU训练
optimizer.zero_grad() #梯度清零
outputs = model(images) #图片输入到模型
loss = criterion(outputs, labels) #预测值和真是值之间计算损失
loss.backward() #反向传播
optimizer.step() #更新参数
running_loss += loss.item() #每次损失相加
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")
# 在测试集上评估模型
model.eval() #训练模式,这句代码主要是对模型中的Droupout层和Normsize(均值方差计算)不加入计算
total_correct = 0
total_samples = 0
with torch.no_grad():
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
total_samples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_samples
print(f"测试集准确率: {accuracy * 100:.2f}%")
torch.save(model,"model56")
# 文件结构为:
# |--- data
# |--- dog
# |--- dog1_1.jpg
# |--- dog1_2.jpg
# |--- cat
# |--- cat2_1.jpg
# |--- cat2_2.jpg
# 不同的模型构建细节
# AlexNet 模型结构
# torchvision.models.alexnet(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 构建一个resnet18模型
# torchvision.models.resnet18(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 构建一个ResNet-34 模型.
# torchvision.models.resnet34(pretrained=False, ** kwargs)
# Parameters: pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 构建一个ResNet-50模型
# torchvision.models.resnet50(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 构建一个ResNet-101模型
# torchvision.models.resnet101(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 构建一个ResNet-152模型
# torchvision.models.resnet152(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# VGG 11层模型(配置“A”)
# torchvision.models.vgg11(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 批量归一化的VGG 11层模型(配置“A”)
# torchvision.models.vgg11_bn(** kwargs)
#
# 构建一个VGG 13模型
# torchvision.models.vgg13(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 批量归一化的VGG 13层模型(配置“B”)
# torchvision.models.vgg13_bn(** kwargs)
#
# VGG 16层模型(配置“D”)
# torchvision.models.vgg16(pretrained=False, ** kwargs)
# Parameters: pretrained (bool) = True, returns a model pre-trained on ImageNet
#
# 批量归一化的VGG 16层模型(配置“D”)
# torchvision.models.vgg16_bn(** kwargs)
#
# VGG 19层模型(配置“E”)
# torchvision.models.vgg19(pretrained=False, ** kwargs)
# pretrained (bool) = True, 返回在ImageNet上训练好的模型。
#
# 批量归一化的VGG 16层模型(配置“E”)
# torchvision.models.vgg19_bn(** kwargs)
predict.py保存模型之后预测:
from torchvision import datasets, transforms
import numpy as np
from PIL import Image
import torch
import torch.nn.functional as F
from cov01 import Model
classes = ('cat','dog')
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('model56') # 加载模型
model = model.to(device)
model.eval() # 把模型转为test模式
img = Image.open("../dog.jpg")
trans = transforms.Compose(
[
transforms.CenterCrop(32),
transforms.ToTensor(),
# transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])
img = trans(img)
img = img.to(device)
img = img.unsqueeze(0) # 图片扩展多一维,因为输入到保存的模型中是4维的[batch_size,通道,长,宽],而普通图片只有三维,[通道,长,宽]
# 扩展后,为[1,1,28,28]
output = model(img)
prob = F.softmax(output, dim=1) # prob是10个分类的概率
print(prob)
value, predicted = torch.max(output.data, 1) #按照维度返回最大概率dim = 0 表示按列求最大值,并返回最大值的索引,dim = 1 表示按行求最大值,并返回最大值的索引
print(predicted.item())
print(value)
pred_class = classes[predicted.item()]
print(pred_class)