java-pytorch 使用手动下载FashionMNIST数据集进行测试

- 先定义训练数据和测试数据的位置
- 查看一下读取到的标签数据格式
- 使用loc和iloc访问下数据，便于下面操作
- 使用read_image函数查看下图片的数据大小
- 开始写数据集
- 使用DataLoader去加载我们自己的数据
- - 看下加载后的dataloader数据形状
  - 循环查看下dataloader每个数据的信息
- 写一个简单的模型，由Linear组成
- 定义损失函数和优化器
- 编写测试和训练方法
- 开始训练
- 保存模型
- 加载模型
- 使用加载的模型进行预测
- - 图片处理
- 预测
- 其他
- - 图片灰度处理转tensor
  - 测试某一张图片

手动下载FashionMNIST数据集，通过https://blog.csdn.net/m0_60688978/article/details/137085740转换为实际的图片和标注
目的是为了模拟实际业务中，我们往往需要自己采集图片数据和打标签的过程
因为FashionMNIST数据集图片是28x28，和对应的一个图片和类型的记录文件output.txt

先定义训练数据和测试数据的位置

annotations_file="../data/imageandlableTrain/output.txt"
img_dire="../data/imageandlableTrain"

test_img_dire="../data/imageandlableTest"
test_annotations_file="../data/imageandlableTest/output.txt"

查看一下读取到的标签数据格式

import pandas as pd

lables=pd.read_csv(annotations_file,header=None)
lables.head(10)

	0	1
0	Ankleboot1.jpg	9
1	T-shirttop2.jpg	0
2	T-shirttop3.jpg	0
3	Dress4.jpg	3
4	T-shirttop5.jpg	0
5	Pullover6.jpg	2
6	Sneaker7.jpg	7
7	Pullover8.jpg	2
8	Sandal9.jpg	5
9	Sandal10.jpg	5

使用loc和iloc访问下数据，便于下面操作

imageName,lable=lables.loc[3,:]
imageName,lable

('Dress4.jpg', 3)

lables.iloc[2,1]

使用read_image函数查看下图片的数据大小

from torchvision.io import read_image
image1=read_image("../data/imageandlableTrain/T-shirttop2.jpg")
type(image1),image1.size(),image1[0].size(),image1

(torch.Tensor,
 torch.Size([3, 28, 28]),
 torch.Size([28, 28]),
 tensor([[[ 0,  0,  1,  ...,  1,  8,  0],
          [13,  0,  0,  ..., 10,  0,  0],
          [ 0,  0, 22,  ..., 10,  0,  1],
          ...,
          [ 0,  0,  0,  ...,  0,  0,  0],
          [ 0,  0,  0,  ...,  0,  0,  0],
          [ 0,  0,  0,  ...,  0,  0,  0]],
 
         [[ 0,  0,  1,  ...,  1,  8,  0],
          [13,  0,  0,  ..., 10,  0,  0],
          [ 0,  0, 22,  ..., 10,  0,  1],
          ...,
          [ 0,  0,  0,  ...,  0,  0,  0],
          [ 0,  0,  0,  ...,  0,  0,  0],
          [ 0,  0,  0,  ...,  0,  0,  0]],
 
         [[ 0,  0,  1,  ...,  1,  8,  0],
          [13,  0,  0,  ..., 10,  0,  0],
          [ 0,  0, 22,  ..., 10,  0,  1],
          ...,
          [ 0,  0,  0,  ...,  0,  0,  0],
          [ 0,  0,  0,  ...,  0,  0,  0],
          [ 0,  0,  0,  ...,  0,  0,  0]]], dtype=torch.uint8))

开始写数据集

思路很简单，初始化的时候，将output.txt的数据读出来，然后在__getitem__返回单一图片的tensor数据和标签

这里需要注意的是：read_image的结果数据size是torch.Size([3, 28, 28])，而模型需要的[28,28]，因此要返回image[0]

from torchvision.io import read_image
from torch.utils.data import Dataset

class CustomImageDataset(Dataset):
    def __init__(self):
#         获得所有的lables
        self.labels=pd.read_csv(annotations_file,header=None)
        self.imageDir=img_dire
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        imageName,lable=self.labels.loc[idx,:]
        image=read_image("{}/{}".format(img_dire,imageName))
        image=image[0]
        return image,lable

class CustomImageDatasetTest(Dataset):
    def __init__(self):
    #         获得所有的lables
        self.labels=pd.read_csv(test_annotations_file,header=None)
        self.imageDir=img_dire

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        imageName,lable=self.labels.loc[idx,:]
        image=read_image("{}/{}".format(test_img_dire,imageName))
        image=image[0]
        return image,lable

使用DataLoader去加载我们自己的数据

from torch.utils.data import DataLoader
train_dataloader = DataLoader(CustomImageDataset(), batch_size=2)
len(train_dataloader)

test_dataloader = DataLoader(CustomImageDatasetTest(), batch_size=2)
len(test_dataloader)

看下加载后的dataloader数据形状

for X, y in train_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([2, 28, 28])
Shape of y: torch.Size([2]) torch.int64

循环查看下dataloader每个数据的信息

for batch, (X, y) in enumerate(train_dataloader):
    print(X,batch,X.size(),y,type(y))
    if batch==2:
        break

---------------------------------------------------------------------------

NameError                                 Traceback (most recent call last)

Cell In[35], line 1
----> 1 for batch, (X, y) in enumerate(train_dataloader):
      2     print(X,batch,X.size(),y,type(y))
      3     if batch==2:


NameError: name 'train_dataloader' is not defined

写一个简单的模型，由Linear组成

import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
model = NeuralNetwork().to("cpu")
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

定义损失函数和优化器

这里重点关注学利率，太低会爆

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

编写测试和训练方法

测试方法思路也很简单，就是在测试数据中逐一把数据传入到模型中，累计损失和正确率

这里要注意的是正确率的统计，就是预测正确的？累加(pred.argmax(1) == y).type(torch.float).sum().item()

训练思路：固定套路，直接copy

# 测试方法
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
#             X, y = X.to(device), y.to(device)
            pred = model(X.float().unsqueeze(1))
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

def train():
    for batch,(onedatas,lable) in enumerate(train_dataloader):

        model.train()
        pred=model(onedatas.float().unsqueeze(1))

        loss=loss_fn(pred,lable)
        lossitem=loss.item()

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if batch % 100:
            print(f"loss {lossitem},batch is {batch}")

开始训练

思路就是套路照搬即可

#  训练
for t in range(10):
    print(f"Epoch {t+1}\n-------------------------------")
    train()
    test(test_dataloader, model, loss_fn)
print("Done!")

保存模型

# save model
torch.save(model.state_dict(),"model.pth")
print("Saved Model State to model.pth")

Saved Model State to model.pth

print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

加载模型

model = NeuralNetwork().to("cpu")
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>

使用加载的模型进行预测

图片处理

要求灰度 2. 要求28*28 3. 数据是tensor

from PIL import Image
 
# 打开原始图片
image = Image.open('lianxie.jpg')
 
# 调整图片大小
new_size = (28,28) # 新的宽高像素值
resized_image = image.resize(new_size)
 
# 转换成灰度图像
grayscaled_image = resized_image.convert("L")
grayscaled_image

外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传

transform_d = transforms.Compose([
    transforms.ToTensor()
])
image_t = transform_d(grayscaled_image)
plt.imshow(image_t[0])

<matplotlib.image.AxesImage at 0x1fabef58520>

外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传

预测

可以看到output的最大值下表是5，即是预测结果，预实际相符

output=model(image_t)
output

tensor([[ 0.1207, -0.4304,  0.2356,  0.2038,  0.2823, -0.2736,  0.4910, -0.0614,
         -0.1314, -0.4034]], grad_fn=<AddmmBackward0>)

其他

图片灰度处理转tensor

import torch
import torchvision.transforms as transforms
from PIL import Image
 
# 定义转换管道
grayscale_transform = transforms.Grayscale(num_output_channels=1)  # 灰度转换
tensor_transform = transforms.ToTensor()  # Tensor转换
resized_transform=transforms.Resize((28,28))

# 读取图片
image = Image.open("lianxie.jpg")
 
# 应用转换
gray_image = grayscale_transform(image)

resized_gray_tensor = resized_transform(gray_image)
gray_tensor = tensor_transform(resized_gray_tensor)

gray_tensor,gray_tensor.size()

output=model(gray_tensor)
output

tensor([[ 0.1208, -0.4305,  0.2355,  0.2039,  0.2823, -0.2737,  0.4913, -0.0619,
         -0.1308, -0.4036]], grad_fn=<AddmmBackward0>)

测试某一张图片

# 排查

# Sandal13.jpg,5
# Sandal14.jpg,5

image=read_image("{}/Sandal13.jpg".format(img_dire))

imageData=image[0].unsqueeze(0)
print("unsqueeze: ",imageData)
print("unsqueeze after size : ",imageData.size())

print("original size: ",image.size())

output=model(imageData.float())

print("output content is ",output)

# argmax取值最大的下标
print(output.argmax(1))
# 结论
# 减少学习率即可

unsqueeze:  tensor([[[  0,   0,   0,   0,   0,   0,   0,   0,   1,   1,   0,   0,   3,   8,
            5,   0,   0,   0,   0,   0,   5,   0,   0,   3,   0,   4,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   2,   1,   0,   0,   0,   0,
            0,   0,   3,   5,   1,   1,   6,   1,   0,   0,   4,   0,   0,   4],
         [  0,   0,   0,   0,   0,   0,   0,   0,   3,   0,   0,   6,  12,   6,
            1,   3,   0,   1,   0,   0,   0,   1,   1,   0,   0,   0,   0,   4],
         [  0,   0,   0,   0,   0,   0,   0,   0,   7,   1,   0,   1,   1,   0,
            0,   0,   0,   0,   2,   0,   0,   0,   5,   3,   0,   2,   8,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   5,   7,   0,   0,   0,
            5,  11,   2,   0,   6,   9,   0,   0,   3,   0,   7,   6,   5,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,   5,   5,
            0,   0,   7,   0,   0,   9,   2,   0,   4,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   9,   0,   0,   1,   0,   0,
            3,  29,  62,   3,   0,   3,   0,   0,   6,   0,   0,  20,   9,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   2,   9,   0,   0,
           96, 209, 143,  51,   1,   3,   0,   0,   6,   0,   2,  76,  57,   0],
         [  6,   0,   5,   7,   4,   0,   0,   7,   4,   0,   0,  11,  19, 167,
          218, 176, 197, 184,   0,   5,   0,  11, 121, 134, 152, 117,  75,   0],
         [  0,   0,   0,   0,   6,   0,   4,   0,   0,   0,   0,   7,  56, 219,
          168, 160, 204, 246,  95,  31, 105, 175, 199, 115,  18,   0,   5,   0],
         [  0,   7,   2,   8,  18,   0,   2,   8,   0,   5,   2,   0,   1, 182,
          245, 152, 158, 185, 235, 200, 175,  78,   8,  17,  23,   2,  13,   0],
         [  0,  15,   0,   0,   0,  16,   5,   0,   4,   0,  21,  14,  11,  48,
          230, 251, 240, 185, 221,  71,   0,  10,   0,   0,   0,   0,  12,   0],
         [  3,   0,   0,  31,   0,   0,  25, 120, 194,   0,   0,   0,   0,  19,
          171, 225, 230, 205, 222, 150,   8,   0,   1,  37,  42,  43,  61,  38],
         [  4,   7,   0,   0,   2, 140, 194, 186, 201, 165,  13,   0,  11,   0,
            0,  89, 135, 208, 180, 241, 178, 124, 132, 135, 161, 141, 143, 113],
         [  0,   3,   0,   0,   2, 107, 235, 196, 167, 219,  18,   8,  11,   0,
            6,  49, 203, 221, 137, 170, 112,  65,  59,  75,  52,  55,  80,  59],
         [  9,  13,   3,   2,   0,  11, 184, 127,   5, 197, 111,   0,  14,  97,
          109, 127, 148, 100,  89,  93,  64, 126, 106, 115,  87, 105, 115,  53],
         [ 40, 109, 121, 120, 106,  91, 198, 207, 121, 187, 255, 127, 126,  96,
          110,  71,  60,  93,  73,  74,  74,  73,  93,  60,  85,  82,  99,  40],
         [ 41,  72,  36,  56,  64,  78,  99,  69,  92,  92, 109,  51,  76,  75,
           84,  79, 104, 102,  74,  94,  94,  76, 105, 107,  60,  63,  87,  22],
         [ 21,  95,  88, 115,  84, 105,  82,  83,  61,  64,  79,  88,  94,  89,
           72,  88,  98,  92,  75, 103, 102,  72,  87,  92, 103,  99, 105,  14],
         [ 17,  84,  79,  90,  66, 104,  73,  97,  75,  95,  96,  91,  75,  78,
           74,  94,  59,  75,  70,  80,  81,  74,  81,  66,  47,  54,  70,   0],
         [  0,  78, 122, 127, 108, 109,  70,  90,  93, 102,  94,  85, 103, 115,
          128, 129, 132, 146, 136, 126, 133, 154, 173, 164, 180, 153, 110,   9],
         [  0,   4,   6,  21,  70, 108, 103, 108, 105, 103,  95,  86,  98,  61,
           54,  37,  13,   7,   0,   0,   0,   8,  23,  31,  17,  21,  15,   0],
         [  0,   6,   2,   2,   0,   0,   0,   5,   0,   8,   0,   0,   5,   0,
            0,   0,  10,   0,   1,   3,   2,   0,   0,   4,   0,   9,   0,   0],
         [  0,   1,   0,  16,   0,   0,   3,   1,   0,  15,   0,   0,   2,   3,
           10,   0,   0,   0,   3,   0,   0,   3,   0,   0,   0,  10,   0,   9],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
            0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
            0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
            0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
            0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0]]],
       dtype=torch.uint8)
unsqueeze after size :  torch.Size([1, 28, 28])
original size:  torch.Size([3, 28, 28])
output content is  tensor([[-6.7115,  1.3163, -7.2377, -1.6586,  2.7382,  3.2748, -6.3696, -0.6497,
         -7.3979, -1.4042]], grad_fn=<AddmmBackward0>)
tensor([5])