- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊
目录
- 环境
- 步骤
- 环境设置
- 数据准备
- 工具方法
- 模型设计
- 模型训练
- 模型效果展示
- 总结与心得体会
上周已经简单的了解了ACGAN的原理,并且不经实践的编写了部分代码,这周复现一下真正的ACGAN
环境
Pytorch: 2.3.1+cu121
Nvidia GTX 4090
步骤
环境设置
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torch.autograd import Variable
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 全局参数
n_epochs = 200
batch_size = 64
lr = 0.0002
b1 = 0.5
b2 = 0.999
n_cpu = 8
latent_dim = 100
n_classes = 10
img_size = 32
channels = 1
sample_interval = 400
数据准备
# 创建中间采样图片的文件夹
import os
os.makedirs('images', exist_ok=True)
# 配置数据集
os.makedirs('data/mnist', exist_ok=True)
dataloader = DataLoader(
datasets.MNIST(
'data/mnist',
train=True,
download=True,
transform=transforms.Compose([
transforms.Resize(img_size),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])
]),
),
batch_size=batch_size,
shuffle=True,
)
工具方法
# 权重初始化函数
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm2d') != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
# 日志函数 因为使用了jupyter notebook环境,长时间的任务日志无法直接查看,于是需要打印到文件
import logging
import sys
import datetime
def init_logger(filename, logger_name):
'''
@brief:
initialize logger that redirect info to a file just in case we lost connection to the notebook
@params:
filename: to which file should we log all the info
logger_name: an alias to the logger
'''
# get current timestamp
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
logging.basicConfig(
level=logging.INFO,
format='[%(asctime)s] %(name)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(filename=filename),
logging.StreamHandler(sys.stdout)
]
)
# Test
logger = logging.getLogger(logger_name)
logger.info('### Init. Logger {} ###'.format(logger_name))
return logger
# Initialize
my_logger = init_logger("./ml_notebook.log", "ml_logger")
# 生成函数的结果保存
def sample_image(n_row, batches_done):
"""保存从0到n_classes的生成数字的图像风格"""
# 采样噪声
z = torch.randn((n_row**2, latent_dim), device=device)
# 为n行生成标签从0到n_classes
labels = torch.tensor([num for _ in range(n_row) for num in range(n_row)], device=device)
gen_imgs = generator(z, labels)
save_image(gen_imgs.data.cpu(), 'images/%d.png' % batches_done, nrow=n_row, normalize=True)
模型设计
# 生成器
class Generator(nn.Module):
def __init__(self):
super().__init__()
# 标签嵌入
self.label_emb = nn.Embedding(n_classes, latent_dim)
# 计算上采样前的初始大小
self.init_size = img_size // 4
# 第一层线性层
self.l1 = nn.Sequential(
nn.Linear(latent_dim, 128*self.init_size**2)
)
# 卷积层
self.conv_blocks = nn.Sequential(
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, 3, stride=1, padding=1),
nn.BatchNorm2d(128, 0.8),
nn.LeakyReLU(0.2, inplace=True),
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64, 0.8),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, channels, 3, stride=1, padding=1),
nn.Tanh(),
)
def forward(self, noise, labels):
# 标签嵌入到噪声中
gen_input = torch.mul(self.label_emb(labels), noise)
# 通过第一层线性层
out = self.l1(gen_input)
# 整形
out = out.view(out.shape[0], 128, self.init_size, self.init_size)
# 卷积生成图像
img = self.conv_blocks(out)
return img
# 判别器
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
# 判别器块生成函数
def discriminator_block(in_filters, out_filters, bn=True):
"""返回每个判别器层"""
block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
if bn:
block.append(nn.BatchNorm2d(out_filters, 0.8))
return block
# 卷积层
self.conv_blocks = nn.Sequential(
*discriminator_block(channels, 16, bn=False),
*discriminator_block(16, 32),
*discriminator_block(32, 64),
*discriminator_block(64, 128),
)
# 下采样后,图像的宽高
ds_size = img_size // 2 ** 4
# 输出层
self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())
self.aux_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, n_classes), nn.Softmax())
def forward(self, img):
out = self.conv_blocks(img)
out = out.view(out.shape[0], -1)
validity = self.adv_layer(out)
label = self.aux_layer(out)
return validity, label
# 模型初始化
# 损失函数
adversarial_loss = nn.BCELoss()
auxiliary_loss = nn.CrossEntropyLoss()
# 初始化生成器和判别器
generator = Generator().to(device)
discriminator = Discriminator().to(device)
# 初始化权重
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
模型训练
# 训练
# 优化器
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))
for epoch in range(n_epochs):
for i, (imgs, labels) in enumerate(dataloader):
batch_size = imgs.shape[0]
# 图像是 真实的 标签
valid = torch.ones((batch_size, 1), requires_grad=False, device=device)
# 图像是 生成的 标签
fake = torch.zeros((batch_size, 1), requires_grad=False, device=device)
real_imgs = imgs.to(device)
labels = labels.to(device)
# 训练生成器
optimizer_G.zero_grad()
# 采样噪声和标签作为生成器的输入
z = torch.randn((batch_size, latent_dim), device=device)
gen_labels = torch.randint(0, 1, (batch_size,), device=device)
# 生成一批图像
gen_imgs = generator(z, gen_labels)
# 损失度量 生成器欺骗判别器的能力
validity, pred_label = discriminator(gen_imgs)
g_loss = 0.5 * (adversarial_loss(validity, valid) + auxiliary_loss(pred_label, gen_labels))
g_loss.backward()
optimizer_G.step()
# 训练判别器
optimizer_D.zero_grad()
# 真实图像的损失
real_pred, real_aux = discriminator(real_imgs)
d_real_loss = 0.5 * (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels))
# 生成图像的损失
fake_pred, fake_aux = discriminator(gen_imgs.detach())
d_fake_loss = 0.5 * (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, gen_labels))
# 判别器的总损失
d_loss = 0.5 * (d_real_loss + d_fake_loss)
# 计算判别器的准确率
pred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)
gt = np.concatenate([labels.data.cpu().numpy(), gen_labels.data.cpu().numpy()], axis=0)
d_acc = np.mean(np.argmax(pred, axis=1) == gt)
d_loss.backward()
optimizer_D.step()
if i % 100 == 0:
my_logger.info("[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %d%%] [G loss: %f]" % (epoch, n_epochs, i, len(dataloader), d_loss.item(), 100 * d_acc, g_loss.item()))
batches_done = epoch * len(dataloader) + i
if batches_done % sample_interval == 0:
sample_image(n_row=10, batches_done=batches_done)
模型效果展示
总结与心得体会
通过对模型的复现,发现我之前对判别器的理解有偏差,如果在判别器的输入中插入分类信息,等于是将答案直接给了判别器,生成的结果反而不会太好。还有一个和我预想的不一样的地方,在生成器中,将标签嵌入到特征向量使用了矩阵乘法,而没有直接使用concatenate操作。