一、RNN特性
权重共享
wordi · weight + bais
持久记忆单元
wordi · weightword + baisword + hi · weighth + baish
二、公式化表达
ht</sub = f(ht - 1, xt)
ht = tanh(Whhht - 1 + Wxhxt)
yt = Whyht
三、RNN网络正弦波波形预测
环境准备
import numpy as np
import torch
from torch import nn,optim
from matplotlib import pyplot as plt
# 时间轴采样数
num_time_steps = 50
input_size = 1
hidden_size = 16
output_size = 1
lr = 0.01
RNN类
class Net(nn.Module):
def __init__(self,):
super(Net, self).__init__()
self.rnn = nn.RNN(
input_size = input_size,
hidden_size = hidden_size,
num_layers = 1,
# 格式为[batch, seq, feature]
batch_first = True
)
for p in self.rnn.parameters():
nn.init.normal_(p,mean=0.0, std=0.001)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden_prev):
out, hidden_prev = self.rnn(x, hidden_prev)
# [1, seq, h] => [seq, h]
out = out.view(-1,hidden_size)
# [seq, h] => [seq, 1]
out = self.linear(out)
# [seq, 1] => [1, seq, 1], 需要和y做均方差
out = out.unsqueeze(dim=0)
return out, hidden_prev.clone()
正弦数据构建函数
def create_image():
start = np.random.randint(3, size=1)[0]
time_steps = np.linspace(start, start + 10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)
return time_steps,x, y
训练模型
model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)
hidden_prev = torch.zeros(1,1, hidden_size)
for iter in range(6000):
time_steps,x, y = create_image()
output, hidden_prev = model(x, hidden_prev)
hidden_prev = hidden_prev.detach()
loss = criterion(output,y)
model.zero_grad()
loss.backward()
for p in model.parameters():
torch.nn.utils.clip_grad_norm_(p,10)
optimizer.step()
if iter % 1000 == 0:
plt.plot(time_steps[:-1], x.ravel(), c = 'b')
plt.plot(time_steps[:-1], y.ravel(), c= 'r')
plt.plot(time_steps[:-1], output.detach().numpy().ravel(), c= 'g')
plt.show()
print('Iteration:{} loss {}'.format(iter, loss.item()))
可以看到第二次绘制图像的时候,输出曲线基本拟合了目标曲线
图像预测
time_steps,x, y = create_image()
predictions = []
# input = x[:, 0, :]
for i in range(x.shape[1]):
input = x[:, i, :].view(1, 1, 1)
(pred, hiden_prev) = model(input, hidden_prev)
input = pred
predictions.append(pred.detach().numpy().ravel()[0])
x = x.data.numpy().ravel()
y = y.data.numpy()
plt.scatter(time_steps[:-1], x.ravel(), s=90)
plt.plot(time_steps[:-1], x.ravel())
plt.scatter(time_steps[1:],predictions)
plt.show()
输出的预测曲线基本与目标曲线相同