代码来自up主【强化学习的数学原理-作业】GridWorld示例代码(已更新至DQN、REINFORCE、A2C)_哔哩哔哩_bilibili
SGD、GD、MGD举例:
# 先初始化一个列表,未来要在这100个样本里面再sample出来
np.random.seed(0)
X = np.linspace(-10, 10, 1000)
Y = 2 * X ** 2 + 3*X +5 # 用作真实值
#定义二次函数,找到一组参数a、b、c使得损失函数的值最小
def quadratic_function(X, a, b, c):
return a * X ** 2 + b * X + c
#定义损失函数
def loss_function(Y_pred, Y):
return np.mean((Y_pred - Y)**2)
def train(learning_rate, batch_size, note):
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
loss = 1000
cnt = 0
results = np.array([0])
while loss > 0.01:
cnt += 1
batch = np.random.randint(0,1000,size=(1,batch_size)) # 大小为1 * batch_size
x = X[batch]
y = Y[batch]
if cnt < 2:
print(batch)
print(x)
y_pred = quadratic_function(x,a,b,c)
loss = loss_function(y_pred,y)
results = np.append(results,loss)
# 这些是计算得到的梯度,是最小化损失函数,通过损失函数对a、b、c分别求导
grad_a = (2 * (y_pred - y) * x ** 2).mean()
grad_b = (2 * (y_pred - y) * x).mean()
grad_c = (2 * (y_pred - y)).mean()
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
# 检验误差
valid_batch = np.random.randint(0,1000,size=(1,5))
x = X[valid_batch]
y = Y[valid_batch]
y_pred = quadratic_function(x,a,b,c)
loss = loss_function(y_pred,y)
# results = np.append(results,loss)
print("最终系数为:",a,b,c)
print("最后迭代次数:",cnt)
y_pred = quadratic_function(X,a,b,c)
plt.figure(figsize=(8,3))
# plt.plot(X,y_pred,label="predict")
plt.plot(X,Y,label="target")
plt.plot(X,y_pred,label="predict")
plt.title(note)
plt.legend()
plt.show()
# print(a,b,c)
plt.figure(figsize=(8,3))
plt.plot(results[:150],label='x')
# plt.plot(results[:,1],label='y')
# plt.yticks(np.arange(-5,5,1))
plt.legend()
plt.title(note)
plt.show()