本文目标:
以这个公式为例,设计一个算法,用梯度下降法来模拟训练过程,最终得出参数a,b,c
原理介绍
目标函数:
损失函数:,就是mse
损失函数展开:
损失函数对a,b,c求导数:
导数就是梯度,也就是目标参数与当前参数的差异,这个差异需要用梯度下降法更新
= = =
重复上面的过程,参数就可以更新了,然后可以看看新参数的效果,也就是损失有没有降低
具体流程
- 预设模型的表达式为:,也就是二次函数。同时随机初始化模型参数a,b,c。如果是其他函数如,就无法在本版本适用(修改求导方式后才可用)。即本模型需要提前知道模型的表达式。
- 通过不断喂入(x_input,y_true),得出.而y_out与y_true之间具有差异。
- 将差异封装成一个loss函数,并分别对a,b,c进行求导。得到a,b,c的梯度,,
- 将,,和原始的参数a,b,c和学习率作为输入,用梯度下降法来对a,b,c参数进行更新.
- 重复2,3,4过程。直到训练结束或者loss降低到较小值
python实现
# 初始化a,b,c为:-11/6 , -395/3,-2400 目标a,b,c为:(2,-4,3)
class QuadraticFunc(): def drew(self,w,name="show"): a,b,c = w x1 = np.array(range(-80,80)) y1 = a*x1*x1 + b*x1 + c y2 = 2*x1*x1 - 4*x1 + 3 plt.clf() plt.plot(x1, y1) plt.plot(x1, y2) plt.scatter(x1, y1, c='r')# set color plt.xlim((-50,50)) plt.ylim((-500,500)) plt.xlabel('X Axis') plt.ylabel('Y Axis') if name == "first": plt.pause(3) else: plt.pause(0.01) plt.ioff() #计算loss def cal_loss(self,y_out,y_true): # return np.dot((y_out - y_true),(y_out - y_true)) * 0.5 return np.mean((y_out - y_true)**2) #计算梯度 def cal_grad(self,x,y_out,y_true): # x(batch),y_out(batch),y_true(batch) a_grad = (y_out-y_true)*x**2 # b_grad = (y_out-y_true)*x c_grad = (y_out-y_true) return np.array([np.mean(a_grad),np.mean(b_grad),np.mean(c_grad)]) #梯度下降法更新参数 def update_theta(self,step,w,grad): new_w = w - step*grad return new_w def run(self): feed_x = np.array(range(-400,400))/400 feed_y = 2*feed_x*feed_x - 4*feed_x + 3 step = 0.5 base_lr = 0.5 lr = base_lr # 初始化参数 a,b,c = -11/6 , -395/3,-2400#-1,10,26 w = np.array([a,b,c]) self.drew(w,"first") epochs = 100 for epoch in range(epochs): # 每隔10轮 降低一半的学习率 lr = base_lr/(2**(int((epoch+1)/10))) for i in range(len(feed_x)): x_input = feed_x[i] y_true = feed_y[i] y_out = w[0]*x_input*x_input +w[1]*x_input + w[2] #计算loss loss = self.cal_loss(y_out,y_true) #计算梯度 grad = self.cal_grad(x_input,y_out,y_true) #更新参数,梯度下降 w = self.update_theta(lr,w,grad) # self.drew(w) grad = np.round(grad,2) loss = np.round(loss,2) w = np.round(w,2) print("train times is:",epoch," grad is:",grad," loss is:","%.4f"%loss, " w is:",w,"\n") self.drew(w) if loss<1e-5: print("train finish:",w) break def run_batch(self): feed_x = np.array(range(-400,400))/400 feed_y = 2*feed_x*feed_x - 4*feed_x + 3 x_y = [[feed_x[i],feed_y[i]] for i in range(len(feed_x))] base_lr = 0.5 lr = base_lr # 初始化参数 a,b,c = -11/6 , -395/3,-2400#-1,10,26 w = np.array([a,b,c]) self.drew(w,"first") batch_size = 16 data_len = len(x_y)//batch_size epochs = 100 for epoch in range(epochs): random.shuffle(x_y) # 每隔10轮 降低一半的学习率 lr = base_lr/(2**(int((epoch+1)/10))) print("epoch,lr:",epoch,lr) for i in range(data_len): x_y_list = x_y[i*batch_size:(i+1)*batch_size] x_y_np = np.array(x_y_list) x_input = x_y_np[:,0] y_true = x_y_np[:,1] y_out = w[0]*x_input*x_input +w[1]*x_input + w[2] #计算loss loss = self.cal_loss(y_out,y_true) #计算梯度 grad = self.cal_grad(x_input,y_out,y_true) #更新参数,梯度下降 w = self.update_theta(lr,w,grad) grad = np.round(grad,2) loss = np.round(loss,2) w = np.round(w,2) print("train times is:",epoch," grad is:",grad," loss is:","%.4f"%loss, " w is:",w,"\n") self.drew(w) if loss<1e-5: print("train finish:",w) # break time.sleep(0.1) if __name__ == "__main__": qf = QuadraticFunc() qf.run()