一.LFM梯度下降算法
2.代码实现
# 0. 引入依赖
import numpy as np
import pandas as pd
# 1. 数据准备
# 评分矩阵R
R = np.array([[4,0,2,0,1],
[0,2,3,0,0],
[1,0,2,4,0],
[5,0,0,3,1],
[0,0,1,5,1],
[0,3,2,4,1],])
# 二维数组小技巧:取行数R.shape[0]和len(R),列数R.shape[1]和len(R[0])
# 2. 算法实现
"""
@输入参数:
R:M*N 的评分矩阵
K:隐特征向量维度
max_iter: 最大迭代次数
alpha:步长
lamda:正则化系数
@输出:
分解之后的 P,Q
P:初始化用户特征矩阵M*K
Q:初始化物品特征矩阵N*K
"""
# 给定超参数
K = 5
max_iter = 5000
alpha = 0.0002
lamda = 0.004
# 核心算法
def LFM_grad_desc(R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002):
# 基本维度参数定义
M = len(R)
N = len(R[0])
# P,Q初始值,随机生成一个M*K的矩阵
P = np.random.rand(M, K)
Q = np.random.rand(N, K)
Q = Q.T # Q转置(变为K*M矩阵)
# 开始迭代
for step in range(max_iter):
# 对所有的用户u、物品i做遍历,对应的特征向量Pu、Qi梯度下降
for u in range(M):
for i in range(N):
# 对于每一个大于0的评分,求出预测评分误差,0分表示没评价过
if R[u][i] > 0:
eui = np.dot(P[u, :], Q[:, i]) - R[u][i] # 用户u对物品i的向量乘积减去该物品的实际评分
# 代入公式,按照梯度下降算法更新当前的Pu、Qi
for k in range(K):
# 循环每一步都递减所以不用再求和然后再减去
P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] + 2 * lamda * P[u][k])
Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] + 2 * lamda * Q[k][i])
# u、i遍历完成,所有特征向量更新完成,可以得到P、Q,可以计算预测评分矩阵
predR = np.dot(P, Q)
# 计算当前损失函数
cost = 0
for u in range(M):
for i in range(N):
if R[u][i] > 0:
cost += (np.dot(P[u, :], Q[:, i]) - R[u][i]) ** 2
# 加上正则化项
for k in range(K):
cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
if cost < 0.0001:
break
return P, Q.T, cost
# 3. 测试
P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)
predR = P.dot(Q.T)
print("P矩阵:\n",P)
print("Q矩阵:\n",Q)
print("评分矩阵:\n",R)
print("预测误差:",cost)
print("预测矩阵:\n",predR)
P矩阵:
[[ 1.00438746 0.80908498 1.03554314 0.75883908 0.65112987]
[ 0.259309 0.30692238 0.84186825 1.31376124 1.07697641]
[-0.35224656 1.12598736 1.00862706 0.26789057 0.88263718]
[ 1.24601986 -0.30059379 0.33445767 1.04451048 1.21593543]
[ 1.34112239 1.20429668 0.83579321 0.08691934 0.47363809]
[ 0.90599105 0.81248866 0.39840001 0.55195613 1.0236363 ]]
Q矩阵:
[[ 1.95601706 -0.06282288 0.57841689 1.04259243 1.03590011]
[ 1.03926527 0.99645306 0.3604848 0.08656269 0.96649848]
[-0.14436891 0.20787068 0.45512168 1.05211096 1.10490284]
[ 0.96410499 1.56976268 1.44327334 0.39381784 1.11552898]
[ 0.51741592 0.15695691 0.11513832 0.25747499 0.10010323]]
评分矩阵:
[[4 0 2 0 1]
[0 2 3 0 0]
[1 0 2 4 0]
[5 0 0 3 1]
[0 0 1 5 1]
[0 3 2 4 1]]
预测误差: 0.5744316139475661
预测矩阵:
[[3.97841099 2.91834095 2.01229901 4.75817678 1.02647054]
[3.46023939 2.03342405 2.9816934 3.6656234 0.72534497]
[1.01729351 1.99576748 2.00104019 3.97376224 0.2679351 ]
[4.99816259 2.38159984 2.3522792 2.97990494 1.02669353]
[3.61230101 3.36039225 1.05188206 4.95230823 1.04896486]
[3.58738259 2.93191072 1.93115385 4.0831511 0.88675521]]