z-score算法原理参考网址
https://blog.csdn.net/m0_59596937/article/details/128378641
具体实现代码如下:
import numpy as np
class ZScoreOutlierDetector:
def __init__(self, threshold=3):
"""构造函数"""
self.threshold = threshold # Z-score阈值,默认为3
self.x_mean = None # X坐标的均值
self.x_std = None # X坐标的标准差
self.y_mean = None # Y坐标的均值
self.y_std = None # Y坐标的标准差
def fit(self, points):
"""根据样本数据计算各维度(x,y维度)的均值和标准差"""
points = np.array(points)
x = points[:, 0]#:表示选择所有行,0表示选择第0列(通常是x坐标)
y = points[:, 1]
self.x_mean = np.mean(x)
self.x_std = np.std(x)
self.y_mean = np.mean(y)
self.y_std = np.std(y)
def _calc_z_score(self, val, mean, std):
"""计算单个维度的Z-score,并且处理标准差为0的情况"""
if std == 0:
# 如果样本数据全相同,则当前值不同时视为无限大Z-score(即离群点)
return float('inf') if val != mean else 0#python三元运算符
#float('inf')表示无穷大
return (val - mean) / std
def is_outlier(self, point):
"""判断给定点是否为离群点"""
x_z = self._calc_z_score(point[0], self.x_mean, self.x_std)
y_z = self._calc_z_score(point[1], self.y_mean, self.y_std)
# 任一维度超过阈值则视为离群点
return abs(x_z) > self.threshold or abs(y_z) > self.threshold
# 示例用法
if __name__ == "__main__":
# 样本数据
sample_points = [
(1.0, 2.0),
(1.5, 3.0),
(1.2, 2.5),
(1.8, 3.2),
(0.9, 2.1)
]
# 初始化检测器并拟合数据
detector = ZScoreOutlierDetector(threshold=3)
detector.fit(sample_points)
# 待检测点
test_point = (5.0, 4.0)
result = detector.is_outlier(test_point)
print(f"检测点 {test_point} 是否为离群点:{result}")