1 聚类算法
聚类分析或简单聚类基本上是一种无监督的学习方法,它将数据点划分为若干特定的批次或组,使得相同组中的数据点具有相似的属性,而不同组中的数据点在某种意义上具有不同的属性。它包括许多基于差分进化的不同方法。
E、 g.K-均值(点之间的距离)、亲和传播(图距离)、均值偏移(点之间的距离)、DBSCAN(最近点之间的距离)、高斯混合(到中心的马氏距离)、谱聚类(图距离)等。
基本上,所有聚类方法都使用相同的方法,即首先计算相似度,然后使用相似度将数据点聚类为组或批。在这里,我们将重点讨论基于密度的应用程序空间聚类和噪声(DBSCAN)聚类方法。
簇是数据空间中的密集区域,由点密度较低的区域隔开。DBSCAN算法基于“簇”和“噪声”这一直观概念。关键思想是,对于簇的每个点,给定半径的邻域必须至少包含最少数量的点。
2 DBSCAN
DBSCAN全称为Density-based Spatial Clustering of Applications with Noise。翻译中文即:基于密度的噪声应用空间聚类(DBSCAN)
基于密度的噪声应用空间聚类是一种基于密度的聚类技术。在基于密度的聚类中,数据按数据点高度集中的区域分组,数据点高度集中的区域被数据点高度集中的区域包围。基本上,该算法会找到数据点密集的地方,并调用这些集群。
DBSCAN是基于密度的聚类的基本算法。它可以从包含噪声和异常值的大量数据中发现不同形状和大小的聚类。DBSCAN集群最吸引人的特性是它对异常值的鲁棒性。该算法只需要两个参数,即minPoints和epsilon。对于一个区域,聚集在一起的最小点数(阈值)是minpoint,其中作为epsilon的是用于在任何点的邻域中定位点的距离度量。DBSCAN围绕每个数据点创建一个epsilon半径的圆,并将其分类为核心点、边界点和噪声。
3 源程序
using System;
using System.Data;
using System.Collections.Generic;
using System.Drawing;
namespace Legalsoft.Truffer.Algorithm
{
public class K_Means_DBSCAN_Algorithm
{
public List<Point> Cluster1 = new List<Point>();
public List<Point> Cluster2 = new List<Point>();
public List<Point> afterCluster1 = new List<Point>();
public List<Point> afterCluster2 = new List<Point>();
public void Caverage()
{
afterCluster1.Clear();
afterCluster2.Clear();
List<Point> Cluster = new List<Point>();
foreach (Point m1Point in Cluster1)
{
Cluster.Add(m1Point);
}
foreach (Point m2Point in Cluster2)
{
Cluster.Add(m2Point);
}
Point C1 = Cluster[0];
Point C2 = Cluster[1];
bool flag = true;
while (flag)
{
int N1 = 0;
int N2 = 0;
int C1x = 0;
int C2x = 0;
int C1y = 0;
int C2y = 0;
foreach (Point point in Cluster)
{
int s1 = Math.Abs(point.X - C1.X) + Math.Abs(point.Y - C1.Y);
int s2 = Math.Abs(point.X - C2.X) + Math.Abs(point.Y - C2.Y);
if (s1 < s2)
{
N1++;
C1x += point.X;
C1y += point.Y;
}
else
{
N2++;
C2x += point.X;
C2y += point.Y;
}
}
if (C1x / N1 == C1.X && C2.X == C2x / N2 && C1.Y == C1y / N1 && C2.Y == C2y / N2)
{
flag = false;
}
C1.X = C1x / N1;
C2.X = C2x / N2;
C1.Y = C1y / N1;
C2.Y = C2y / N2;
}
foreach (Point point in Cluster)
{
int s1 = Math.Abs(point.X - C1.X) + Math.Abs(point.Y - C1.Y);
int s2 = Math.Abs(point.X - C2.X) + Math.Abs(point.Y - C2.Y);
if (s1 < s2)
{
afterCluster1.Add(point);
}
else
{
afterCluster2.Add(point);
}
}
}
/// <summary>
/// K-Means-DBSCAN算法
/// </summary>
public void DBSCAN(int radius = 1, int MinPts = 12)
{
double pow = 2.0;
afterCluster1.Clear();
afterCluster2.Clear();
List<Point> Cluster = new List<Point>();
List<Point> temp1 = new List<Point>();
List<Point> temp2 = new List<Point>();
foreach (Point m1Point in Cluster1)
{
Cluster.Add(m1Point);
}
foreach (Point m2Point in Cluster2)
{
Cluster.Add(m2Point);
}
Point C1;
Point C2;
bool isC1Get = false;
bool isC2Get = false;
foreach (Point mm in Cluster)
{
if (isC1Get == false)
{
int count = 0;
temp1.Clear();
foreach (Point mm1 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm1.X - mm.X, pow) + Math.Pow(mm1.Y - mm.Y, pow));
if (banjing < radius)
{
count++;
temp1.Add(mm1);
}
}
if (count >= MinPts)
{
C1 = mm;
isC1Get = true;
foreach (Point mm2 in temp1)
{
foreach (Point mm3 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm3.X - mm2.X, pow) + Math.Pow(mm3.Y - mm2.Y, pow));
if (banjing < radius && (afterCluster1.Contains(mm3) == false))
{
afterCluster1.Add(mm3);
}
}
}
}
}
else if (isC2Get == false)
{
if (afterCluster1.Contains(mm) == false)
{
int count = 0;
temp2.Clear();
foreach (Point mm1 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm1.X - mm.X, pow) + Math.Pow(mm1.Y - mm.Y, pow));
if (banjing < radius)
{
count++;
temp2.Add(mm1);
}
}
if (count >= MinPts)
{
C2 = mm;
isC2Get = true;
foreach (Point mm2 in temp2)
{
foreach (Point mm3 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm3.X - mm2.X, pow) + Math.Pow(mm3.Y - mm2.Y, pow));
if (banjing < radius && (afterCluster1.Contains(mm3) == false) && afterCluster2.Contains(mm3) == false)
{
afterCluster2.Add(mm3);
}
}
}
}
}
else
{
continue;
}
}
else
{
break;
}
}
}
}
}
4 源代码
using System;
using System.Data;
using System.Collections.Generic;
using System.Drawing;
namespace Legalsoft.Truffer.Algorithm
{
public class K_Means_DBSCAN_Algorithm
{
public List<Point> Cluster1 = new List<Point>();
public List<Point> Cluster2 = new List<Point>();
public List<Point> afterCluster1 = new List<Point>();
public List<Point> afterCluster2 = new List<Point>();
public void Caverage()
{
afterCluster1.Clear();
afterCluster2.Clear();
List<Point> Cluster = new List<Point>();
foreach (Point m1Point in Cluster1)
{
Cluster.Add(m1Point);
}
foreach (Point m2Point in Cluster2)
{
Cluster.Add(m2Point);
}
Point C1 = Cluster[0];
Point C2 = Cluster[1];
bool flag = true;
while (flag)
{
int N1 = 0;
int N2 = 0;
int C1x = 0;
int C2x = 0;
int C1y = 0;
int C2y = 0;
foreach (Point point in Cluster)
{
int s1 = Math.Abs(point.X - C1.X) + Math.Abs(point.Y - C1.Y);
int s2 = Math.Abs(point.X - C2.X) + Math.Abs(point.Y - C2.Y);
if (s1 < s2)
{
N1++;
C1x += point.X;
C1y += point.Y;
}
else
{
N2++;
C2x += point.X;
C2y += point.Y;
}
}
if (C1x / N1 == C1.X && C2.X == C2x / N2 && C1.Y == C1y / N1 && C2.Y == C2y / N2)
{
flag = false;
}
C1.X = C1x / N1;
C2.X = C2x / N2;
C1.Y = C1y / N1;
C2.Y = C2y / N2;
}
foreach (Point point in Cluster)
{
int s1 = Math.Abs(point.X - C1.X) + Math.Abs(point.Y - C1.Y);
int s2 = Math.Abs(point.X - C2.X) + Math.Abs(point.Y - C2.Y);
if (s1 < s2)
{
afterCluster1.Add(point);
}
else
{
afterCluster2.Add(point);
}
}
}
/// <summary>
/// K-Means-DBSCAN算法
/// </summary>
public void DBSCAN(int radius = 1, int MinPts = 12)
{
double pow = 2.0;
afterCluster1.Clear();
afterCluster2.Clear();
List<Point> Cluster = new List<Point>();
List<Point> temp1 = new List<Point>();
List<Point> temp2 = new List<Point>();
foreach (Point m1Point in Cluster1)
{
Cluster.Add(m1Point);
}
foreach (Point m2Point in Cluster2)
{
Cluster.Add(m2Point);
}
Point C1;
Point C2;
bool isC1Get = false;
bool isC2Get = false;
foreach (Point mm in Cluster)
{
if (isC1Get == false)
{
int count = 0;
temp1.Clear();
foreach (Point mm1 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm1.X - mm.X, pow) + Math.Pow(mm1.Y - mm.Y, pow));
if (banjing < radius)
{
count++;
temp1.Add(mm1);
}
}
if (count >= MinPts)
{
C1 = mm;
isC1Get = true;
foreach (Point mm2 in temp1)
{
foreach (Point mm3 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm3.X - mm2.X, pow) + Math.Pow(mm3.Y - mm2.Y, pow));
if (banjing < radius && (afterCluster1.Contains(mm3) == false))
{
afterCluster1.Add(mm3);
}
}
}
}
}
else if (isC2Get == false)
{
if (afterCluster1.Contains(mm) == false)
{
int count = 0;
temp2.Clear();
foreach (Point mm1 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm1.X - mm.X, pow) + Math.Pow(mm1.Y - mm.Y, pow));
if (banjing < radius)
{
count++;
temp2.Add(mm1);
}
}
if (count >= MinPts)
{
C2 = mm;
isC2Get = true;
foreach (Point mm2 in temp2)
{
foreach (Point mm3 in Cluster)
{
double banjing = Math.Sqrt(Math.Pow(mm3.X - mm2.X, pow) + Math.Pow(mm3.Y - mm2.Y, pow));
if (banjing < radius && (afterCluster1.Contains(mm3) == false) && afterCluster2.Contains(mm3) == false)
{
afterCluster2.Add(mm3);
}
}
}
}
}
else
{
continue;
}
}
else
{
break;
}
}
}
}
}