NSGA-II介绍
NSGA-II(Non-dominated Sorting Genetic Algorithm II)是一种多目标优化算法,用于解决具有多个冲突目标的优化问题。它通过模拟进化过程中的自然选择和遗传操作,逐步改进种群中的解,以找到一组尽可能好的解,这些解在多个目标下都是非支配的(Pareto优解)。
建模目的
用NSGA-II实现对SVR超参数的寻优,找到SVR最优的超参数C和对应的评价指标RMSE,超参数C范围(0.01, 10),迭代次数5,种群大小5。
ps:超参数范围、迭代次数、种群大小可自定义
模型源码
import random
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
# 设置参数
pop_size = 5 # 种群大小
gen_size = 5 # 进化代数
pc = 1 # 交叉概率
pm = 0.3 # 变异概率
num_obj = 1 # 目标函数个数
x_range = (0.01, 10) # 自变量取值范围
data = pd.read_excel('C:/Users/孙海涛/Desktop/x.xlsx', sheet_name='Sheet1') # 读取数据
target = pd.read_excel('C:/Users/孙海涛/Desktop/y.xlsx', sheet_name='Sheet1') # 读取数据
x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=22, test_size=0.25)
# 定义自变量的类
class Individual:
def __init__(self, x):
self.x = x
self.objs = [None] * num_obj
self.rank = None
self.distance = 0.0
# 计算目标函数的值
def evaluate(self):
c = self.x
model_svr = SVR(C=c)
model_svr.fit(x_train, y_train)
predict_results = model_svr.predict(x_test)
#rmse
self.objs[0] =np.sqrt(mean_squared_error(y_test, predict_results))
# 初始化种群
pop = [Individual(random.uniform(*x_range)) for _ in range(pop_size)]
# 进化
for _ in range(gen_size):
print(f"第{_}次迭代")
# 计算目标函数的值
for ind in pop:
ind.evaluate()
# 非支配排序
fronts = [set()]
for ind in pop:
ind.domination_count = 0
ind.dominated_set = set()
for other in pop:
if ind.objs[0] < other.objs[0] :
ind.dominated_set.add(other)
elif ind.objs[0] > other.objs[0] :
ind.domination_count += 1
if ind.domination_count == 0:
ind.rank = 1
fronts[0].add(ind)
rank = 1
while fronts[-1]:
next_front = set()
for ind in fronts[-1]:
ind.rank = rank
for dominated_ind in ind.dominated_set:
dominated_ind.domination_count -= 1
if dominated_ind.domination_count == 0:
next_front.add(dominated_ind)
fronts.append(next_front)
rank += 1
# 计算拥挤度距离
pop_for_cross=set()
for front in fronts:
if len(front) == 0:
continue
sorted_front = sorted(list(front), key=lambda ind: ind.rank)
for i in range(num_obj):
sorted_front[0].objs[i] = float('inf')
sorted_front[-1].objs[i] = float('inf')
for j in range(1, len(sorted_front) - 1):
delta = sorted_front[j + 1].objs[i] - sorted_front[j - 1].objs[i]
if delta == 0:
continue
sorted_front[j].distance += delta / (x_range[1] - x_range[0])
front_list = list(sorted_front)
front_list.sort(key=lambda ind: (-ind.rank, -ind.distance))
selected_inds =front_list
if len(pop_for_cross) + len(selected_inds)<=pop_size:
pop_for_cross.update(selected_inds)
elif len(pop_for_cross)+len(selected_inds)>=pop_size and len(pop_for_cross)<pop_size:
part_selected_inds=selected_inds[:(pop_size-len(pop_for_cross))]
pop_for_cross.update(part_selected_inds)
break
# 交叉
new_pop=set()
while len(new_pop) < len(pop_for_cross):
x1, x2 = random.sample(pop_for_cross, 2)
if random.random() < pc:
new_x = (x1.x + x2.x) / 2
delta_x = abs(x1.x - x2.x)
new_x += delta_x * random.uniform(-1, 1)
new_x = max(x_range[0], min(x_range[1], new_x))
new_pop.add(Individual(new_x))
# 变异
for ind in new_pop:
if random.random() < pm:
delta_x = random.uniform(-1, 1) * (x_range[1] - x_range[0])
ind.x += delta_x
ind.x = max(x_range[0], min(x_range[1], ind.x))
# 更新种群,把原来的精英(pop_for_cross)保留下来
pop = list(new_pop)+list(pop_for_cross)
# 输出最优解集合
for ind in pop:
ind.evaluate()
pareto_front = set()
for ind in pop:
dominated = False
for other in pop:
if other.objs[0] < ind.objs[0] :
dominated = True
break
if not dominated:
pareto_front.add(ind)
print("Pareto front:")
for ind in pareto_front:
print(f"x={ind.x:.4f}, y1={ind.objs[0]:.4f}")