粒子群算法(PSO)优化随机森林超参数python实现

Ilingis 发表于 2021-11-28 12:46

PSO- RF算法首先对超参数n estimators、max depth随机初始化一群粒子，计算相应的适应度值，并通过不断更新粒子的速度和位置来达到最佳的适应度值，从而得到最佳RF模型的超参数n_estimators、max_depth,进而提高RF模型的收敛速度及预测性能。粒子群优化算法在解决实际问题中展现了其实现容易、精度高、收敛速度快等优点。

样本数据：
（附件已经上传）
trainX：shape为 n x 132 的特征矩阵
trainY：长度为 n 的 list
废话不多说，直接上代码。

# -*- coding: utf-8 -*-
&#34;&#34;&#34;
Created on Sat Dec1 17:00:23 2018

@author: lj
&#34;&#34;&#34;
import pickle
import time

import numpy as np
from sklearn import svm
from sklearn import model_selection
import random
import matplotlib.pyplot as plt

## 1.加载数据
from sklearn.ensemble import RandomForestClassifier
from tqdm import tqdm

max_count = 300

def load_data():
&#39;&#39;&#39;导入训练数据
&#39;&#39;&#39;
with open(rf&#34;I:/样本数据_非60m点/pickle/labels_all_{max_count}.pkl&#34;, &#34;rb&#34;) as f:
   labels = pickle.load(f)
with open(rf&#34;I:/样本数据_非60m点/pickle/features_all_{max_count}.pkl&#34;, &#34;rb&#34;) as f:
   features = pickle.load(f)

X = features
y = labels

return X, y

## 2. PSO优化算法
class PSO(object):
def __init__(self, particle_num, particle_dim, iter_num, c1, c2, w, max_value, min_value):
   &#39;&#39;&#39;参数初始化
   particle_num(int):粒子群的粒子数量
   particle_dim(int):粒子维度，对应待寻优参数的个数
   iter_num(int):最大迭代次数
   c1(float):局部学习因子，表示粒子移动到该粒子历史最优位置(pbest)的加速项的权重
   c2(float):全局学习因子，表示粒子移动到所有粒子最优位置(gbest)的加速项的权重
   w(float):惯性因子，表示粒子之前运动方向在本次方向上的惯性
   max_value(float):参数的最大值
   min_value(float):参数的最小值
   &#39;&#39;&#39;
   self.particle_num = particle_num
   self.particle_dim = particle_dim
   self.iter_num = iter_num
   self.c1 = c1##通常设为2.0
   self.c2 = c2##通常设为2.0
   self.w = w
   self.max_value = max_value
   self.min_value = min_value

### 2.1 粒子群初始化
def swarm_origin(self):
   &#39;&#39;&#39;粒子群初始化
   input:self(object):PSO类
   output:particle_loc(list):粒子群位置列表
            particle_dir(list):粒子群方向列表
   &#39;&#39;&#39;
   particle_loc = []
   particle_dir = []
   for i in range(self.particle_num):
         tmp1 = []
         tmp2 = []
         for j in range(self.particle_dim):
            a = random.random()
            b = random.random()
            tmp1.append(a * (self.max_value - self.min_value) + self.min_value)
            tmp2.append(b)
         particle_loc.append(tmp1)
         particle_dir.append(tmp2)

   return particle_loc, particle_dir

## 2.2 计算适应度函数数值列表;初始化pbest_parameters和gbest_parameter
def fitness(self, particle_loc):
   &#39;&#39;&#39;计算适应度函数值
   input:self(object):PSO类
         particle_loc(list):粒子群位置列表
   output:fitness_value(list):适应度函数值列表
   &#39;&#39;&#39;
   fitness_value = []
   ### 1.适应度函数为RBF_SVM的3_fold交叉校验平均值
   for i in range(self.particle_num):
         print(f&#34;particle_loc 0 {particle_loc} particle_loc 1 {particle_loc}&#34;)

         model = RandomForestClassifier(n_estimators=int(particle_loc), max_features=int(particle_loc),
                                       n_jobs=-1, verbose=1)

         # rbf_svm = svm.SVC(kernel=&#39;rbf&#39;, C=particle_loc, gamma=particle_loc)
         cv_scores = model_selection.cross_val_score(model, trainX, trainY, cv=3, scoring=&#39;accuracy&#39;)
         fitness_value.append(cv_scores.mean())
   ### 2. 当前粒子群最优适应度函数值和对应的参数
   current_fitness = 0.0
   current_parameter = []
   for i in range(self.particle_num):
         if current_fitness < fitness_value:
            current_fitness = fitness_value
            current_parameter = particle_loc

   return fitness_value, current_fitness, current_parameter

## 2.3粒子位置更新
def update(self, particle_loc, particle_dir, gbest_parameter, pbest_parameters):
   &#39;&#39;&#39;粒子群位置更新
   input:self(object):PSO类
         particle_loc(list):粒子群位置列表
         particle_dir(list):粒子群方向列表
         gbest_parameter(list):全局最优参数
         pbest_parameters(list):每个粒子的历史最优值
   output:particle_loc(list):新的粒子群位置列表
            particle_dir(list):新的粒子群方向列表
   &#39;&#39;&#39;
   ## 1.计算新的量子群方向和粒子群位置
   for i in range(self.particle_num):
         a1 = ]
         a2 = [y * self.c1 * random.random() for y in
               list(np.array(pbest_parameters) - np.array(particle_loc))]
         a3 = ))]
         particle_dir = list(np.array(a1) + np.array(a2) + np.array(a3))
         #          particle_dir = self.w * particle_dir + self.c1 * random.random() * (pbest_parameters - particle_loc) + self.c2 * random.random() * (gbest_parameter - particle_dir)
         particle_loc = list(np.array(particle_loc) + np.array(particle_dir))

   ## 2.将更新后的量子位置参数固定在内
   ### 2.1 每个参数的取值列表
   parameter_list = []
   for i in range(self.particle_dim):
         tmp1 = []
         for j in range(self.particle_num):
            tmp1.append(particle_loc)
         parameter_list.append(tmp1)
   ### 2.2 每个参数取值的最大值、最小值、平均值
   value = []
   for i in range(self.particle_dim):
         tmp2 = []
         tmp2.append(max(parameter_list))
         tmp2.append(min(parameter_list))
         value.append(tmp2)

   for i in range(self.particle_num):
         for j in range(self.particle_dim):
            particle_loc = (particle_loc - value) / (value - value) * (
                        self.max_value - self.min_value) + self.min_value

   return particle_loc, particle_dir

## 2.4 画出适应度函数值变化图
def plot(self, results):
   &#39;&#39;&#39;画图
   &#39;&#39;&#39;
   X = []
   Y = []
   for i in range(self.iter_num):
         X.append(i + 1)
         Y.append(results)
   plt.plot(X, Y)
   plt.xlabel(&#39;Number of iteration&#39;, size=15)
   plt.ylabel(&#39;Value of CV&#39;, size=15)
   plt.title(&#39;PSO_RF parameter optimization&#39;)
   plt.show()

## 2.5 主函数
def main(self):
   &#39;&#39;&#39;主函数
   &#39;&#39;&#39;
   results = []
   log = []
   best_fitness = 0.0
   ## 1、粒子群初始化
   particle_loc, particle_dir = self.swarm_origin()
   ## 2、初始化gbest_parameter、pbest_parameters、fitness_value列表
   ### 2.1 gbest_parameter
   gbest_parameter = []
   for i in range(self.particle_dim):
         gbest_parameter.append(0.0)
   ### 2.2 pbest_parameters
   pbest_parameters = []
   for i in range(self.particle_num):
         tmp1 = []
         for j in range(self.particle_dim):
            tmp1.append(0.0)
         pbest_parameters.append(tmp1)
   ### 2.3 fitness_value
   fitness_value = []
   for i in range(self.particle_num):
         fitness_value.append(0.0)

   ## 3.迭代
   for i in tqdm(range(self.iter_num)):
         ### 3.1 计算当前适应度函数值列表
         current_fitness_value, current_best_fitness, current_best_parameter = self.fitness(particle_loc)
         ### 3.2 求当前的gbest_parameter、pbest_parameters和best_fitness
         for j in range(self.particle_num):
            if current_fitness_value > fitness_value:
               pbest_parameters = particle_loc
         if current_best_fitness > best_fitness:
            best_fitness = current_best_fitness
            gbest_parameter = current_best_parameter

         print(&#39;iteration is :&#39;, i + 1, &#39;;Best parameters:&#39;, gbest_parameter, &#39;;Best fitness&#39;, best_fitness)
         results.append(best_fitness)
         ### 3.3 更新fitness_value
         fitness_value = current_fitness_value
         ### 3.4 更新粒子群
         particle_loc, particle_dir = self.update(particle_loc, particle_dir, gbest_parameter, pbest_parameters)
         ## 写入日志
         log.append((i, gbest_parameter, gbest_parameter, best_fitness))
         ##绘图

         print(particle_loc)
         # 绘制粒子适应度图
         os.makedirs(&#39;fig&#39;, exist_ok=True)
         plt.plot(list(range(0, len(current_fitness_value))), current_fitness_value)
         plt.title(i)
         plt.savefig(f&#34;fig/{i}_适应度_{current_best_fitness}.png&#34;)
         plt.show()
         # 绘制粒子位置
         a = np.array(particle_loc)
         x = a[:, 0].tolist()
         y = a[:, 1].tolist()

         plt.scatter(x, y)
         plt.title(i)
         plt.savefig(f&#34;fig/{i}_粒子位置_{current_best_fitness}.png&#34;)
         plt.show()

   ## 4.结果展示
   results.sort()
   self.plot(results)
   print(&#39;Final parameters are :&#39;, gbest_parameter)
   with open(&#34;log.pkl&#34;, &#34;wb&#34;) as f:
         pickle.dump(log, f)

if __name__ == &#39;__main__&#39;:
print(&#39;----------------1.Load Data-------------------&#39;)
trainX, trainY = load_data()
print(&#39;----------------2.Parameter Seting------------&#39;)
particle_num = 100
particle_dim = 2
iter_num = 7
c1 = 2
c2 = 2
w = 0.8
max_value = 100
min_value = 1
print(&#39;----------------3.PSO_RF-----------------&#39;)
pso = PSO(particle_num, particle_dim, iter_num, c1, c2, w, max_value, min_value)
pso.main()

输出：

样本数据：
样例数据。仅供测试
features_all_300.pkl
1.3M
· 百度网盘

labels_all_300.pkl
4.8K
· 百度网盘

页: [1]

Unity开发者联盟's Archiver

粒子群算法(PSO)优化随机森林超参数python实现