在跑遗传算法的过程中,运行结果全是nan,但是源文件中没有nan,并且损失值与迭代次数的函数图也正常,这是为什么
该算法的目的是对一系列数据进行神经网络拟合,原数据包含若干输入参数和一个输出参数,输入参数和输出参数之间包含一定的关系,然后使用遗传算法来预测输入参数的最小值,同时预测当输出参数为最小值时,各个输入参数的值
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from geneticalgorithm import geneticalgorithm as ga
from tensorflow.keras import layers
data = pd.read_excel('D:/test.xlsx', header=None)
X = data.iloc[:, :-1] # input parameters
y = data.iloc[:, -1] # output parameters
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义约束(设置定义域,可以不看,只需知道输入参数有离散型定义域和连续型定义域即可)
def check_constraints(params):
# 列1到9上的约束(0-索引)
discrete_bounds = [
[0, 0.67, 1, 1.2, 999],
[0, 1],
[0, 0.67, 1, 1.2, 999],
[0, 1],
[500, 1000, 2000, 3000, 7000],
[0, 13, 15, 20, 24, 28, 36],
[0.67, 1.2, 2],
[1.5, 2, 3],
[1000, 2000, 7000, 12000]
]
for i in range(9):
if params[i] not in discrete_bounds[i]:
return False
# 列10到40上的约束(0-索引)
if np.any(params[9:33] < 0) or np.any(params[9:33] > 100):
return False
return True
# 定义适应度函数
def fitness_function(params):
if not check_constraints(params):
return 1e9 # 违反约束的惩罚
input_params = np.array(params).reshape(1, -1)
output = model.predict(input_params).flatten()[0]
if np.isnan(output):
print("Warning: NaN encountered in fitness function")
print("Input parameters:", params)
return 1e9 # NaN值的惩罚
if output <= 0 or output > 100: # 输出范围约束
return 1e9 # 违反约束的惩罚
else:
return output
def on_generation_callback(ga_instance):
generation = ga_instance.generations_completed
best_solution = ga_instance.best_solution
best_fitness = ga_instance.best_function
print(f"Generation {generation}: Best solution = {best_solution}, Best fitness = {best_fitness}")
# 定义遗传算法参数
algorithm_parameters = {
'max_num_iteration': 4000,
'population_size': 50,
'mutation_probability': 0.1,
'elit_ratio': 0.01,
'crossover_probability': 0.5,
'parents_portion': 0.3,
'crossover_type': 'uniform',
'max_iteration_without_improv': None
}
# 建立神经网络模型
model = tf.keras.Sequential()
ANN_number = 50
model.add(tf.keras.layers.Dense(ANN_number, input_shape=[X.shape[1],], activation='relu'))
model.add(tf.keras.layers.Dense(ANN_number, activation='relu'))
model.add(tf.keras.layers.Dense(ANN_number, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1))
# model.summary() # 查看模型基本信息
model.compile(optimizer='adam', loss='mse') # 优化神经网络模型设置
history = model.fit(X_train, y_train, epochs=100, verbose=0)
# 为遗传算法定义变量边界
varbound = np.vstack((np.array([-np.inf]*40), np.array([np.inf]*40))).T
model_genetic = ga(function=fitness_function, dimension=40,
variable_type='real', variable_boundaries=varbound,
algorithm_parameters=algorithm_parameters)
for generation in range(algorithm_parameters['max_num_iteration']):
model_genetic.run()
best_solution = model_genetic.best_variable
best_fitness = model_genetic.best_function
print(f"Generation {generation+1}: Best solution = {best_solution}, Best fitness = {best_fitness}")
print("最优输入参数: ", model_genetic.output_dict['variable'])
print("最小输出值: ", model_genetic.output_dict['function'])
下面是代码运行结果:
检查自己做完预处理的数据,看下是否存在nan值(比如需要计算0/0和log0的情况)
"""检验下input_data中是否存在nan值"""
input_data = np.array(input_data).reshape([-1,n_input])
# 这里的input_data 是三维数组必须转成2d
input_data_pd = pd.DataFrame(input_data)
if np.any(input_data_pd.isnull()) == True:
print("input data has nan value!")
list_nan = list(map(tuple, np.argwhere(np.isnan(input_data_pd.values))))
print(list_nan)