如何在xgboost基础上使用selectKbest调参


###################打乱数据
import random
#print(len(labels))                                
#print(range(len(labels)))                     
a = list(range(len(labels)))                 
random.shuffle(a)                              
shuffled_features = [features[i] for i in a]    
shuffled_labels = [labels[i] for i in a]

##########分训练、测试集
X = shuffled_features
y = shuffled_labels
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(len(X_train), len(X_test), len(y_train), len(y_test))
print(y_train.count('0'), y_train.count('1'), y_test.count('0'), y_test.count('1'))

##################K-fold 模型的交叉验证
lg_scores_1 = []
lg_scores_2 = []

from sklearn.model_selection import KFold
kf = KFold(n_splits=5)
kf.get_n_splits(shuffled_features)
for train_index, test_index in kf.split(shuffled_features):
    #分训练集和测试集
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = np.array(shuffled_features)[train_index], np.array(shuffled_features)[test_index]
    y_train, y_test = np.array(shuffled_labels)[train_index], np.array(shuffled_labels)[test_index]

   ##########################################训练logistic regression模型  
    from xgboost import XGBClassifier
    import xgboost as xgb

  
    xgb = xgb.XGBClassifier()
   
    X_train =  np.array(X_train)
    y_train =  np.array(y_train)
    X_test  =  np.array(X_test)
    y_test  =  np.array(y_test)

    xgb.fit(X_train,y_train)
    # 对测试集进行预测
    y_pred = xgb.predict(X_test)  
    #结果检验
    f1_lg1= f1_score(y_test, y_pred, average='weighted')
    lg_scores_1.append(f1_lg1)
  
    
    #######################
    from sklearn.model_selection import GridSearchCV
    parameters = {'C':[0.00001, 0.0001, 0.001, 0.005,0.01,0.05, 0.1, 0.5,1,2,5,10]}
    lr = XGBClassifier()#选好模型
    clf = GridSearchCV(lr, parameters, cv=5, scoring = fscore) #找寻最好的参数
    
    clf.fit(X_train, y_train)
    print (clf.best_params_)
    y_pred = clf.predict(X_test)
    #结果检验
    
    f1_lg2= f1_score(y_test, y_pred, average='weighted')
    lg_scores_2.append(f1_lg2)
    
    print(f1_lg1, f1_lg2)    #调参前   调参后
第一次写还是很迷糊如果代码里有很奇怪的地方，求指正