Kaggle-Automobile Customer机器学习决策树Light GBM

机器学习初学者,最近在联系。使用决策树和lightGBM分类,准确率都在0.5一下,有没有朋友指点一下
数据: https://www.kaggle.com/akashdeepkuila/automobile-customer
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import RandomState
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder

customer_df = pd.read_csv('train-set.csv')
customer_df.head(5)

#查看有缺失值的特征
print(' Shape:', customer_df.shape)
print('\n feature  type \n',customer_df.dtypes.value_counts())
isnull_series = customer_df.isnull().sum()
print('\nNull :\n ', isnull_series[isnull_series > 0].sort_values(ascending=False))

customer_df.fillna(customer_df.mean(),inplace=True)
null_column_count =customer_df.isnull().sum()[customer_df.isnull().sum() > 0]
print('## Null  Type :\n',customer_df.dtypes[null_column_count.index])

for c in customer_df.columns:
    if customer_df[c].dtype=='object': 
        customer_df[c] = customer_df[c].fillna('N')
customer_df['Gender']=customer_df['Gender'].apply(lambda x: 1 if x=='Male' else 0)
customer_df['Married']=customer_df['Married'].apply(lambda x: 1 if x=='Yes' else 0)
customer_df['Graduated']=customer_df['Graduated'].apply(lambda x: 1 if x=='Yes' else 0)
customer_df['Segmentation']=customer_df['Segmentation'].apply(lambda x: 1 if x=='A' else (2 if x=='B' else (3 if x=='C' else 4)))

y = customer_df['Segmentation']
customer_df.drop(['CustomerID','Segmentation'],axis=1,inplace=True)

features=['Profession','SpendingScore','Category']
for feature in features:
    le = LabelEncoder()
    le = le.fit(customer_df[feature])
    customer_df[feature] = le.transform(customer_df[feature])

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(customer_df)
customer_scaled = scaler.transform(customer_df)
customer_df

customer_scaled
customer_scaled_df = pd.DataFrame(data=customer_scaled,columns=['Gender','Married','Age','Graduated','Profession','WorkExperience','SpendingScore','FamilySize','Category'])

X = customer_scaled_df.iloc[:,:-1]
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
import os

warnings.filterwarnings('ignore')

dt_clf = DecisionTreeClassifier(random_state=156)
X_train , X_test , y_train , y_test = train_test_split(customer_scaled_df, y,test_size=0.2,  random_state=11)
dt_clf.fit(X_train , y_train)
pred = dt_clf.predict(X_test)
accuracy = accuracy_score(y_test , pred)
print('准确度: {0:.4f}'.format(accuracy))

from lightgbm import LGBMClassifier
X_train, X_test, y_train, y_test=train_test_split(customer_df, y,test_size=0.2,  random_state=156 )
X1_train, X1_val, y1_train, y1_val=train_test_split(X_train, y_train,test_size=0.2, random_state=156 )
lgbm_wrapper = LGBMClassifier(n_estimators=400)
evals = [(X1_val, y1_val)]
lgbm_wrapper.fit(X1_train, y1_train, early_stopping_rounds=100, eval_metric="logloss", 
                 eval_set=evals, verbose=True)
lgbm_preds = lgbm_wrapper.predict(X_test)

accuracy = accuracy_score(y_test ,lgbm_preds)
print('准确度: {0:.4f}'.format(accuracy))

有没有朋友可以分享一下代码,或者帮我修改一下,感谢感谢!

你好,我是有问必答小助手,非常抱歉,本次您提出的有问必答问题,技术专家团超时未为您做出解答


本次提问扣除的有问必答次数,已经为您补发到账户,我们后续会持续优化,扩大我们的服务范围,为您带来更好地服务。