import matplotlib.pyplot as plt
import pandas
from sklearn import tree
import pydotplus
from sklearn.tree import DecisionTreeClassifier
import matplotlib.image as pltimg
f=open("test1.txt","r")
print(f.read())
df =pandas.read_csv("test_csv.csv")
print(df)
d1={'UK':0,'USA':1,'N':2}
df['Nationality']=df['Nationality'].map(d1)
d2={'YES':1,'NO':0}
df['Go']=df['Go'].map(d2)
print(df)
features=['Age','Experience','Rank','Nationality']
X=df[features]
y=df['Go']
print(X)
print(y)
dtree=DecisionTreeClassifier()
dtree=dtree.fit(X,y)
data=tree.export_graphviz(dtree,out_file=None,feature_names=features)
graph=pydotplus.graph_from_dot_data(data)
graph.write_png('mydecisiontree.png')
img=pltimg.imread('mydecisiontree.png')
impplot=plt.imshow(img)
plt.show()
图1是我自己运行出来的,图2是教程运行出来的,可以看出我自己的决策树只对两个特征进行了判断,代码是照着教程写的,会是什么原因造成的呢?谢谢!
可能后面的特征划分过于纯净了和数据有关系,比如rank已经把数据已经划分干净了,基尼指数无法划分了就会有这样的情况。