import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
df_train=pd.read_csv('http://fdp.csmar.com:7092/group1/M00/12/EE/wKhlJmNJityAGXrnABLiIV8SXRM985.csv',index_col=0)
df_train.describe(include=['O']).T
#字符转化为浮点数
def str_column_to_float(dataset, column):
for row in dataset:
row[column] = float(row[column].strip())
print(df_train.info())
print(df_train.columns)
print(df_train.describe())
print(df_train.isna().sum())
#取变量值及标签值
import pandas as pd
X=df_train.iloc[:,:-1]
Y=df_train.iloc[:,-1]
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=180)
print('1.1打印各个数据集的形状=')
print(X_train.shape,X_test.shape,Y_train.shape,Y_test.shape)
print('''1.2打印训练集中因变量(标签)的各类别数目情况=')
print(pd.value_counts(Y_train))
print('''1.3打印验证集集中因变量的各类别数目情况=')
print(pd.value_counts(Y_test))
#平均天数
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#时间指标
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sa=pd.cut(X_train['月基本工资'].values,10).value_counts()
sa.plot(kind='pie')
plt.ylabel('月基本工资')
plt.show()
print('月基本工资的分布情况=')
storage=pd.cut(X_train['月余额'],10).value_counts()
storage.plot(kind='pie')
plt.ylabel('月余额')
plt.show()
column=
string无法转换为float,报错是哪一行呢