tensorflow1.x,在泰坦尼克生存预测时,使用feature_column训练集训练时没问题,测试集计算acc时, acc = sess.run([accuracy], feed_dict={x: x_eva, y: y_eva}) 报错:TypeError: unhashable type: 'dict',为什么?有解决的办法吗?
import os
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import pprint
# tf.feature_column主要针对离散特征和连续特征
data_dir = 'tan2'
train_df = pd.read_csv(os.path.join(data_dir, 'train.csv'))
eval_df = pd.read_csv(os.path.join(data_dir, 'eval.csv'))
y_train = train_df.pop('survived')
y_eval = eval_df.pop('survived')
categorical_columns = ['class', 'sex', 'n_siblings_spouses', 'parch', 'deck', 'embark_town', 'alone']
numeric_columns = ['age', 'fare']
feature_columns = [] # 相当于定义一个筛选,可以从数据中选出所要的列,并对离散型属性做onehot
for categorical_column in categorical_columns:
vocab = train_df[categorical_column].unique() # 返回该属性所有不同的值
feature_columns.append(
tf.feature_column.indicator_column( # tf.feature_column.indicator column 是一个onehot工具,用于把sparse特征进行onehot 变换
tf.feature_column.categorical_column_with_vocabulary_list( # 根据单词的序列顺序,把单词根据index转换成one hot encoding
categorical_column, vocab))) # 对categorical_column键根据vocab属性进行one-hot编码
for numeric_column in numeric_columns:
feature_columns.append(
tf.feature_column.numeric_column( # tf.feature_column.numeric_column用于抽取数值类型的特征,即dense特征
numeric_column, dtype=tf.float32))
print('feature_columns:', feature_columns)
def make_dataset(data_df, label_df, epochs=10, shuffle=True, batch_size=32):
dataset = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) # 把每一行属性和对应的label分割组成一个元组组成dataset的一个元素
print(dataset)
if shuffle:
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(epochs).batch(batch_size) # 将所有数据重复epochs次,每次输出batch_size个数据
return dataset.make_one_shot_iterator().get_next()
epochs = 100
batch_size = 32
x, y = make_dataset(train_df, y_train, epochs=epochs, batch_size=batch_size)
x_eva, y_eva = make_dataset(eval_df, y_eval, epochs=1)
params = {'feature_columns': feature_columns, 'hidden_units': [100, 100], 'n_classes': 2}
input_for_next_layers = tf.feature_column.input_layer(x, params['feature_columns'])
for hidden_layer in params['hidden_units']:
input_for_next_layers = tf.layers.dense(input_for_next_layers, hidden_layer, activation=tf.nn.relu)
logits = tf.layers.dense(input_for_next_layers, params['n_classes'])
predicted_classes = tf.argmax(logits, 1)
loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=logits)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted_classes, y), tf.float32))
train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, global_step=tf.train.get_global_step())
n_batch = train_df.shape[0]//batch_size
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
for epoch in range(epochs):
for batch in range(n_batch):
loss_val, acc_val, _ = sess.run([loss, accuracy, train_op])
print(epoch, '/', epochs, 'loss_val:', loss_val, ', acc_val:', acc_val)
acc = sess.run([accuracy], feed_dict={x: x_eva, y: y_eva}) # 报错:TypeError: unhashable type: 'dict',为社么?
print(epoch, '/', epochs, ':acc ', acc)