# 导入tensorflow和其他必要的库
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
# 下载并浏览数据集
import pathlib
dataset_url = 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)
# 查看数据集副本
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)
# 显示数据集图像
# roses = list(data_dir.glob('roses/*'))
# img = PIL.Image.open(str(roses[0]))
# plt.figure("Image") # 图像窗口名称
# plt.imshow(img)
# plt.axis('on') # 关掉坐标轴为 off
# plt.title('image') # 图像题目
# # 必须有这个,要不然无法显示
# plt.show()
# 创建数据集
batch_size = 32
img_height = 180
img_width = 180
train_gen = ImageDataGenerator(rescale=1 / 255.,
validation_split=0.2)
train_ds = train_gen.flow_from_directory(
data_dir,
subset='training',
seed=123,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='sparse'
)
val_ds = train_gen.flow_from_directory(
data_dir,
subset='validation',
seed=123,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='sparse'
)
# 查看类名
class_names = 'daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'
# 创建基本模型
num_classes = len(class_names)
print(num_classes)
model = Sequential([
layers.Input(shape=(img_width, img_height, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(5)
])
# 编译模型
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# 查看模型层
model.summary()
# 训练模型
epochs = 10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
# 可视化训练结果
# acc
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
# loss
loss = history.history['loss']
val_loss = history.history['val_loss']
# epochs
epochs_range = range(epochs)
# 绘图
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
根据tensorflow官方教程
20太少啦 提高次数 或者替换网络结构 数据前处理 来进一步提升
从损失函数曲线来看,训练集的损失还在不断下降,验证集的损失也还在逐步降低,这说明模型还在不断地拟合数据。因此可以增大训练次数,也可以调整batch_size的大小,优化器和损失函数也可以换一个,做分类任务建议使用交叉熵损失函数