请问如何更改新数据预测这一行让他读取这几个文件啊


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

def get_encoding(file):
    encodings = ['utf-8', 'iso-8859-1', 'cp1252', 'gb2312', 'gbk']
    for e in encodings:
        try:
            pd.read_csv(file, encoding=e, nrows=1)  # 尝试读取文件的第一行，如果没有问题则返回该编码
            return e
        except:
            pass
    return None  # 如果所有编码都失败，返回 None
    

def predict_heart_disease(file_path):
    encoding = get_encoding(file_path)

    if encoding is None:
        print(f"无法确定文件 {file_path} 的编码")
        return

    # 数据加载，跳过解析错误的行
    chunksize = 10 ** 6
    chunks = []
    for chunk in pd.read_csv(file_path, chunksize=chunksize, dtype=str, encoding=encoding, error_bad_lines=False):
        chunks.append(chunk)

    data = pd.concat(chunks, axis=0)

    # 分离特征和目标变量
    if '是否患有心血管疾病' in data.columns:
        y = data['是否患有心血管疾病']
        X = data.drop('是否患有心血管疾病', axis=1)

        # 数据预处理
        le = LabelEncoder()
        for column in X.columns:
            if X[column].dtype == 'object':
                X[column] = le.fit_transform(X[column])
        y = le.fit_transform(y)

        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        # 数据划分
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 80% 训练集, 20% 测试集

        # 构建模型
        model = Sequential()
        model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))

        # 模型编译
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        # 模型训练
        model.fit(X_train, y_train, epochs=10, batch_size=32)

        # 模型评估
        score = model.evaluate(X_test, y_test, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        # 保存模型
        model.save("C:\\Users\\1\\Desktop\\heart_disease\\heart_disease_model_{file_path.split('/')[-1]}.h5")

        # 新数据预测
        new_data = [r"C:\Users\1\Desktop\heart_disease\1-首次病程记录.csv", r"C:\Users\1\Desktop\heart_disease\2-日常病程记录.csv",
                  r"C:\Users\1\Desktop\heart_disease\3-出院记录.csv", r"C:\Users\1\Desktop\heart_disease\4-检验记录表.csv",
                  r"C:\Users\1\Desktop\heart_disease\5-检验明细表.csv", r"C:\Users\1\Desktop\heart_disease\6-细菌结果表.csv",
                  r"C:\Users\1\Desktop\heart_disease\7-影像检查报告表.csv"]  # 这个列表需要根据你的实际特征来修改
        new_data = le.transform(new_data)
        new_data = scaler.transform(new_data.reshape(1, -1))
        prediction = model.predict(new_data)
        if prediction[0][0] >= 0.5:
            prediction_result = '有心血管疾病'
        else:
            prediction_result = '无心血管疾病'

        # 返回预测结果
        return prediction_result

# 文件列表
files = [r"C:\Users\1\Desktop\heart_disease\1-首次病程记录.csv", r"C:\Users\1\Desktop\heart_disease\2-日常病程记录.csv",
          r"C:\Users\1\Desktop\heart_disease\3-出院记录.csv", r"C:\Users\1\Desktop\heart_disease\4-检验记录表.csv",
          r"C:\Users\1\Desktop\heart_disease\5-检验明细表.csv", r"C:\Users\1\Desktop\heart_disease\6-细菌结果表.csv",
          r"C:\Users\1\Desktop\heart_disease\7-影像检查报告表.csv", r"C:\Users\1\Desktop\heart_disease\8-输出结果.csv"]

# 用于保存预测结果的列表
prediction_results = []

# 针对每一个文件运行函数】
for file in files:
    print(f"Processing {file}")
    result = predict_heart_disease(file)
    prediction_results.append({r"C:\Users\1\Desktop\result.xlsx": file, '预测结果': result})

# 将预测结果保存为DataFrame
prediction_df = pd.DataFrame(prediction_results)

# 输出预测结果到CSV文件
prediction_df.to_csv('heart_disease_predictions.csv', index=False)

# 输出预测结果到Excel文件
prediction_df.to_excel('heart_disease_predictions.xlsx', index=False)


new_data_files = [r"C:\Users\1\Desktop\heart_disease\1-首次病程记录.csv", 
                  r"C:\Users\1\Desktop\heart_disease\2-日常病程记录.csv",
                  r"C:\Users\1\Desktop\heart_disease\3-出院记录.csv",
                  r"C:\Users\1\Desktop\heart_disease\4-检验记录表.csv",
                  r"C:\Users\1\Desktop\heart_disease\5-检验明细表.csv",
                  r"C:\Users\1\Desktop\heart_disease\6-细菌结果表.csv",
                  r"C:\Users\1\Desktop\heart_disease\7-影像检查报告表.csv"]

for file in new_data_files:
    file = file.replace('\\', '/')  # 将反斜杠替换为正斜杠（如果需要）
    print(f"Processing {file}")
    result = predict_heart_disease(file)
    prediction_results.append({'文件名': file, '预测结果': result})


prediction_df = pd.DataFrame(prediction_results)

prediction_df.to_csv('heart_disease_predictions.csv', index=False)

prediction_df.to_excel('heart_disease_predictions.xlsx', index=False)

参考chatgpt的回答
要实现这一点，可以进行以下更改：

更新predict_heart_disease函数，使其接受文件路径列表作为输入。
修改函数以读取和预处理新数据。
对每个新数据文件进行预测并存储结果。
以下是更新后的代码：

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# ...（保留现有的get_encoding和predict_heart_disease函数）...

def predict_new_data(file_paths):
    # 加载训练好的模型
    model_path = "C:\\Users\\1\\Desktop\\heart_disease\\heart_disease_model.h5"
    model = load_model(model_path)

    # 加载在训练过程中使用的标签编码器和标准化器
    label_encoder_path = "C:\\Users\\1\\Desktop\\heart_disease\\label_encoder.pkl"
    scaler_path = "C:\\Users\\1\\Desktop\\heart_disease\\scaler.pkl"
    with open(label_encoder_path, 'rb') as le_file, open(scaler_path, 'rb') as scaler_file:
        le = pickle.load(le_file)
        scaler = pickle.load(scaler_file)

    # 用于保存预测结果的列表
    prediction_results = []

    for file_path in file_paths:
        # 读取新数据文件
        encoding = get_encoding(file_path)
        if encoding is None:
            print(f"无法确定文件 {file_path} 的编码")
            continue

        new_data = pd.read_csv(file_path, dtype=str, encoding=encoding, error_bad_lines=False)

        # 预处理新数据
        for column in new_data.columns:
            if new_data[column].dtype == 'object':
                new_data[column] = le.transform(new_data[column])
        new_data = scaler.transform(new_data)

        # 进行预测
        predictions = model.predict(new_data)
        prediction_results.append({
            '文件名': file_path,
            '预测结果': '有心血管疾病' if predictions[0][0] >= 0.5 else '无心血管疾病'
        })

    # 返回预测结果
    return prediction_results

# ...（保留其余代码不变）...

在更新的代码中，我们添加了一个新的函数predict_new_data，它接受一个文件路径列表作为输入。在函数内部，我们加载训练好的模型、标签编码器和标准化器。然后，我们遍历文件路径列表，逐个读取文件，预处理数据，并使用加载的模型进行预测。预测结果被追加到prediction_results列表中。

请注意，模型、标签编码器和标准化器应该在训练阶段保存下来。如果你还没有保存它们，你需要重新训练模型，并在预测时保存这些组件以便后续使用。另外，在预测过程中最好处理异常和错误，以避免意外崩溃。此外，如果需要，你可以将预测结果保存到Excel或CSV文件中，就像原始代码中一样。

不知道你这个问题是否已经解决, 如果还没有解决的话:

帮你找了个相似的问题, 你可以看下: https://ask.csdn.net/questions/7641155
这篇博客也不错, 你可以看下如何将文件夹中读取到的图片保存到另一个文件夹
您还可以看一下刘国柱老师的热更新框架设计之热更流程与热补丁视频课程课程中的创建校验文件_递归算法遍历指定路径所有文件思路小节, 巩固相关知识点
除此之外, 这篇博客: 迁移学习的模型训练中的 依据标注类型创建标签映射文件 部分也许能够解决你的问题, 你可以仔细阅读以下内容或跳转源博客中阅读:
新建label_map.pbtxt文件。
如果有两个类型，那么id的编号从1到2，如果有四个类型，那么id就到4
```
item {
   id:1
  name: 'cat'
}
item{
   id:2
  name:'dog'
}
```
放到这里

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^