目录下的json文件过滤,合并,去重,并转txt,不知道我错在哪里


>>>
>>>
>>> import os
f merge_and_remove_dup>>> import json
licates(json_data):
    merged_data = {}
    for item in json_data:
        key = item['question']
        value = item[>>>
答,如果问题已存在,则保留第>>> def filter_json(json_data):
    return [{'question': k, 'answer': v} f...     filtered_data = [x for x in json_data if x['size'] <= 700]
output_file_json = "/storage/emulated/0/下载/Doc/merg...     return filtered_data
所有JSON文件
json_fi...
 [file for file in os.listdir(directory) if file.endswith('.json')]

# 存储所有 JSON 数据                            al>>> def merge_and_remove_duplicates(json_data):
(directory, file), 'r') ...     merged_data = {} all_json_data.exte...     for item in json_data:
# 合并并去重问答                                merged_j...         key = item['question']
e_json, 'w') a...         value = item['answer']
...         # 合并问答,如果问题已存在,则保留第一个回答
成...         if key not in merged_data:
...             merged_data[key] = value        ...     return [{'question': k, 'answer': v} for k, v in merged_data.items()]                   ...
>>> # 定义目录路径和输出文件路径
>>> directory = "/storage/emulated/0/下载/Document"                                             >>> output_file_json = "/storage/emulated/0/下载/Doc/merged.json"
>>> output_file_txt = "/storage/emulated/0/下载/Doc/merged.txt"
>>>
>>> # 读取目录下的所有JSON文件
>>> json_files = [file for file in os.listdir(directory) if file.endswith('.json')]
>>>
>>> # 存储所有 JSON 数据
>>> all_json_data = []                          >>>
>>> # 遍历每个 JSON 文件,读取数据并添加到 all_json_data 列表                                   >>> for file in json_files:
...     with open(os.path.join(directory, file), 'r') as f:                                     ...         json_data = json.load(f)
...         all_json_data.extend(json_data)
...
>>> # 过滤大于700m的 JSON 数据
>>> filtered_json = filter_json(all_json_data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>             File "<stdin>", line 2, in filter_json
  File "<stdin>", line 2, in <listcomp>
TypeError: string indices must be integers, not 'str'
>>>                                             >>> # 合并并去重问答
>>> merged_json = merge_and_remove_duplicates(filtered_json)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>           NameError: name 'filtered_json' is not defined. Did you mean: 'filter_json'?                    >>>
>>> # 保存为JSON文件
>>> with open(output_file_json, 'w') as f:      ...     json.dump(merged_json, f)
...
>>> # 保存为TXT文件 (每行一个问答对)
>>> with open(output_file_txt, 'w') as f:
...     for item in merged_json:
...         f.write(f"Question: {item['question']}\n")
...         f.write(f"Answer: {item['answer']}\n")
...         f.write('\n')
...
Traceback (most recent call last):                File "<stdin>", line 3, in <module>           KeyError: 'question'
>>> print("合并和保存已完成。")
合并和保存已完成。
>>>
>>>

img

不知道你这个问题是否已经解决, 如果还没有解决的话:
  • 你可以参考下这个问题的回答, 看看是否对你有帮助, 链接: https://ask.csdn.net/questions/793222
  • 这篇博客也不错, 你可以看下目标检测数据集转换 json文件转换为txt文件格式
  • 除此之外, 这篇博客: json文件批量转为txt文件中的 3 每个json文件内容: 部分也许能够解决你的问题, 你可以仔细阅读以下内容或者直接跳转源博客中阅读:
    [
     {
      "type": 1,
      "x": 1168,
      "y": 639,
      "width": 457,
      "height": 245,
      "segmentation": []
     },
     {
      "type": 1,
      "x": 831,
      "y": 626,
      "width": 77,
      "height": 57,
      "segmentation": []
     },
     {
      "type": 1,
      "x": 810,
      "y": 627,
      "width": 36,
      "height": 48,
      "segmentation": []
     },
     {
      "type": 2,
      "x": 753,
      "y": 628,
      "width": 44,
      "height": 52,
      "segmentation": []
     },
     {
      "type": 1,
      "x": 615,
      "y": 619,
      "width": 31,
      "height": 22,
      "segmentation": []
     }
    ]
    

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^

你这粘的可够乱的
你先离开控制台,建个py文件,好好在py文件里写代码