用python_docx合并多个word文档,但是只需要每个文档的第一页,其他页不需要,要怎么能实现?
import os
from docx import Document
from docxcompose.composer import Composer
from docx import Document as Document_compose
result=[]
def search(path=".", name=""):
for item in os.listdir(path):
item_path = os.path.join(path, item)
if os.path.isdir(item_path):
search(item_path, name)
elif os.path.isfile(item_path):
if name in item:
global result
result.append(item_path)
print (item_path)
search(path="申请报告汇总\转换后docx文件夹", name=".docx")
print(result)
# 合并文档的列表
files = result
def combine_all_docx(filename_master,files_list):
number_of_sections=len(files_list)
master = Document_compose(filename_master)
composer = Composer(master)
for i in range(1, number_of_sections):
doc_temp = Document_compose(files_list[i])
composer.append(doc_temp)
composer.save("申请报告汇总\汇总后.docx")
combine_all_docx(result[0],result)
docx库是没有这个功能的,因为docx只能识别分页符,并不能根据生成出的文档识别每一页。目前我只知道win32可以实现,可以参考这个。
https://www.zhihu.com/question/435942687