用python-docx模块读取word表格内容,结果报错。
程序如下:
mydocumnet = docx.Document(doc_Path)
mytable = mydocumnet.tables[0]
print("表格行数", len(mytable.rows))
for myRow in mytable.rows:
print(type(myRow))
for cell in myRow.cells:
print(cell.text)
mydocumnet.save(doc_Path)
报错信息如下:
F:\Python3.11.3\python.exe E:\pythonProject1\main.py
Hi, PyCharm
表格行数 5
<class 'docx.table._Row'>
Traceback (most recent call last):
File "E:\pythonProject1\main.py", line 21, in <module>
for cell in myRow.cells:
^^^^^^^^^^^
File "F:\Python3.11.3\Lib\site-packages\docx\table.py", line 401, in cells
return tuple(self.table.row_cells(self._index))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "F:\Python3.11.3\Lib\site-packages\docx\table.py", line 103, in row_cells
column_count = self._column_count
^^^^^^^^^^^^^^^^^^
File "F:\Python3.11.3\Lib\site-packages\docx\table.py", line 185, in _column_count
return self._tbl.col_count
^^^^^^^^^^^^^^^^^^^
File "F:\Python3.11.3\Lib\site-packages\docx\oxml\table.py", line 140, in col_count
return len(self.tblGrid.gridCol_lst)
^^^^^^^^^^^^
File "F:\Python3.11.3\Lib\site-packages\docx\oxml\xmlchemy.py", line 513, in get_child_element
raise InvalidXmlError(
docx.oxml.exceptions.InvalidXmlError: required ``<w:tblGrid>`` child element not present
进程已结束,退出代码1
你参考一下我这个, 如有帮助给个采纳
我这个是读取word 中的表格 , 和 word 中指的的信息的 ,
如有帮助给个采纳 谢谢谢
# 导入库
import os
from docx import Document
import pandas as pd
# 定义要读取的文件夹路径
folder_path = "wordData"
tableList = []
# 遍历文件夹下的所有 Word 文件
for filename in os.listdir(folder_path):
sub_list = []
if filename.endswith(".docx"):
# 拼接完整的文件路径
file_path = os.path.join(folder_path, filename)
# 打开 Word 文件
document = Document(file_path)
# 遍历所有表格
for table in document.tables:
rows = table.rows
# 如果该表格行数小于 2,表示没有数据
if len(rows) < 2:
continue
else:
# 遍历表格行
for row in rows:
data = []
# 遍历每个单元格
for cell in row.cells:
# 将单元格内容转换为字符串类型并添加到列表
data.append(str(cell.text).strip())
# 将非空行添加到子列表中
if data:
sub_list.append(data)
# 遍历文本内容
target_line = None
for paragraph in document.paragraphs:
text = paragraph.text
print(text)
# 判断是否找到目标数据所在行
if '姓 名:' in text:
target_line = text
break
# 如果找到了目标数据所在行
if target_line:
# 使用字符串的切片方法获取数据
name = target_line.split(":")[1]
sub_list.append(['', '姓名', name])
else:
print(f'文件{filename}没有找到指定的信息!')
tableList.append(sub_list)
# 构建DataFrame列表
df_list = []
for d in tableList:
# 将子列表转换为DataFrame对象
df = pd.DataFrame(d[1:], columns=d[0])
# 新增一列,作为姓名列
name = df.iloc[-1][1]
df['姓名'] = [name] * len(df.index)
df_list.append(df)
# 合并数据
all_data = pd.concat(df_list)
# 导出到Excel文件
with pd.ExcelWriter('wordData/output.xlsx') as writer:
all_data.to_excel(writer, index=True)