All strings must be XML compatible: Unicode or
ASCII, no NULL bytes or control characters
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36\lib\site-packages\docx\oxml\xmlchemy.py", line 273, in _add_child
setattr(child, key, value)
File "src\lxml\etree.pyx", line 1041, in lxml.etree._Element.text.set
File "src\lxml\apihelpers.pxi", line 748, in lxml.etree._setNodeText
File "src\lxml\apihelpers.pxi", line 736, in lxml.etree._createTextNode
File "src\lxml\apihelpers.pxi", line 1541, in lxml.etree._utf8
ValueError: All strings must be XML compatible: Unicode or
ASCII, no NULL bytes or control characters
```python
from docx import Document
#获取本文档中的所有章节
document = Document()
f = open("./data/W.OUT","r",encoding='gbk') # 注意编码类型
line = f.readline()
while line:
#这行有错误
document.add_paragraph(line)
#document.add_paragraph(line.decode('utf-8'))
line = f.readline()
f.close()
```
报错复制发过来
"""
读取Word文件,这个代码存储进入word后是
b'\n'
b' ///////////////////////////////////////////////////////////////////////////\n'
b' | \xb9\xab\xcb\xbe\xc3\xfb\xb3\xc6: |\n'
类似这种
"""
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# word_1.py
# 导入库
from docx import Document
from docx.enum.section import WD_ORIENT
from docx.shared import Cm,Inches,Pt
#获取本文档中的所有章节
document = Document()
paragraph=document.add_paragraph()
import codecs
#file = codecs.open("./data/W1.OUT", "r", "gbk")
file = codecs.open("./data/W1.OUT", "r")
lines = file.readline().encode("gbk")
#lines = file.readline().encode("utf8")
#document.add_paragraph(u" " + line)
while lines:
#document.add_paragraph(u"" +str(line.decode('gbk')))
document.add_paragraph(str(lines))
#document.add_paragraph(u"" +str(lines))
#document.add_paragraph(str(lines,encoding='utf8'))
#document.add_paragraph(u"" +str(lines,encoding='utf8'))
#document.add_paragraph(u"" +str(line.decode("utf8","ignore")))
lines = file.readline().encode("gbk")
#lines = file.readline().encode("utf8")
#line = f.readline().encode("gbk")
print(str(lines))
document.save('test3.docx')
这篇文章:python将word文档转换为txt 也许能够解决你的问题,你可以看下