from lxml import etree
text = '''
<body>
<div>
<ul>
<li>01li>
<li>02li>
<li>03li>
<li>04li>
<li>05li>
<li>06li>
<li>07
ul>
div>
body>
'''
```html
<body>
<div>
<ul>
<li>01li>
<li>02li>
<li>03li>
<li>04li>
<li>05li>
<li>06li>
<li>07
ul>
div>
body>
htmla = etree.HTML(text) # 将字符串格式的文件转化为html文档
bc=htmla.xpath("/body/div/ul/li[1]/text()")
print(bc)
```
from lxml import etree
text = '''
<body>
<div id="content">
<ul>
<li>01</li>
<li>02</li>
<li>03</li>
<li>04</li>
<li>05</li>
<li>06</li>
<li>07
</ul>
</div>
</body>
'''
htmla = etree.HTML(text) # 将字符串格式的文件转化为html文档
bc = htmla.xpath('//li')
for c in bc:
print(c.text)
将/body/div/ul/li[1]/text(),body前的斜杆(/)去掉
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from lxml import etree
text = '''
<body>
<div id="content">
<ul>
<li>01</li>
<li>02</li>
<li>03</li>
<li>04</li>
<li>05</li>
<li>06</li>
<li>07
</ul>
</div>
</body>
'''
htmla = etree.HTML(text) # 将字符串格式的文件转化为html文档
bc = htmla.xpath('//div[@id="content"]/ul/li') #在清洗数据的过程中,我们一般会找有明显标志的HTML属性向下获取数据,比如id或者class
for c in bc:
print(c.xpath('./text()')[0])
代码如下,望采纳:
from lxml import etree
text = '''
<body>
<div>
<ul>
<li>01</li>
<li>02</li>
<li>03</li>
<li>04</li>
<li>05</li>
<li>06</li>
<li>07
</ul>
</div>
</body>
'''
htmla = etree.HTML(text) # 将字符串格式的文件转化为html文档
bc=htmla.xpath("//body/div/ul/li[1]/text()")
print(bc)