<tr class="curr" _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">7</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">8</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">9</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">10</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">11</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">12</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">13</span>
</div>
</tr>
用正则 /_nk="8\/KP5a">(\d+)</g
如果格式都是如此的话,可以用正则表达式
String str = ("<tr class=\"curr\" _nk=\"8/KPc6\">\n" +
" <div _nk=\"8/KP3b\">\n" +
" <span _nk=\"8/KP5a\">7</span>\n" +
" </div>\n" +
"</tr> \n" +
"<tr _nk=\"8/KPc6\">\n" +
" <div _nk=\"8/KP3b\">\n" +
" <span _nk=\"8/KP5a\">8</span>\n" +
" </div>\n" +
"</tr> \n" +
"<tr _nk=\"8/KPc6\">\n" +
" <div _nk=\"8/KP3b\">\n" +
" <span _nk=\"8/KP5a\">9</span>\n" +
" </div>\n" +
"</tr> \n" +
"<tr _nk=\"8/KPc6\">\n" +
" <div _nk=\"8/KP3b\">\n" +
" <span _nk=\"8/KP5a\">10</span>\n" +
" </div>\n" +
"</tr> \n" +
"<tr _nk=\"8/KPc6\">\n" +
" <div _nk=\"8/KP3b\">\n" +
" <span _nk=\"8/KP5a\">11</span>\n" +
" </div>\n" +
"</tr> \n" +
"<tr _nk=\"8/KPc6\">\n" +
" <div _nk=\"8/KP3b\">\n" +
" <span _nk=\"8/KP5a\">12</span>\n" +
" </div>\n" +
"</tr> \n" +
"<tr _nk=\"8/KPc6\">\n" +
" <div _nk=\"8/KP3b\">\n" +
" <span _nk=\"8/KP5a\">13</span>\n" +
" </div>\n" +
"</tr> ");
Pattern pattern = Pattern.compile(">[1-9]\\d*</span>");
Matcher m = pattern.matcher(str);
while (m.find()) {
System.out.println(m.group(0).replace("</span>","").replace(">",""));
}
from bs4 import BeautifulSoup
#用selenium获取页面代码,用BeautifulSoup解析较容易得到结果。
page_source='''<tr class="curr" _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">7</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">8</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">9</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">10</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">11</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">12</span>
</div>
</tr>
<tr _nk="8/KPc6">
<div _nk="8/KP3b">
<span _nk="8/KP5a">13</span>
</div>
</tr>'''
soup=BeautifulSoup(page_source,'html.parser')
result=[x.text for x in soup.select('tr div span')]
print(result)