正则表达式爬取并列标签


url="https://lishi.tianqi.com/hangzhou/202212.html"
headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54"}
r=requests.get(url, headers=headers,timeout = 30)
r.encoding = "utf-8"
time.sleep(6)
text=r.text
pattern =re.compile('<div class="th140">(.*?)div>',re.S)
results = re.findall(pattern,text)
results

img


问题
如何提取出最高气温和最低气温
然后能将数据导入txt文件成大致如下这个样子

img

from bs4 import BeautifulSoup
import requests

url="https://lishi.tianqi.com/hangzhou/202212.html"
headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54"}
r=requests.get(url, headers=headers)
d=BeautifulSoup(r.text,'html.parser')
data=d.find_all('div',{'class':{'th140'}})
date=d.find_all('div',{'class':{'th200'}})

f=open('e:/t.txt','w')
for i in range(len(data)):
    if i%4==0:
        f.write('{},'.format(date[i//4].text))
    f.write('{},\t'.format(data[i].text))
    if i%4==3:
        f.write('\n')
f.close()


img


# import time
import re

from jsonref import requests

if __name__ == '__main__':
    url = "https://lishi.tianqi.com/hangzhou/202212.html"
    headers = {
        'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"}  # 请改回自己的请求头
    r = requests.get(url, headers=headers, timeout=30)
    r.encoding = "utf-8"
    # time.sleep(6)
    text = r.text
    # print(text)
    pattern = re.compile('<div class="th140">(.*?)</div>', re.S)
    print(pattern)
    match = list(re.finditer(pattern, text))
    results = []
    length = len(match)
    # print(length)
    match2 = [[]]
    content = [[]]
    for i in range(24):
        match2.append([])
        content.append([])
    t = 0
    for i in range(length):
        if (i % 4 == 0):
            match2[t] = match[i:i + 4]
            t += 1
    results = re.findall(pattern, text)
    length2 = len(match2)
    # print(results)
    # print(length2)
    # 打开文本文件,并写入内容
    with open('example.txt', 'w') as f:
        for i in range(length2):
            content[i] = match2[i][0].group(1) +" "+ match2[i][1].group(1)+"\n"
            print(match2[i][0].group(1), match2[i][1].group(1))
            f.write(content[i])
    print("finsh")

img