爬取网站文档的时候邮件报错

这段代码运行后在邮件中报这个错误:" target="_blank" class="NavigationItem__external-link">服务商相关指引
请各位朋友指点一下

#coding=utf-8
import time
import re
import requests
import datetime
import smtplib
from bs4 import BeautifulSoup
from urllib.parse import urljoin


def qingqiu(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 \
                    (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'}     #设置headers信息,模拟成浏览器取访问网站
    req = requests.get(url, headers=headers)   #向网站发起请求,并获取响应对象
    content = req.text   #获取网站源码
    pattern = re.compile('.html(.*?)</a>').findall(content)  #正则化匹配字符,根据网站源码设置
    return pattern  #运行qingqiu()函数,会返回pattern的值


def get_all_urls(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 \
                    (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'}     #设置headers信息,模拟成浏览器取访问网站
    req = requests.get(url, headers=headers)   #向网站发起请求,并获取响应对象
    soup = BeautifulSoup(req.text, 'html.parser')  #使用BeautifulSoup解析HTML

    urls = set()  #使用集合来存储URL,避免重复
    for a in soup.find_all('a'):  #找到所有a标签
        href = a.get('href')  #提取href属性
        if href:  #判断是否存在href属性
            href = re.sub(r'\s*target="_blank"\s*class="NavigationItem__external-link"', '', href) #去掉多余部分
            abs_url = urljoin(url, href)  #将相对URL转化为绝对URL
            if abs_url.startswith("http") or abs_url.startswith("https"):
                urls.add(abs_url)  #加入集合

    return urls  #返回URL集合



def send_email():
    HOST = 'smtp.163.com'   # 网易邮箱smtp
    PORT = '465'
    fajianren = 'yaozx0922@163.com'   #发送人邮箱
    shoujianren = '1578407333@qq.com'   #收件人邮箱
    title = '更新信息通知'     # 邮件标题
    new_urls = get_all_urls('https://developers.weixin.qq.com/doc/channels/Operational_Guidelines/Shop_opening_guidelines.html')  #提取页面内所有URL
    new_pattern = qingqiu('https://developers.weixin.qq.com/doc/channels/Operational_Guidelines/Shop_opening_guidelines.html')  #获取新的页面内容列表
    context = new_pattern[0]  # 邮件内容
    smtp = smtplib.SMTP_SSL(HOST, 465)  # 启用SSL发信, 端口一般是465
    res = smtp.login(user=fajianren, password='xxxx') # 登录验证,password是邮箱授权码而非密码,需要去网易邮箱手动开启
    print('发送结果:', res)
    msg = '\n'.join(
        ['From: {}'.format(fajianren), 'To: {}'.format(shoujianren), 'Subject: {}'.format(title), '', context])
    smtp.sendmail(from_addr=fajianren, to_addrs=shoujianren, msg=msg.encode('utf-8')) # 发送邮件
    print(context)


def update():
    print('通知系统启动中')
    old_urls = set()  #使用集合来存储URL,避免重复
    while True:
        urls = get_all_urls('https://developers.weixin.qq.com/doc/channels/Operational_Guidelines/Shop_opening_guidelines.html')  #获取页面内所有URL
        for url in urls:  #遍历URLs
            if url not in old_urls:  #判断URL是否已经访问过
                old_urls.add(url)  #记录已访问的URLs
                new_pattern = qingqiu(url)  #获取新的页面内容列表
                if new_pattern:  #判断页面内容是否为None
                    send_email()  #发送邮件
        now=datetime.datetime.now()
        print(now,"暂无更新")
        time.sleep(3600) # 一小时检测一次

if __name__ == '__main__':
    update()
   

你是正则提取部分的问题?你想提取a标签里面的url?