用python 获取网站内的所有页面内的所有 连接(包括分页在内)
import requests
from bs4 import BeautifulSoup
def gl(url):
l = []
r = requests.get(url)
s = BeautifulSoup(r.content, 'html.parser')
for a in s.find_all('a'):
h = a.get('href')
if h and (h.startswith('http') or h.startswith('https')):
l.append(h)
return l
def gal(bu):
al = []
al.append(bu)
pl = gl(bu)
al.extend(pl)
for l in pl:
fl = gl(l)
al.extend(fl)
return al
all_links = gal('https://www.你的网站.com')
print(all_links)