import requests
from bs4 import BeautifulSoup
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
def get_html(url):
html = requests.get(url, headers=headers)
html.encoding = 'utf-8'
return html.text
def parse_html(html):
html_url = BeautifulSoup(html, 'lxml')
conMidtab1 = html_url.find('div', id="container")
lst = []
for conMidtab2 in conMidtab1:
dic = {}
a = conMidtab2.find('a', target="blank")
lj = a.get('href')
mz = a.get('alt')
dic['图片名'] = mz
dic['图片链接'] = lj
lst.append(dic)
return lst
def save_data(lst):
with open('tupian.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['图片名', '图片链接'])
writer.writeheader()
writer.writerows(lst)
moxie_content = []
for i in range(1, 11):
if i == 1:
url = 'https://sc.chinaz.com/tupian/index.html'
else:
url = f'https://sc.chinaz.com/tupian/index{i}.html'
html = get_html(url)
moxie_content = moxie_content + parse_html(html)
save_data(moxie_content)
你题目的解答代码如下:
import requests
from bs4 import BeautifulSoup
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
def get_html(url):
html = requests.get(url, headers=headers)
html.encoding = 'utf-8'
return html.text
def parse_html(html):
html_url = BeautifulSoup(html, 'lxml')
conMidtab1 = html_url.select('#container>div>div>a[target="_blank"]')
lst = []
for a in conMidtab1:
dic = {}
lj = a.get('href')
mz = a.get('alt')
dic['图片名'] = mz
dic['图片链接'] = "https:"+lj
lst.append(dic)
print(dic)
return lst
def save_data(lst):
with open('tupian.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['图片名', '图片链接'])
writer.writeheader()
writer.writerows(lst)
moxie_content = []
for i in range(1,11):
if i == 1:
url = 'https://sc.chinaz.com/tupian/index.html'
else:
url = f'https://sc.chinaz.com/tupian/index_{i}.html'
html = get_html(url)
moxie_content = moxie_content + parse_html(html)
save_data(moxie_content)
如有帮助,请点击我的回答下方的【采纳该答案】按钮帮忙采纳下,谢谢!