上圖為爬到的超連結網址
下圖是這網址的寫法,若要在for迴圈下繼續將檔案下載下來該怎麼寫?
def save_image(self,url,name):
req = Request(url=url,headers = self.headers)
content = urlopen(req).read() # 打开图片链接,返回文件流
with open("地址",'wb') as f:# 写入文件
f.write(content)
print('finsh...')
from bs4 import BeautifulSoup
import time
import requests
import urllib
from urllib import request
from selenium import webdriver
from urllib.request import urlopen
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
respond=requests.get('https://www.manhuaren.com/m1009898/').content
url=driver.get('https://www.manhuaren.com/m1009898/')
soup=BeautifulSoup(url,'html.parser')
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
for img in soup.select("div.img-link,img[src]"):
def save_image(self,url,name):
req = Request(url=url,headers =headers)
content = urlopen(req).read() # 打开图片链接,返回文件流
with open("/Users/Documents/crawler/photo/",'wb') as f:# 写入文件
f.write(content)
print('finsh...')
你好 這是我的程式碼 還有哪邊需要改進?(函數的參數要怎麼改?)
試了幾次有以下報錯的
没写名称和类型
#coding: UTF-8
from bs4 import BeautifulSoup
from selenium import webdriver
import urllib.request
import os
import time
driver = webdriver.Chrome()#模拟chrom浏览器
url = "https://www.kanman.com/25934/dpcq_1h.html"#保存漫画的网址
openurl = driver.get(url)#用模拟浏览器打开网站
html = driver.page_source#读取打开的网站代码
soup = BeautifulSoup(html,'html.parser')#传入网站代码,用beautiful匹配关键字
driver.quit()#关闭打开的浏览器窗口
def Requ(url):#定义一个可以获取图片的字节码的方法
headers = {"User-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.22 Safari/537.36","Referer":url}
requ =urllib.request.Request(url,headers = headers)
html = urllib.request.urlopen(requ).read()
return html
count = 1
for img in soup.find_all("img",width="800"):#用find_all寻找每一页漫画地址
with open(os.getcwd()+'\\2\\'+'%d.jpg'%count,'wb')as f:#写入文件,应加入文件名和文件位置
f.write(Requ(r"https:"+img.get("src")))
time.sleep(1)
print("save %d pic"%count)
count = count + 1
print("end app")
可以参考一下,重写的,用的是看漫画的网站