import re
import urllib.request
from bs4 import BeautifulSoup
def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
print('func 1 ok')
return html.decode('gb18030')
def getImg(html):
bs=BeautifulSoup(html,'html.parser')
img=bs.find_all('img')
im=[]
for i in img:
reg = r'(http://[^"]*.(gif|png|jpg ))'
imgre = re.compile(reg)
x=imgre.findall(str(i))
x=0
for p in im:
urllib.request.urlretrieve(str(p),'./img/%s.jpg' % x)
print('download%s' %x)
x = x + 1
html = getHtml("http://www.pcauto.com.cn/")
getImg(html)
https://jingyan.baidu.com/article/cbcede070055a802f40b4dd7.html