我想知道我哪一步写错了 怎么样更改才可以开始爬

#-*- codeing = utf-8 -*-

from bs4 import BeautifulSoup   #网页解析,获取数据
import re               #正则表达式,进行文字匹配
import  urllib.request,urllib.error         #制定URL,获取网页数据
import xlwt             #进行excel操作
import sqlite3          #进行sqlite数据库操作







def main():
   baseurl="https://xueshu.baidu.com/s?wd=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&tn=SE_baiduxueshu_c1gjeupa&cl=3&ie=utf-8&bs=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&f=8&rsv_bp=1&rsv_sug2=0&sc_f_para=sc_tasktype%3D%7BfirstSimpleSearch%7D&rsv_spt=3"
   #爬取网页
   datalise = getData(baseurl)
   savepath =r".\\日本企业管理模式对我国企业发展的启示.xls"
   #保存数据
   saveData(savepath)
   askURL("https://xueshu.baidu.com/s?wd=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&tn=SE_baiduxueshu_c1gjeupa&cl=3&ie=utf-8&bs=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&f=8&rsv_bp=1&rsv_sug2=0&sc_f_para=sc_tasktype%3D%7BfirstSimpleSearch%7D&rsv_spt=3")



#爬取网页
def getData(baseurl):
    datalist = []
    for i in  range(0,10):
        url =baseurl + str(i*25)
        html = askURL(url)



    # 2.逐一解析数据



    return datalist



#得到指定一个url的网页内容
def askURL(url):
   #模拟浏览器头部信息,想服务区发送消息 伪装用

   head = {"User-Agent:Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML,like Gecko)Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66"}

   request = urllib.request.Request(url,headers = head)


   html = ""


   try:
       response = urllib.request.urlopen(request)
       html = response.read().decode("uf-8")
       print(html)
   except urllib.error.URLError as e:
       if hasattr(e,"code"):
           print(e.code)
       if hasattr(e,"reason"):
           print(e.reason)
   return html





#3.保存数据
def saveData(savepath):




    print("savepath")







if __name__ =="__main__":       #当程序执行时
#调用函数
   main()

我测试了一下 askURL函数中 的伪装头文件有问题 

head = {  # 模拟浏览器头部信息
       "User-Agent": "Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, like; Gecko) Chrome / 86.0; .4240; .198; Safari / 537.36"
   }

 

希望采纳 已运行

#head 改成这个
head = {"User-Agent":"Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML,like Gecko)Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66"}

##这句是utf-8
html = response.read().decode("utf-8")


##然后可以跑通了,但你只是打印了网页源码,并没有写处理逻辑

 

我直接在getData的循环中打印的 可以打印出来 你这边可以试试 加油

#-*- codeing = utf-8 -*-
from bs4 import BeautifulSoup   #网页解析,获取数据
import re               #正则表达式,进行文字匹配
import  urllib.request,urllib.error         #制定URL,获取网页数据
import xlwt             #进行excel操作
import sqlite3          #进行sqlite数据库操作

def main():
   baseurl="https://xueshu.baidu.com/s?wd=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&tn=SE_baiduxueshu_c1gjeupa&cl=3&ie=utf-8&bs=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&f=8&rsv_bp=1&rsv_sug2=0&sc_f_para=sc_tasktype%3D%7BfirstSimpleSearch%7D&rsv_spt=3"
   #爬取网页
   datalise = getData(baseurl)
   savepath =r".\日本企业管理模式对我国企业发展的启示.xls"
   #保存数据
   saveData(savepath)
   askURL("https://xueshu.baidu.com/s?wd=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&tn=SE_baiduxueshu_c1gjeupa&cl=3&ie=utf-8&bs=%E6%97%A5%E6%9C%AC%E4%BC%81%E4%B8%9A%E7%AE%A1%E7%90%86%E6%A8%A1%E5%BC%8F%E5%AF%B9%E6%88%91%E5%9B%BD%E4%BC%81%E4%B8%9A%E5%8F%91%E5%B1%95%E7%9A%84%E5%90%AF%E7%A4%BA&f=8&rsv_bp=1&rsv_sug2=0&sc_f_para=sc_tasktype%3D%7BfirstSimpleSearch%7D&rsv_spt=3")



#爬取网页
def getData(baseurl):
    datalist = []
    for i in  range(0,10):
        url =baseurl + str(i*25)
        html = askURL(url)
    return datalist



#得到指定一个url的网页内容
def askURL(url):
   #模拟浏览器头部信息,想服务区发送消息 伪装用

   head = {"User-Agent" : "Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML,like Gecko)Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66"}

   request = urllib.request.Request(url,headers = head)


   html = ""


   try:
       response = urllib.request.urlopen(request)
       html = response.read().decode("utf-8")
       print(html)
   except urllib.error.URLError as e:
       if hasattr(e,"code"):
           print(e.code)
       if hasattr(e,"reason"):
           print(e.reason)
   return html


#3.保存数据
def saveData(savepath):
    print("savepath")


if __name__ =="__main__":       #当程序执行时
#调用函数
   main()

 


1 字符串前加了r 就不用写双 \ 了
   savepath =r".\日本企业管理模式对我国企业发展的启示.xls"

2 url =baseurl + str(i*25)
你确定 str(i*25)要加在 baseurl 后面??

3    head = {"User-Agent" : "Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML,like Gecko)Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66"}
字典键名和键值要分成两个字符串

4  html = response.read().decode("utf-8")
你utf-8写成了uf-8