python初级爬虫爬不出来

问题遇到的现象和发生背景
为什么爬不出来
问题相关代码,请勿粘贴截图


from urllib.request import urlopen, Request
import re

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'}
url = 'https://read.douban.com/ebook/57468782/?&dcs=provider-63687123-%E5%8D%9A%E9%9B%86%E5%A4%A9%E5%8D%B7'
response = Request(url, headers=headers)
res = urlopen(response)
data = res.read().decode('utf-8')

comments = '"Linkify">(.*?)'
commentss = re.compile(comments).findall(data)

for i in range(0,len(commentss)):
    print(commentss[1])

运行结果及报错内容
运行直接运行结束

因为你要找的内容根本就没在这个url下,他是通过ajax加载的,如图:

img

打开浏览器,重新抓包找到正确的url再请求就行了
改成这样:

import requests
import json


headers = {
    "Accept": "application/json",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Connection": "keep-alive",
    "Content-Type": "application/json",
    "Origin": "https://read.douban.com",
    "Referer": "https://read.douban.com/ebook/57468782/?&dcs=provider-63687123-%E5%8D%9A%E9%9B%86%E5%A4%A9%E5%8D%B7",
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
    "X-CSRF-Token": "null",
    "X-Requested-With": "XMLHttpRequest",
    "sec-ch-ua": "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "\"macOS\""
}
cookies = {
    "bid": "bm5xbp_pnnk",
    "__gads": "ID=405da3e1060fea95-22aba81db4d000a6:T=1645595906:RT=1645595906:S=ALNI_MZG4dJ-Q8qISb1TMQYcXCyUrhH_5Q",
    "ll": "\"108288\"",
    "viewed": "\"1095634\"",
    "gr_user_id": "f393e052-d9b1-42eb-8f4c-24a3f8852010",
    "douban-fav-remind": "1",
    "__utma": "30149280.705976680.1645595907.1652087977.1659088871.5",
    "__utmz": "30149280.1659088871.5.4.utmcsr=baidu|utmccn=(organic)|utmcmd=organic",
    "_ga": "GA1.1.705976680.1645595907",
    "_gid": "GA1.3.813058082.1661849611",
    "_gat": "1",
    "_pk_id.100001.a7dd": "cfe2c464cd4bf4b3.1661849611.1.1661849611.1661849611.",
    "_pk_ses.100001.a7dd": "*",
    "__gpi": "UID=0000081fa54ffd56:T=1659089077:RT=1661849690:S=ALNI_MZLGLQU4CI3g3YMotiscG-l-oNEXw",
    "_ga_RXNMP372GL": "GS1.1.1661849611.1.1.1661849649.22.0.0"
}
url = "https://read.douban.com/j/graphql"
data = {
    "query": "\n    query getWorksComment($worksId: ID!, $limit: Int) {\n      works: works(worksId: $worksId) {\n        worksType\n        \n    ... on WorksBase {\n      comments: mixedComments(limit: $limit) {\n        \n  ... on CommentBase {\n    id\n    isHidden\n    isDeleted\n    \n  ... on CommentBase {\n    id\n    works {\n      agent {\n        id\n      }\n    }\n    user {\n      id\n      avatar: picture(size: MEDIUM)\n      name\n      url\n      isVip\n      ... on Agent {\n        agentName\n        hasMedal\n        agentId\n      }\n    }\n    createTime\n    commentType\n    donation {\n      amount\n    }\n    ... on Review {\n      url\n      badge {\n        url\n        image\n        title\n        color\n      }\n    }\n    ... on Annotation {\n      url\n    }\n    ... on WorksRecommend {\n      score\n      isEditorChoice\n    }\n    hasPurchasedAllBadge\n  }\n\n    \n  ... on CommentBase {\n    id\n    content\n    commentType\n    user {\n      id\n      ... on Agent {\n        agentName\n      }\n    }\n    ... on Discussion {\n      refComment {\n        id\n        user {\n          id\n          name\n          url\n          ... on Agent {\n            agentName\n            agentId\n          }\n        }\n        isDeleted\n        createTime\n        content\n      }\n    }\n    ... on Review {\n      title\n      badge {\n        label\n        color\n      }\n    }\n    ... on Annotation {\n      \n  ... on Annotation {\n    originContent {\n      rawTexts\n      startOffset\n      endOffset\n      image {\n        url\n        size { width height }\n      }\n    }\n  }\n\n    }\n    ... on WorksRecommend {\n      title\n    }\n    ... on ReviewComment {\n      refComment {\n        id\n        content\n        createTime\n        isDeleted\n        user {\n          id\n          name\n          ... on Agent {\n            agentName\n            agentId\n          }\n        }\n      }\n    }\n    ... on AnnotationComment {\n      refComment {\n        id\n        content\n        createTime\n        isDeleted\n        user {\n          id\n          name\n          ... on Agent {\n            agentName\n            agentId\n          }\n        }\n      }\n    }\n    ... on WorksRecommendComment {\n      refComment {\n        id\n        content\n        createTime\n        isDeleted\n        user {\n          id\n          name\n          ... on Agent {\n            agentName\n            agentId\n          }\n        }\n      }\n    }\n  }\n\n    \n  ... on CommentBase {\n    id\n    commentType\n    isHidden\n    isDeleted\n    content\n    user {\n      id\n      name\n      isBlocked\n      ... on Agent {\n        agentName\n      }\n    }\n    works {\n      id\n      title\n      cover(useSmall: true)\n      url\n      isChapter\n    }\n    operationInfo {\n      editor {\n        id\n        name\n        url\n      }\n      time\n    }\n    ... on Review {\n      url\n      upvoted\n      upvoteCount\n      commentCount\n    }\n    ... on Annotation {\n      url\n      upvoted\n      upvoteCount\n      commentCount\n    }\n    ... on Discussion {\n      targetId\n      upvoted\n      upvoteCount\n      works {\n        title\n        url\n      }\n    }\n    ... on WorksRecommend {\n      url\n      upvoted\n      upvoteCount\n      commentCount\n    }\n    ... on ReviewComment {\n      targetId\n      upvoted\n      upvoteCount\n    }\n    ... on AnnotationComment {\n      targetId\n      upvoted\n      upvoteCount\n    }\n    ... on WorksRecommendComment {\n      targetId\n      upvoted\n      upvoteCount\n    }\n  }\n\n    \n  ... on WorksRecommend {\n    id\n    works {\n      id\n      \n    title\n    cover(useSmall: true)\n    url\n    isBundle\n    coverLabel(preferVip: true)\n  \n      \n  url\n  title\n\n      \n  author {\n    name\n    url\n  }\n  origAuthor {\n    name\n    url\n  }\n  translator {\n    name\n    url\n  }\n\n      isColumn\n      isFinished\n      wordCount\n      wordCountUnit\n      isInLibrary\n      \n    }\n  }\n\n    \n  ... on CommentBase {\n    id\n    commentType\n    content\n    works {\n      id\n      title\n    }\n    user {\n      id\n      name\n      isBlocked\n      ... on Agent {\n        agentName\n      }\n    }\n    isHidden\n    isDeleted\n    operationInfo {\n      editor {\n        id\n        name\n        url\n      }\n      time\n    }\n    ... on Review {\n      title\n      rating\n      reviewId\n      upvoted\n      upvoteCount\n      commentCount\n    }\n    ... on Annotation {\n      upvoted\n      upvoteCount\n      commentCount\n    }\n    ... on WorksRecommend {\n      upvoted\n      upvoteCount\n      commentCount\n    }\n    ... on Discussion {\n      targetId\n    }\n    ... on ReviewComment {\n      targetId\n    }\n    ... on AnnotationComment {\n      targetId\n    }\n    ... on WorksRecommendComment {\n      targetId\n    }\n  }\n  \n  ... on CommentBase {\n    id\n    commentType\n    content\n    works {\n      id\n    }\n    user {\n      id\n      name\n      ... on Agent {\n        agentName\n      }\n    }\n    ... on Review {\n      reviewId\n    }\n  }\n\n  \n  ... on CommentBase {\n    id\n    commentType\n    works {\n      id\n    }\n    ... on Review {\n      reviewId\n    }\n    ... on Discussion {\n      targetId\n    }\n    ... on ReviewComment {\n      targetId\n    }\n    ... on AnnotationComment {\n      targetId\n    }\n    ... on WorksRecommendComment {\n      targetId\n    }\n  }\n\n\n  }\n\n      }\n      commentTotal: mixedCommentCount\n    }\n    \n  ... on WorksBase {\n    id\n    title\n    review {\n      id\n      content\n      ... on Review {\n        reviewId\n        rating\n        title\n        url\n      }\n    }\n  }\n\n  \n      }\n    }\n  ",
    "variables": {
        "worksId": "57468782",
        "limit": 6
    },
    "operationName": "getWorksComment"
}
data = json.dumps(data)
response = requests.post(url, headers=headers, cookies=cookies, data=data)

print(response.text)
print(response)

你想要的数据不是你请求的这个url返回的。
是这个

img


我从CSDN技能树中找了一些相关资料给你, 你可以看下: