Python的pyquery爬取音乐MV时爬不到视频网址

我在用Python的pyquery爬取QQ音乐的音乐MV。这个网页的网址就是url,用了find方法找这个视频的地址,先找video节点,然后获取它的src属性,就是它的地址,但是视频地址打印出来是None。
img

我的部分代码:

from pyquery import PyQuery as pq

url_mv = pq(url='https://y.qq.com/n/ryqq/mv/d0023bpqirq')
        
url_mp4 = url_mv.find('#video_player__source').attr('src')

print(url_mp4)

我试过打印出这个网页的HTML内容,但是在其中找不到这个节点,难道说说我获取到的网页内容并不完整?我想知道这是为什么,怎么解决。

动态创建的节点。配合selenium.webdriver的无头浏览器来抓。

输入mv ID 获取播放地址


# -*- coding:utf-8 -*-
import json
import random
import re
import requests

class QQYingYue():

    def __init__(self,proxy):
        self.proxy = proxy

    # 获取响应的函数
    def unify_requests(self,method="GET",url="",headers={},proxies={},data={},params=()):
        if method=="GET":
            response = requests.get(url, headers=headers,proxies=proxies,params=params,timeout=5)
            return response

    def down_load_single_song(self,song_id,song_uid=''):


        headers = {
            "Proxy-Tunnel": str(random.randint(1, 10000)),
            'authority': 'u.y.qq.com',
            'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Mobile Safari/537.36',
            'accept': '*/*',
            'sec-fetch-site': 'same-site',
            'sec-fetch-mode': 'no-cors',
            'sec-fetch-dest': 'script',
            'referer': 'https://i.y.qq.com/n2/m/share/details/mv.html?ADTAG=newyqq.mv&vid=r0033wmeu4q',
            'accept-language': 'zh-CN,zh;q=0.9',
        }

        params = (
            ('ct', '23'),
            ('cv', '0'),
            ('format', 'json'),
            ('callback', 'qmv_jsonp_2'),
            ('data',
             '{"getMVInfo":{"module":"video.VideoDataServer","method":"get_video_info_batch","param":{"vidlist":["%s"],"required":["vid","sid","gmid","type","name","cover_pic","video_switch","msg"],"from":"h5.mvplay"}},"getMVUrl":{"module":"gosrf.Stream.MvUrlProxy","method":"GetMvUrls","param":{"vids":["%s"],"from":"h5.mvplay"},"request_typet":10001}}'%(song_id,song_id)),
            ('platform', 'h5'),
        )

        response = self.unify_requests(url='https://u.y.qq.com/cgi-bin/musicu.fcg', headers=headers, params=params,proxies=self.proxy)
        return self.get_url_re(response.text)
    def get_url_re(self,text):
        info = re.findall('"freeflow_url":(\[.*?\])',text)
        if info:
            # 这里可能有问题 有很多url 可能是分高清和普清了
            url_list = []
            for e in info:
                each_json_list = json.loads(e)
                if isinstance(each_json_list,list) and len(each_json_list)>1:
                    # print(each_json_list[1])
                    url_list.append(each_json_list[1])
            if url_list:
                return url_list[-1]
            else:
                print("请注意 url_list为空但是找到了 freeflow_url,请检查freeflow_url是否都是空 会员???")
                return -1
        else:
            return -2
    # 批量下载歌曲
    def down_load_songs(self):
        pass

if __name__ == '__main__':
    qqyy = QQYingYue(proxy={})
    link = qqyy.down_load_single_song(song_id="d0023bpqirq") # 下架2规则
    print(link)

https://www.jianshu.com/p/1b63c5f3c98e
https://blog.csdn.net/bizcatt/article/details/88693982

使用selenium获取源码,然后再用pyquery进行解析

# coding=utf-8

from pyquery import PyQuery as pq
from selenium import webdriver


from pyquery import PyQuery as pq

driver = webdriver.Chrome()
url_mv = 'https://y.qq.com/n/ryqq/mv/d0023bpqirq'
driver.get(url_mv)
source = driver.page_source

url_mp4 = pq(source).find('#video_player__source').attr('src')

print(url_mp4)
driver.quit()

img

至于selenium怎么用,可以看我之前的一些文章,或者网上查询,已经亲测

已解决:https://blog.csdn.net/weixin_52132159/article/details/119055557