# coding=utf-8
import requests
from bs4 import BeautifulSoup
import json
import lxml
def configdata():
requests.packages.urllib3.disable_warnings()
indexurl = 'https://xxx/index.php?main_page=info_check&action=ajax_get_search_result_left'
# 设置用于访问的请求头
headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'origin': 'www.xxx.com',
'referer': 'https://www.xxx.com/index.php?main_page=advanced_search_result&inc_subcat=1&search_in_description=0&prm=1.5.62.0&keyword=123',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36 Edg/89.0.774.68',
}
data = {
'keyword_filter': '123',
'keyword': '123',
'cate_count': '17664:1,5632:1,225:1,16580:1,2500:1,548:1,20837:1,229:1,18982:1,76:1,368:1,4050:1,4150:2,1015:3,54425:2,59321:1,109819:1',
'ns_array': 'null',
'result_use_best_seller': '0',
'product_optimization': '',
'get_query': '{"main_page":"advanced_search_result","inc_subcat":"1","search_in_description":"0","keyword":"123","language":"en"}',
'light_select': '',
}
def parse_index():
session = requests.Session()
resp = session.post(url=indexurl, data=data, headers=headers)
print(resp)
# print(resp.content)
bsboj = BeautifulSoup(resp.content, 'lxml')
a_list = bsboj.find_all('a')
text = ''
for a in a_list:
# print(a.get('href'))
href = a.get('href')
text += str(href) + '\n'
with open('baiduurl.txt', 'w') as f:
f.write(text)
if __name__ == '__main__':
configdata()
parse_index()
第二个def用不了第一个def的参数值....求大神帮忙,小白刚学
把赋值的参数返回,并传递到下个方法,这样就可以了,望采纳,谢谢!
# coding=utf-8
import requests
from bs4 import BeautifulSoup
import json
import lxml
def configdata():
requests.packages.urllib3.disable_warnings()
indexurl = 'https://xxx/index.php?main_page=info_check&action=ajax_get_search_result_left'
# 设置用于访问的请求头
headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'origin': 'www.xxx.com',
'referer': 'https://www.xxx.com/index.php?main_page=advanced_search_result&inc_subcat=1&search_in_description=0&prm=1.5.62.0&keyword=123',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36 Edg/89.0.774.68',
}
data = {
'keyword_filter': '123',
'keyword': '123',
'cate_count': '17664:1,5632:1,225:1,16580:1,2500:1,548:1,20837:1,229:1,18982:1,76:1,368:1,4050:1,4150:2,1015:3,54425:2,59321:1,109819:1',
'ns_array': 'null',
'result_use_best_seller': '0',
'product_optimization': '',
'get_query': '{"main_page":"advanced_search_result","inc_subcat":"1","search_in_description":"0","keyword":"123","language":"en"}',
'light_select': '',
}
return indexurl,headers,data
def parse_index(indexurl,headers,data):
session = requests.Session()
resp = session.post(url=indexurl, data=data, headers=headers)
print(resp)
# print(resp.content)
bsboj = BeautifulSoup(resp.content, 'lxml')
a_list = bsboj.find_all('a')
text = ''
for a in a_list:
# print(a.get('href'))
href = a.get('href')
text += str(href) + '\n'
with open('baiduurl.txt', 'w') as f:
f.write(text)
if __name__ == '__main__':
indexurl,headers,data=configdata()
print(indexurl)
parse_index(indexurl,headers,data)
你可以不写在函数内部,作为全局变量声明即可
可以把公共参数放在全局变量中,这样就可以共享了。例如这样处理:
# coding=utf-8
import requests
from bs4 import BeautifulSoup
import json
import lxml
# 设置用于访问的请求头
headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'origin': 'www.xxx.com',
'referer': 'https://www.xxx.com/index.php?main_page=advanced_search_result&inc_subcat=1&search_in_description=0&prm=1.5.62.0&keyword=123',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36 Edg/89.0.774.68',
}
data = {
'keyword_filter': '123',
'keyword': '123',
'cate_count': '17664:1,5632:1,225:1,16580:1,2500:1,548:1,20837:1,229:1,18982:1,76:1,368:1,4050:1,4150:2,1015:3,54425:2,59321:1,109819:1',
'ns_array': 'null',
'result_use_best_seller': '0',
'product_optimization': '',
'get_query': '{"main_page":"advanced_search_result","inc_subcat":"1","search_in_description":"0","keyword":"123","language":"en"}',
'light_select': '',
}
indexurl = 'https://xxx/index.php?main_page=info_check&action=ajax_get_search_result_left'
requests.packages.urllib3.disable_warnings()
def parse_index():
session = requests.Session()
resp = session.post(url=indexurl, data=data, headers=headers)
print(resp)
# print(resp.content)
bsboj = BeautifulSoup(resp.content, 'lxml')
a_list = bsboj.find_all('a')
text = ''
for a in a_list:
# print(a.get('href'))
href = a.get('href')
text += str(href) + '\n'
with open('baiduurl.txt', 'w') as f:
f.write(text)
if __name__ == '__main__':
parse_index()