import json import os import time import urllib import requests import requests_toolbelt import hashlib from requests_toolbelt import MultipartEncoder WENKU_BASE_URL = 'https://wenku.baidu.com' # 正式 # WENKU_UPLOAD_URL = WENKU_BASE_URL + '/doc/orgapi/orgupload' # 测试 WENKU_UPLOAD_URL = WENKU_BASE_URL + '/doc/orgapi/fororguploadtest' # 正式 # WENKU_APPKEY = '你的正式appkey' # 测试 WENKU_APPKEY = 'test' # 正式 # WENKU_SECRET = '你的正式secret' # 测试 WENKU_SECRET = 'g75a50a20baed9bbd12a6e69d091f4d6' def upload(file_path=''): file_name = os.path.basename(file_path) file_tail = get_file_tail(file_name) file_name_without_tail = file_name[:-(len(file_tail) + 1)] new_file_name = get_md5(file_name_without_tail) + '.' + file_tail tmp_file_path = 'C:/Users/Administrator/Desktop/1' + os.sep + new_file_name # 生成文件名不为中文的临时文件 os.rename(file_path, tmp_file_path) timestamp = '{0}'.format(int(time.time())) flag = '10' price = '2' title = urllib.parse.quote(file_name_without_tail) summary = urllib.parse.quote(file_name_without_tail) pre_sign_str ='appkey={0}&flag={1}&price={2}&summary={3}×tamp={4}&title={5}&secret={6}&filemd5={7}'.format( WENKU_APPKEY, flag, price, summary, timestamp, title, WENKU_SECRET, get_file_md5(tmp_file_path) ) print('pre_sign_str1:', pre_sign_str) sign_str = get_md5(pre_sign_str) print('sign_str:', sign_str) # flag 0:免费文档3:机构文档10:付费文档19:商业文档 # price 单位为分 fields = {'object_file': (os.path.basename(tmp_file_path), open(tmp_file_path, 'rb'), 'multipart/form-data'), 'appkey': WENKU_APPKEY, 'timestamp': timestamp, 'sign': sign_str, 'title': title, 'flag': '10', 'price': '2', 'summary': summary } data = MultipartEncoder(fields=fields) print('ct:', data.content_type) result = requests.Session().post(url=WENKU_UPLOAD_URL, data=data, verify=False, headers={ 'Content-Type': data.content_type}) # print(result.request.headers) print('result.status_code:', result.status_code) if result.status_code == 200: result_str = result.text print('result_str:', result_str) result_json = json.loads(result_str) code = result_json['status']['code'] msg = result_json['status']['msg'] if code == 0: print('msg:', msg) data = result_json['data'] print('上传成功,doc_id:', data['doc_id']) else: print('code:', code) print('msg:', msg) else: print('网络请求出错') # 获取文件名后缀 def get_file_tail(file_name=''): # 获取文件后缀名 file_arr = file_name.split(".") if len(file_arr) >= 2: return file_arr[len(file_arr) - 1].lower() else: return 'default' def get_file_md5(file_path=''): try: with open(file_path, 'rb') as fp: data = fp.read() file_md5 = hashlib.md5(data).hexdigest() print(file_md5) return file_md5 except Exception as e: print(e.args) return '' def get_md5(source_str=''): return hashlib.md5(source_str.encode('utf8')).hexdigest() if __name__ == '__main__': upload(your_absolute_file_path)
提示
Traceback (most recent call last):
File "I:/new-python/down/baidu.py", line 110, in <module>
upload(your_absolute_file_path)
NameError: name 'your_absolute_file_path' is not defined如何解决呢
在最后一行的upload()函数调用前写上你要上传文档的绝对路径。
import json
import os
import time
import urllib
import requests
import requests_toolbelt
import hashlib
from requests_toolbelt import MultipartEncoder
WENKU_BASE_URL = 'https://wenku.baidu.com'
# 正式
# WENKU_UPLOAD_URL = WENKU_BASE_URL + '/doc/orgapi/orgupload'
# 测试
WENKU_UPLOAD_URL = WENKU_BASE_URL + '/doc/orgapi/fororguploadtest'
# 正式
# WENKU_APPKEY = '你的正式appkey'
# 测试
WENKU_APPKEY = 'test'
# 正式
# WENKU_SECRET = '你的正式secret'
# 测试
WENKU_SECRET = 'g75a50a20baed9bbd12a6e69d091f4d6'
def upload(file_path=''):
file_name = os.path.basename(file_path)
file_tail = get_file_tail(file_name)
file_name_without_tail = file_name[:-(len(file_tail) + 1)]
new_file_name = get_md5(file_name_without_tail) + '.' + file_tail
tmp_file_path = r'C:\Users\Administrator\Desktop\1'+ os.sep + new_file_name
# 生成文件名不为中文的临时文件
os.rename(file_path, tmp_file_path)
timestamp = '{0}'.format(int(time.time()))
flag = '10'
price = '2'
title = urllib.parse.quote(file_name_without_tail)
summary = urllib.parse.quote(file_name_without_tail)
pre_sign_str ='appkey={0}&flag={1}&price={2}&summary={3}×tamp={4}&title={5}&secret={6}&filemd5={7}'.format(
WENKU_APPKEY, flag, price, summary, timestamp, title,
WENKU_SECRET, get_file_md5(tmp_file_path)
)
print('pre_sign_str1:', pre_sign_str)
sign_str = get_md5(pre_sign_str)
print('sign_str:', sign_str)
# flag 0:免费文档3:机构文档10:付费文档19:商业文档
# price 单位为分
fields = {'object_file': (os.path.basename(tmp_file_path),
open(tmp_file_path, 'rb'), 'multipart/form-data'),
'appkey': WENKU_APPKEY,
'timestamp': timestamp,
'sign': sign_str,
'title': title,
'flag': '10',
'price': '2',
'summary': summary
}
data = MultipartEncoder(fields=fields)
print('ct:', data.content_type)
result = requests.Session().post(url=WENKU_UPLOAD_URL, data=data,
verify=False, headers={
'Content-Type': data.content_type})
# print(result.request.headers)
print('result.status_code:', result.status_code)
if result.status_code == 200:
result_str = result.text
print('result_str:', result_str)
result_json = json.loads(result_str)
code = result_json['status']['code']
msg = result_json['status']['msg']
if code == 0:
print('msg:', msg)
data = result_json['data']
print('上传成功,doc_id:', data['doc_id'])
else:
print('code:', code)
print('msg:', msg)
else:
print('网络请求出错')
# 获取文件名后缀
def get_file_tail(file_name=''):
# 获取文件后缀名
file_arr = file_name.split(".")
if len(file_arr) >= 2:
return file_arr[len(file_arr) - 1].lower()
else:
return 'default'
def get_file_md5(file_path=''):
try:
with open(file_path, 'rb') as fp:
data = fp.read()
file_md5 = hashlib.md5(data).hexdigest()
print(file_md5)
return file_md5
except Exception as e:
print(e.args)
return ''
def get_md5(source_str=''):
return hashlib.md5(source_str.encode('utf8')).hexdigest()
if __name__ == '__main__':
import glob
for f in glob.glob('C:/Users/Administrator/Desktop/1/*'):
upload(f) # 文件夹下的所有文件路径逐个传给upload()
time.sleep(2) # 因每个循环调用upload时都有对网页的请求,为避免过于频繁,要设定休眠时间,数据根据情况调整大小
"""
假设你的所有文档都在C:/Users/Administrator/Desktop/1/下,比如有 m1.jpg,m2.doc,m3.md,m4.txt
通过用glob模块遍历文件夹下所有文件,作为参数传递给upload函数。
另外upload()的参数不要赋缺省值。
def upload(file_path):
"""
if __name__ == '__main__':
import glob
for f in glob.glob('C:/Users/Administrator/Desktop/1/*'):
upload(f)#文件夹下的所有文件路径逐个传给upload()
time.sleep(2)#因每个循环调用upload时都有对网页的请求,为避免过于频繁,要设定休眠时间,数据根据情况调整大小