我想把每一个网页爬到的内容放到json文件中的一行里,一个网页放一行,我自己写的程序总是一个网页在json文件中占很多行,希望得到大家的帮助。
import requests
headers = {
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Referer': 'http://www.wikipedia.org/',
'Connection': 'keep-alive',
}
fileObject = open('gongshi.json', 'w', encoding='utf-8')
for i in range(10):
response = requests.get('https://pubchem.ncbi.nlm.nih.gov/sdq/sdqagent.cgi?infmt=json&outfmt=json&query={%22select%22:%22*%22,'
'%22collection%22:%22compound%22,%22where%22:{%22ands%22:[{%22*%22:%22CCCC%22}]},'
'%22order%22:[%22relevancescore,desc%22],%22start%22:'+str(i)+'1,%22limit%22:10,%22width%22:1000000,'
'%22listids%22:0}', headers=headers)
fileObject.write(response.text+'\n')
fileObject.close()
import requests
import re
headers = {
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Referer': 'http://www.wikipedia.org/',
'Connection': 'keep-alive',
}
fileObject = open('gongshi.json', 'w', encoding='utf-8')
for i in range(10):
response = requests.get('https://pubchem.ncbi.nlm.nih.gov/sdq/sdqagent.cgi?infmt=json&outfmt=json&query={%22select%22:%22*%22,'
'%22collection%22:%22compound%22,%22where%22:{%22ands%22:[{%22*%22:%22CCCC%22}]},'
'%22order%22:[%22relevancescore,desc%22],%22start%22:'+str(i)+'1,%22limit%22:10,%22width%22:1000000,'
'%22listids%22:0}', headers=headers)
Reg='123 456 7\t8\r9\n10'
jsonData = re.sub('[\s+]', '', response.text)
fileObject.write(jsonData+'\n')
把JSON串里的\n去掉就好了