有哪位大能可以帮我改一下这个代码吗?

from user_agent import *
from lxml import etree
from tubiao import *
import pymysql
import time
import os

def get_html(url):
r = requests.get(url,headers=head())
r.encoding = "utf8"
return r

def down_pic(wen_date,t_pic_url,picpath):
wen_year = wen_date[0:4]
img = ''
for i in range(len(t_pic_url)):
try:
r = get_html(t_pic_url[i])
filename = picpath+'\'+wen_date+''+str(i+1)+''+t_pic_url[i][-7:]
with open(filename,'wb') as fp:
fp.write(r.content)
print('{}图片下载完成'.format(wen_date+''+str(i+1)+'.jpg'))
img = img + wen_date+'
'+str(i+1)+'.jpg,'
time.sleep(1)
except:
continue
return img
def get_text(url,tit,wdate):
db = pymysql.connect(host="localhost",user="root",password="123456",port=3306,db="gxjdnews")
cursor = db.cursor()
ss = str(tit)
wdate = str(wdate).strip('[]')
r = get_html(url)
print(r.text)
res = etree.HTML(r.text)
lai = res.xpath('//div[@id="notice2txtArea"]/p[2]//text()')
t_pic_url = res.xpath('//div[@id="text"]/div//img//@src')
lai1 = lai[0].replace('\n','').replace(' ','')
print(type(wdate),wdate)
if '|' in lai1:
wen_laiyuan = lai1.split('|')[1][3:]
wen_date = lai1.split('|')[0][3:]
wen_year = wen_date[0:4]
else:
wen_laiyuan = lail
wen_date = ''
wen_year = wdate
wen_content = res.xpath('//div[@id="text"]//text()')
cu = ['\n \t', '\xa0', '\r\n\xa0',]
str1 = ''
for c in cu:
while c in wen_content:
wen_content.remove(c)
for i in range(len(wen_content)):
if wen_content[i]!='\r\n':
str1 = str1+wen_content[i]
elif wen_content[i]=='\r\n' and wen_content[i+1]!='\r\n':
str1 = str1+'\n'
fpath = os.getcwd()
filepath = fpath+'\'+wen_year
img = ''
if not os.path.exists(filepath):
os.mkdir(wen_year)
picpath = filepath + '\' + wen_date
if not os.path.exists(picpath):
os.chdir(filepath)
os.mkdir(wen_date)
os.chdir(fpath)
txtpath = picpath + '\' + wen_date + '_' + ss[:7] + '.txt'
with open(txtpath, 'w', encoding='utf-8') as fp:
fp.write(lai1)
fp.write(str1)
if len(t_pic_url) != 0: img = down_pic(wen_date, t_pic_url, picpath)
print('%s-%s' % (wen_date, ss))
try:
db.commit()
except:
db.rollback()
sql = 'select * from news;'
print('库中共有%s 条数据' % cursor.execute(sql))
db.close()

def get_yaowen(url, page):
title_list = []
date_list = []
r = get_html(url)
res = etree.HTML(r.text)
title = res.xpath('//div[@id="contentArea"]/dl/dt//text()')
t_url = res.xpath('//div[@id="contentArea"]/dl/dt//@href')
while '▪' in title:
title.remove('▪')
for i in range(0, len(title), 2):
title_list.append(title[i])
for i in range(1, len(title), 2):
date_list.append(title[i])
print("已经开始爬取第{}页数据".format(page))
for i in range(len(title_list)):
try:
get_text(t_url[i], title_list[i], date_list[i])
except:
continue
print("第{}页数据已爬完".format(page))
time.sleep(3)

def spider():
jd_url = 'http://www.gxcme.edu.cn/news/p-'
start_page = int(input("请输入爬取的开始页:"))
end_page = int(input("请输入爬取的结束页:"))
for i in range(start_page,end_page+1):
url = jd_url+str(i)+'.html'
get_yaowen(url,i)
time.sleep(2)
def fengxi():
db = pymysql.connect(host="localhost",user="root",password="123456",port=3306,db="gxjdnews")
cursor = db.cursor()
sql1 = 'insert news_new select * from news;'
sql = ' select laiyuan,count(laiyuan) from news_new group by laiyuan;'
sql = 'select * from news;'
print('\n查询所得数据有%s条' % cursor.execute(sql1))
coun=cursor.execute(sql)
print('统计查询所得数据有%s条' % coun)
data = cursor.fetchall()
print(data)
laiy_list = [data[i][0] for i in range(len(data))]
laiy_count = [data[i][1] for i in range(len(data))]
laiy_count.insert(0,"新闻来源")
tu = Tubiao()
tu.zexian(laiy_list,[laiy_count],['机电学院新闻统计','篇数'],'机电学院新闻统计')
tu.zuxian(laiy_list,[laiy_count],['机电学院新闻统计','篇数'],'机电学院新闻统计')
print('\n')
for i in range(len(laiy_list)):
print(laiy_list[i],laiy_count[i+1],end=',')
print('\n')
def year_month_find():
db = pymysql.connect(host="localhost",user="root",password="123456",port=3306,db="gxjdnews")
cursor = db.cursor()
yy = str(input("请输入查询的年份:"))
mm = str(input("请输入查询的月份:"))
sql1 = 'insert news_new select * from news where (mid(wdate,6,2)={} and year={});'.format(mm,yy)
sql = ' select laiyuan,count() from news_new group by laiyuan;'
print('\n查询所得数据有%s条' % cursor.execute(sql1))
coun=cursor.execute(sql)
print('统计查询所得数据有%s条' % coun)
data = cursor.fetchall()
if coun!=0:
laiy_list = [data[i][0] for i in range(len(data))]
laiy_count = [data[i][1] for i in range(len(data))]
laiy_count.insert(0,"新闻来源")
tb_tit = '{}年{}月各部门文章发表情况'.format(yy,mm)
tb_name = '{}年{}月各部门图表状态'.format(yy, mm)
tu = Tubiao()
tu.zexian(laiy_list, [laiy_count], [tb_tit, '篇数'], tb_name)
tu.zuxian(laiy_list, [laiy_count], [tb_tit, '篇数'], tb_name)
print('\n')
for i in range(len(laiy_list)):
print(laiy_list[i], laiy_count[i + 1], end=' ')
print('\n')
def year_many_month():
db = pymysql.connect(host="localhost",user="root",password="123456",port=3306,db="gxjdnews")
cursor = db.cursor()
yy = str(input("请输入查询的年份:"))
start_m = str(input("请输入查询的开始月份:"))
end_m = str(input("请输入查询的结束月份:"))
yue= []
for i in range(int(start_m),int(end_m)+1):
yue.append(i)
mm = tuple(yue)
sql1 = 'insert news_new select * from news where (mid(wdate,6,2) in {} and year={});'.format(mm,yy)
sql = ' select laiyuan,count(
) from news_new group by laiyuan;'
print('\n查询所得数据有%s条' % cursor.execute(sql1))
coun = cursor.execute(sql)
print('统计查询所得数据有%s条' % coun)
data = cursor.fetchall()
if coun != 0:
laiy_list = [data[i][0] for i in range(len(data))]
laiy_count = [data[i][1] for i in range(len(data))]
laiy_count.insert(0,"文章来源")
tb_tit = '{}年{}-{}月各部门文章发表情况'.format(yy,start_m,end_m)
tb_name = '{}年{}-{}月各部门图表状态'.format(yy,start_m,end_m)
tu = Tubiao()
tu.zexian(laiy_list,[laiy_count],[tb_tit,'篇数'],tb_name)
tu.zuxian(laiy_list,[laiy_count],[tb_tit,'篇数'],tb_name)
print('\n')
for i in range(len(laiy_list)):
print(laiy_list[i],laiy_count[i+1],end=' ')
print('\n')
def year_find():
db = pymysql.connect(host="localhost",user="root",password="123456",port=3306,db="gxjdnews")
cursor = db.cursor()
sql = ' select year,count(*) from news group by year;'
print('\n查询所得数据有%s条' % cursor.execute(sql))
coun = cursor.execute(sql)
print('统计所得数据有%s条' % coun)
data = cursor.fetchall()
print(data)
exit()
if coun != 0:
laiy_list= [data[i][0] for i in range(len(data))]
laiy_count = [data[i][1] for i in range(len(data))]
laiy_count.insert(0,"年度")
tb_tit = '机电学院校园网新闻发布年度统计'
tb_name = '机电新闻发布年度统计'
tu = Tubiao()
tu.zexian(laiy_list,[laiy_count],[tb_tit,'篇数'],tb_name)
tu.zuxian(laiy_list,[laiy_count],[tb_tit,'篇数'],tb_name)
print('\n')
for i in range(len(laiy_list)):
print(laiy_list[i],laiy_count[i+1],end=',')
print('\n')
def choose():
while True:
print('-----------------------------------------')
print('1.从网站爬取数据')
print('2.按年月统计单个月的数据')
print('3.按年月统计多个月的数据')
print('4.统计爬取的所有数据')
print('5.按年统计所有数据')
print('6.退出')
print('-----------------------------------------')
ch = input("\n请输入你要选择的操作:")
print(type(ch),ch)
if ch == '1':
spider()
elif ch == '2':
year_month_find()
elif ch == '3':
year_many_month()
elif ch == '4':
fengxi()
elif ch == '5':
year_find()
else:break
def main():
choose()
if name == 'main':
main()

你这个代码重新编辑一下,用插入代码块的方式,要不然,别人就算想帮你修改也没心情了,还要重新调整缩进

另外,你把你的问题描述一下,哪里的结果不符合预期,或者哪里有报错信息,也一起发上来