想把两个按照最左边一列标题进行表合并,求解,需要用来做数据分析
from bs4 import BeautifulSoup
import requests
import csv
import bs4
import easygui
import sys
headers = {
User-Agent Mozilla5.0 (Windows NT 10.0; WOW64) AppleWebKit537.36 (KHTML, like Gecko) Chrome56.0.2924.87 Safari537.36
}
name = tr
save = d主板A股01.csv
def check_link(url)
r = requests.post(url, headers=headers)
soup = BeautifulSoup(r.text, html.parser)
tdwe = soup.findAll(name=td, attrs={class tbcaption})
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
def get_contents(ulist, rurl)
soup = BeautifulSoup(rurl, html.parser)
trs = soup.find_all(name)
for tr in trs
ui = []
for td in tr
ui.append(td.string)
ulist.append(ui)
def save_contents(urlist, d, keyd, go, a,name)
with open(
d + name + .csv, a+, encoding=utf-8-sig
) as f
writer = csv.writer(f)
for a in range(len(urlist))
new_list = []
i = a
if a 26
one = urlist[i][0]
if not urlist[i][0] == urlist[i - 1][0]
for f in range(len(urlist[i]))
new_list.append(urlist[i][f])
writer.writerow(new_list)
def main(url, a, name)
urli = []
rs = check_link(url)
get_contents(urli, rs)
save_contents(urli, 0, True, 0, a,name)
#爬取
for u in range(1, 6)
print(u)
urs1 = (
httpsstock.cfi.cncfidata.aspxsortfd=&sortway=&curpage=
+ str(u)
+ &fr=content&ndk=A0A1934A1939A1959A1960&xztj=&mystock=
)
main(urs1, u, name)
可以在源代码上进行修改,这是个爬虫的源码,爬完会存到表格里,需要把2个表格合并,谢谢了
import pandas as pd
df1 = pd.read_csv("t1.csv", encoding='gbk')
df2 = pd.read_csv("t2.csv", encoding='gbk')
col1 = df1.columns
col2 = df2.columns
data = list()
for row in df1.itertuples():
lie1 = row.列1
res = df2[(df2["列1"]==lie1)]
row_new = dict()
for col in col1:
row_new[col] = getattr(row, col, "")
if len(res) == 0:
res = dict()
else:
res = res.iloc[0]
for col in col2:
row_new[col] = getattr(res, col, "")
data.append(row_new)
df = pd.DataFrame(data=data)
df.index=df.index+1
df.to_excel("tt.xlsx", encoding='gbk')
解决思路是:假设已获取到了多个表格,表头格式一样,每个表格读取成df0,在循环中用df=df.append(df0)将多个表格数据添加了表格后面。