Python合并两个csv表,按照每一行的标题追加写入到后面

img
想把两个按照最左边一列标题进行表合并,求解,需要用来做数据分析


from bs4 import BeautifulSoup

import requests

import csv

import bs4

import easygui

import sys


headers = {
    User-Agent Mozilla5.0 (Windows NT 10.0; WOW64) AppleWebKit537.36 (KHTML, like Gecko) Chrome56.0.2924.87 Safari537.36
}


name = tr

save = d主板A股01.csv 


def check_link(url)


    r = requests.post(url, headers=headers)

    soup = BeautifulSoup(r.text, html.parser)

    tdwe = soup.findAll(name=td, attrs={class tbcaption})

    r.raise_for_status()

    r.encoding = r.apparent_encoding

    return r.text



def get_contents(ulist, rurl)

    soup = BeautifulSoup(rurl, html.parser)

    trs = soup.find_all(name) 

    for tr in trs 

        ui = [] 

        for td in tr 

            ui.append(td.string)

            ulist.append(ui)




def save_contents(urlist, d, keyd, go, a,name)
    with open(
        d + name + .csv, a+, encoding=utf-8-sig
    ) as f  

        writer = csv.writer(f)

        for a in range(len(urlist))
            new_list = []
            i = a

            if a  26

                one = urlist[i][0]

                if not urlist[i][0] == urlist[i - 1][0]
                    for f in range(len(urlist[i]))
                            new_list.append(urlist[i][f])
                    writer.writerow(new_list)



def main(url, a, name)

    urli = []

    rs = check_link(url)

    get_contents(urli, rs)

    save_contents(urli, 0, True, 0, a,name)


#爬取
for u in range(1, 6)

    print(u)

    urs1 = (
        httpsstock.cfi.cncfidata.aspxsortfd=&sortway=&curpage=
        + str(u)
        + &fr=content&ndk=A0A1934A1939A1959A1960&xztj=&mystock=
    )

    main(urs1, u, name)

可以在源代码上进行修改,这是个爬虫的源码,爬完会存到表格里,需要把2个表格合并,谢谢了


import pandas as pd


df1 = pd.read_csv("t1.csv", encoding='gbk')
df2 = pd.read_csv("t2.csv", encoding='gbk')

col1 = df1.columns
col2 = df2.columns

data = list()
for row in df1.itertuples():
    lie1 = row.列1
    res = df2[(df2["列1"]==lie1)]
    row_new = dict()
    for col in col1:
        row_new[col] = getattr(row, col, "")
    if len(res) == 0:
        res = dict()
    else:
        res = res.iloc[0]
    for col in col2:
        row_new[col] = getattr(res, col, "")
    data.append(row_new)

df = pd.DataFrame(data=data)
df.index=df.index+1
df.to_excel("tt.xlsx", encoding='gbk')

解决思路是:假设已获取到了多个表格,表头格式一样,每个表格读取成df0,在循环中用df=df.append(df0)将多个表格数据添加了表格后面。

img