原始数据如下
a = pd.DataFrame({'药品名':['感康','感冒灵','皮炎平'], '商品号':['8124','0023','0045'],'条形码':['69000000001','69000002103','69000000003'], '规格':['12片','8袋','15克']})
b = pd.DataFrame({'批准文号':['国药准字Z43018942','国药准字Z23010042','国药准字H13018942', '国药准字B12018332'], '商品号':['0012','0023','1023','0045'],'条形码':['69000000001','69000000002','69000000003', '69000000553'], '零售价':[10, 9, 7.5, 25]})
我想通过‘商品号’和‘条形码’这两列,合并这两个表格
但这两列,只要其中一列相等即可合并,我要的结果如下:
如何用最短的代码实现这个目的呢
照你这个说法,这两个红框都代表的皮炎平,可是价格却不一样? 咋合并
如果两个表的数据不一致,以哪一个为准?例如:a[0]的商品号和b[0]的商品号不一致,合并后以谁为主
# -*- coding: utf-8 -*- import pandas as pd class Handle: def __init__(self, a, b): self.a = a self.b = b self.c = {'药品名': [], '商品号': [], '条形码': [], '规格': [], '批准文号': [], '零售价': []} for i in self.a['商品号']: for ii in self.b['商品号']: if i == ii: self.combine(num_a=list(self.a['商品号']).index(i), num_b=list(self.b['商品号']).index(ii)) for j in self.a['条形码']: for jj in self.b['条形码']: if j == jj: self.combine(num_a=list(self.a['条形码']).index(j), num_b=list(self.b['条形码']).index(j)) def combine(self, num_a, num_b): if self.a['药品名'][num_a] in self.c['药品名']: return self.c['药品名'].append(self.a['药品名'][num_a]) self.c['商品号'].append(self.a['商品号'][num_a]) self.c['条形码'].append(self.a['条形码'][num_a]) self.c['规格'].append(self.a['规格'][num_a]) self.c['批准文号'].append(self.b['批准文号'][num_b]) self.c['零售价'].append(self.b['零售价'][num_b]) if __name__ == '__main__': a = pd.DataFrame( {'药品名': ['感康', '感冒灵', '皮炎平'], '商品号': ['8124', '0023', '0045'], '条形码': ['69000000001', '69000002103', '69000000003'], '规格': ['12片', '8袋', '15克']}) b = pd.DataFrame({'批准文号': ['国药准字Z43018942', '国药准字Z23010042', '国药准字H13018942', '国药准字B12018332'], '商品号': ['0012', '0023', '1023', '0045'], '条形码': ['69000000001', '69000000002', '69000000003', '69000000553'], '零售价': [10, 9, 7.5, 25]}) handle = Handle(a, b) c = pd.DataFrame(handle.c) print(c)