批量导入数据,比较最后列的时间,时间是更新的则更新,没有则创建
导入90多万行的数据,效率很低
class IpAddr(models.Model):
ip_addr = models.CharField(verbose_name='IP地址', max_length=16)
mac_addr = models.CharField(verbose_name='MAC地址', max_length=14)
interface = models.CharField(verbose_name='接口', max_length=16)
cap_datetime = models.DateTimeField(verbose_name='采集时间')
class Meta:
# 模型对象返回的记录结果按照哪个字段排序
ordering = ['cap_datetime']
# 模型类在后台管理中显示的名称
verbose_name = 'ARP表'
def ip_multi(request):
"""
ip arp 地址对应关系导入
:param request:
:return:
"""
if request.method == 'GET':
return HttpResponse('请post上传csv文件!')
# 是否上传空文件
csv_File = request.FILES.get('csv_file', None)
if not csv_File:
return redirect('/ip/list/')
elif os.path.splitext(csv_File.name)[1] != '.csv':
return redirect('/ip/list/')
path = 'media/uploads/'
if not os.path.exists(path):
os.makedirs(path)
# 保存文件
with open(os.path.join(path, csv_File.name), 'wb+') as fp:
for chunk in csv_File.chunks():
fp.write(chunk)
with open(os.path.join(path, csv_File.name), encoding='utf-8') as fp:
count = 0
csv_read = csv.reader(fp)
for row in csv_read:
# 先把字符串转成不带时区的datetime
dt = datetime.strptime(row[3], '%Y-%m-%d %H:%M:%S')
# 把不带时区的datetime转成timestamp再转成秒
ts = int(dt.timestamp())
# 把秒转成带时区的datetime
t = datetime.fromtimestamp(ts, pytz.timezone('Asia/Shanghai'))
data_dict = {
'ip_addr': row[0],
'mac_addr': row[1],
'interface': row[2],
# 解决时区问题 时间字符串转为时间 datetime.strptime('2018-03-02 17:41:20', '%Y-%m-%d %H:%M:%S')
'cap_datetime': t
# 'cap_datetime': datetime.strptime(row[3], '%Y-%m-%d %H:%M:%S')
}
queryset = models.IpAddr.objects.filter(ip_addr=data_dict['ip_addr'],
mac_addr=data_dict['mac_addr'],
interface=data_dict['interface'],
# cap_datetime__lte=data_dict['cap_datetime']
)
count += 1
print('count:', count)
if queryset.exists():
print('找到:', queryset.count())
for query_obj in queryset:
print("data_dict['cap_datetime']", data_dict['cap_datetime'])
print('query_obj.cap_datetime', query_obj.cap_datetime)
if query_obj.cap_datetime <= data_dict['cap_datetime']:
query_obj.cap_datetime = data_dict['cap_datetime']
query_obj.save()
else:
print('新增数据:', end='')
pprint(data_dict)
models.IpAddr.objects.create(**data_dict)
return redirect('/ip/list/')
csv 文件的的格式:
10.155.88.121,a471-743f-b926,GE1/0/1,2022-04-01 00:00:01
能正常插入,但是效率很低
是不是先插入前,先处理下csv文件,之找出最新的数据?
慢是因为频繁执行sql,消耗了大量的iO,建议是将数据全部从库里面取出来,分更新和新增两组,计算之后,然后通过批量更新新增方法