RT,自己随便写了个,显示错误为
ValueError: Can only compare identically-labeled DataFrame objects
但总感觉思路都错了....请指教一下
import pandas as pd
from pandas import DataFrame
L=[]
file=open('F:\\FCD\\实验文本.txt')
a=file.readlines()
for line in a:
L.append(line.split(','))
data=pd.DataFrame(L)
for i in range(len(data)):
for u in range(len(data)):
a=data[i:i+1]
b=data[u:u+1]
if u != i and a==b:
data.drop(data.index[[i,i+1]],inplace=True)
import pandas as pd
from pandas import DataFrame
L=[]
file=open('D:\FCD\1.txt')
a=file.readlines()
#print a
for line in a:
#print line
#print line.strip('\n')
L.append(line.strip('\n'))
print L
data=pd.DataFrame(L)
print data
print len(data)
IsDuplicated = data.duplicated()
print IsDuplicated
print type(IsDuplicated)
data = data.drop_duplicates()
print data
##for i in range(len(data)):
用这个试试,上面的格式有问题
import pandas as pd
from pandas import DataFrame
L=[]
file=open('F:\FCD\实验文本.txt')
a=file.readlines()
for line in a:
L.append(line.strip('\n'))
print L
data=pd.DataFrame(L)
print data
print len(data)
IsDuplicated = data.duplicated()
print IsDuplicated
print type(IsDuplicated)
data = data.drop_duplicates()
print data
你好,显示的行是正确的。
7 20110101,718,H ,粤B00G18 ,114.113983,22.5874...
8 20110101,759,H ,粤B00G18 ,114.114571,22.5886...
9 20110101,838,H ,粤B00G18 ,114.114883,22.5900...
10 20110101,936,H ,粤B00G18 ,114.114891,22.5893...
18 20110101,1016,H ,粤B00G18 ,114.114326,22.587...
19 20110101,1056,H ,粤B00G18 ,114.11412,22.5872...
20 20110101,1136,H ,粤B00G18 ,114.113983,22.585...
但是为什么后面的字符不显示了,只有省略号
开头加上下面2行试下
import numpy as np
np.set_printoptions(threshold=np.inf)
第一行import pandas as pd
后面加入下面3行
pd.set_option('display.width', 10000) # 设置字符显示宽度
pd.set_option('display.max_rows', None) # 设置显示最大行
pd.set_option('display.max_colwidth',500)
改进一下你的代码可以吗?
for i in range(len(data)):
for u in range(len(data)):
a=data.iloc[i,:]
b=data.iloc[u,:]
if u != i and a==b:
data.drop([u],inplace=True) #先以索引i为参考,然后删除和索引i一样的数据。新创一个文件,用我改进的这个试一下。
加一行这个:
np.set_printoptions(threshold=np.inf)