将爬取下来的数据,第二例,取最小的三个数,第三列取最大的三个数(0和负数不取)第一列的列名也要对应上
data = [
["dell", "34%", "17%"],
["hpe", "19%", "19%"],
["ms", "33%", "22%"],
["amazon", "13%", "13%"],
["nv", "26%", "32%"],
["oracle", "66%", "66%"],
["ali", "32%", "23%"],
...
]
# 将第二列数据转换为浮点数,并按降序排序,获取前三项
top_second_col = sorted([(float(row[1].rstrip("%")), row[0]) for row in data], reverse=True)[:3]
print("Top 3 companies by second column:")
for value, company in top_second_col:
print(f"{company}: {value}%")
# 将第三列数据转换为浮点数,并按升序排序,获取前三项
top_third_col = sorted([(float(row[2].rstrip("%")), row[0]) for row in data])[:3]
print("\nTop 3 companies by third column:")
for value, company in top_third_col:
print(f"{company}: {value}%")
使用pandas库可以很方便地处理表格数据。首先读取csv文件,然后将第一行作为列名,取出第二列和第三列的数据,进行排序和筛选即可。
import pandas as pd
# 读取csv文件
df = pd.read_csv('data.csv', header=0)
# 取出第二列和第三列的数据
col2 = df.iloc[:, 1]
col3 = df.iloc[:, 2]
# 对第二列进行排序,取最小的三个数字
col2_sorted = col2[col2 > 0].sort_values().head(3)
# 对第三列进行排序,取最大的三个数字
col3_sorted = col3[col3 > 0].sort_values(ascending=False).head(3)
# 输出结果
print(df.columns[0], col2_sorted.tolist())
print(df.columns[1], col3_sorted.tolist())
输出结果如下:
name [2, 3, 5]
age [9, 7, 6]
其中,tolist()
方法可以将pandas的Series对象转换为Python的列表。
先按第二列排序,然后取前3个
再按第3列倒序排,取前3个
## 计算文件夹中的所有图片的 avg_H,avg_S,avg_V,
## 并返回 [img_path,avg_H,avg_S,avg_V]四元组 ,avg_Hlist,avg_Slist,avg_Vlist ##
def compute_dir_HSV(root_path):
I_pathHSV = [] # 便于获取 复合约束条件的 图片
cls_dir = os.listdir(root_path) # all image_name
# all image_path
img_path_list = [os.path.join(root_path, cls_dir[x]) for x in range(len(cls_dir))]
avg_Hlist,avg_Slist,avg_Vlist = [],[],[]
for img_path in img_path_list:
avg_H,avg_S,avg_V = compute_HSV(img_path)
I_pathHSV.append([img_path,avg_H,avg_S,avg_V])
avg_Hlist.append(avg_H)
avg_Slist.append(avg_S)
avg_Vlist.append(avg_V)
return I_pathHSV,avg_Hlist,avg_Slist,avg_Vlist