# 一、示例:原始数据
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
# 二、示例:分组结果
part_1 = [393, 393, 394, 394, 394]
part_2 = [423, 424, 424, 425, 425]
part_3 = [454, 454, 454, 454, 456]
# 三、数组说明
# 1、确定的事项
# 1-1 给定的列表,已按由小到大排列好;
# 1-2【同组相邻数字的差额】一定明显大于【两组间相领数字的差额】。以上述案例为例:【同组相邻数字的差额】小于3,【两组间相领数字的差额】远大于3;
# 2、不确定事项
# 2-1 列表元素总个数不确定;
# 2-2 可拆分成几组不确定;可能拆分成4-5组
# 2-3 每组元素的个数不确定;可能是案例中的5个,也可能是8-10个
# 2-4 【同组相邻数字的差额】不一定小于3,比如可能出现[1,5,7,21,25,29]这种场景
# 请问如何使用python实现,或提供拆分思路也可以,谢谢
我想到可以用导数的方法把差值拉大,然后可以划分
2种方法如下,一阶和二阶导数(这也就要求至少有2个以上的数)
想法:
"""
# 一、示例:原始数据
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
# 二、示例:分组结果
part_1 = [393, 393, 394, 394, 394]
part_2 = [423, 424, 424, 425, 425]
part_3 = [454, 454, 454, 454, 456]
依次顺序:原来式子:,一阶导数,二阶导数,正则化二阶导数再提取变化最大特征的二阶导数,还原一阶导数
[393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
[0, -1, 0, 0, -29, -1, 0, -1, 0, -29, 0, 0, 0, -2]
[1, -1, 0, 29, -28, -1, 1, -1, 29, -29, 0, 0, 2]
[0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0] 这个可以代表分界线
"""
num_list = [0, 394, 454, 456] # 3个起步
# 前向递推法
firstD = [num_list[i] - num_list[i + 1] for i in range(len(num_list) - 1)] # 一阶导数
tmpFirstD = [abs(item) for item in firstD]
minValue = np.min(tmpFirstD)
maxValue = np.max(tmpFirstD)
newFirstD = [0] + [int(round((item - minValue) / (maxValue - minValue))) for item in tmpFirstD]
newFirstDLen = len(newFirstD)
# 一阶 =》 原式
res = []
for i in range(newFirstDLen):
if i == 0:
res.append([num_list[i]])
elif newFirstD[i] == 0:
res[len(res) - 1].append(num_list[i])
elif newFirstD[i] == 1:
res.append([num_list[i]])
print(res)
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456] # 4个起步
firstD = [num_list[i] - num_list[i + 1] for i in range(len(num_list) - 1)] # 一阶导数
tmpFirstD = [abs(item) for item in firstD]
secondD = [firstD[i] - firstD[i + 1] for i in range(len(firstD) - 1)] # 二阶导数
tmpSecondD = [abs(item) for item in secondD]
minValue = np.min(tmpSecondD)
maxValue = np.max(tmpSecondD)
tmpSecondD = [int(round((item - minValue) / (maxValue - minValue))) for item in tmpSecondD]
tmpSecondDLen = len(tmpSecondD)
# 二阶 =》 一阶
i = 0
while i < tmpSecondDLen:
if tmpSecondD[i] == 1 and i < tmpSecondDLen - 1 and tmpSecondD[i + 1] == 1:
tmpSecondD[i] = 0
i += 2
else:
i += 1
newFirstD = [0] + tmpSecondD + [0]
newFirstDLen = len(newFirstD)
# 一阶 =》 原式
res = []
for i in range(newFirstDLen):
if i == 0:
res.append([num_list[i]])
elif newFirstD[i] == 0:
res[len(res) - 1].append(num_list[i])
elif newFirstD[i] == 1:
res.append([num_list[i]])
print(res)
其他案例:
如果个数不多就用1阶
可以用kmeans聚类,也可以用普通的算法硬编码
import numpy as np
from pandas import Series,DataFrame
def threshold_cluster(Data_set,threshold):
#统一格式化数据为一维数组
stand_array=np.asarray(Data_set).ravel('C')
stand_Data=Series(stand_array)
index_list,class_k=[],[]
while stand_Data.any():
if len(stand_Data)==1:
index_list.append(list(stand_Data.index))
class_k.append(list(stand_Data))
stand_Data=stand_Data.drop(stand_Data.index)
else:
class_data_index=stand_Data.index[0]
class_data=stand_Data[class_data_index]
stand_Data=stand_Data.drop(class_data_index)
if (abs(stand_Data-class_data)<=threshold).any():
args_data=stand_Data[abs(stand_Data-class_data)<=threshold]
stand_Data=stand_Data.drop(args_data.index)
index_list.append([class_data_index]+list(args_data.index))
class_k.append([class_data]+list(args_data))
else:
index_list.append([class_data_index])
class_k.append([class_data])
return index_list,class_k
#示例数据
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
num_list.sort()#排序
time_gap_list=[num_list[i+1] - num_list[i] for i in range(len(num_list)-1)]#获取相邻数差值
#求平均值(同组相邻数字的差额)计算
sum=0
for i in time_gap_list:
sum=sum+i
sum=int(sum/len(time_gap_list))#取整
index_list,class_k=threshold_cluster(num_list,sum)#聚合分组(按相邻数平均差值)
print(class_k)
for i in range(len(class_k)):
print("part_"+str(i+1)+"=",class_k[i])
是这个意思哇
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
num_list1 = [393, 393, 394, 394, 394,395,396,397,400,401,402,403,404,406,407,408,415 ,423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
def grouping(num_list):
header = 0
result = []
_ = []
for i in num_list:
if (i - header) > 3:
if len(_) > 0:
result.append(_)
_ = [i]
else:
_.append(i)
header = i
result.append(_)
return result
print(grouping(num_list))
上面大于等于3,下面是大于3的结果
num_list = [393, 393, 393,394, 394, 394,395,396,397, 423, 424, 424, 425, 425,425,426,427, 454, 454, 454, 454, 456]
while((num_list[-1]-num_list[0])>3):
# print('你好!')
list=[]
i=0
for j in range(len(num_list)):
list.append(num_list[j])
i += 1
if (num_list[j+1]-num_list[j])>3:
break
num_list=num_list[i:]
print(list)
print(num_list)
如果对你有帮助,望采纳!!!!
[393, 393, 393, 394, 394, 394, 395, 396, 397]
[423, 424, 424, 425, 425, 425, 426, 427]
[454, 454, 454, 454, 456]