【python】给定数组,按相邻元素差额分组


# 一、示例:原始数据
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]


# 二、示例:分组结果
part_1 = [393, 393, 394, 394, 394]
part_2 = [423, 424, 424, 425, 425]
part_3 = [454, 454, 454, 454, 456]


# 三、数组说明
# 1、确定的事项
#   1-1 给定的列表,已按由小到大排列好;
#   1-2【同组相邻数字的差额】一定明显大于【两组间相领数字的差额】。以上述案例为例:【同组相邻数字的差额】小于3,【两组间相领数字的差额】远大于3;
# 2、不确定事项
#   2-1 列表元素总个数不确定;
#   2-2 可拆分成几组不确定;可能拆分成4-5组
#   2-3 每组元素的个数不确定;可能是案例中的5个,也可能是8-10个
#   2-4 【同组相邻数字的差额】不一定小于3,比如可能出现[1,5,7,21,25,29]这种场景


# 请问如何使用python实现,或提供拆分思路也可以,谢谢

我想到可以用导数的方法把差值拉大,然后可以划分
2种方法如下,一阶和二阶导数(这也就要求至少有2个以上的数)
想法:

"""
# 一、示例:原始数据
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]

# 二、示例:分组结果
part_1 = [393, 393, 394, 394, 394]
part_2 = [423, 424, 424, 425, 425]
part_3 = [454, 454, 454, 454, 456]

依次顺序:原来式子:,一阶导数,二阶导数,正则化二阶导数再提取变化最大特征的二阶导数,还原一阶导数
[393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
    [0, -1,   0,   0,   -29,  -1,  0,  -1,  0,  -29,   0,  0,    0,  -2]    
      [1,  -1,  0,  29,   -28,  -1,  1,  -1,  29,  -29, 0, 0, 2]
      [0,  0,   0,   1,    1,    0,   0,   0,   1,  1,  0,  0, 0]
    [0,  0,   0,   0,   1,     0,    0,    0,  0,   1,  0,  0, 0,  0]  
 [0, 0,   0,   0,   0,    1,     0,    0,   0,  0,   1,  0,  0, 0, 0]  这个可以代表分界线  
"""

一阶导数


num_list = [0, 394, 454, 456]  # 3个起步
# 前向递推法
firstD = [num_list[i] - num_list[i + 1] for i in range(len(num_list) - 1)]  # 一阶导数
tmpFirstD = [abs(item) for item in firstD]

minValue = np.min(tmpFirstD)
maxValue = np.max(tmpFirstD)
newFirstD = [0] + [int(round((item - minValue) / (maxValue - minValue))) for item in tmpFirstD] 
newFirstDLen = len(newFirstD)
# 一阶 =》 原式
res = []
for i in range(newFirstDLen):
    if i == 0:
        res.append([num_list[i]])
    elif newFirstD[i] == 0:
        res[len(res) - 1].append(num_list[i])
    elif newFirstD[i] == 1:
        res.append([num_list[i]])
print(res)

二阶求导

num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]  # 4个起步
firstD = [num_list[i] - num_list[i + 1] for i in range(len(num_list) - 1)]  # 一阶导数
tmpFirstD = [abs(item) for item in firstD]

secondD = [firstD[i] - firstD[i + 1] for i in range(len(firstD) - 1)]  # 二阶导数
tmpSecondD = [abs(item) for item in secondD]


minValue = np.min(tmpSecondD)
maxValue = np.max(tmpSecondD)
tmpSecondD = [int(round((item - minValue) / (maxValue - minValue))) for item in tmpSecondD]
tmpSecondDLen = len(tmpSecondD)

# 二阶 =》 一阶
i = 0
while i < tmpSecondDLen:
    if tmpSecondD[i] == 1 and i < tmpSecondDLen - 1 and tmpSecondD[i + 1] == 1:
        tmpSecondD[i] = 0
        i += 2
    else:
        i += 1

newFirstD = [0] + tmpSecondD + [0]  
newFirstDLen = len(newFirstD)

# 一阶 =》 原式
res = []
for i in range(newFirstDLen):
    if i == 0:
        res.append([num_list[i]])
    elif newFirstD[i] == 0:
        res[len(res) - 1].append(num_list[i])
    elif newFirstD[i] == 1:
        res.append([num_list[i]])
print(res)

img

其他案例:
如果个数不多就用1阶

img

img

img

可以用kmeans聚类,也可以用普通的算法硬编码


import numpy as np
from pandas import Series,DataFrame

def threshold_cluster(Data_set,threshold):
    #统一格式化数据为一维数组
    stand_array=np.asarray(Data_set).ravel('C')
    stand_Data=Series(stand_array)
    index_list,class_k=[],[]
    while stand_Data.any():
        if len(stand_Data)==1:
            index_list.append(list(stand_Data.index))
            class_k.append(list(stand_Data))
            stand_Data=stand_Data.drop(stand_Data.index)
        else:
            class_data_index=stand_Data.index[0]
            class_data=stand_Data[class_data_index]
            stand_Data=stand_Data.drop(class_data_index)
            if (abs(stand_Data-class_data)<=threshold).any():
                args_data=stand_Data[abs(stand_Data-class_data)<=threshold]
                stand_Data=stand_Data.drop(args_data.index)
                index_list.append([class_data_index]+list(args_data.index))
                class_k.append([class_data]+list(args_data))
            else:
                index_list.append([class_data_index])
                class_k.append([class_data])
    return index_list,class_k
#示例数据
num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
num_list.sort()#排序
time_gap_list=[num_list[i+1] - num_list[i] for i in range(len(num_list)-1)]#获取相邻数差值
#求平均值(同组相邻数字的差额)计算
sum=0
for i in time_gap_list:
    sum=sum+i
sum=int(sum/len(time_gap_list))#取整
index_list,class_k=threshold_cluster(num_list,sum)#聚合分组(按相邻数平均差值)
print(class_k)
for i  in range(len(class_k)):
    print("part_"+str(i+1)+"=",class_k[i])

img

是这个意思哇

num_list = [393, 393, 394, 394, 394, 423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
num_list1 = [393, 393, 394, 394, 394,395,396,397,400,401,402,403,404,406,407,408,415 ,423, 424, 424, 425, 425, 454, 454, 454, 454, 456]
def grouping(num_list):
    header = 0
    result = []
    _ = []
    for i in num_list:

        if (i - header) > 3:
            if len(_) > 0:
                result.append(_)
            _ = [i]
        else:
            _.append(i)
        header = i
    result.append(_)
    return result

print(grouping(num_list))

img

img

上面大于等于3,下面是大于3的结果

img


num_list = [393, 393, 393,394, 394, 394,395,396,397, 423, 424, 424, 425, 425,425,426,427, 454, 454, 454, 454, 456]
while((num_list[-1]-num_list[0])>3):
    # print('你好!')
    list=[]
    i=0
    for j in range(len(num_list)):

            list.append(num_list[j])
            i += 1
            if  (num_list[j+1]-num_list[j])>3:
                break
    num_list=num_list[i:]
    print(list)
print(num_list)

如果对你有帮助,望采纳!!!!

[393, 393, 393, 394, 394, 394, 395, 396, 397]
[423, 424, 424, 425, 425, 425, 426, 427]
[454, 454, 454, 454, 456]