配对样本二分类资料一开始采用了错误的卡方检验报错,修改为麦克尼马尔还是报错

问题遇到的现象和发生背景

别人帮写的卡方检验代码,报下面的错误,
现在需要修改代码为麦克尼马尔检验,数据为配对样本二分类资料

用代码块功能插入代码,请勿粘贴截图

```python
def foursquare_chi_test(cross_table):
    chi2, pvalue, dof, expected = chi2_contingency(cross_table, correction=False)

    chi2 = round(chi2, 3)
    pvalue = round(pvalue, 3)
    value = stats.chi2.ppf(0.95, df=dof)  # 变量相关概率为0.95时对应的卡方值
    print('自由度{}'.format(dof))
    print('卡方值{:.2f}'.format(chi2))
    # print('p值{:.2f}'.format(pvalue))
    if pvalue<=0.05:
        print('p值 == {:.2f}<0.05,有统计学差异'.format(pvalue))
    else:
        print('p值 == {:.2f}>0.05,无统计学差异'.format(pvalue))
    return chi2, pvalue


def confusion_matrix_statistic(ytrue,pred):
    conf_mat = confusion_matrix(ytrue, pred)
    TP = conf_mat[1, 1]
    TN = conf_mat[0, 0]
    FP = conf_mat[0, 1]
    FN = conf_mat[1, 0]
    Accuracy = ((TP + TN) * 1.0) / (TN + TP + FN + FP)
    Precision = (TP * 1.0) / (TP + FP)
    Sensitivity = (TP * 1.0) / (TP + FN)
    Specificity = (TN * 1.0) / (TN + FP)
    return TP,TN,FP,FN,Accuracy,Precision,Sensitivity,Specificity


def Sensitivity_Specificity_Statistic(original_df,lowerage_group_1,lowerage_group_2,higherage_group_1,
                  higherage_group_2,radiomics_group,label,group_list):

    real_group_index = original_df[original_df['REAL'] == label].index
    lowerage_group_1_selected = lowerage_group_1.loc[real_group_index]
    lowerage_group_2_selected = lowerage_group_2.loc[real_group_index]
    higherage_group_1_selected = higherage_group_1.loc[real_group_index]
    higherage_group_2_selected = higherage_group_2.loc[real_group_index]
    radiomics_group_selected = radiomics_group.loc[real_group_index]

    group_data = [lowerage_group_1_selected, lowerage_group_2_selected, higherage_group_1_selected,
                  higherage_group_2_selected, radiomics_group_selected]
    for i in range(len(group_data)):
        for j in range(i + 1, len(group_data)):
            print("############################")
            print('卡方检验:%s-%s' % (group_list[i], group_list[j]))
            result_con = confusion_matrix_statistic(group_data[i], group_data[j])
            x = [[result_con[0], result_con[1]], [result_con[2], result_con[3]]]
            foursquare_chi_test(x)

def Accuracy_Statistic(original_df,lowerage_group_1,lowerage_group_2,higherage_group_1,
                  higherage_group_2,radiomics_group,group_list):

    real_group = original_df['REAL']

    temp = lowerage_group_1-real_group
    real_group_index = temp[temp== 0].index
    lowerage_group_1_selected = lowerage_group_1.loc[real_group_index]
    temp = lowerage_group_2 - real_group
    real_group_index = temp[temp == 0].index
    lowerage_group_2_selected = lowerage_group_2.loc[real_group_index]
    temp = higherage_group_1 - real_group
    real_group_index = temp[temp == 0].index
    higherage_group_1_selected = higherage_group_1.loc[real_group_index]
    temp = higherage_group_2 - real_group
    real_group_index = temp[temp == 0].index
    higherage_group_2_selected = higherage_group_2.loc[real_group_index]
    temp = radiomics_group - real_group
    real_group_index = temp[temp == 0].index
    radiomics_group_selected = radiomics_group.loc[real_group_index]

    group_data = [lowerage_group_1_selected, lowerage_group_2_selected, higherage_group_1_selected,
                  higherage_group_2_selected, radiomics_group_selected]
    for i in range(len(group_data)):
        for j in range(i + 1, len(group_data)):
            print("############################")
            print('卡方检验:%s-%s' % (group_list[i], group_list[j]))
            first_index = group_data[i].index
            second_index = group_data[j].index
            intersect = set(first_index).intersection(second_index)
            # diff1 = set(first_index_array).difference(second_index_array)

            group_data_i = group_data[i].loc[intersect]
            group_data_j = group_data[j].loc[intersect]
            result_con = confusion_matrix_statistic(group_data_i, group_data_j)
            x = [[result_con[0], result_con[1]], [result_con[2], result_con[3]]]
            print('TP;TN;FP;FN: ',x)

            if result_con[2]==0 and  result_con[3]==0:
                continue
            else:
                foursquare_chi_test(x)


def main():
    input_root = 'E:/research/data'
    ####merge.csv要自己去做,病例有改变就要去更新
    input_path_data = 'E:/research/data/merge.csv'
    # input_path_data = './data/merge.xlsx'
    original_df = pd.read_csv(input_path_data, encoding='utf=8', index_col=0)
    real_group = original_df['REAL']
    lowerage_group_1 = original_df['lowerage1']
    lowerage_group_2 = original_df['lowerage2']
    higherage_group_1 = original_df['higherage1']
    higherage_group_2 = original_df['higherage2']
    radiomics_group = original_df['radiomics']


    # index_positive = original_df[original_df["REAL"] == 1].index
    # 低年资1,real
    group_list = ['lowerage1', 'lowerage2', 'higherage1', 'higherage2', 'radiomics']

    for i in range(5):
        if i == 0:
            result = confusion_matrix_statistic(real_group, lowerage_group_1)
        elif i == 1:
            result = confusion_matrix_statistic(real_group, lowerage_group_2)
        elif i == 2:
            result = confusion_matrix_statistic(real_group, higherage_group_1)
        elif i == 3:
            result = confusion_matrix_statistic(real_group, higherage_group_2)
        else:
            result = confusion_matrix_statistic(real_group, radiomics_group)

        print("%s 量化指标:TP真阳性:%.3f,TN真阴性:%.3f,FP假阳性:%.3f,FN假阴性:%.3f,Accuracy准确率:%.3f,Precision精确率:%.3f,Sensitivity敏感度:%.3f,Specificity特异度:%.3f"%
              (group_list[i],result[0], result[1], result[2], result[3],result[4], result[5], result[6], result[7]))

    Sensitivity_Specificity_list = ['Sensitivity','Specificity','Accuracy']
    for sen_spe in Sensitivity_Specificity_list:
        # 灵敏度:卡方检验
        print('\n'*2)
        print('%s 差异性分析' % (sen_spe))
        if sen_spe == 'Sensitivity':
            label = 1
            Sensitivity_Specificity_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
                                              higherage_group_2, radiomics_group, label,group_list)
        # 特异度:卡方检验
        elif sen_spe == 'Specificity':
            label = 0
            Sensitivity_Specificity_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
                                      higherage_group_2, radiomics_group, label,group_list)
        else:
            # 准确率:卡方检验
            Accuracy_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
                                      higherage_group_2, radiomics_group, group_list)

if __name__ == '__main__':
    main()

###### 运行结果及报错内容 
lowerage1 量化指标:TP真阳性:67.000,TN真阴性:44.000,FP假阳性:18.000,FN假阴性:3.000,Accuracy准确率:0.841,Precision精确率:0.788,Sensitivity敏感度:0.957,Specificity特异度:0.710
lowerage2 量化指标:TP真阳性:66.000,TN真阴性:47.000,FP假阳性:15.000,FN假阴性:4.000,Accuracy准确率:0.856,Precision精确率:0.815,Sensitivity敏感度:0.943,Specificity特异度:0.758
higherage1 量化指标:TP真阳性:62.000,TN真阴性:54.000,FP假阳性:8.000,FN假阴性:8.000,Accuracy准确率:0.879,Precision精确率:0.886,Sensitivity敏感度:0.886,Specificity特异度:0.871
higherage2 量化指标:TP真阳性:69.000,TN真阴性:51.000,FP假阳性:11.000,FN假阴性:1.000,Accuracy准确率:0.909,Precision精确率:0.863,Sensitivity敏感度:0.986,Specificity特异度:0.823
radiomics 量化指标:TP真阳性:66.000,TN真阴性:48.000,FP假阳性:14.000,FN假阴性:4.000,Accuracy准确率:0.864,Precision精确率:0.825,Sensitivity敏感度:0.943,Specificity特异度:0.774



Sensitivity 差异性分析
############################
卡方检验:lowerage1-lowerage2
自由度1
卡方值20.63
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage1
自由度1
卡方值66.14
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage2
自由度1
卡方值16.16
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-radiomics
自由度1
卡方值48.37
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage1
自由度1
卡方值60.17
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage2
自由度1
卡方值13.71
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-radiomics
自由度1
卡方值33.11
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage1-higherage2
自由度1
卡方值5.05
p值 == 0.03<0.05,有统计学差异
############################
卡方检验:higherage1-radiomics
自由度1
卡方值20.28
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage2-radiomics
自由度1
卡方值84.12
p值 == 0.00<0.05,有统计学差异



Specificity 差异性分析
############################
卡方检验:lowerage1-lowerage2
自由度1
卡方值18.88
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage1
自由度1
卡方值22.73
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage2
自由度1
卡方值13.55
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-radiomics
自由度1
卡方值21.62
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage1
自由度1
卡方值4.25
p值 == 0.04<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage2
自由度1
卡方值18.39
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-radiomics
自由度1
卡方值15.35
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage1-higherage2
自由度1
卡方值30.12
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage1-radiomics
自由度1
卡方值43.12
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage2-radiomics
自由度1
卡方值39.28
p值 == 0.00<0.05,有统计学差异



Accuracy 差异性分析
############################
卡方检验:lowerage1-lowerage2
TP;TN;FP;FN:  [[95, 56], [0, 0]]
############################
卡方检验:lowerage1-higherage1
TP;TN;FP;FN:  [[85, 62], [0, 0]]
############################
卡方检验:lowerage1-higherage2
TP;TN;FP;FN:  [[96, 62], [0, 0]]
############################
卡方检验:lowerage1-radiomics
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14416/1769209816.py in 
    163 
    164 if __name__ == '__main__':
--> 165     main()

~\AppData\Local\Temp/ipykernel_14416/1769209816.py in main()
    159         else:
    160             # 准确率:卡方检验
--> 161             Accuracy_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
    162                                       higherage_group_2, radiomics_group, group_list)
    163 

~\AppData\Local\Temp/ipykernel_14416/1769209816.py in Accuracy_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1, higherage_group_2, radiomics_group, group_list)
    100             group_data_i = group_data[i].loc[intersect]
    101             group_data_j = group_data[j].loc[intersect]
--> 102             result_con = confusion_matrix_statistic(group_data_i, group_data_j)
    103             x = [[result_con[0], result_con[1]], [result_con[2], result_con[3]]]
    104             print('TP;TN;FP;FN: ',x)

~\AppData\Local\Temp/ipykernel_14416/1769209816.py in confusion_matrix_statistic(ytrue, pred)
     34 
     35 def confusion_matrix_statistic(ytrue,pred):
---> 36     conf_mat = confusion_matrix(ytrue, pred)
     37     TP = conf_mat[1, 1]
     38     TN = conf_mat[0, 0]

~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in confusion_matrix(y_true, y_pred, labels, sample_weight, normalize)
    297 
    298     """
--> 299     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    300     if y_type not in ("binary", "multiclass"):
    301         raise ValueError("%s is not supported" % y_type)

~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in _check_targets(y_true, y_pred)
     81     y_pred : array or indicator matrix
     82     """
---> 83     check_consistent_length(y_true, y_pred)
     84     type_true = type_of_target(y_true)
     85     type_pred = type_of_target(y_pred)

~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
    317     uniques = np.unique(lengths)
    318     if len(uniques) > 1:
--> 319         raise ValueError("Found input variables with inconsistent numbers of"
    320                          " samples: %r" % [int(l) for l in lengths])
    321 

ValueError: Found input variables with inconsistent numbers of samples: [158, 148]
###### 我的解答思路和尝试过的方法 
数据表格应该没问题,因为用SPSS都能正常得到结果。但是用python就报上面的错误
###### 我想要达到的结果
更换卡方检验方法为麦克尼马尔,并且不报错

#####数据表格内容##########
ID    REAL    lowerage1    lowerage2    higherage1    higherage2    radiomics
335485    1    1    1    1    1    1
338815    1    1    1    1    1    1
339100    0    0    0    0    0    0
339100    0    0    0    0    0    0
340519    0    1    0    0    0    0
340519    0    1    0    0    0    0
340521    0    0    0    0    0    0
340521    0    0    0    0    0    1
345586    1    1    1    1    1    1
346033    0    0    1    0    0    0
346033    0    0    1    0    0    0
348933    1    1    1    1    1    1
350839    1    1    1    1    1    1
360496    0    1    0    0    0    0
360953    1    1    1    1    1    1
362110    0    1    0    0    0    1
362110    0    1    0    0    0    0
362211    0    0    1    1    0    1
362211    0    0    1    1    0    1
363088    0    0    0    0    1    1
364015    1    1    1    1    1    1
364701    1    1    1    1    1    1
364701    1    1    1    1    1    1
364703    1    1    1    1    1    1
367447    1    1    1    1    1    1
369624    1    1    1    1    1    1
370931    1    1    1    1    1    1
371114    1    1    1    1    1    1
372345    1    1    1    1    1    1
378579    1    1    1    1    1    1
380837    1    1    1    1    1    1
384720    1    1    1    1    0    1
386946    1    1    1    1    1    1
387804    1    1    1    1    1    1
388916    0    1    1    0    0    0
389070    1    1    1    1    1    1
390879    1    1    1    0    1    1
390879    1    1    1    0    1    1
390879    1    1    1    0    1    1
391961    1    1    1    1    1    1
392082    1    0    0    0    1    1
392082    1    0    0    0    1    1
392616    1    1    1    1    1    1
392700    1    1    1    1    1    1
394244    0    0    1    0    0    0
394244    0    0    1    0    0    0
398945    0    0    1    0    0    1
398945    0    0    1    0    0    1
399379    1    1    1    1    1    1
400286    1    1    1    1    1    1
400446    0    1    1    0    1    1
404768    0    0    0    0    0    0
404856    0    0    1    1    1    0
404856    0    0    1    1    1    0
405396    1    1    1    0    1    1
406538    0    0    0    0    0    0
406538    0    0    0    0    0    0
406688    1    1    1    1    1    1
412344    1    1    1    1    1    1
412344    1    1    1    1    1    1
412344    1    1    1    1    1    1
412881    1    1    1    1    1    0
417171    1    1    1    1    1    1
417171    1    1    1    1    1    1
420066    0    0    0    0    0    0
420066    0    0    0    0    0    0
422671    1    1    1    1    1    1
423385    0    0    0    0    0    1
423928    1    1    1    1    1    1
423928    1    1    1    1    1    0
426476    0    1    1    0    0    0
427415    1    1    1    1    1    1
431308    1    1    1    1    1    1
431896    0    0    0    0    0    1
431896    0    0    0    0    0    0
432158    1    1    1    1    1    1
432158    1    1    1    1    1    1
432841    1    1    1    1    1    1
432846    1    1    1    1    1    0
433562    1    1    0    1    1    1
434988    1    1    1    1    1    1
435325    0    0    0    0    0    0
435325    0    0    0    0    0    1
438348    0    0    0    0    0    0
438348    0    0    0    0    0    1
440266    0    0    0    0    0    0
442287    0    0    0    0    0    0
442287    0    0    0    0    0    0
442639    1    1    1    1    1    1
442952    1    1    1    1    1    1
442952    1    1    1    1    1    1
443855    0    0    0    0    0    0
444427    1    1    1    1    1    1
444427    1    1    1    1    1    1
445200    1    1    1    1    1    1
445357    1    1    1    1    1    1
446436    1    1    1    1    1    1
446436    1    1    1    1    1    1
446593    0    0    0    0    0    0
448034    1    1    1    1    1    1
448034    1    1    1    1    1    1
451739    0    1    1    0    0    0
454929    1    1    1    1    1    1
456213    0    0    0    1    0    0
456962    1    1    1    1    1    1
456962    1    1    1    1    1    1
460020    1    1    1    1    1    1
464972    1    1    1    1    1    1
470013    1    1    0    1    1    0
485120    1    0    1    1    1    1
485139    0    0    0    0    0    0
490387    0    0    0    0    0    1
495067    0    0    0    1    0    1
499044    0    1    0    0    0    0
499044    0    1    0    0    0    0
507661    0    0    0    1    1    0
507661    0    0    0    1    1    0
517046    1    1    1    0    1    1
790103    0    1    1    0    0    0
790370    1    1    1    0    1    1
1807100559    0    1    0    0    1    0
1807100559    0    1    0    0    1    0
1902130389    0    1    0    0    0    0
1902130389    0    1    0    0    0    0
1906120711    0    0    0    0    0    0
1909111103    0    0    0    0    0    0
1909111103    0    0    0    0    0    0
2009250760    0    1    0    0    0    0
2009250760    0    1    0    0    0    0
2106180478    0    0    0    0    1    0
2106180478    0    0    0    0    1    0
2110070754    0    0    0    0    1    0

看看有没有用
https://b23.tv/Mi6blit

没看到你的报错信息,自然别人帮你写的,你直接问下帮你写的那个人会更好些

https://blog.csdn.net/lililinglingling/article/details/121207485