别人帮写的卡方检验代码,报下面的错误,
现在需要修改代码为麦克尼马尔检验,数据为配对样本二分类资料
```python
def foursquare_chi_test(cross_table):
chi2, pvalue, dof, expected = chi2_contingency(cross_table, correction=False)
chi2 = round(chi2, 3)
pvalue = round(pvalue, 3)
value = stats.chi2.ppf(0.95, df=dof) # 变量相关概率为0.95时对应的卡方值
print('自由度{}'.format(dof))
print('卡方值{:.2f}'.format(chi2))
# print('p值{:.2f}'.format(pvalue))
if pvalue<=0.05:
print('p值 == {:.2f}<0.05,有统计学差异'.format(pvalue))
else:
print('p值 == {:.2f}>0.05,无统计学差异'.format(pvalue))
return chi2, pvalue
def confusion_matrix_statistic(ytrue,pred):
conf_mat = confusion_matrix(ytrue, pred)
TP = conf_mat[1, 1]
TN = conf_mat[0, 0]
FP = conf_mat[0, 1]
FN = conf_mat[1, 0]
Accuracy = ((TP + TN) * 1.0) / (TN + TP + FN + FP)
Precision = (TP * 1.0) / (TP + FP)
Sensitivity = (TP * 1.0) / (TP + FN)
Specificity = (TN * 1.0) / (TN + FP)
return TP,TN,FP,FN,Accuracy,Precision,Sensitivity,Specificity
def Sensitivity_Specificity_Statistic(original_df,lowerage_group_1,lowerage_group_2,higherage_group_1,
higherage_group_2,radiomics_group,label,group_list):
real_group_index = original_df[original_df['REAL'] == label].index
lowerage_group_1_selected = lowerage_group_1.loc[real_group_index]
lowerage_group_2_selected = lowerage_group_2.loc[real_group_index]
higherage_group_1_selected = higherage_group_1.loc[real_group_index]
higherage_group_2_selected = higherage_group_2.loc[real_group_index]
radiomics_group_selected = radiomics_group.loc[real_group_index]
group_data = [lowerage_group_1_selected, lowerage_group_2_selected, higherage_group_1_selected,
higherage_group_2_selected, radiomics_group_selected]
for i in range(len(group_data)):
for j in range(i + 1, len(group_data)):
print("############################")
print('卡方检验:%s-%s' % (group_list[i], group_list[j]))
result_con = confusion_matrix_statistic(group_data[i], group_data[j])
x = [[result_con[0], result_con[1]], [result_con[2], result_con[3]]]
foursquare_chi_test(x)
def Accuracy_Statistic(original_df,lowerage_group_1,lowerage_group_2,higherage_group_1,
higherage_group_2,radiomics_group,group_list):
real_group = original_df['REAL']
temp = lowerage_group_1-real_group
real_group_index = temp[temp== 0].index
lowerage_group_1_selected = lowerage_group_1.loc[real_group_index]
temp = lowerage_group_2 - real_group
real_group_index = temp[temp == 0].index
lowerage_group_2_selected = lowerage_group_2.loc[real_group_index]
temp = higherage_group_1 - real_group
real_group_index = temp[temp == 0].index
higherage_group_1_selected = higherage_group_1.loc[real_group_index]
temp = higherage_group_2 - real_group
real_group_index = temp[temp == 0].index
higherage_group_2_selected = higherage_group_2.loc[real_group_index]
temp = radiomics_group - real_group
real_group_index = temp[temp == 0].index
radiomics_group_selected = radiomics_group.loc[real_group_index]
group_data = [lowerage_group_1_selected, lowerage_group_2_selected, higherage_group_1_selected,
higherage_group_2_selected, radiomics_group_selected]
for i in range(len(group_data)):
for j in range(i + 1, len(group_data)):
print("############################")
print('卡方检验:%s-%s' % (group_list[i], group_list[j]))
first_index = group_data[i].index
second_index = group_data[j].index
intersect = set(first_index).intersection(second_index)
# diff1 = set(first_index_array).difference(second_index_array)
group_data_i = group_data[i].loc[intersect]
group_data_j = group_data[j].loc[intersect]
result_con = confusion_matrix_statistic(group_data_i, group_data_j)
x = [[result_con[0], result_con[1]], [result_con[2], result_con[3]]]
print('TP;TN;FP;FN: ',x)
if result_con[2]==0 and result_con[3]==0:
continue
else:
foursquare_chi_test(x)
def main():
input_root = 'E:/research/data'
####merge.csv要自己去做,病例有改变就要去更新
input_path_data = 'E:/research/data/merge.csv'
# input_path_data = './data/merge.xlsx'
original_df = pd.read_csv(input_path_data, encoding='utf=8', index_col=0)
real_group = original_df['REAL']
lowerage_group_1 = original_df['lowerage1']
lowerage_group_2 = original_df['lowerage2']
higherage_group_1 = original_df['higherage1']
higherage_group_2 = original_df['higherage2']
radiomics_group = original_df['radiomics']
# index_positive = original_df[original_df["REAL"] == 1].index
# 低年资1,real
group_list = ['lowerage1', 'lowerage2', 'higherage1', 'higherage2', 'radiomics']
for i in range(5):
if i == 0:
result = confusion_matrix_statistic(real_group, lowerage_group_1)
elif i == 1:
result = confusion_matrix_statistic(real_group, lowerage_group_2)
elif i == 2:
result = confusion_matrix_statistic(real_group, higherage_group_1)
elif i == 3:
result = confusion_matrix_statistic(real_group, higherage_group_2)
else:
result = confusion_matrix_statistic(real_group, radiomics_group)
print("%s 量化指标:TP真阳性:%.3f,TN真阴性:%.3f,FP假阳性:%.3f,FN假阴性:%.3f,Accuracy准确率:%.3f,Precision精确率:%.3f,Sensitivity敏感度:%.3f,Specificity特异度:%.3f"%
(group_list[i],result[0], result[1], result[2], result[3],result[4], result[5], result[6], result[7]))
Sensitivity_Specificity_list = ['Sensitivity','Specificity','Accuracy']
for sen_spe in Sensitivity_Specificity_list:
# 灵敏度:卡方检验
print('\n'*2)
print('%s 差异性分析' % (sen_spe))
if sen_spe == 'Sensitivity':
label = 1
Sensitivity_Specificity_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
higherage_group_2, radiomics_group, label,group_list)
# 特异度:卡方检验
elif sen_spe == 'Specificity':
label = 0
Sensitivity_Specificity_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
higherage_group_2, radiomics_group, label,group_list)
else:
# 准确率:卡方检验
Accuracy_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
higherage_group_2, radiomics_group, group_list)
if __name__ == '__main__':
main()
###### 运行结果及报错内容
lowerage1 量化指标:TP真阳性:67.000,TN真阴性:44.000,FP假阳性:18.000,FN假阴性:3.000,Accuracy准确率:0.841,Precision精确率:0.788,Sensitivity敏感度:0.957,Specificity特异度:0.710
lowerage2 量化指标:TP真阳性:66.000,TN真阴性:47.000,FP假阳性:15.000,FN假阴性:4.000,Accuracy准确率:0.856,Precision精确率:0.815,Sensitivity敏感度:0.943,Specificity特异度:0.758
higherage1 量化指标:TP真阳性:62.000,TN真阴性:54.000,FP假阳性:8.000,FN假阴性:8.000,Accuracy准确率:0.879,Precision精确率:0.886,Sensitivity敏感度:0.886,Specificity特异度:0.871
higherage2 量化指标:TP真阳性:69.000,TN真阴性:51.000,FP假阳性:11.000,FN假阴性:1.000,Accuracy准确率:0.909,Precision精确率:0.863,Sensitivity敏感度:0.986,Specificity特异度:0.823
radiomics 量化指标:TP真阳性:66.000,TN真阴性:48.000,FP假阳性:14.000,FN假阴性:4.000,Accuracy准确率:0.864,Precision精确率:0.825,Sensitivity敏感度:0.943,Specificity特异度:0.774
Sensitivity 差异性分析
############################
卡方检验:lowerage1-lowerage2
自由度1
卡方值20.63
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage1
自由度1
卡方值66.14
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage2
自由度1
卡方值16.16
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-radiomics
自由度1
卡方值48.37
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage1
自由度1
卡方值60.17
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage2
自由度1
卡方值13.71
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-radiomics
自由度1
卡方值33.11
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage1-higherage2
自由度1
卡方值5.05
p值 == 0.03<0.05,有统计学差异
############################
卡方检验:higherage1-radiomics
自由度1
卡方值20.28
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage2-radiomics
自由度1
卡方值84.12
p值 == 0.00<0.05,有统计学差异
Specificity 差异性分析
############################
卡方检验:lowerage1-lowerage2
自由度1
卡方值18.88
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage1
自由度1
卡方值22.73
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-higherage2
自由度1
卡方值13.55
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage1-radiomics
自由度1
卡方值21.62
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage1
自由度1
卡方值4.25
p值 == 0.04<0.05,有统计学差异
############################
卡方检验:lowerage2-higherage2
自由度1
卡方值18.39
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:lowerage2-radiomics
自由度1
卡方值15.35
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage1-higherage2
自由度1
卡方值30.12
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage1-radiomics
自由度1
卡方值43.12
p值 == 0.00<0.05,有统计学差异
############################
卡方检验:higherage2-radiomics
自由度1
卡方值39.28
p值 == 0.00<0.05,有统计学差异
Accuracy 差异性分析
############################
卡方检验:lowerage1-lowerage2
TP;TN;FP;FN: [[95, 56], [0, 0]]
############################
卡方检验:lowerage1-higherage1
TP;TN;FP;FN: [[85, 62], [0, 0]]
############################
卡方检验:lowerage1-higherage2
TP;TN;FP;FN: [[96, 62], [0, 0]]
############################
卡方检验:lowerage1-radiomics
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14416/1769209816.py in
163
164 if __name__ == '__main__' :
--> 165 main()
~\AppData\Local\Temp/ipykernel_14416/1769209816.py in main()
159 else:
160 # 准确率:卡方检验
--> 161 Accuracy_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1,
162 higherage_group_2, radiomics_group, group_list)
163
~\AppData\Local\Temp/ipykernel_14416/1769209816.py in Accuracy_Statistic(original_df, lowerage_group_1, lowerage_group_2, higherage_group_1, higherage_group_2, radiomics_group, group_list)
100 group_data_i = group_data[i].loc[intersect]
101 group_data_j = group_data[j].loc[intersect]
--> 102 result_con = confusion_matrix_statistic(group_data_i, group_data_j)
103 x = [[result_con[0], result_con[1]], [result_con[2], result_con[3]]]
104 print('TP;TN;FP;FN: ',x)
~\AppData\Local\Temp/ipykernel_14416/1769209816.py in confusion_matrix_statistic(ytrue, pred)
34
35 def confusion_matrix_statistic(ytrue,pred):
---> 36 conf_mat = confusion_matrix(ytrue, pred)
37 TP = conf_mat[1, 1]
38 TN = conf_mat[0, 0]
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in confusion_matrix(y_true, y_pred, labels, sample_weight, normalize)
297
298 """
--> 299 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
300 if y_type not in ("binary", "multiclass"):
301 raise ValueError("%s is not supported" % y_type)
~\anaconda3\lib\site-packages\sklearn\metrics\_classification.py in _check_targets(y_true, y_pred)
81 y_pred : array or indicator matrix
82 """
---> 83 check_consistent_length(y_true, y_pred)
84 type_true = type_of_target(y_true)
85 type_pred = type_of_target(y_pred)
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
317 uniques = np.unique(lengths)
318 if len(uniques) > 1:
--> 319 raise ValueError("Found input variables with inconsistent numbers of"
320 " samples: %r" % [int(l) for l in lengths])
321
ValueError: Found input variables with inconsistent numbers of samples: [158, 148]
###### 我的解答思路和尝试过的方法
数据表格应该没问题,因为用SPSS都能正常得到结果。但是用python就报上面的错误
###### 我想要达到的结果
更换卡方检验方法为麦克尼马尔,并且不报错
#####数据表格内容##########
ID REAL lowerage1 lowerage2 higherage1 higherage2 radiomics
335485 1 1 1 1 1 1
338815 1 1 1 1 1 1
339100 0 0 0 0 0 0
339100 0 0 0 0 0 0
340519 0 1 0 0 0 0
340519 0 1 0 0 0 0
340521 0 0 0 0 0 0
340521 0 0 0 0 0 1
345586 1 1 1 1 1 1
346033 0 0 1 0 0 0
346033 0 0 1 0 0 0
348933 1 1 1 1 1 1
350839 1 1 1 1 1 1
360496 0 1 0 0 0 0
360953 1 1 1 1 1 1
362110 0 1 0 0 0 1
362110 0 1 0 0 0 0
362211 0 0 1 1 0 1
362211 0 0 1 1 0 1
363088 0 0 0 0 1 1
364015 1 1 1 1 1 1
364701 1 1 1 1 1 1
364701 1 1 1 1 1 1
364703 1 1 1 1 1 1
367447 1 1 1 1 1 1
369624 1 1 1 1 1 1
370931 1 1 1 1 1 1
371114 1 1 1 1 1 1
372345 1 1 1 1 1 1
378579 1 1 1 1 1 1
380837 1 1 1 1 1 1
384720 1 1 1 1 0 1
386946 1 1 1 1 1 1
387804 1 1 1 1 1 1
388916 0 1 1 0 0 0
389070 1 1 1 1 1 1
390879 1 1 1 0 1 1
390879 1 1 1 0 1 1
390879 1 1 1 0 1 1
391961 1 1 1 1 1 1
392082 1 0 0 0 1 1
392082 1 0 0 0 1 1
392616 1 1 1 1 1 1
392700 1 1 1 1 1 1
394244 0 0 1 0 0 0
394244 0 0 1 0 0 0
398945 0 0 1 0 0 1
398945 0 0 1 0 0 1
399379 1 1 1 1 1 1
400286 1 1 1 1 1 1
400446 0 1 1 0 1 1
404768 0 0 0 0 0 0
404856 0 0 1 1 1 0
404856 0 0 1 1 1 0
405396 1 1 1 0 1 1
406538 0 0 0 0 0 0
406538 0 0 0 0 0 0
406688 1 1 1 1 1 1
412344 1 1 1 1 1 1
412344 1 1 1 1 1 1
412344 1 1 1 1 1 1
412881 1 1 1 1 1 0
417171 1 1 1 1 1 1
417171 1 1 1 1 1 1
420066 0 0 0 0 0 0
420066 0 0 0 0 0 0
422671 1 1 1 1 1 1
423385 0 0 0 0 0 1
423928 1 1 1 1 1 1
423928 1 1 1 1 1 0
426476 0 1 1 0 0 0
427415 1 1 1 1 1 1
431308 1 1 1 1 1 1
431896 0 0 0 0 0 1
431896 0 0 0 0 0 0
432158 1 1 1 1 1 1
432158 1 1 1 1 1 1
432841 1 1 1 1 1 1
432846 1 1 1 1 1 0
433562 1 1 0 1 1 1
434988 1 1 1 1 1 1
435325 0 0 0 0 0 0
435325 0 0 0 0 0 1
438348 0 0 0 0 0 0
438348 0 0 0 0 0 1
440266 0 0 0 0 0 0
442287 0 0 0 0 0 0
442287 0 0 0 0 0 0
442639 1 1 1 1 1 1
442952 1 1 1 1 1 1
442952 1 1 1 1 1 1
443855 0 0 0 0 0 0
444427 1 1 1 1 1 1
444427 1 1 1 1 1 1
445200 1 1 1 1 1 1
445357 1 1 1 1 1 1
446436 1 1 1 1 1 1
446436 1 1 1 1 1 1
446593 0 0 0 0 0 0
448034 1 1 1 1 1 1
448034 1 1 1 1 1 1
451739 0 1 1 0 0 0
454929 1 1 1 1 1 1
456213 0 0 0 1 0 0
456962 1 1 1 1 1 1
456962 1 1 1 1 1 1
460020 1 1 1 1 1 1
464972 1 1 1 1 1 1
470013 1 1 0 1 1 0
485120 1 0 1 1 1 1
485139 0 0 0 0 0 0
490387 0 0 0 0 0 1
495067 0 0 0 1 0 1
499044 0 1 0 0 0 0
499044 0 1 0 0 0 0
507661 0 0 0 1 1 0
507661 0 0 0 1 1 0
517046 1 1 1 0 1 1
790103 0 1 1 0 0 0
790370 1 1 1 0 1 1
1807100559 0 1 0 0 1 0
1807100559 0 1 0 0 1 0
1902130389 0 1 0 0 0 0
1902130389 0 1 0 0 0 0
1906120711 0 0 0 0 0 0
1909111103 0 0 0 0 0 0
1909111103 0 0 0 0 0 0
2009250760 0 1 0 0 0 0
2009250760 0 1 0 0 0 0
2106180478 0 0 0 0 1 0
2106180478 0 0 0 0 1 0
2110070754 0 0 0 0 1 0
看看有没有用
https://b23.tv/Mi6blit
没看到你的报错信息,自然别人帮你写的,你直接问下帮你写的那个人会更好些
https://blog.csdn.net/lililinglingling/article/details/121207485