python中Kappa,confusion

    科技2024-08-16  28

    使用鸢尾花作为例子:

    用knn模型来输出测试精度,下面是一个模型拟合的过程,训练和测试都是全体数据。

    import numpy as np from sklearn import neighbors from sklearn.metrics import accuracy_score if __name__ == '__main__': data_all = np.loadtxt('.\\iris.txt') data = data_all[:,:-1] label = data_all[:,-1] knn = neighbors.KNeighborsClassifier(n_neighbors=5) knn.fit(data,label) pre_model = knn.predict(data) acc = accuracy_score(label, pre_model) # 参数为真实label和预测label print(acc) # 输出的精度

     

    下面是输出混淆矩阵的代码,附带出图的部分程序

    import numpy as np from sklearn.metrics import confusion_matrix from sklearn import neighbors if __name__ == '__main__': data_all = np.loadtxt('.\\iris.txt') data = data_all[:,:-1] label = data_all[:,-1] num_label = len(list(set(label))) knn = neighbors.KNeighborsClassifier(n_neighbors=5) knn.fit(data,label) pre_model = knn.predict(data) test_confu = confusion_matrix(label, pre_model) # 真实label, 预测label # test_confu_pd = pd.DataFrame(test_confu) # test_confu_pd.to_csv('混淆矩阵.txt', sep=' ', header=False, # index=False,columns=None, index_label=False) fig, ax = plt.subplots() im = ax.imshow(test_confu) # 设置坐标轴上的刻度 ax.set_xticks(np.arange(num_label)) ax.set_yticks(np.arange(num_label)) # 用各自的列标标记 ax.set_xticklabels(np.arange(num_label)) ax.set_yticklabels(np.arange(num_label)) # # 轴标签的旋转 # plt.setp(ax.get_xticklabels() , ha="right", # rotation_mode="anchor") fig.colorbar(im, fraction=0.02) # # 设置混淆矩阵中的数字 # for i in range(len(test_confu)): # for j in range(len(test_confu)): # text = ax.text(j, i, test_confu[i, j], # ha="center", va="center", color="w") ax.set_title("混淆矩阵") fig.tight_layout() # plt.savefig('混淆矩阵.png', dpi=300) plt.show()

    最后是kappa系数

    import numpy as np from sklearn import neighbors from sklearn.metrics import cohen_kappa_score if __name__ == '__main__': data_all = np.loadtxt('E:\原始数据\\iris.txt') data = data_all[:, :-1] label = data_all[:, -1] knn = neighbors.KNeighborsClassifier(n_neighbors=5) knn.fit(data, label) pre_model = knn.predict(data) acc = cohen_kappa_score(label, pre_model) # 参数为真实label和预测label print(acc) # 输出的精度

     

    Processed: 0.010, SQL: 8