分类预测输出precision,recall,accuracy,auc和tp,tn,fp,fn矩阵
此次我做的实验是二分类问题,输出precision,recall,accuracy,auc
# -*- coding: utf-8 -*-
#from sklearn.neighbors import
import numpy as np
from pandas import read_csv
import pandas as pd
import sys
import importlib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat def metrics_result(actual, predict):
print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict)))
输出混淆矩阵
matr=confusion_matrix(test_y,predict)
matr=mat(matr)
conf=np.matrix([[0,0],[0,0]])
conf[0,0]=matr[1,1]
conf[1,0]=matr[1,0]
conf[0,1]=matr[0,1]
conf[1,1]=matr[0,0]
print(conf)

全代码:
# -*- coding: utf-8 -*-
#from sklearn.neighbors import
import numpy as np
from pandas import read_csv
import pandas as pd
import sys
import importlib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat def metrics_result(actual, predict):
print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict))) '''分类0-1'''
root1="D:/ProgramData/station3/10.csv"
root2="D:/ProgramData/station3/more+average2.csv"
root3="D:/ProgramData/station3/new_10.csv"
root4="D:/ProgramData/station3/more+remove.csv"
root5="D:/ProgramData/station3/new_10 2.csv"
root6="D:/ProgramData/station3/new10.csv"
root7="D:/ProgramData/station3/no_-999.csv" root=root4
data1 = read_csv(root) #数据转化为数组
data1=data1.values
print(root)
time=1 accuracy=[]
aucc=[]
pre=[]
recall=[]
for i in range(time):
train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
test_x=test[:,:-1]
test_y=test[:,-1]
train_x=train[:,:-1]
train_y=train[:,-1]
# =============================================================================
# print(train_x.shape)
# print(train_y.shape)
# print(test_x.shape)
# print(test_y.shape)
# print(type(train_x))
# ============================================================================= #X_Train=train_x
#Y_Train=train_y X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y) #clf = RandomForestClassifier() #82
#clf = LogisticRegression() #82 #penalty=’l2’, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver=’liblinear’, max_iter=100, multi_class=’ovr’, verbose=0, warm_start=False, n_jobs=1
#clf=svm.SVC()
clf= XGBClassifier()
#from sklearn.ensemble import RandomForestClassifier #92%
#clf = DecisionTreeClassifier()
#clf = GradientBoostingClassifier() #clf=neighbors.KNeighborsClassifier()
#clf=BernoulliNB()
print(clf)
clf.fit(X_Train, Y_Train)
predict=clf.predict(test_x) matr=confusion_matrix(test_y,predict)
matr=mat(matr)
conf=np.matrix([[0,0],[0,0]])
conf[0,0]=matr[1,1]
conf[1,0]=matr[1,0]
conf[0,1]=matr[0,1]
conf[1,1]=matr[0,0]
print(conf)
#a=metrics_result(test_y, predict) #a=metrics_result(test_y,predict)
'''accuracy'''
aa=metrics.accuracy_score(test_y, predict) #print(metrics.accuracy_score(test_y, predict))
accuracy.append(aa) '''auc'''
bb=metrics.roc_auc_score(test_y, predict, average=None)
aucc.append(bb) '''precision'''
cc=metrics.precision_score(test_y, predict, average=None)
pre.append(cc[1]) # =============================================================================
# print('cc')
# print(type(cc))
# print(cc[1])
# print('cc')
# ============================================================================= '''recall'''
dd=metrics.recall_score(test_y, predict, average=None)
#print(metrics.recall_score(test_y, predict,average='weighted'))
recall.append(dd[1]) f=open('D:\ProgramData\station3\predict.txt', 'w')
for i in range(len(predict)):
f.write(str(predict[i]))
f.write('\n')
f.write("写好了")
f.close() f=open('D:\ProgramData\station3\y_.txt', 'w')
for i in range(len(predict)):
f.write(str(test_y[i]))
f.write('\n')
f.write("写好了")
f.close() # =============================================================================
# f=open('D:/ProgramData/station3/predict.txt', 'w')
# for i in range(len(predict)):
# f.write(str(predict[i]))
# f.write('\n')
# f.write("写好了")
# f.close()
#
# f=open('D:/ProgramData/station3/y.txt', 'w')
# for i in range(len(test_y)):
# f.write(str(test_y[i]))
# f.write('\n')
# f.write("写好了")
# f.close()
#
# =============================================================================
# =============================================================================
# print('调用函数auc:', metrics.roc_auc_score(test_y, predict, average='micro'))
#
# fpr, tpr, thresholds = metrics.roc_curve(test_y.ravel(),predict.ravel())
# auc = metrics.auc(fpr, tpr)
# print('手动计算auc:', auc)
# #绘图
# mpl.rcParams['font.sans-serif'] = u'SimHei'
# mpl.rcParams['axes.unicode_minus'] = False
# #FPR就是横坐标,TPR就是纵坐标
# plt.plot(fpr, tpr, c = 'r', lw = 2, alpha = 0.7, label = u'AUC=%.3f' % auc)
# plt.plot((0, 1), (0, 1), c = '#808080', lw = 1, ls = '--', alpha = 0.7)
# plt.xlim((-0.01, 1.02))
# plt.ylim((-0.01, 1.02))
# plt.xticks(np.arange(0, 1.1, 0.1))
# plt.yticks(np.arange(0, 1.1, 0.1))
# plt.xlabel('False Positive Rate', fontsize=13)
# plt.ylabel('True Positive Rate', fontsize=13)
# plt.grid(b=True, ls=':')
# plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12)
# plt.title(u'大类问题一分类后的ROC和AUC', fontsize=17)
# plt.show()
# ============================================================================= sum_acc=0
sum_auc=0
sum_pre=0
sum_recall=0
for i in range(time):
sum_acc+=accuracy[i]
sum_auc+=aucc[i]
sum_pre+=pre[i]
sum_recall+=recall[i] acc1=sum_acc*1.0/time
auc1=sum_auc*1.0/time
pre1=sum_pre*1.0/time
recall1=sum_recall*1.0/time
print("acc",acc1)
print("auc",auc1)
print("pre",pre1)
print("recall",recall1) # =============================================================================
#
# data1 = read_csv(root2) #数据转化为数组
# data1=data1.values
#
#
# accuracy=[]
# auc=[]
# pre=[]
# recall=[]
# for i in range(30):
# train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
# test_x=test[:,:-1]
# test_y=test[:,-1]
# train_x=train[:,:-1]
# train_y=train[:,-1]
# X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y)
#
# #clf = RandomForestClassifier() #82
# clf = LogisticRegression() #82
# #clf=svm.SVC()
# #clf= XGBClassifier()
# #from sklearn.ensemble import RandomForestClassifier #92%
# #clf = DecisionTreeClassifier()
# #clf = GradientBoostingClassifier()
#
# #clf=neighbors.KNeighborsClassifier() 65.25%
# #clf=BernoulliNB()
# clf.fit(X_Train, Y_Train)
# predict=clf.predict(test_x)
#
# '''accuracy'''
# aa=metrics.accuracy_score(test_y, predict)
# accuracy.append(aa)
#
# '''auc'''
# aa=metrics.roc_auc_score(test_y, predict)
# auc.append(aa)
#
# '''precision'''
# aa=metrics.precision_score(test_y, predict,average='weighted')
# pre.append(aa)
#
# '''recall'''
# aa=metrics.recall_score(test_y, predict,average='weighted')
# recall.append(aa)
#
#
# sum_acc=0
# sum_auc=0
# sum_pre=0
# sum_recall=0
# for i in range(30):
# sum_acc+=accuracy[i]
# sum_auc+=auc[i]
# sum_pre+=pre[i]
# sum_recall+=recall[i]
#
# acc1=sum_acc*1.0/30
# auc1=sum_auc*1.0/30
# pre1=sum_pre*1.0/30
# recall1=sum_recall*1.0/30
# print("more 的 acc:", acc1)
# print("more 的 auc:", auc1)
# print("more 的 precision:", pre1)
# print("more 的 recall:", recall1)
#
# =============================================================================
#X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_Train,Y_Train, test_size=0.2, random_state=i)
输出结果:

分类预测输出precision,recall,accuracy,auc和tp,tn,fp,fn矩阵的更多相关文章
- 目标检测的评价标准mAP, Precision, Recall, Accuracy
目录 metrics 评价方法 TP , FP , TN , FN 概念 计算流程 Accuracy , Precision ,Recall Average Precision PR曲线 AP计算 A ...
- 机器学习:评价分类结果(Precision - Recall 的平衡、P - R 曲线)
一.Precision - Recall 的平衡 1)基础理论 调整阈值的大小,可以调节精准率和召回率的比重: 阈值:threshold,分类边界值,score > threshold 时分类为 ...
- 机器学习基础梳理—(accuracy,precision,recall浅谈)
一.TP TN FP FN TP:标签为正例,预测为正例(P),即预测正确(T) TN:标签为负例,预测为负例(N),即预测正确(T) FP:标签为负例,预测为正例(P),即预测错误(F) FN:标签 ...
- Precision,Recall,F1的计算
Precision又叫查准率,Recall又叫查全率.这两个指标共同衡量才能评价模型输出结果. TP: 预测为1(Positive),实际也为1(Truth-预测对了) TN: 预测为0(Negati ...
- 评价指标整理:Precision, Recall, F-score, TPR, FPR, TNR, FNR, AUC, Accuracy
针对二分类的结果,对模型进行评估,通常有以下几种方法: Precision.Recall.F-score(F1-measure)TPR.FPR.TNR.FNR.AUCAccuracy 真实结果 1 ...
- 分类指标准确率(Precision)和正确率(Accuracy)的区别
http://www.cnblogs.com/fengfenggirl/p/classification_evaluate.html 一.引言 分类算法有很多,不同分类算法又用很多不同的变种.不同的分 ...
- Precision/Recall、ROC/AUC、AP/MAP等概念区分
1. Precision和Recall Precision,准确率/查准率.Recall,召回率/查全率.这两个指标分别以两个角度衡量分类系统的准确率. 例如,有一个池塘,里面共有1000条鱼,含10 ...
- 机器学习--如何理解Accuracy, Precision, Recall, F1 score
当我们在谈论一个模型好坏的时候,我们常常会听到准确率(Accuracy)这个词,我们也会听到"如何才能使模型的Accurcy更高".那么是不是准确率最高的模型就一定是最好的模型? 这篇博文会向大家解释 ...
- 通过Precision/Recall判断分类结果偏差极大时算法的性能
当我们对某些问题进行分类时,真实结果的分布会有明显偏差. 例如对是否患癌症进行分类,testing set 中可能只有0.5%的人患了癌症. 此时如果直接数误分类数的话,那么一个每次都预测人没有癌症的 ...
随机推荐
- 操作数据库的时候,使用自带的DbProviderFactory类 (涉及抽象工厂和工厂方法)
微软自带的DbProviderFactory https://msdn.microsoft.com/en-us/library/system.data.common.dbproviderfactory ...
- bzoj 1799: [Ahoi2009]self 同类分布 数位dp
1799: [Ahoi2009]self 同类分布 Time Limit: 50 Sec Memory Limit: 64 MB[Submit][Status][Discuss] Descripti ...
- CentOS6.4x86EngCustomize120g__20160307.rar
安装的镜像包: CentOS-6.4-i386-bin-DVD1to2(CentOS-6.4-i386-bin-DVD1.iso / CentOS-6.4-i386-bin-DVD2.iso) 1. ...
- Java回顾之多线程同步
在这篇文章里,我们关注线程同步的话题.这是比多线程更复杂,稍不留意,我们就会“掉到坑里”,而且和单线程程序不同,多线程的错误是否每次都出现,也是不固定的,这给调试也带来了很大的挑战. 在这篇文章里,我 ...
- angular的 表单
一般来讲表单可能遇到的问题:1.如何数据绑定.2.验证表单.3.显示出错信息.4.整个form的验证.5.避免提交没有验证通过的表单.6.防止多系提交. input属性:nameng-modelng- ...
- charles抓取https中出现unknow
http正常抓包,https则出现unknown 1.安装证书 Help->SSL Proxying->Install Charles Root Certificate 但是!!!装完并没 ...
- Hadoop异常
org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: hdfs://xxx:49000/user/had ...
- bzoj3862
题解: 这一道题目和模板有不同的地方就是在于可以修改只有一条边和i相邻 于是我们还要记录与这个点相邻的点有没有改变 代码: #pragma GCC optimize(2) #include<bi ...
- Use appropriate component from RM component palette
--------------------------- 错误 --------------------------- Opened report contains the following non- ...
- 单链表是否有环的问题解决与讨论(java实现)
单链表是否有环的问题经常在面试中遇到,一般面试中会要求空间为O(1);再者求若有环,则求环产生时的起始位置. 下面采用java实现. //单链表class ListNode{ int val; Lis ...