分类预测输出precision,recall,accuracy,auc和tp,tn,fp,fn矩阵
此次我做的实验是二分类问题,输出precision,recall,accuracy,auc
# -*- coding: utf-8 -*-
#from sklearn.neighbors import
import numpy as np
from pandas import read_csv
import pandas as pd
import sys
import importlib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat def metrics_result(actual, predict):
print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict)))
输出混淆矩阵
matr=confusion_matrix(test_y,predict)
matr=mat(matr)
conf=np.matrix([[0,0],[0,0]])
conf[0,0]=matr[1,1]
conf[1,0]=matr[1,0]
conf[0,1]=matr[0,1]
conf[1,1]=matr[0,0]
print(conf)

全代码:
# -*- coding: utf-8 -*-
#from sklearn.neighbors import
import numpy as np
from pandas import read_csv
import pandas as pd
import sys
import importlib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat def metrics_result(actual, predict):
print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict))) '''分类0-1'''
root1="D:/ProgramData/station3/10.csv"
root2="D:/ProgramData/station3/more+average2.csv"
root3="D:/ProgramData/station3/new_10.csv"
root4="D:/ProgramData/station3/more+remove.csv"
root5="D:/ProgramData/station3/new_10 2.csv"
root6="D:/ProgramData/station3/new10.csv"
root7="D:/ProgramData/station3/no_-999.csv" root=root4
data1 = read_csv(root) #数据转化为数组
data1=data1.values
print(root)
time=1 accuracy=[]
aucc=[]
pre=[]
recall=[]
for i in range(time):
train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
test_x=test[:,:-1]
test_y=test[:,-1]
train_x=train[:,:-1]
train_y=train[:,-1]
# =============================================================================
# print(train_x.shape)
# print(train_y.shape)
# print(test_x.shape)
# print(test_y.shape)
# print(type(train_x))
# ============================================================================= #X_Train=train_x
#Y_Train=train_y X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y) #clf = RandomForestClassifier() #82
#clf = LogisticRegression() #82 #penalty=’l2’, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver=’liblinear’, max_iter=100, multi_class=’ovr’, verbose=0, warm_start=False, n_jobs=1
#clf=svm.SVC()
clf= XGBClassifier()
#from sklearn.ensemble import RandomForestClassifier #92%
#clf = DecisionTreeClassifier()
#clf = GradientBoostingClassifier() #clf=neighbors.KNeighborsClassifier()
#clf=BernoulliNB()
print(clf)
clf.fit(X_Train, Y_Train)
predict=clf.predict(test_x) matr=confusion_matrix(test_y,predict)
matr=mat(matr)
conf=np.matrix([[0,0],[0,0]])
conf[0,0]=matr[1,1]
conf[1,0]=matr[1,0]
conf[0,1]=matr[0,1]
conf[1,1]=matr[0,0]
print(conf)
#a=metrics_result(test_y, predict) #a=metrics_result(test_y,predict)
'''accuracy'''
aa=metrics.accuracy_score(test_y, predict) #print(metrics.accuracy_score(test_y, predict))
accuracy.append(aa) '''auc'''
bb=metrics.roc_auc_score(test_y, predict, average=None)
aucc.append(bb) '''precision'''
cc=metrics.precision_score(test_y, predict, average=None)
pre.append(cc[1]) # =============================================================================
# print('cc')
# print(type(cc))
# print(cc[1])
# print('cc')
# ============================================================================= '''recall'''
dd=metrics.recall_score(test_y, predict, average=None)
#print(metrics.recall_score(test_y, predict,average='weighted'))
recall.append(dd[1]) f=open('D:\ProgramData\station3\predict.txt', 'w')
for i in range(len(predict)):
f.write(str(predict[i]))
f.write('\n')
f.write("写好了")
f.close() f=open('D:\ProgramData\station3\y_.txt', 'w')
for i in range(len(predict)):
f.write(str(test_y[i]))
f.write('\n')
f.write("写好了")
f.close() # =============================================================================
# f=open('D:/ProgramData/station3/predict.txt', 'w')
# for i in range(len(predict)):
# f.write(str(predict[i]))
# f.write('\n')
# f.write("写好了")
# f.close()
#
# f=open('D:/ProgramData/station3/y.txt', 'w')
# for i in range(len(test_y)):
# f.write(str(test_y[i]))
# f.write('\n')
# f.write("写好了")
# f.close()
#
# =============================================================================
# =============================================================================
# print('调用函数auc:', metrics.roc_auc_score(test_y, predict, average='micro'))
#
# fpr, tpr, thresholds = metrics.roc_curve(test_y.ravel(),predict.ravel())
# auc = metrics.auc(fpr, tpr)
# print('手动计算auc:', auc)
# #绘图
# mpl.rcParams['font.sans-serif'] = u'SimHei'
# mpl.rcParams['axes.unicode_minus'] = False
# #FPR就是横坐标,TPR就是纵坐标
# plt.plot(fpr, tpr, c = 'r', lw = 2, alpha = 0.7, label = u'AUC=%.3f' % auc)
# plt.plot((0, 1), (0, 1), c = '#808080', lw = 1, ls = '--', alpha = 0.7)
# plt.xlim((-0.01, 1.02))
# plt.ylim((-0.01, 1.02))
# plt.xticks(np.arange(0, 1.1, 0.1))
# plt.yticks(np.arange(0, 1.1, 0.1))
# plt.xlabel('False Positive Rate', fontsize=13)
# plt.ylabel('True Positive Rate', fontsize=13)
# plt.grid(b=True, ls=':')
# plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12)
# plt.title(u'大类问题一分类后的ROC和AUC', fontsize=17)
# plt.show()
# ============================================================================= sum_acc=0
sum_auc=0
sum_pre=0
sum_recall=0
for i in range(time):
sum_acc+=accuracy[i]
sum_auc+=aucc[i]
sum_pre+=pre[i]
sum_recall+=recall[i] acc1=sum_acc*1.0/time
auc1=sum_auc*1.0/time
pre1=sum_pre*1.0/time
recall1=sum_recall*1.0/time
print("acc",acc1)
print("auc",auc1)
print("pre",pre1)
print("recall",recall1) # =============================================================================
#
# data1 = read_csv(root2) #数据转化为数组
# data1=data1.values
#
#
# accuracy=[]
# auc=[]
# pre=[]
# recall=[]
# for i in range(30):
# train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
# test_x=test[:,:-1]
# test_y=test[:,-1]
# train_x=train[:,:-1]
# train_y=train[:,-1]
# X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y)
#
# #clf = RandomForestClassifier() #82
# clf = LogisticRegression() #82
# #clf=svm.SVC()
# #clf= XGBClassifier()
# #from sklearn.ensemble import RandomForestClassifier #92%
# #clf = DecisionTreeClassifier()
# #clf = GradientBoostingClassifier()
#
# #clf=neighbors.KNeighborsClassifier() 65.25%
# #clf=BernoulliNB()
# clf.fit(X_Train, Y_Train)
# predict=clf.predict(test_x)
#
# '''accuracy'''
# aa=metrics.accuracy_score(test_y, predict)
# accuracy.append(aa)
#
# '''auc'''
# aa=metrics.roc_auc_score(test_y, predict)
# auc.append(aa)
#
# '''precision'''
# aa=metrics.precision_score(test_y, predict,average='weighted')
# pre.append(aa)
#
# '''recall'''
# aa=metrics.recall_score(test_y, predict,average='weighted')
# recall.append(aa)
#
#
# sum_acc=0
# sum_auc=0
# sum_pre=0
# sum_recall=0
# for i in range(30):
# sum_acc+=accuracy[i]
# sum_auc+=auc[i]
# sum_pre+=pre[i]
# sum_recall+=recall[i]
#
# acc1=sum_acc*1.0/30
# auc1=sum_auc*1.0/30
# pre1=sum_pre*1.0/30
# recall1=sum_recall*1.0/30
# print("more 的 acc:", acc1)
# print("more 的 auc:", auc1)
# print("more 的 precision:", pre1)
# print("more 的 recall:", recall1)
#
# =============================================================================
#X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_Train,Y_Train, test_size=0.2, random_state=i)
输出结果:

分类预测输出precision,recall,accuracy,auc和tp,tn,fp,fn矩阵的更多相关文章
- 目标检测的评价标准mAP, Precision, Recall, Accuracy
目录 metrics 评价方法 TP , FP , TN , FN 概念 计算流程 Accuracy , Precision ,Recall Average Precision PR曲线 AP计算 A ...
- 机器学习:评价分类结果(Precision - Recall 的平衡、P - R 曲线)
一.Precision - Recall 的平衡 1)基础理论 调整阈值的大小,可以调节精准率和召回率的比重: 阈值:threshold,分类边界值,score > threshold 时分类为 ...
- 机器学习基础梳理—(accuracy,precision,recall浅谈)
一.TP TN FP FN TP:标签为正例,预测为正例(P),即预测正确(T) TN:标签为负例,预测为负例(N),即预测正确(T) FP:标签为负例,预测为正例(P),即预测错误(F) FN:标签 ...
- Precision,Recall,F1的计算
Precision又叫查准率,Recall又叫查全率.这两个指标共同衡量才能评价模型输出结果. TP: 预测为1(Positive),实际也为1(Truth-预测对了) TN: 预测为0(Negati ...
- 评价指标整理:Precision, Recall, F-score, TPR, FPR, TNR, FNR, AUC, Accuracy
针对二分类的结果,对模型进行评估,通常有以下几种方法: Precision.Recall.F-score(F1-measure)TPR.FPR.TNR.FNR.AUCAccuracy 真实结果 1 ...
- 分类指标准确率(Precision)和正确率(Accuracy)的区别
http://www.cnblogs.com/fengfenggirl/p/classification_evaluate.html 一.引言 分类算法有很多,不同分类算法又用很多不同的变种.不同的分 ...
- Precision/Recall、ROC/AUC、AP/MAP等概念区分
1. Precision和Recall Precision,准确率/查准率.Recall,召回率/查全率.这两个指标分别以两个角度衡量分类系统的准确率. 例如,有一个池塘,里面共有1000条鱼,含10 ...
- 机器学习--如何理解Accuracy, Precision, Recall, F1 score
当我们在谈论一个模型好坏的时候,我们常常会听到准确率(Accuracy)这个词,我们也会听到"如何才能使模型的Accurcy更高".那么是不是准确率最高的模型就一定是最好的模型? 这篇博文会向大家解释 ...
- 通过Precision/Recall判断分类结果偏差极大时算法的性能
当我们对某些问题进行分类时,真实结果的分布会有明显偏差. 例如对是否患癌症进行分类,testing set 中可能只有0.5%的人患了癌症. 此时如果直接数误分类数的话,那么一个每次都预测人没有癌症的 ...
随机推荐
- POJ 2115 C Looooops(模线性方程)
http://poj.org/problem?id=2115 题意: 给你一个变量,变量初始值a,终止值b,每循环一遍加c,问一共循环几遍终止,结果mod2^k.如果无法终止则输出FOREVER. 思 ...
- shell 判断文件是否是可执行文件
测试变量指定的文件是否存在且是可执行文件.如果存在且是可执行文件,则执行该文件,否则通过chmod命令赋予该文件可执行权限. //test.sh #!/bin/bash echo "ente ...
- Redis并发竞争
Redis是一种单线程机制的nosql数据库,基于key-value,数据可持久化落盘.由于单线程所以Redis本身并没有锁的概念,多个客户端连接并不存在竞争关系,但是利用jedis等客户端对Redi ...
- 【测试设计】基于正交法的测试用例设计工具--PICT
前言 我们都知道成对组合覆盖是一种非常有效的测试用例设计方法,但是实际工作过程中当成对组合量太大,我们往往很难做到有效的用例覆盖. PICT是微软公司出品的一款成对组合命令行生成工具,它很好的解决了上 ...
- 【安全测试】Web应用安全之XSS跨站脚本攻击漏洞
前言 以前都只是在各类文档中见到过XSS,也进行过相关的学习,但是都是一知半解,过了一段时间就忘了. 前几天我们收到了了一份标题为<XX账号昵称参数中存在存储XSS漏洞>的报告文档,来源是 ...
- JPA使用指南 javax.persistence的注解配置
@SuppressWarnings("serial") @Entity @Table(name="T_X") public class X implements ...
- OOP的感悟
不要认为你关心的东西就是对象的全部或对象的核心,相对于对象的成员家族而言,它仅仅是其中的一个‘很小的成员而已’
- 玲珑杯 round18 A 计算几何瞎暴力
题目链接 : http://www.ifrog.cc/acm/problem/1143 当时没看到坐标的数据范围= =看到讨论才意识到,不同的坐标最多只有1k多个,完全可以暴力做法,不过也要一些技巧. ...
- 进程间通信IPC
body, table{font-family: 微软雅黑; font-size: 13.5pt} table{border-collapse: collapse; border: solid gra ...
- Double H3.0
Double H3.0 团队分工 组员 王熙航 杨艺勇 刘杰 郑沐榕 李冠锐 曾磊鑫 戴俊涵 聂寒冰 任务内容 分配任务,整理内容,审核修改 规格说明书汇总排版 ,记录其他组的提问 用例图,功能描述 ...