此次我做的实验是二分类问题,输出precision,recall,accuracy,auc

# -*- coding: utf-8 -*-
#from sklearn.neighbors import
import numpy as np
from pandas import read_csv
import pandas as pd
import sys
import importlib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat def metrics_result(actual, predict):
print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict)))

输出混淆矩阵

    matr=confusion_matrix(test_y,predict)
matr=mat(matr)
conf=np.matrix([[0,0],[0,0]])
conf[0,0]=matr[1,1]
conf[1,0]=matr[1,0]
conf[0,1]=matr[0,1]
conf[1,1]=matr[0,0]
print(conf)

全代码:

# -*- coding: utf-8 -*-
#from sklearn.neighbors import
import numpy as np
from pandas import read_csv
import pandas as pd
import sys
import importlib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat def metrics_result(actual, predict):
print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict)))
print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted')))
print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict))) '''分类0-1'''
root1="D:/ProgramData/station3/10.csv"
root2="D:/ProgramData/station3/more+average2.csv"
root3="D:/ProgramData/station3/new_10.csv"
root4="D:/ProgramData/station3/more+remove.csv"
root5="D:/ProgramData/station3/new_10 2.csv"
root6="D:/ProgramData/station3/new10.csv"
root7="D:/ProgramData/station3/no_-999.csv" root=root4
data1 = read_csv(root) #数据转化为数组
data1=data1.values
print(root)
time=1 accuracy=[]
aucc=[]
pre=[]
recall=[]
for i in range(time):
train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
test_x=test[:,:-1]
test_y=test[:,-1]
train_x=train[:,:-1]
train_y=train[:,-1]
# =============================================================================
# print(train_x.shape)
# print(train_y.shape)
# print(test_x.shape)
# print(test_y.shape)
# print(type(train_x))
# ============================================================================= #X_Train=train_x
#Y_Train=train_y X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y) #clf = RandomForestClassifier() #82
#clf = LogisticRegression() #82 #penalty=’l2’, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver=’liblinear’, max_iter=100, multi_class=’ovr’, verbose=0, warm_start=False, n_jobs=1
#clf=svm.SVC()
clf= XGBClassifier()
#from sklearn.ensemble import RandomForestClassifier #92%
#clf = DecisionTreeClassifier()
#clf = GradientBoostingClassifier() #clf=neighbors.KNeighborsClassifier()
#clf=BernoulliNB()
print(clf)
clf.fit(X_Train, Y_Train)
predict=clf.predict(test_x) matr=confusion_matrix(test_y,predict)
matr=mat(matr)
conf=np.matrix([[0,0],[0,0]])
conf[0,0]=matr[1,1]
conf[1,0]=matr[1,0]
conf[0,1]=matr[0,1]
conf[1,1]=matr[0,0]
print(conf)
#a=metrics_result(test_y, predict) #a=metrics_result(test_y,predict)
'''accuracy'''
aa=metrics.accuracy_score(test_y, predict) #print(metrics.accuracy_score(test_y, predict))
accuracy.append(aa) '''auc'''
bb=metrics.roc_auc_score(test_y, predict, average=None)
aucc.append(bb) '''precision'''
cc=metrics.precision_score(test_y, predict, average=None)
pre.append(cc[1]) # =============================================================================
# print('cc')
# print(type(cc))
# print(cc[1])
# print('cc')
# ============================================================================= '''recall'''
dd=metrics.recall_score(test_y, predict, average=None)
#print(metrics.recall_score(test_y, predict,average='weighted'))
recall.append(dd[1]) f=open('D:\ProgramData\station3\predict.txt', 'w')
for i in range(len(predict)):
f.write(str(predict[i]))
f.write('\n')
f.write("写好了")
f.close() f=open('D:\ProgramData\station3\y_.txt', 'w')
for i in range(len(predict)):
f.write(str(test_y[i]))
f.write('\n')
f.write("写好了")
f.close() # =============================================================================
# f=open('D:/ProgramData/station3/predict.txt', 'w')
# for i in range(len(predict)):
# f.write(str(predict[i]))
# f.write('\n')
# f.write("写好了")
# f.close()
#
# f=open('D:/ProgramData/station3/y.txt', 'w')
# for i in range(len(test_y)):
# f.write(str(test_y[i]))
# f.write('\n')
# f.write("写好了")
# f.close()
#
# =============================================================================
# =============================================================================
# print('调用函数auc:', metrics.roc_auc_score(test_y, predict, average='micro'))
#
# fpr, tpr, thresholds = metrics.roc_curve(test_y.ravel(),predict.ravel())
# auc = metrics.auc(fpr, tpr)
# print('手动计算auc:', auc)
# #绘图
# mpl.rcParams['font.sans-serif'] = u'SimHei'
# mpl.rcParams['axes.unicode_minus'] = False
# #FPR就是横坐标,TPR就是纵坐标
# plt.plot(fpr, tpr, c = 'r', lw = 2, alpha = 0.7, label = u'AUC=%.3f' % auc)
# plt.plot((0, 1), (0, 1), c = '#808080', lw = 1, ls = '--', alpha = 0.7)
# plt.xlim((-0.01, 1.02))
# plt.ylim((-0.01, 1.02))
# plt.xticks(np.arange(0, 1.1, 0.1))
# plt.yticks(np.arange(0, 1.1, 0.1))
# plt.xlabel('False Positive Rate', fontsize=13)
# plt.ylabel('True Positive Rate', fontsize=13)
# plt.grid(b=True, ls=':')
# plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12)
# plt.title(u'大类问题一分类后的ROC和AUC', fontsize=17)
# plt.show()
# ============================================================================= sum_acc=0
sum_auc=0
sum_pre=0
sum_recall=0
for i in range(time):
sum_acc+=accuracy[i]
sum_auc+=aucc[i]
sum_pre+=pre[i]
sum_recall+=recall[i] acc1=sum_acc*1.0/time
auc1=sum_auc*1.0/time
pre1=sum_pre*1.0/time
recall1=sum_recall*1.0/time
print("acc",acc1)
print("auc",auc1)
print("pre",pre1)
print("recall",recall1) # =============================================================================
#
# data1 = read_csv(root2) #数据转化为数组
# data1=data1.values
#
#
# accuracy=[]
# auc=[]
# pre=[]
# recall=[]
# for i in range(30):
# train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
# test_x=test[:,:-1]
# test_y=test[:,-1]
# train_x=train[:,:-1]
# train_y=train[:,-1]
# X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y)
#
# #clf = RandomForestClassifier() #82
# clf = LogisticRegression() #82
# #clf=svm.SVC()
# #clf= XGBClassifier()
# #from sklearn.ensemble import RandomForestClassifier #92%
# #clf = DecisionTreeClassifier()
# #clf = GradientBoostingClassifier()
#
# #clf=neighbors.KNeighborsClassifier() 65.25%
# #clf=BernoulliNB()
# clf.fit(X_Train, Y_Train)
# predict=clf.predict(test_x)
#
# '''accuracy'''
# aa=metrics.accuracy_score(test_y, predict)
# accuracy.append(aa)
#
# '''auc'''
# aa=metrics.roc_auc_score(test_y, predict)
# auc.append(aa)
#
# '''precision'''
# aa=metrics.precision_score(test_y, predict,average='weighted')
# pre.append(aa)
#
# '''recall'''
# aa=metrics.recall_score(test_y, predict,average='weighted')
# recall.append(aa)
#
#
# sum_acc=0
# sum_auc=0
# sum_pre=0
# sum_recall=0
# for i in range(30):
# sum_acc+=accuracy[i]
# sum_auc+=auc[i]
# sum_pre+=pre[i]
# sum_recall+=recall[i]
#
# acc1=sum_acc*1.0/30
# auc1=sum_auc*1.0/30
# pre1=sum_pre*1.0/30
# recall1=sum_recall*1.0/30
# print("more 的 acc:", acc1)
# print("more 的 auc:", auc1)
# print("more 的 precision:", pre1)
# print("more 的 recall:", recall1)
#
# =============================================================================
#X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_Train,Y_Train, test_size=0.2, random_state=i)

输出结果:

分类预测输出precision,recall,accuracy,auc和tp,tn,fp,fn矩阵的更多相关文章

  1. 目标检测的评价标准mAP, Precision, Recall, Accuracy

    目录 metrics 评价方法 TP , FP , TN , FN 概念 计算流程 Accuracy , Precision ,Recall Average Precision PR曲线 AP计算 A ...

  2. 机器学习:评价分类结果(Precision - Recall 的平衡、P - R 曲线)

    一.Precision - Recall 的平衡 1)基础理论 调整阈值的大小,可以调节精准率和召回率的比重: 阈值:threshold,分类边界值,score > threshold 时分类为 ...

  3. 机器学习基础梳理—(accuracy,precision,recall浅谈)

    一.TP TN FP FN TP:标签为正例,预测为正例(P),即预测正确(T) TN:标签为负例,预测为负例(N),即预测正确(T) FP:标签为负例,预测为正例(P),即预测错误(F) FN:标签 ...

  4. Precision,Recall,F1的计算

    Precision又叫查准率,Recall又叫查全率.这两个指标共同衡量才能评价模型输出结果. TP: 预测为1(Positive),实际也为1(Truth-预测对了) TN: 预测为0(Negati ...

  5. 评价指标整理:Precision, Recall, F-score, TPR, FPR, TNR, FNR, AUC, Accuracy

    针对二分类的结果,对模型进行评估,通常有以下几种方法: Precision.Recall.F-score(F1-measure)TPR.FPR.TNR.FNR.AUCAccuracy   真实结果 1 ...

  6. 分类指标准确率(Precision)和正确率(Accuracy)的区别

    http://www.cnblogs.com/fengfenggirl/p/classification_evaluate.html 一.引言 分类算法有很多,不同分类算法又用很多不同的变种.不同的分 ...

  7. Precision/Recall、ROC/AUC、AP/MAP等概念区分

    1. Precision和Recall Precision,准确率/查准率.Recall,召回率/查全率.这两个指标分别以两个角度衡量分类系统的准确率. 例如,有一个池塘,里面共有1000条鱼,含10 ...

  8. 机器学习--如何理解Accuracy, Precision, Recall, F1 score

    当我们在谈论一个模型好坏的时候,我们常常会听到准确率(Accuracy)这个词,我们也会听到"如何才能使模型的Accurcy更高".那么是不是准确率最高的模型就一定是最好的模型? 这篇博文会向大家解释 ...

  9. 通过Precision/Recall判断分类结果偏差极大时算法的性能

    当我们对某些问题进行分类时,真实结果的分布会有明显偏差. 例如对是否患癌症进行分类,testing set 中可能只有0.5%的人患了癌症. 此时如果直接数误分类数的话,那么一个每次都预测人没有癌症的 ...

随机推荐

  1. Java的System.out.println()的解析

    Java的System.out.println()的解析 System 是java.lang中的一个类. System.out 中的out, 代表了System类中的静态对象PrintStream, ...

  2. activemq、rabbitmq、kafka原理和比较

    一.activemq 虽然是java写的消息队列,但是提供Java, C, C++, C#, Ruby, Perl, Python, PHP各种客户端,所以语言上是没什么问题的.配置和使用,基本上是j ...

  3. Oracle数据库空值操作

    空值操作: null表示空的意思. 一.情况: 1:表中的任何字段默认情况下都可以为null值. 2:not null表示非空,是一种约束 设置为非空约束的字段,必须有有效值,不能为空. 3:插入数据 ...

  4. git基本操作,一篇文章就够了!

    1. git简介 git的通用操作流程如下图(来源于网络) 主要涉及到四个关键点: 工作区:本地电脑存放项目文件的地方,比如learnGitProject文件夹: 暂存区(Index/Stage):在 ...

  5. hdu3549网络流之最大流

    Ford-Fulkerson方法:dfs实现 dfs  140ms #include<map> #include<set> #include<cmath> #inc ...

  6. IOS-CoreData(增删改查、表关联、分页和模糊查询、多个数据库)

    1>什么是CoreData Core Data是iOS5之后才出现的一个框架,它提供了对象-关系映射(ORM)的功能,即能够将OC对象转化成数据,保存在SQLite数据库文件中,也能够将保存在数 ...

  7. 【javascript基础】之【__lookupSetter__ 跟 __lookupSetter__】

    描述: 返回getter setter所定义的函数 语法: obj.__lookupGetter__(sprop) 参数: getter setter中定义的字符串属性 注意:这不是标准的方法,ecm ...

  8. EM算法及其应用(一)

    EM算法及其应用(一) EM算法及其应用(二): K-means 与 高斯混合模型 EM算法是期望最大化 (Expectation Maximization) 算法的简称,用于含有隐变量的情况下,概率 ...

  9. Python基础学习----异常

    ''' 异常: 程序在运行的时候,Python的解释器遇到一个错误会停止运行. 并且会提供错误的信息,这就是异常 抛出异常: 程序停止执行并且提示错误信息这个动作就是抛出异常(raise Except ...

  10. JSP Unable to compile class for JSP

    今天刚弄好MyEclipse环境,试了一下jsp的创建,然后就出现了一个很令人纠结的问题. 文档目录如下: Jsp代码如下: <%@page import="com.pd.Person ...