吴裕雄 python 机器学习——模型选择分类问题性能度量
import numpy as np
import matplotlib.pyplot as plt from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,fbeta_score,classification_report,confusion_matrix,precision_recall_curve,roc_auc_score,roc_curve #模型选择分类问题性能度量accuracy_score模型
def test_accuracy_score():
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,1,1,0,0]
print('Accuracy Score(normalize=True):',accuracy_score(y_true,y_pred,normalize=True))
print('Accuracy Score(normalize=False):',accuracy_score(y_true,y_pred,normalize=False)) #调用test_accuracy_score()
test_accuracy_score()
#模型选择分类问题性能度量precision_score模型
def test_precision_score():
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:',accuracy_score(y_true,y_pred,normalize=True))
print('Precision Score:',precision_score(y_true,y_pred)) #调用test_precision_score()
test_precision_score()
#模型选择分类问题性能度量recall_score模型
def test_recall_score():
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:',accuracy_score(y_true,y_pred,normalize=True))
print('Precision Score:',precision_score(y_true,y_pred))
print('Recall Score:',recall_score(y_true,y_pred)) #调用test_recall_score()
test_recall_score()
#模型选择分类问题性能度量f1_score模型
def test_f1_score():
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:',accuracy_score(y_true,y_pred,normalize=True))
print('Precision Score:',precision_score(y_true,y_pred))
print('Recall Score:',recall_score(y_true,y_pred))
print('F1 Score:',f1_score(y_true,y_pred)) #调用test_f1_score()
test_f1_score()
#模型选择分类问题性能度量fbeta_score模型
def test_fbeta_score():
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Accuracy Score:',accuracy_score(y_true,y_pred,normalize=True))
print('Precision Score:',precision_score(y_true,y_pred))
print('Recall Score:',recall_score(y_true,y_pred))
print('F1 Score:',f1_score(y_true,y_pred))
print('Fbeta Score(beta=0.001):',fbeta_score(y_true,y_pred,beta=0.001))
print('Fbeta Score(beta=1):',fbeta_score(y_true,y_pred,beta=1))
print('Fbeta Score(beta=10):',fbeta_score(y_true,y_pred,beta=10))
print('Fbeta Score(beta=10000):',fbeta_score(y_true,y_pred,beta=10000)) #调用test_fbeta_score()
test_fbeta_score()
#模型选择分类问题性能度量classification_report模型
def test_classification_report():
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Classification Report:\n',classification_report(y_true,y_pred,target_names=["class_0","class_1"])) #调用test_classification_report()
test_classification_report()
#模型选择分类问题性能度量confusion_matrix模型
def test_confusion_matrix():
y_true=[1,1,1,1,1,0,0,0,0,0]
y_pred=[0,0,1,1,0,0,0,0,0,0]
print('Confusion Matrix:\n',confusion_matrix(y_true,y_pred,labels=[0,1])) #调用test_confusion_matrix()
test_confusion_matrix()
#模型选择分类问题性能度量precision_recall_curve模型
def test_precision_recall_curve():
### 加载数据
iris=load_iris()
X=iris.data
y=iris.target
# 二元化标记
y = label_binarize(y, classes=[0, 1, 2])
n_classes = y.shape[1]
#### 添加噪音
np.random.seed(0)
n_samples, n_features = X.shape
X = np.c_[X, np.random.randn(n_samples, 200 * n_features)] X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)
### 训练模型
clf=OneVsRestClassifier(SVC(kernel='linear', probability=True,random_state=0))
clf.fit(X_train,y_train)
y_score = clf.fit(X_train, y_train).decision_function(X_test)
### 获取 P-R
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
precision = dict()
recall = dict()
for i in range(n_classes):
precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],y_score[:, i])
ax.plot(recall[i],precision[i],label="target=%s"%i)
ax.set_xlabel("Recall Score")
ax.set_ylabel("Precision Score")
ax.set_title("P-R")
ax.legend(loc='best')
ax.set_xlim(0,1.1)
ax.set_ylim(0,1.1)
ax.grid()
plt.show() #调用test_precision_recall_curve()
test_precision_recall_curve()
#模型选择分类问题性能度量roc_curve、roc_auc_score模型
def test_roc_auc_score():
### 加载数据
iris=load_iris()
X=iris.data
y=iris.target
# 二元化标记
y = label_binarize(y, classes=[0, 1, 2])
n_classes = y.shape[1]
#### 添加噪音
np.random.seed(0)
n_samples, n_features = X.shape
X = np.c_[X, np.random.randn(n_samples, 200 * n_features)] X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)
### 训练模型
clf=OneVsRestClassifier(SVC(kernel='linear', probability=True,random_state=0))
clf.fit(X_train,y_train)
y_score = clf.fit(X_train, y_train).decision_function(X_test)
### 获取 ROC
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
fpr = dict()
tpr = dict()
roc_auc=dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i],y_score[:, i])
roc_auc[i] = roc_auc_score(fpr[i], tpr[i])
ax.plot(fpr[i],tpr[i],label="target=%s,auc=%s"%(i,roc_auc[i]))
ax.plot([0, 1], [0, 1], 'k--')
ax.set_xlabel("FPR")
ax.set_ylabel("TPR")
ax.set_title("ROC")
ax.legend(loc="best")
ax.set_xlim(0,1.1)
ax.set_ylim(0,1.1)
ax.grid()
plt.show() #调用test_roc_auc_score()
test_roc_auc_score()
吴裕雄 python 机器学习——模型选择分类问题性能度量的更多相关文章
- 吴裕雄 python 机器学习——模型选择回归问题性能度量
from sklearn.metrics import mean_absolute_error,mean_squared_error #模型选择回归问题性能度量mean_absolute_error模 ...
- 吴裕雄 python 机器学习——模型选择数据集切分
import numpy as np from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveO ...
- 吴裕雄 python 机器学习——模型选择验证曲线validation_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
- 吴裕雄 python 机器学习——模型选择学习曲线learning_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
- 吴裕雄 python 机器学习——模型选择参数优化暴力搜索寻优GridSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
- 吴裕雄 python 机器学习——模型选择参数优化随机搜索寻优RandomizedSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
- 吴裕雄 python 机器学习——模型选择损失函数模型
from sklearn.metrics import zero_one_loss,log_loss def test_zero_one_loss(): y_true=[1,1,1,1,1,0,0,0 ...
- 吴裕雄 python 机器学习——支持向量机线性分类LinearSVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
- 吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
随机推荐
- excel用xlrd日期变成42631.0
datetime的解决办法混合数据的表中有个日期:2016/9/18 通过table.row_values(row_number)[1]读取时,显示的结果为:42631.0 查看row_values方 ...
- 题解【洛谷P1618】 三连击(升级版)
设三个数分别为n1.n2.n3,因为三个数的比为A:B:C,取一份量i,使得A·i=x,B·i=y,C·i=z(·是*的意思). 所以我们的代码只需要枚举i,并以此判断n1.n2.n3是否为三位数且包 ...
- Linux零碎002
1.if else就近原则: 2.指针位数与机器地址总线宽度一致: 3.数组即常量指针,用法和指针类似,在操作指针时:p与&p[0]含义一样: 4.编译器按照内存递减的方式来分配变量.
- Linux 虚拟机共享目录
1. 开启linux虚拟机 2. 菜单“虚拟机” -------“重新安装 Vm tools” 3. 桌面看到 VmTools 安装盘 4. 安装 5. 设置中添加共享目录 5. ...
- 载 js验证密码 必须由大小写字母、数字和特殊字符组成
转自:https://blog.csdn.net/weixin_43824935/article/details/93601064 密码长度8-16位 必须由大写字母,小写字母,数字,特殊符号组成 正 ...
- python3练习100题——017
原题链接:http://www.runoob.com/python/python-exercise-example17.html 题目:输入一行字符,分别统计出其中 英文字母.空格.数字和其它字符的个 ...
- android 获取webview内容真实高度(webview上下可滚动距离)
正常获取: mainWebView.getContentHeight()//获取html高度 mainWebView.getScale()//手机上网页缩放比例 mainWebView.getHeig ...
- (复习)父子组件传值使用v-modal双向绑定,报错Avoid mutating a prop directly解决方案
报错:Avoid mutating a prop directly since the value will be overwritten whenever the parent component. ...
- 1.4 面试问题整理: ATM机取款
流程图:
- php/js将 CST时间转成格式化时间
PHP :比较简单 $str = 'Wed Jul 24 11:24:33 CST 2019'; echo date('Y-m-d H:i:s', strtotime($str)); echo dat ...