吴裕雄 python 机器学习——分类决策树模型
import numpy as np
import matplotlib.pyplot as plt from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor def load_data():
'''
加载用于分类问题的数据集。数据集采用 scikit-learn 自带的 iris 数据集
'''
# scikit-learn 自带的 iris 数据集
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return train_test_split(X_train, y_train,test_size=0.25,random_state=0,stratify=y_train) #分类决策树DecisionTreeClassifier模型
def test_DecisionTreeClassifier(*data):
X_train,X_test,y_train,y_test=data
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
print("Training score:%f"%(clf.score(X_train,y_train)))
print("Testing score:%f"%(clf.score(X_test,y_test))) # 产生用于分类问题的数据集
X_train,X_test,y_train,y_test=load_data()
# 调用 test_DecisionTreeClassifier
test_DecisionTreeClassifier(X_train,X_test,y_train,y_test)

def test_DecisionTreeClassifier_criterion(*data):
'''
测试 DecisionTreeClassifier 的预测性能随 criterion 参数的影响
'''
X_train,X_test,y_train,y_test=data
criterions=['gini','entropy']
for criterion in criterions:
clf = DecisionTreeClassifier(criterion=criterion)
clf.fit(X_train, y_train)
print("criterion:%s"%criterion)
print("Training score:%f"%(clf.score(X_train,y_train)))
print("Testing score:%f"%(clf.score(X_test,y_test))) # 调用 test_DecisionTreeClassifier_criterion
test_DecisionTreeClassifier_criterion(X_train,X_test,y_train,y_test)

def test_DecisionTreeClassifier_splitter(*data):
'''
测试 DecisionTreeClassifier 的预测性能随划分类型的影响
'''
X_train,X_test,y_train,y_test=data
splitters=['best','random']
for splitter in splitters:
clf = DecisionTreeClassifier(splitter=splitter)
clf.fit(X_train, y_train)
print("splitter:%s"%splitter)
print("Training score:%f"%(clf.score(X_train,y_train)))
print("Testing score:%f"%(clf.score(X_test,y_test))) # 调用 test_DecisionTreeClassifier_splitter
test_DecisionTreeClassifier_splitter(X_train,X_test,y_train,y_test)

def test_DecisionTreeClassifier_depth(*data,maxdepth):
'''
测试 DecisionTreeClassifier 的预测性能随 max_depth 参数的影响
'''
X_train,X_test,y_train,y_test=data
depths=np.arange(1,maxdepth)
training_scores=[]
testing_scores=[]
for depth in depths:
clf = DecisionTreeClassifier(max_depth=depth)
clf.fit(X_train, y_train)
training_scores.append(clf.score(X_train,y_train))
testing_scores.append(clf.score(X_test,y_test)) ## 绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(depths,training_scores,label="traing score",marker='o')
ax.plot(depths,testing_scores,label="testing score",marker='*')
ax.set_xlabel("maxdepth")
ax.set_ylabel("score")
ax.set_title("Decision Tree Classification")
ax.legend(framealpha=0.5,loc='best')
plt.show() # 调用 test_DecisionTreeClassifier_depth
test_DecisionTreeClassifier_depth(X_train,X_test,y_train,y_test,maxdepth=100)

import os
import pydotplus from io import StringIO
from sklearn.tree import export_graphviz
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor X_train,X_test,y_train,y_test=load_data()
clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)
export_graphviz(clf,"F://out")



吴裕雄 python 机器学习——分类决策树模型的更多相关文章
- 吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——核化PCAKernelPCA模型
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datas ...
- 吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
- 吴裕雄 python 机器学习——支持向量机SVM非线性分类SVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
- 吴裕雄 python 机器学习——支持向量机线性分类LinearSVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
- 吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
- 吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
- 吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
- 吴裕雄 python 机器学习——模型选择分类问题性能度量
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from sklearn.datasets ...
随机推荐
- 剑指offer 7. 递归和循环 斐波那契数列
题目描述 大家都知道斐波那契数列,现在要求输入一个整数n,请你输出斐波那契数列的第n项(从0开始,第0项为0). n<=39 简简单单 废话不多说,直接上代码: public class Sol ...
- ubutun 下配置php和postgresql
安装完成后,检查PHP扩展. php -m | grep pdo_pgsql php -m 和phpinfo应该是不同的配置文件, 你在php -m 中能看到的话, 说明你只在php -i|grup ...
- Android使用okhttp 响应Post请求 使用线程
1.在libs中导入okhttp-2.7.5.jar和okio-1.11.0.jar. 2.post请求 public void getData(){ new Thread(new Runnable( ...
- 执行代码出现ImportError:attempted relative import with no known parent package
前言 在这篇文章中,我将会解析 ImportError: attempted relative import with no known parent package 这个异常的原因.当你在运行的py ...
- activiti中的查询sql
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE mapper PUBLIC "- ...
- python基本语法汇总
From: https://www.cnblogs.com/yunguoxiaoqiao/p/7640040.html 1.常用列表的操作 D = {} D = {'spam': 2, 'tol': ...
- 在思科模拟器上配置AAA认证
1.实验拓扑 2.检测用户之间连通性 PC2 ping PC-A PC-C ping PC-A 3.路由及服务器配置 R1:在路由器R1上配置一个本地用户账号并且利用本地AAA通过console ...
- docker镜像的常用操作
获取镜像 比如说我们可以这样操作 当然把这个镜像拉过来时间非常长. 查看镜像列表 命令: docker images 说明: 使用docker images命令可以列出本地主机上已有的镜像. 信息 ...
- python中令人惊艳的小众数据科学库
Python是门很神奇的语言,历经时间和实践检验,受到开发者和数据科学家一致好评,目前已经是全世界发展最好的编程语言之一.简单易用,完整而庞大的第三方库生态圈,使得Python成为编程小白和高级工程师 ...
- 【Python学习】Python3 环境搭建
参考地址:http://www.runoob.com/python3/python3-install.html Python3 环境搭建 本章节我们将向大家介绍如何在本地搭建 Python3 开发环境 ...