吴裕雄 python 机器学习——分类决策树模型
import numpy as np
import matplotlib.pyplot as plt from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor def load_data():
'''
加载用于分类问题的数据集。数据集采用 scikit-learn 自带的 iris 数据集
'''
# scikit-learn 自带的 iris 数据集
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return train_test_split(X_train, y_train,test_size=0.25,random_state=0,stratify=y_train) #分类决策树DecisionTreeClassifier模型
def test_DecisionTreeClassifier(*data):
X_train,X_test,y_train,y_test=data
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
print("Training score:%f"%(clf.score(X_train,y_train)))
print("Testing score:%f"%(clf.score(X_test,y_test))) # 产生用于分类问题的数据集
X_train,X_test,y_train,y_test=load_data()
# 调用 test_DecisionTreeClassifier
test_DecisionTreeClassifier(X_train,X_test,y_train,y_test)
def test_DecisionTreeClassifier_criterion(*data):
'''
测试 DecisionTreeClassifier 的预测性能随 criterion 参数的影响
'''
X_train,X_test,y_train,y_test=data
criterions=['gini','entropy']
for criterion in criterions:
clf = DecisionTreeClassifier(criterion=criterion)
clf.fit(X_train, y_train)
print("criterion:%s"%criterion)
print("Training score:%f"%(clf.score(X_train,y_train)))
print("Testing score:%f"%(clf.score(X_test,y_test))) # 调用 test_DecisionTreeClassifier_criterion
test_DecisionTreeClassifier_criterion(X_train,X_test,y_train,y_test)
def test_DecisionTreeClassifier_splitter(*data):
'''
测试 DecisionTreeClassifier 的预测性能随划分类型的影响
'''
X_train,X_test,y_train,y_test=data
splitters=['best','random']
for splitter in splitters:
clf = DecisionTreeClassifier(splitter=splitter)
clf.fit(X_train, y_train)
print("splitter:%s"%splitter)
print("Training score:%f"%(clf.score(X_train,y_train)))
print("Testing score:%f"%(clf.score(X_test,y_test))) # 调用 test_DecisionTreeClassifier_splitter
test_DecisionTreeClassifier_splitter(X_train,X_test,y_train,y_test)
def test_DecisionTreeClassifier_depth(*data,maxdepth):
'''
测试 DecisionTreeClassifier 的预测性能随 max_depth 参数的影响
'''
X_train,X_test,y_train,y_test=data
depths=np.arange(1,maxdepth)
training_scores=[]
testing_scores=[]
for depth in depths:
clf = DecisionTreeClassifier(max_depth=depth)
clf.fit(X_train, y_train)
training_scores.append(clf.score(X_train,y_train))
testing_scores.append(clf.score(X_test,y_test)) ## 绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(depths,training_scores,label="traing score",marker='o')
ax.plot(depths,testing_scores,label="testing score",marker='*')
ax.set_xlabel("maxdepth")
ax.set_ylabel("score")
ax.set_title("Decision Tree Classification")
ax.legend(framealpha=0.5,loc='best')
plt.show() # 调用 test_DecisionTreeClassifier_depth
test_DecisionTreeClassifier_depth(X_train,X_test,y_train,y_test,maxdepth=100)
import os
import pydotplus from io import StringIO
from sklearn.tree import export_graphviz
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor X_train,X_test,y_train,y_test=load_data()
clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)
export_graphviz(clf,"F://out")
吴裕雄 python 机器学习——分类决策树模型的更多相关文章
- 吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——核化PCAKernelPCA模型
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datas ...
- 吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
- 吴裕雄 python 机器学习——支持向量机SVM非线性分类SVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
- 吴裕雄 python 机器学习——支持向量机线性分类LinearSVC模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...
- 吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
- 吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
- 吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
- 吴裕雄 python 机器学习——模型选择分类问题性能度量
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from sklearn.datasets ...
随机推荐
- Spark分布式编程之全局变量专题【共享变量】
转载自:http://www.aboutyun.com/thread-19652-1-1.html 问题导读 1.spark共享变量的作用是什么?2.什么情况下使用共享变量?3.如何在程序中使用共享变 ...
- Kettle解决方案: 第五章 ETL相关知识
早期, ETL知识作为BI系统的一部分来介绍. 后来在The Data Warehouse ETL Tooket一书中, 系统性的整理了ETL的相关内容, 形成了一篇"ETL里的34个子系统 ...
- linux远程windows桌面
rdesktop,例子如下,-f为全屏,-a为颜色设置 rdesktop -f -a 32 192.168.88.235
- Bacnet协议IP采集开发 总结
一.开发准备 a.模拟器 VTS和BACnetDeviceSimulator b.主站 BACnetScan c.参考文档 http://wenku.baidu.com/view/3052 ...
- Java读取Excel并与SqlServer库中的数据比较
先说说需求.在SQL server数据库中的表里存在一些数据,现在整理的Excel文档中也存在一些数据,现在需要通过根据比较某个字段值(唯一)来判断出,在库中有但excel中没有的数据. 大概的思路就 ...
- cefsharp插入自定义JS
string script_1 = "document.getElementsByTagName('head')[0].appendChild(document.createEleme ...
- C++日常应用-定时器
定时器的使用:分为有句柄 无句柄两类 有句柄情况下的使用:头文件: 1.添加映射 BEGIN_MSG_MAP(类名) MESSAGE_HANDLER(WM_TIMER, OnTimer) END_MS ...
- nginx+多个tomcat
学习nginx的时候遇到的问题:nginx怎么部署两台tomcat? upstream 在网上找的资源,我在nginx配置文件(nginx.conf)中添加了两个server.结果只显示第一个se ...
- CSS——Flex
任何一个容器都可以指定为Flexbox布局 .flex-container { display: -webkit-flex; /* Safari */ display: flex; } 行内元素可以指 ...
- 【BZOJ2054】疯狂的馒头(并查集)
/* 经典思路, 倒序并查集处理即可 */ #include<cstdio> #include<algorithm> #include<cstring> #incl ...