import numpy as np
import matplotlib.pyplot as plt from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor def creat_data(n):
np.random.seed(0)
X = 5 * np.random.rand(n, 1)
y = np.sin(X).ravel()
noise_num=(int)(n/5)
# 每第5个样本,就在该样本的值上添加噪音
y[::5] += 3 * (0.5 - np.random.rand(noise_num))
return train_test_split(X, y,test_size=0.25,random_state=1) #决策树DecisionTreeRegressor模型
def test_DecisionTreeRegressor(*data):
X_train,X_test,y_train,y_test=data
regr = DecisionTreeRegressor()
regr.fit(X_train, y_train)
print("Training score:%f"%(regr.score(X_train,y_train)))
print("Testing score:%f"%(regr.score(X_test,y_test)))
#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
X = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
Y = regr.predict(X)
ax.scatter(X_train, y_train, label="train sample",c='g')
ax.scatter(X_test, y_test, label="test sample",c='r')
ax.plot(X, Y, label="predict_value", linewidth=2,alpha=0.5)
ax.set_xlabel("data")
ax.set_ylabel("target")
ax.set_title("Decision Tree Regression")
ax.legend(framealpha=0.5)
plt.show() # 产生用于回归问题的数据集
X_train,X_test,y_train,y_test=creat_data(100)
# 调用 test_DecisionTreeRegressor
test_DecisionTreeRegressor(X_train,X_test,y_train,y_test)

def test_DecisionTreeRegressor_splitter(*data):
'''
测试 DecisionTreeRegressor 预测性能随划分类型的影响
'''
X_train,X_test,y_train,y_test=data
splitters=['best','random']
for splitter in splitters:
regr = DecisionTreeRegressor(splitter=splitter)
regr.fit(X_train, y_train)
print("Splitter %s"%splitter)
print("Training score:%f"%(regr.score(X_train,y_train)))
print("Testing score:%f"%(regr.score(X_test,y_test))) # 调用 test_DecisionTreeRegressor_splitter
test_DecisionTreeRegressor_splitter(X_train,X_test,y_train,y_test)

def test_DecisionTreeRegressor_depth(*data,maxdepth):
'''
测试 DecisionTreeRegressor 预测性能随 max_depth 的影响
'''
X_train,X_test,y_train,y_test=data
depths=np.arange(1,maxdepth)
training_scores=[]
testing_scores=[]
for depth in depths:
regr = DecisionTreeRegressor(max_depth=depth)
regr.fit(X_train, y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
# 绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(depths,training_scores,label="traing score")
ax.plot(depths,testing_scores,label="testing score")
ax.set_xlabel("maxdepth")
ax.set_ylabel("score")
ax.set_title("Decision Tree Regression")
ax.legend(framealpha=0.5)
plt.show() # 调用 test_DecisionTreeRegressor_depth
test_DecisionTreeRegressor_depth(X_train,X_test,y_train,y_test,maxdepth=20)

吴裕雄 python 机器学习——回归决策树模型的更多相关文章

  1. 吴裕雄 python 机器学习——分类决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  2. 吴裕雄 python 机器学习——核化PCAKernelPCA模型

    # -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datas ...

  3. 吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

  4. 吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  5. 吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  6. 吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  7. 吴裕雄 python 机器学习——模型选择回归问题性能度量

    from sklearn.metrics import mean_absolute_error,mean_squared_error #模型选择回归问题性能度量mean_absolute_error模 ...

  8. 吴裕雄 python 机器学习——线性回归模型

    import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...

  9. 吴裕雄 python 机器学习——支持向量机非线性回归SVR模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model,svm fr ...

随机推荐

  1. celery+Rabbit MQ实战记录

    基于以前的一篇文章,celery+Rabbit MQ的安装和使用, 本文更加详细的介绍如何安装和使用celey, Rabbit MQ. 并记录在使用celery时遇到的一些问题. 1.安装 Rabbi ...

  2. 图形化SVN管理搭建 subversion edge自行修改密码

    参考文章: https://blog.csdn.net/buyaore_wo/article/details/84313467 安装版本: Subversion Edge 5.2.3 (Linux 6 ...

  3. Centos 7 安装图形化环境

    安装系统时,使用了最小化安装,当需要用到图形界面的时候需要安装图形界面支持 环境是centos7.5最小化安装 1,先更新系统 yum -y upgrade 这里说明一下upgrade和update的 ...

  4. redis 缓存击穿 看一篇成高手系列3

    什么是缓存击穿 在谈论缓存击穿之前,我们先来回忆下从缓存中加载数据的逻辑,如下图所示 因此,如果黑客每次故意查询一个在缓存内必然不存在的数据,导致每次请求都要去存储层去查询,这样缓存就失去了意义.如果 ...

  5. CentOS下Redis的安装(转)

    目录 CentOS下Redis的安装 前言 下载安装包 解压安装包并安装 启动和停止Redis 启动Redis 停止Redis 参考资料 CentOS下Redis的安装 前言 安装Redis需要知道自 ...

  6. Anaconda下安装OpenCV

    安装命令:conda install -c https://conda.binstar.org/menpo opencvwin10+Anaconda3+python3.5.2,最终cv版本为3.3.1 ...

  7. CentOS 7.x 如何关闭 numa

    CentOS7.x中发现 numactl --interleave=all 执行失败. CentOS7.x中可以通过下面的方式关闭 numa: 1. 编辑 /etc/default/grub 文件,如 ...

  8. 弹框时(如大于body的高度),锁死body,使其不能滚动

    if(flag){ document.body.style.height = '100vh' document.body.style['overflow-y'] = 'hidden' }else{ d ...

  9. c#调用python代码

    c#调用python的方法比较多,比如ironpython,尽管不用安装python环境,可是不兼容python众多的包,也只更新到了python2,通过创建python进程这种方式可以很好的解决兼容 ...

  10. kmeans

    K均值(K-means)算法 ).setSeed(1L) val model=kmeans.fit(dataset) //Make predictions val predictions=model. ...