吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型

import numpy as np

import matplotlib.pyplot as plt

from sklearn import datasets,ensemble

from sklearn.model_selection import train_test_split

def load_data_regression():

    '''

    加载用于回归问题的数据集

    '''

    #使用 scikit-learn 自带的一个糖尿病病人的数据集

    diabetes = datasets.load_diabetes()

    # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4

    return train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0) 

#集成学习梯度提升决策树GradientBoostingRegressor回归模型

def test_GradientBoostingRegressor(*data):

    X_train,X_test,y_train,y_test=data

    regr=ensemble.GradientBoostingRegressor()

    regr.fit(X_train,y_train)

    print("Training score:%f"%regr.score(X_train,y_train))

    print("Testing score:%f"%regr.score(X_test,y_test))

# 获取分类数据

X_train,X_test,y_train,y_test=load_data_regression()

# 调用 test_GradientBoostingRegressor

test_GradientBoostingRegressor(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_num(*data):

    '''

    测试 GradientBoostingRegressor 的预测性能随 n_estimators 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    nums=np.arange(1,200,step=2)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for num in nums:

        regr=ensemble.GradientBoostingRegressor(n_estimators=num)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(nums,training_scores,label="Training Score")

    ax.plot(nums,testing_scores,label="Testing Score")

    ax.set_xlabel("estimator num")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(0,1.05)

    plt.suptitle("GradientBoostingRegressor")

    plt.show()

# 调用 test_GradientBoostingRegressor_num

test_GradientBoostingRegressor_num(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_maxdepth(*data):

    '''

    测试 GradientBoostingRegressor 的预测性能随 max_depth 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    maxdepths=np.arange(1,20)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for maxdepth in maxdepths:

        regr=ensemble.GradientBoostingRegressor(max_depth=maxdepth,max_leaf_nodes=None)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(maxdepths,training_scores,label="Training Score")

    ax.plot(maxdepths,testing_scores,label="Testing Score")

    ax.set_xlabel("max_depth")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(-1,1.05)

    plt.suptitle("GradientBoostingRegressor")

    plt.show()

# 调用 test_GradientBoostingRegressor_maxdepth

test_GradientBoostingRegressor_maxdepth(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_learning(*data):

    '''

    测试 GradientBoostingRegressor 的预测性能随 learning_rate 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    learnings=np.linspace(0.01,1.0)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for learning in learnings:

        regr=ensemble.GradientBoostingRegressor(learning_rate=learning)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(learnings,training_scores,label="Training Score")

    ax.plot(learnings,testing_scores,label="Testing Score")

    ax.set_xlabel("learning_rate")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(-1,1.05)

    plt.suptitle("GradientBoostingRegressor")

    plt.show()

# 调用 test_GradientBoostingRegressor_learning

test_GradientBoostingRegressor_learning(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_subsample(*data):

    '''

    测试 GradientBoostingRegressor 的预测性能随 subsample 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    subsamples=np.linspace(0.01,1.0,num=20)

    testing_scores=[]

    training_scores=[]

    for subsample in subsamples:

        regr=ensemble.GradientBoostingRegressor(subsample=subsample)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(subsamples,training_scores,label="Training Score")

    ax.plot(subsamples,testing_scores,label="Training Score")

    ax.set_xlabel("subsample")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(-1,1.05)

    plt.suptitle("GradientBoostingRegressor")

    plt.show()

# 调用 test_GradientBoostingRegressor_subsample

test_GradientBoostingRegressor_subsample(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_loss(*data):

    '''

    测试 GradientBoostingRegressor 的预测性能随不同的损失函数和 alpha 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    fig=plt.figure()

    nums=np.arange(1,200,step=2)

    ########## 绘制 huber ######

    ax=fig.add_subplot(2,1,1)

    alphas=np.linspace(0.01,1.0,endpoint=False,num=5)

    for alpha in alphas:

        testing_scores=[]

        training_scores=[]

        for num in nums:

            regr=ensemble.GradientBoostingRegressor(n_estimators=num,loss='huber',alpha=alpha)

            regr.fit(X_train,y_train)

            training_scores.append(regr.score(X_train,y_train))

            testing_scores.append(regr.score(X_test,y_test))

        ax.plot(nums,training_scores,label="Training Score:alpha=%f"%alpha)

        ax.plot(nums,testing_scores,label="Testing Score:alpha=%f"%alpha)

    ax.set_xlabel("estimator num")

    ax.set_ylabel("score")

    ax.legend(loc="lower right",framealpha=0.4)

    ax.set_ylim(0,1.05)

    ax.set_title("loss=%huber")

    plt.suptitle("GradientBoostingRegressor")

    #### 绘制 ls  和 lad

    ax=fig.add_subplot(2,1,2)

    for loss in ['ls','lad']:

        testing_scores=[]

        training_scores=[]

        for num in nums:

            regr=ensemble.GradientBoostingRegressor(n_estimators=num,loss=loss)

            regr.fit(X_train,y_train)

            training_scores.append(regr.score(X_train,y_train))

            testing_scores.append(regr.score(X_test,y_test))

        ax.plot(nums,training_scores,label="Training Score:loss=%s"%loss)

        ax.plot(nums,testing_scores,label="Testing Score:loss=%s"%loss)

    ax.set_xlabel("estimator num")

    ax.set_ylabel("score")

    ax.legend(loc="lower right",framealpha=0.4)

    ax.set_ylim(0,1.05)

    ax.set_title("loss=ls,lad")

    plt.suptitle("GradientBoostingRegressor")

    plt.show()

# 调用 test_GradientBoostingRegressor_loss

test_GradientBoostingRegressor_loss(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_max_features(*data):

    '''

    测试 GradientBoostingRegressor 的预测性能随 max_features 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    max_features=np.linspace(0.01,1.0)

    testing_scores=[]

    training_scores=[]

    for features in max_features:

        regr=ensemble.GradientBoostingRegressor(max_features=features)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(max_features,training_scores,label="Training Score")

    ax.plot(max_features,testing_scores,label="Training Score")

    ax.set_xlabel("max_features")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(0,1.05)

    plt.suptitle("GradientBoostingRegressor")

    plt.show()

# 调用 test_GradientBoostingRegressor_max_features

test_GradientBoostingRegressor_max_features(X_train,X_test,y_train,y_test)

吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型的更多相关文章

吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——数据预处理字典学习模型
from sklearn.decomposition import DictionaryLearning #数据预处理字典学习DictionaryLearning模型 def test_Diction ...
吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用
import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...
吴裕雄 python 机器学习——人工神经网络与原始感知机模型
import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from ...
吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

随机推荐

Verilog-异步FIFO
参考博文:https://blog.csdn.net/alangaixiaoxiao/article/details/81432144 1.概述异步FIFO设计的关键是产生“写满”和“读空”信号,这 ...
修复ThinkPHP导出excel数字过大时显示为科学记数法
修复ThinkPHP导出excel数字过大时显示为科学记数法,这种显示对于查看的用户来说是及其不友好的.所以,我们要使其转化为正常的数字串! 我在google 的过程中,查了一些资料.其中 1).// ...
最长公共子串2（LCS2） lg SP1812
题意:n个字符串(n<=10)求最长公共子串的长度前置技能点:https://www.cnblogs.com/wenci/p/10432932.html (两个字符串求最长公共子串的长度) 既 ...
SpringMVC-简单参数绑定
SpringMVC-简单参数绑定众所周知,springmvc是用来处理页面的一些请求,然后将数据再通过视图返回给用户的,前面的几篇博文中使用的都是静态数据,为了能快速入门springmvc,在 ...
[P4550] 收集邮票 - 概率期望,dp
套路性地倒过来考虑,设\(f[i]\)表示拥有了\(i\)种票子时还需要多少次购买,\(g[i]\)表示还需要多少钱推\(g[i]\)递推式时注意把代价倒过来(反正总数一定,从顺序第\(1\)张开始 ...
访问windows共享无法分配内存问题解决
设置:“HKLMSYSTEMCurrentControlSetControlSession ManagerMemory ManagementLargeSystemCache” 为 “1″ 设置:“HK ...
关于spring boot集成MQTT
安装说到mqtt,首先肯定要安装了,安装什么的地址:http://activemq.apache.org/ap...我本地是Windows的环境,所以装的是Windows版本,这里是第一个注意的地方 ...
java Map 迭代key,value 最简洁的方法
import java.util.HashMap; import java.util.Map; public class EntrySets { public static void main(Str ...
Centos7下配置Apache的虚拟主机
一.虚拟主机虚拟主机是Apache提供的一个功能,通过虚拟主机拉雅在一台服务器上部署多个网站.虽然服务器的IP地址是相同的,但用户当用户使用不同的域名访问时,访问到的是不同的网站. 下面讲解Apac ...
(GHRD)HPS
DE1作为ARM+FPGA的组合,ARM和FPGA之间通信,两个区块间有三个通道可以让两部分实现传输数据,统称为 HPS-FPGA AXI Bridges. 分别为:FPGA-to-HPS Bridg ...

吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型

吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型的更多相关文章

随机推荐

热门专题