import numpy as np
import matplotlib.pyplot as plt from sklearn import datasets,ensemble
from sklearn.model_selection import train_test_split def load_data_regression():
'''
加载用于回归问题的数据集
'''
#使用 scikit-learn 自带的一个糖尿病病人的数据集
diabetes = datasets.load_diabetes()
# 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4
return train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0) #集成学习梯度提升决策树GradientBoostingRegressor回归模型
def test_GradientBoostingRegressor(*data):
X_train,X_test,y_train,y_test=data
regr=ensemble.GradientBoostingRegressor()
regr.fit(X_train,y_train)
print("Training score:%f"%regr.score(X_train,y_train))
print("Testing score:%f"%regr.score(X_test,y_test)) # 获取分类数据
X_train,X_test,y_train,y_test=load_data_regression()
# 调用 test_GradientBoostingRegressor
test_GradientBoostingRegressor(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_num(*data):
'''
测试 GradientBoostingRegressor 的预测性能随 n_estimators 参数的影响
'''
X_train,X_test,y_train,y_test=data
nums=np.arange(1,200,step=2)
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
testing_scores=[]
training_scores=[]
for num in nums:
regr=ensemble.GradientBoostingRegressor(n_estimators=num)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
ax.plot(nums,training_scores,label="Training Score")
ax.plot(nums,testing_scores,label="Testing Score")
ax.set_xlabel("estimator num")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(0,1.05)
plt.suptitle("GradientBoostingRegressor")
plt.show() # 调用 test_GradientBoostingRegressor_num
test_GradientBoostingRegressor_num(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_maxdepth(*data):
'''
测试 GradientBoostingRegressor 的预测性能随 max_depth 参数的影响
'''
X_train,X_test,y_train,y_test=data
maxdepths=np.arange(1,20)
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
testing_scores=[]
training_scores=[]
for maxdepth in maxdepths:
regr=ensemble.GradientBoostingRegressor(max_depth=maxdepth,max_leaf_nodes=None)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
ax.plot(maxdepths,training_scores,label="Training Score")
ax.plot(maxdepths,testing_scores,label="Testing Score")
ax.set_xlabel("max_depth")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(-1,1.05)
plt.suptitle("GradientBoostingRegressor")
plt.show() # 调用 test_GradientBoostingRegressor_maxdepth
test_GradientBoostingRegressor_maxdepth(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_learning(*data):
'''
测试 GradientBoostingRegressor 的预测性能随 learning_rate 参数的影响
'''
X_train,X_test,y_train,y_test=data
learnings=np.linspace(0.01,1.0)
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
testing_scores=[]
training_scores=[]
for learning in learnings:
regr=ensemble.GradientBoostingRegressor(learning_rate=learning)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
ax.plot(learnings,training_scores,label="Training Score")
ax.plot(learnings,testing_scores,label="Testing Score")
ax.set_xlabel("learning_rate")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(-1,1.05)
plt.suptitle("GradientBoostingRegressor")
plt.show() # 调用 test_GradientBoostingRegressor_learning
test_GradientBoostingRegressor_learning(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_subsample(*data):
'''
测试 GradientBoostingRegressor 的预测性能随 subsample 参数的影响
'''
X_train,X_test,y_train,y_test=data
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
subsamples=np.linspace(0.01,1.0,num=20)
testing_scores=[]
training_scores=[]
for subsample in subsamples:
regr=ensemble.GradientBoostingRegressor(subsample=subsample)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
ax.plot(subsamples,training_scores,label="Training Score")
ax.plot(subsamples,testing_scores,label="Training Score")
ax.set_xlabel("subsample")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(-1,1.05)
plt.suptitle("GradientBoostingRegressor")
plt.show() # 调用 test_GradientBoostingRegressor_subsample
test_GradientBoostingRegressor_subsample(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_loss(*data):
'''
测试 GradientBoostingRegressor 的预测性能随不同的损失函数和 alpha 参数的影响
'''
X_train,X_test,y_train,y_test=data
fig=plt.figure()
nums=np.arange(1,200,step=2)
########## 绘制 huber ######
ax=fig.add_subplot(2,1,1)
alphas=np.linspace(0.01,1.0,endpoint=False,num=5)
for alpha in alphas:
testing_scores=[]
training_scores=[]
for num in nums:
regr=ensemble.GradientBoostingRegressor(n_estimators=num,loss='huber',alpha=alpha)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
ax.plot(nums,training_scores,label="Training Score:alpha=%f"%alpha)
ax.plot(nums,testing_scores,label="Testing Score:alpha=%f"%alpha)
ax.set_xlabel("estimator num")
ax.set_ylabel("score")
ax.legend(loc="lower right",framealpha=0.4)
ax.set_ylim(0,1.05)
ax.set_title("loss=%huber")
plt.suptitle("GradientBoostingRegressor")
#### 绘制 ls 和 lad
ax=fig.add_subplot(2,1,2)
for loss in ['ls','lad']:
testing_scores=[]
training_scores=[]
for num in nums:
regr=ensemble.GradientBoostingRegressor(n_estimators=num,loss=loss)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
ax.plot(nums,training_scores,label="Training Score:loss=%s"%loss)
ax.plot(nums,testing_scores,label="Testing Score:loss=%s"%loss)
ax.set_xlabel("estimator num")
ax.set_ylabel("score")
ax.legend(loc="lower right",framealpha=0.4)
ax.set_ylim(0,1.05)
ax.set_title("loss=ls,lad")
plt.suptitle("GradientBoostingRegressor")
plt.show() # 调用 test_GradientBoostingRegressor_loss
test_GradientBoostingRegressor_loss(X_train,X_test,y_train,y_test)

def test_GradientBoostingRegressor_max_features(*data):
'''
测试 GradientBoostingRegressor 的预测性能随 max_features 参数的影响
'''
X_train,X_test,y_train,y_test=data
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
max_features=np.linspace(0.01,1.0)
testing_scores=[]
training_scores=[]
for features in max_features:
regr=ensemble.GradientBoostingRegressor(max_features=features)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
ax.plot(max_features,training_scores,label="Training Score")
ax.plot(max_features,testing_scores,label="Training Score")
ax.set_xlabel("max_features")
ax.set_ylabel("score")
ax.legend(loc="lower right")
ax.set_ylim(0,1.05)
plt.suptitle("GradientBoostingRegressor")
plt.show() # 调用 test_GradientBoostingRegressor_max_features
test_GradientBoostingRegressor_max_features(X_train,X_test,y_train,y_test)

吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型的更多相关文章

  1. 吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  2. 吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  3. 吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  4. 吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  5. 吴裕雄 python 机器学习——数据预处理字典学习模型

    from sklearn.decomposition import DictionaryLearning #数据预处理字典学习DictionaryLearning模型 def test_Diction ...

  6. 吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用

    import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...

  7. 吴裕雄 python 机器学习——人工神经网络与原始感知机模型

    import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from ...

  8. 吴裕雄 python 机器学习——分类决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  9. 吴裕雄 python 机器学习——回归决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

随机推荐

  1. win10文件夹 无法显示当前所有者 管理员都不行

    1.在win10系统桌面上,任务栏,右键,单击任务管理器. 2.单击性能. 3.单击打开资源监视器. 4.在单击CPU标签,然后再“关联的句柄”右侧的搜索框中输入要删除的文件夹名.例:tre文件夹名. ...

  2. 165.扩展User模型-继承AbstractBaseUser

    继承自AbstractBaseUser模型 如果你想要修改默认的验证方式,并且对于User模型上的一些字段不想要,那么可以自定义一个模型,然后继承自AbstractBaseUser,再添加你想要的字段 ...

  3. 环境配置 | mac环境变量文件.bash_profile相关

    每次环境配置都费老劲,零零碎碎的知识就记在这里 文件:~/.bash_profile

  4. 使用yaml格式进行接口测试报错

    前言:本人公司使用yaml做接口测试.某日开发写了一个字典嵌套列表,列表里面再嵌套字典的接口. yaml的值应该为下图(注意缩进问题)   加了-代表下面是一个列表 {'uid': '3a61479f ...

  5. K3老单序时簿开发示例

    K3需要对老单进行二次开发,老单的二次开发比较麻烦,这里整理一下老单序时簿上添加按钮的二次开发示例. --以下SQL脚本--获取 MENU IDselect FID,FmenuID,FName fro ...

  6. SQL Server 2008创建数据库

    1.数据.数据库.数据管理系统基本概念: 数据:人类有用信息的符号化表示. 数据库:按照数据结构来组织.存储和管理数据的一个仓库. 数据库管理系统(DBMS):可维护.存储并为应用系统提供数据的软件系 ...

  7. org.apache.catalina.connector.ClientAbortException: java.io.IOException: 您的主机中的软件中止了一个已建立的连接。

    日志文件中有“java.io.IOException: 您的主机中的软件中止了一个已建立的连接.”错误 org.apache.catalina.connector.ClientAbortExcepti ...

  8. C#常见几种集合比较

    1. ArrayList 1.1 ArrayList是一个特殊数组,通过添加和删除元素就可以动态改变数组的长度. ArrayList集合相对于数组的优点:支持自动改变大小,可以灵活的插入元素,可以灵活 ...

  9. 只想remove parentNode的一部分children

    parentNode.removeChildByTag(0); let childNode = new cc.Node(); parentNode.addChild(childNode); child ...

  10. 1032 Sharing (25分)

    1032 Sharing (25分) 题目 思路 定义map存储所有的<地址1,地址2> 第一set存放单词1的所有地址(通过查找map) 通过单词二的首地址,结合map,然后在set中查 ...