吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型

import numpy as np

import matplotlib.pyplot as plt

from sklearn import datasets,ensemble

from sklearn.model_selection import train_test_split

def load_data_regression():

    '''

    加载用于回归问题的数据集

    '''

    #使用 scikit-learn 自带的一个糖尿病病人的数据集

    diabetes = datasets.load_diabetes()

    # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4

    return train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0) 

#集成学习随机森林RandomForestRegressor回归模型

def test_RandomForestRegressor(*data):

    X_train,X_test,y_train,y_test=data

    regr=ensemble.RandomForestRegressor()

    regr.fit(X_train,y_train)

    print("Traing Score:%f"%regr.score(X_train,y_train))

    print("Testing Score:%f"%regr.score(X_test,y_test))

# 获取分类数据

X_train,X_test,y_train,y_test=load_data_regression()

# 调用 test_RandomForestRegressor

test_RandomForestRegressor(X_train,X_test,y_train,y_test)

def test_RandomForestRegressor_num(*data):

    '''

    测试 RandomForestRegressor 的预测性能随  n_estimators 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    nums=np.arange(1,100,step=2)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for num in nums:

        regr=ensemble.RandomForestRegressor(n_estimators=num)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(nums,training_scores,label="Training Score")

    ax.plot(nums,testing_scores,label="Testing Score")

    ax.set_xlabel("estimator num")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(-1,1)

    plt.suptitle("RandomForestRegressor")

    plt.show()

# 调用 test_RandomForestRegressor_num

test_RandomForestRegressor_num(X_train,X_test,y_train,y_test)

def test_RandomForestRegressor_max_depth(*data):

    '''

    测试 RandomForestRegressor 的预测性能随  max_depth 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    maxdepths=range(1,20)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for max_depth in maxdepths:

        regr=ensemble.RandomForestRegressor(max_depth=max_depth)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(maxdepths,training_scores,label="Training Score")

    ax.plot(maxdepths,testing_scores,label="Testing Score")

    ax.set_xlabel("max_depth")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(0,1.05)

    plt.suptitle("RandomForestRegressor")

    plt.show()

# 调用 test_RandomForestRegressor_max_depth

test_RandomForestRegressor_max_depth(X_train,X_test,y_train,y_test)

def test_RandomForestRegressor_max_features(*data):

    '''

   测试 RandomForestRegressor 的预测性能随  max_features 参数的影响

    '''

    X_train,X_test,y_train,y_test=data

    max_features=np.linspace(0.01,1.0)

    fig=plt.figure()

    ax=fig.add_subplot(1,1,1)

    testing_scores=[]

    training_scores=[]

    for max_feature in max_features:

        regr=ensemble.RandomForestRegressor(max_features=max_feature)

        regr.fit(X_train,y_train)

        training_scores.append(regr.score(X_train,y_train))

        testing_scores.append(regr.score(X_test,y_test))

    ax.plot(max_features,training_scores,label="Training Score")

    ax.plot(max_features,testing_scores,label="Testing Score")

    ax.set_xlabel("max_feature")

    ax.set_ylabel("score")

    ax.legend(loc="lower right")

    ax.set_ylim(0,1.05)

    plt.suptitle("RandomForestRegressor")

    plt.show()

# 调用 test_RandomForestRegressor_max_features

test_RandomForestRegressor_max_features(X_train,X_test,y_train,y_test)

吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型的更多相关文章

吴裕雄 python 机器学习——集成学习随机森林RandomForestClassifier分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习梯度提升决策树GradientBoostingRegressor回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
机器学习:集成学习:随机森林.GBDT
集成学习(Ensemble Learning) 集成学习的思想是将若干个学习器(分类器&回归器)组合之后产生一个新学习器.弱分类器(weak learner)指那些分类准确率只稍微好于随机猜测 ...
吴裕雄 python 机器学习——伯努利贝叶斯BernoulliNB模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,naive_bayes from skl ...
吴裕雄 python 机器学习——数据预处理过滤式特征选取SelectPercentile模型
from sklearn.feature_selection import SelectPercentile,f_classif #数据预处理过滤式特征选取SelectPercentile模型 def ...
吴裕雄 python 机器学习——数据预处理过滤式特征选取VarianceThreshold模型
from sklearn.feature_selection import VarianceThreshold #数据预处理过滤式特征选取VarianceThreshold模型 def test_Va ...
吴裕雄 python 机器学习——数据预处理字典学习模型
from sklearn.decomposition import DictionaryLearning #数据预处理字典学习DictionaryLearning模型 def test_Diction ...

随机推荐

-bash: mysqld: command not found
网址:https://blog.csdn.net/zq199692288/article/details/78863737
python 中 if __name__ == '__main__' 判断的作用
假设这样一个a.py文件 def fun1(): ........ def fun2(): ......... if __name__=='__main__': ......#执行的一些语句当你执行 ...
163.扩展User模型-一对一方式扩展
一对一外键如果你对用户验证方法authenticate没有更多的要求,就是使用username和password就可以完成用户的登录验证工作,但是想要在原来的模型的基础上添加新的字段,那么就可以使用 ...
对malloc和free和数据结构和算法的一些感触
当年2013.9.大一学c程序设计,因为当时还没有学数据结构,只学了程序设计,大学上的课真的是承上启下的不好,刚学到这里,就断了旋一样,对这个malloc和free一直很迷惑,这些狗玩意是干嘛,因为用 ...
Hibernate项目的基本步骤和一些错误提示
以数据库中有一张user表为例: 1.编写POJO持久化类User.javaPOJO(Plain Old Java Objects),简单的Java对象.一个POJO类不用继承任何类,也无须实现任何接 ...
QT5.1+中文乱码问题
原文连接:https://blog.csdn.net/liyuanbhu/article/details/72596952 QT中规定 QString 的 const char* 构造函数是调用 fr ...
js或者jq的string类型或者number类型的相互转换及json对象与字符串的转换
1.将值乘以1,将string类型转为number类型 //算合计价值function summoney(money) { var zijin = $("#main_xm_dam09&quo ...
mysql数据库函数之left()、right()、substring()、substring_index()
在实际的项目开发中有时会有对数据库某字段截取部分的需求,这种场景有时直接通过数据库操作来实现比通过代码实现要更方便快捷些,mysql有很多字符串函数可以用来处理这些需求,如Mysql字符串截取总结:l ...
c数据结构绪论
四种逻辑结构:1:集合结构结构中的数据元素除了同属于同一个集合的关系外,无任何其他关系2:线性结构结构中的数据元素之间存在着一对一的线性关系3:树形结构结构中的数据元素之间存在着一对多的层次关系 ...
JS高级---函数声明和函数表达式的区别
函数声明和函数表达式的区别多用函数表达式 var ff=function(){}; //函数声明 // // if(true){ // function f1() { // console.log( ...

吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型

吴裕雄 python 机器学习——集成学习随机森林RandomForestRegressor回归模型的更多相关文章

随机推荐

热门专题