吴裕雄 python 机器学习——模型选择数据集切分

import  numpy as np

from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut,cross_val_score

#模型选择数据集切分train_test_split模型

def test_train_test_split():

    X=[[1,2,3,4],

       [11,12,13,14],

       [21,22,23,24],

       [31,32,33,34],

       [41,42,43,44],

       [51,52,53,54],

       [61,62,63,64],

       [71,72,73,74]]

    y=[1,1,0,0,1,1,0,0]

    # 切分，测试集大小为原始数据集大小的 40%

    X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0)

    print("X_train=",X_train)

    print("X_test=",X_test)

    print("y_train=",y_train)

    print("y_test=",y_test)

    # 分层采样切分，测试集大小为原始数据集大小的 40%

    X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,random_state=0,stratify=y)

    print("Stratify:X_train=",X_train)

    print("Stratify:X_test=",X_test)

    print("Stratify:y_train=",y_train)

    print("Stratify:y_test=",y_test)

test_train_test_split()

#模型选择数据集切分KFold模型

def test_KFold():

    X=np.array([[1,2,3,4],

       [11,12,13,14],

       [21,22,23,24],

       [31,32,33,34],

       [41,42,43,44],

       [51,52,53,54],

       [61,62,63,64],

       [71,72,73,74],

       [81,82,83,84]])

    y=np.array([1,1,0,0,1,1,0,0,1])

    # 切分之前不混洗数据集

    folder=KFold(n_splits=3,random_state=0,shuffle=False)

    for train_index,test_index in folder.split(X,y):

        print("Train Index:",train_index)

        print("Test Index:",test_index)

        print("X_train:",X[train_index])

        print("X_test:",X[test_index])

        print("")

    # 切分之前混洗数据集

    shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True)

    for train_index,test_index in shuffle_folder.split(X,y):

        print("Shuffled Train Index:",train_index)

        print("Shuffled Test Index:",test_index)

        print("Shuffled X_train:",X[train_index])

        print("Shuffled X_test:",X[test_index])

        print("")

test_KFold()

#模型选择数据集切分StratifiedKFold模型

def test_StratifiedKFold():

    X=np.array([[1,2,3,4],

       [11,12,13,14],

       [21,22,23,24],

       [31,32,33,34],

       [41,42,43,44],

       [51,52,53,54],

       [61,62,63,64],

       [71,72,73,74]])

    y=np.array([1,1,0,0,1,1,0,0])

    folder=KFold(n_splits=4,random_state=0,shuffle=False)

    stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)

    for train_index,test_index in folder.split(X,y):

        print("Train Index:",train_index)

        print("Test Index:",test_index)

        print("y_train:",y[train_index])

        print("y_test:",y[test_index])

        print("")

    for train_index,test_index in stratified_folder.split(X,y):

        print("Stratified Train Index:",train_index)

        print("Stratified Test Index:",test_index)

        print("Stratified y_train:",y[train_index])

        print("Stratified y_test:",y[test_index])

        print("")

test_StratifiedKFold()

#模型选择数据集切分LeaveOneOut模型

def test_LeaveOneOut():

    X=np.array([[1,2,3,4],

       [11,12,13,14],

       [21,22,23,24],

       [31,32,33,34]])

    y=np.array([1,1,0,0])

    lo=LeaveOneOut()

    for train_index,test_index in lo.split(X):

        print("Train Index:",train_index)

        print("Test Index:",test_index)

        print("X_train:",X[train_index])

        print("X_test:",X[test_index])

        print("")

test_LeaveOneOut()

#模型选择数据集切分cross_val_score模型

def test_cross_val_score():

    from sklearn.datasets import  load_digits

    from sklearn.svm import  LinearSVC

    digits=load_digits() # 加载用于分类问题的数据集

    X=digits.data

    y=digits.target

    # 使用 LinearSVC 作为分类器

    result=cross_val_score(LinearSVC(),X,y,cv=10)

    print("Cross Val Score is:",result)

test_cross_val_score()

吴裕雄 python 机器学习——模型选择数据集切分的更多相关文章

吴裕雄 python 机器学习——模型选择验证曲线validation_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
吴裕雄 python 机器学习——模型选择学习曲线learning_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
吴裕雄 python 机器学习——模型选择回归问题性能度量
from sklearn.metrics import mean_absolute_error,mean_squared_error #模型选择回归问题性能度量mean_absolute_error模 ...
吴裕雄 python 机器学习——模型选择分类问题性能度量
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from sklearn.datasets ...
吴裕雄 python 机器学习——模型选择参数优化暴力搜索寻优GridSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
吴裕雄 python 机器学习——模型选择参数优化随机搜索寻优RandomizedSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
吴裕雄 python 机器学习——模型选择损失函数模型
from sklearn.metrics import zero_one_loss,log_loss def test_zero_one_loss(): y_true=[1,1,1,1,1,0,0,0 ...
吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

随机推荐

echarts 设置默认选中，单选
默认选中和不选中传送门
C++类几种初始化的顺序
首先给段代码: class A{ public: ; A():x(){cout<<"A(): x="<<x<<endl;} A(int a):x ...
pybind11简介
python调用C/C++有不少的方法,如boost.python, swig, ctypes, pybind11等,这些方法有繁有简,而pybind11的优点是对C++ 11支持很好,API比较简单 ...
ts中接口的用法
ts中的接口主要的作用是: 对“对象”进行约束描述对“类”的一部分行为进行抽象一.属性接口接口中可定义确定属性.可选属性.任意属性.只读属性 1.确定属性 interface UserInfo ...
(转)json格式转换成javaBean对象的方法
把json格式转换成javaBean才可以.于是查了一下资料,网上最多的资料就是下面的这种方式: Java code? 1 2 3 4 5 6 7 8 9 String str = "[{\ ...
MODBUS TCP/IP协议规范详细介绍
1．该规范的发展概况原始版本1997年9月3日作为公共评论的草案. 再版1999年3月29日,即修订版1.0. ...
python之路面向对象2
一.利用反射查看面向对象成员的归属二.利用反射导入模块.查找类.创建对象.查找对象中的字段三.静态字段静态字段存在类中,把对象每个都有的存在类中就行了,只存一份四.静态方法静态方法中没有se ...
codeforces Codeforces Round #597 (Div. 2) D. Shichikuji and Power Grid
#include<bits/stdc++.h> using namespace std ; int n; struct City { int id; long long x,y; //坐标 ...
es 6.x scroll用法
我们可以使用from +size来获取所有数据,但是,如果数据量大的时候,这样的操作开销很大,这时候可以使用scroll操作 1.第一步发起一个scroll 的post请求,带上参数scroll=1m ...
bzoj3744: Gty的妹子序列（BIT && 分块）
强制在线的区间询问逆序对数如果不是强制在线就是可以用莫队乱搞啦强制在线的话用f[i][j]记录第i块到第j个点之间的逆序对数用s[i][j]记录前i块中小于等于j的数字个数离散化一下 BI ...

吴裕雄 python 机器学习——模型选择数据集切分

吴裕雄 python 机器学习——模型选择数据集切分的更多相关文章

随机推荐

热门专题