原理介绍

K折交叉验证：

KFold，GroupKFold，StratifiedKFold，

留一法：

LeaveOneGroupOut，LeavePGroupsOut，LeaveOneOut，LeavePOut，

随机划分法：

ShuffleSplit，GroupShuffleSplit，StratifiedShuffleSplit，

代码实现

流程：

实例化分类器 -> 迭代器迭代组[.split()]

KFold(n_splits=2)

#KFold

import numpy as np

from sklearn.model_selection import KFold

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

kf=KFold(n_splits=2)    # 定义分成几个组

# kf.get_n_splits(X)    # 查询分成几个组

print(kf)

for train_index,test_index in kf.split(X):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

GroupKFold(n_splits=2)

# GroupKFold，不是很懂这个划分方法

import numpy as np

from sklearn.model_selection import GroupKFold

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

groups=np.array([1,2,3,4,5,6])

group_kfold=GroupKFold(n_splits=2)

group_kfold.get_n_splits(X,y,groups)

print(group_kfold)

for train_index,test_index in group_kfold.split(X,y,groups):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#GroupKFold(n_splits=2)

#Train Index: [0 2 4] ,Test Index: [1 3 5]

#Train Index: [1 3 5] ,Test Index: [0 2 4]

StratifiedKFold(n_splits=3)

# stratifiedKFold：保证训练集中每一类的比例是相同的（尽量）

import numpy as np

from sklearn.model_selection import StratifiedKFold

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,1,1,2,2,2])

skf=StratifiedKFold(n_splits=3)

skf.get_n_splits(X,y)

print(skf)

for train_index,test_index in skf.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#StratifiedKFold(n_splits=3, random_state=None, shuffle=False)

#Train Index: [1 2 4 5] ,Test Index: [0 3]

#Train Index: [0 2 3 5] ,Test Index: [1 4]

LeaveOneOut()

# leaveOneOut：测试集就留下一个

import numpy as np

from sklearn.model_selection import LeaveOneOut

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

loo=LeaveOneOut()

loo.get_n_splits(X)

print(loo)

for train_index,test_index in loo.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#LeaveOneOut()

#Train Index: [1 2 3 4 5] ,Test Index: [0]

#Train Index: [0 2 3 4 5] ,Test Index: [1]

#Train Index: [0 1 3 4 5] ,Test Index: [2]

#Train Index: [0 1 2 4 5] ,Test Index: [3]

#Train Index: [0 1 2 3 5] ,Test Index: [4]

#Train Index: [0 1 2 3 4] ,Test Index: [5]

LeavePOut(p=3)

LeavePOut：测试集留下P个

import numpy as np

from sklearn.model_selection import LeavePOut

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

lpo=LeavePOut(p=3)

lpo.get_n_splits(X)

print(lpo)

for train_index,test_index in lpo.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#LeavePOut(p=3)

#Train Index: [3 4 5] ,Test Index: [0 1 2]

#Train Index: [2 4 5] ,Test Index: [0 1 3]

#Train Index: [2 3 5] ,Test Index: [0 1 4]

#Train Index: [2 3 4] ,Test Index: [0 1 5]

#Train Index: [1 4 5] ,Test Index: [0 2 3]

#Train Index: [1 3 5] ,Test Index: [0 2 4]

#Train Index: [1 3 4] ,Test Index: [0 2 5]

#Train Index: [1 2 5] ,Test Index: [0 3 4]

#Train Index: [1 2 4] ,Test Index: [0 3 5]

#Train Index: [1 2 3] ,Test Index: [0 4 5]

#Train Index: [0 4 5] ,Test Index: [1 2 3]

#Train Index: [0 3 5] ,Test Index: [1 2 4]

#Train Index: [0 3 4] ,Test Index: [1 2 5]

#Train Index: [0 2 5] ,Test Index: [1 3 4]

#Train Index: [0 2 4] ,Test Index: [1 3 5]

#Train Index: [0 2 3] ,Test Index: [1 4 5]

#Train Index: [0 1 5] ,Test Index: [2 3 4]

#Train Index: [0 1 4] ,Test Index: [2 3 5]

#Train Index: [0 1 3] ,Test Index: [2 4 5]

#Train Index: [0 1 2] ,Test Index: [3 4 5]

ShuffleSplit(n_splits=3,test_size=.25,random_state=0)

# ShuffleSplit 把数据集打乱顺序，然后划分测试集和训练集，训练集额和测试集的比例随机选定，

# 训练集和测试集的比例的和可以小于1

import numpy as np

from sklearn.model_selection import ShuffleSplit

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,3,4,5,6])

rs=ShuffleSplit(n_splits=3,test_size=.25,random_state=0)

rs.get_n_splits(X)

print(rs)

for train_index,test_index in rs.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

print("==============================")

rs=ShuffleSplit(n_splits=3,train_size=.5,test_size=.25,random_state=0)

rs.get_n_splits(X)

print(rs)

for train_index,test_index in rs.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

#ShuffleSplit(n_splits=3, random_state=0, test_size=0.25, train_size=None)

#Train Index: [1 3 0 4] ,Test Index: [5 2]

#Train Index: [4 0 2 5] ,Test Index: [1 3]

#Train Index: [1 2 4 0] ,Test Index: [3 5]

#==============================

#ShuffleSplit(n_splits=3, random_state=0, test_size=0.25, train_size=0.5)

#Train Index: [1 3 0] ,Test Index: [5 2]

#Train Index: [4 0 2] ,Test Index: [1 3]

#Train Index: [1 2 4] ,Test Index: [3 5]

StratifiedShuffleSplit(n_splits=3,test_size=.5,random_state=0)

# StratifiedShuffleSplitShuffleSplit 把数据集打乱顺序，然后划分测试集和训练集，

# 训练集额和测试集的比例随机选定，训练集和测试集的比例的和可以小于1,但是还要保证训练集中各类所占的比例是一样的

import numpy as np

from sklearn.model_selection import StratifiedShuffleSplit

X=np.array([[1,2],[3,4],[5,6],[7,8],[9,10],[11,12]])

y=np.array([1,2,1,2,1,2])

sss=StratifiedShuffleSplit(n_splits=3,test_size=.5,random_state=0)

sss.get_n_splits(X,y)

print(sss)

for train_index,test_index in sss.split(X,y):

    print("Train Index:",train_index,",Test Index:",test_index)

    X_train,X_test=X[train_index],X[test_index]

    y_train,y_test=y[train_index],y[test_index]

    #print(X_train,X_test,y_train,y_test)

#StratifiedShuffleSplit(n_splits=3, random_state=0, test_size=0.5,train_size=None)

#Train Index: [5 4 1] ,Test Index: [3 2 0]

#Train Index: [5 2 3] ,Test Index: [0 4 1]

#Train Index: [5 0 4] ,Test Index: [3 1 2]

『Sklearn』数据划分方法的更多相关文章

『Sklearn』特征向量化处理
『Kaggle』分类任务_决策树&集成模型&DataFrame向量化操作 1 2 3 4 5 6 7 8 9 '''特征提取器''' from sklearn.feature_extr ...
『Sklearn』框架自带数据集接口
自带数据集类型如下: # 自带小型数据集# sklearn.datasets.load_<name># 在线下载数据集# sklearn.datasets.fetch_<name&g ...
JS 中通过对象关联实现『继承』
JS 中继承其实是种委托,而不是传统面向对象中的复制父类到子类,只是通过原型链将要做的事委托给父类. 下面介绍通过对象关联来实现『继承』的方法: Foo = { // 需要提供一个 init 方法来初 ...
『Python』__getattr__()特殊方法
self的认识 & __getattr__()特殊方法将字典调用方式改为通过属性查询的一个小class, class Dict(dict): def __init__(self, **kw) ...
『TensorFlow』模型保存和载入方法汇总
『TensorFlow』第七弹_保存&载入会话_霸王回马一.TensorFlow常规模型加载方法保存模型 tf.train.Saver()类,.save(sess, ckpt文件目录)方法 ...
『转载』hadoop2.x常用端口、定义方法及默认端口
『转载』hadoop2.x常用端口.定义方法及默认端口 1.问题导读 DataNode的http服务的端口.ipc服务的端口分别是哪个? NameNode的http服务的端口.ipc服务的端口分别是哪 ...
『计算机视觉』Mask-RCNN_推断网络终篇：使用detect方法进行推断
一.detect和build 前面多节中我们花了大量笔墨介绍build方法的inference分支,这节我们看看它是如何被调用的. 在dimo.ipynb中,涉及model的操作我们简单进行一下汇总, ...
『TensorFlow』读书笔记_降噪自编码器
『TensorFlow』降噪自编码器设计之前学习过的代码,又敲了一遍,新的收获也还是有的,因为这次注释写的比较详尽,所以再次记录一下,具体的相关知识查阅之前写的文章即可(见上面链接). # Aut ...
『AngularJS』$location 服务
项目中关于 $location的用法简介 $location服务解析在浏览器地址栏中的URL(基于window.location)并且让URL在你的应用中可用.改变在地址栏中的URL会作用到$loc ...

随机推荐

文件系统、服务、防火墙、SELINUX——安全四大金刚
一提到安全,大家都会想到防火墙,和文件系统权限.而实际工作环境中,我们在Linux的安全配置,会涉及到四个级别.我们思考一个场景,你要在百度盘中存放一个文件,这个动作需要考虑下面四个权限. 1 fir ...
01: git & github
目录:GIT其他篇 01: git & github 02: git分支管理目录: 1.1 常见版本管理工具介绍及版本工具作用 1.2 git.GitHub和SVN比较 1.3 本地gi ...
ArcThemALL！5.1：解压、脱壳、压缩样样精通
原文链接:http://www.ithome.com/html/soft/57033.htm ArcThemALL!软件主要功能: 1.支持压缩和解压功能,支持常用的7z.zip.cab.iso.ra ...
面向对象初调用：foolish 电梯
本周我们完成的任务是傻瓜电梯的调度,对于那十分十分详细的指导书,我感觉想要说明白题目要求,是做不到的,所以就把指导书贴出来给大家看了,,由于在下还不会网页制作,只能通过百度网盘了,https://pa ...
Beetl模板引擎入门教程
最近项目中有个邮件发送的需求,不过要求发送的HTML格式的邮件.由于Beetl对java语言的良好支持和很好的性能,我们决定使用Beetl作为我们的模板引擎. Beetl官网已经有了很详细的教程,所以 ...
C# 将 Stream 写入文件
public void StreamToFile(Stream stream,string fileName) { // 把 Stream 转换成 byte[] byte[] bytes = new ...
SQL Over
与over函数结合的几个函数 create table #tab(A varchar(), B varchar()) insert into #tab select 'A1', 'B1' union ...
Spring资源加载基础ClassLoader
1 ClassLoader工作机制 1.1 ClassLoader作用寻找类字节码文件并构造出类在JVM内部表示的组件.负责运行时查找和装入Class字节码文件 1.2 装载步骤 1.2.1 装载 ...
【Coursera】Fourth Week(2)
Netscape JavaScript and Firefox 当Microsoft收购Netscape失败之后: JavaScript 创造并用于与 Visual Basic 竞争(1995). N ...
编译安装lamp （php）
用户账号及权限管理用户账号:'user'@'host' user: 用户名 host: 此用户访问mysqld服务时允许通过哪些主机远程创建连接: host类型:IP.网络地址.主机名.通配符(%和 ...

『Sklearn』数据划分方法

原理介绍

K折交叉验证：

留一法：

随机划分法：

代码实现

KFold(n_splits=2)

GroupKFold(n_splits=2)

StratifiedKFold(n_splits=3)

LeaveOneOut()

LeavePOut(p=3)

ShuffleSplit(n_splits=3,test_size=.25,random_state=0)

StratifiedShuffleSplit(n_splits=3,test_size=.5,random_state=0)

『Sklearn』数据划分方法的更多相关文章

随机推荐

热门专题