吴裕雄 python 机器学习——模型选择数据集切分
import numpy as np
from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut,cross_val_score #模型选择数据集切分train_test_split模型
def test_train_test_split():
X=[[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74]]
y=[1,1,0,0,1,1,0,0]
# 切分,测试集大小为原始数据集大小的 40%
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0)
print("X_train=",X_train)
print("X_test=",X_test)
print("y_train=",y_train)
print("y_test=",y_test)
# 分层采样切分,测试集大小为原始数据集大小的 40%
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,random_state=0,stratify=y)
print("Stratify:X_train=",X_train)
print("Stratify:X_test=",X_test)
print("Stratify:y_train=",y_train)
print("Stratify:y_test=",y_test) test_train_test_split()

#模型选择数据集切分KFold模型
def test_KFold():
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74],
[81,82,83,84]])
y=np.array([1,1,0,0,1,1,0,0,1])
# 切分之前不混洗数据集
folder=KFold(n_splits=3,random_state=0,shuffle=False)
for train_index,test_index in folder.split(X,y):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("X_train:",X[train_index])
print("X_test:",X[test_index])
print("")
# 切分之前混洗数据集
shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True)
for train_index,test_index in shuffle_folder.split(X,y):
print("Shuffled Train Index:",train_index)
print("Shuffled Test Index:",test_index)
print("Shuffled X_train:",X[train_index])
print("Shuffled X_test:",X[test_index])
print("") test_KFold()

#模型选择数据集切分StratifiedKFold模型
def test_StratifiedKFold():
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74]]) y=np.array([1,1,0,0,1,1,0,0]) folder=KFold(n_splits=4,random_state=0,shuffle=False)
stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
for train_index,test_index in folder.split(X,y):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("y_train:",y[train_index])
print("y_test:",y[test_index])
print("") for train_index,test_index in stratified_folder.split(X,y):
print("Stratified Train Index:",train_index)
print("Stratified Test Index:",test_index)
print("Stratified y_train:",y[train_index])
print("Stratified y_test:",y[test_index])
print("") test_StratifiedKFold()

#模型选择数据集切分LeaveOneOut模型
def test_LeaveOneOut():
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34]])
y=np.array([1,1,0,0])
lo=LeaveOneOut()
for train_index,test_index in lo.split(X):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("X_train:",X[train_index])
print("X_test:",X[test_index])
print("") test_LeaveOneOut()

#模型选择数据集切分cross_val_score模型
def test_cross_val_score():
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
digits=load_digits() # 加载用于分类问题的数据集
X=digits.data
y=digits.target
# 使用 LinearSVC 作为分类器
result=cross_val_score(LinearSVC(),X,y,cv=10)
print("Cross Val Score is:",result) test_cross_val_score()

吴裕雄 python 机器学习——模型选择数据集切分的更多相关文章
- 吴裕雄 python 机器学习——模型选择验证曲线validation_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
- 吴裕雄 python 机器学习——模型选择学习曲线learning_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
- 吴裕雄 python 机器学习——模型选择回归问题性能度量
from sklearn.metrics import mean_absolute_error,mean_squared_error #模型选择回归问题性能度量mean_absolute_error模 ...
- 吴裕雄 python 机器学习——模型选择分类问题性能度量
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from sklearn.datasets ...
- 吴裕雄 python 机器学习——模型选择参数优化暴力搜索寻优GridSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
- 吴裕雄 python 机器学习——模型选择参数优化随机搜索寻优RandomizedSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
- 吴裕雄 python 机器学习——模型选择损失函数模型
from sklearn.metrics import zero_one_loss,log_loss def test_zero_one_loss(): y_true=[1,1,1,1,1,0,0,0 ...
- 吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
- 吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
随机推荐
- Execl导入系统
文件导入功能 前台代码: Content\JS\jquery.ajaxfileupload.js<script src="~/Content/JS/jquery.ajaxfileupl ...
- Supervision meeting notes 2019/11/29
topic 分支: 1. subgraph/subsequence mining Wang Jin, routine behavior/ motif. Philippe Fournier Viger ...
- Java基本语法--变量
本篇博客主要介绍了Java基本语法中变量(variable)d的使用,变量是指内存中的一个存储区域,用于在内存中保存数据,在该区域的数据可以在同一类型范围内不断变化.变量是程序中最基本的存储单元.包含 ...
- 从零开始教你做高保真原型图+UI 设计规范
编者按:<从零开始设计App>系列到这篇已经是第三期了,上期是低保真原型图,这期@Sophia的玲珑阁 聊聊如何从零开始制作高保真原型图和UI 设计规范. 往期回顾: <设计师怎样从 ...
- 关于testbench
区别与verilog HDL代码,主要留意以下内容: 1,语言本身支持的特征和可综合的代码是两回事,不是所有verilog语言都可以转化为硬件的. 2,testbench作为top module,不需 ...
- 根据wsdl生成soap请求格式
本文链接:https://blog.csdn.net/a_Little_pumpkin/article/details/84725118根据wsdl文件如何生成soap请求的格式呢?使用最方便的工具S ...
- D. Easy Problem dp(有衔接关系的dp(类似于分类讨论) )
D. Easy Problem dp(有衔接关系的dp(类似于分类讨论) ) 题意 给出一个串 给出删除每一个字符的代价问使得串里面没有hard的子序列需要付出的最小代价(子序列不连续也行) 思路 要 ...
- LED Decorative Light Manufacturer Introduction: LED Metal Table Light
Nowadays, when many people choose the desk light, they are worried that it will not be used for a lo ...
- html代码分享
贴图:<img src="图片URL"> 加入连接:<a href="所要连接的相关URL">写上你想写的字</a> 在新窗 ...
- 刷题3. Longest Substring Without Repeating Characters
一.题目 Longest Substring Without Repeating Characters,具体请自行搜索. 这个题目,我看了一下,经过一番思考,我觉得实现起来不是很复杂. 但要做到bug ...