吴裕雄 python 机器学习——模型选择数据集切分
import numpy as np
from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut,cross_val_score #模型选择数据集切分train_test_split模型
def test_train_test_split():
X=[[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74]]
y=[1,1,0,0,1,1,0,0]
# 切分,测试集大小为原始数据集大小的 40%
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0)
print("X_train=",X_train)
print("X_test=",X_test)
print("y_train=",y_train)
print("y_test=",y_test)
# 分层采样切分,测试集大小为原始数据集大小的 40%
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,random_state=0,stratify=y)
print("Stratify:X_train=",X_train)
print("Stratify:X_test=",X_test)
print("Stratify:y_train=",y_train)
print("Stratify:y_test=",y_test) test_train_test_split()
#模型选择数据集切分KFold模型
def test_KFold():
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74],
[81,82,83,84]])
y=np.array([1,1,0,0,1,1,0,0,1])
# 切分之前不混洗数据集
folder=KFold(n_splits=3,random_state=0,shuffle=False)
for train_index,test_index in folder.split(X,y):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("X_train:",X[train_index])
print("X_test:",X[test_index])
print("")
# 切分之前混洗数据集
shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True)
for train_index,test_index in shuffle_folder.split(X,y):
print("Shuffled Train Index:",train_index)
print("Shuffled Test Index:",test_index)
print("Shuffled X_train:",X[train_index])
print("Shuffled X_test:",X[test_index])
print("") test_KFold()
#模型选择数据集切分StratifiedKFold模型
def test_StratifiedKFold():
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34],
[41,42,43,44],
[51,52,53,54],
[61,62,63,64],
[71,72,73,74]]) y=np.array([1,1,0,0,1,1,0,0]) folder=KFold(n_splits=4,random_state=0,shuffle=False)
stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
for train_index,test_index in folder.split(X,y):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("y_train:",y[train_index])
print("y_test:",y[test_index])
print("") for train_index,test_index in stratified_folder.split(X,y):
print("Stratified Train Index:",train_index)
print("Stratified Test Index:",test_index)
print("Stratified y_train:",y[train_index])
print("Stratified y_test:",y[test_index])
print("") test_StratifiedKFold()
#模型选择数据集切分LeaveOneOut模型
def test_LeaveOneOut():
X=np.array([[1,2,3,4],
[11,12,13,14],
[21,22,23,24],
[31,32,33,34]])
y=np.array([1,1,0,0])
lo=LeaveOneOut()
for train_index,test_index in lo.split(X):
print("Train Index:",train_index)
print("Test Index:",test_index)
print("X_train:",X[train_index])
print("X_test:",X[test_index])
print("") test_LeaveOneOut()
#模型选择数据集切分cross_val_score模型
def test_cross_val_score():
from sklearn.datasets import load_digits
from sklearn.svm import LinearSVC
digits=load_digits() # 加载用于分类问题的数据集
X=digits.data
y=digits.target
# 使用 LinearSVC 作为分类器
result=cross_val_score(LinearSVC(),X,y,cv=10)
print("Cross Val Score is:",result) test_cross_val_score()
吴裕雄 python 机器学习——模型选择数据集切分的更多相关文章
- 吴裕雄 python 机器学习——模型选择验证曲线validation_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
- 吴裕雄 python 机器学习——模型选择学习曲线learning_curve模型
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.da ...
- 吴裕雄 python 机器学习——模型选择回归问题性能度量
from sklearn.metrics import mean_absolute_error,mean_squared_error #模型选择回归问题性能度量mean_absolute_error模 ...
- 吴裕雄 python 机器学习——模型选择分类问题性能度量
import numpy as np import matplotlib.pyplot as plt from sklearn.svm import SVC from sklearn.datasets ...
- 吴裕雄 python 机器学习——模型选择参数优化暴力搜索寻优GridSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
- 吴裕雄 python 机器学习——模型选择参数优化随机搜索寻优RandomizedSearchCV模型
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_rep ...
- 吴裕雄 python 机器学习——模型选择损失函数模型
from sklearn.metrics import zero_one_loss,log_loss def test_zero_one_loss(): y_true=[1,1,1,1,1,0,0,0 ...
- 吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
- 吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
随机推荐
- 2019牛客竞赛第六场D Move 宏观单调,部分不单调
Move 题意 有k个体积相同的箱子,有个憨憨有固定的装箱策略,每次都只装可以装的重量中最大的东西,求箱子的最小提及 分析 看起来可以二分,但由于他的装箱策略有点蠢,所以只在宏观上满足单调性,在特别小 ...
- adb shell 杀进程以及端口占用,adbserver服务重启失败
linux: adb shell ps |grep netease 杀进程: adb shell kill [PID] //杀死进程 C:\Users\chenquan>adb shell ...
- unity命令行参数
Typically, Unity will be launched by double-clicking its icon from the desktop but it is also possib ...
- 慎用--skip-grant-tables命令
该命令作用是跳过授权表,也就是说谁都能进入mysql看到所有数据表,输入任意字符账号密码都可以 当忘记账号密码时可以使用改命令修改密码,但是要随用随关,重启mysql,不然服务器上会有很大的风险. 介 ...
- js中float失精
https://juejin.im/post/5aa1395c6fb9a028df223516 把小数转为整数,然后计算 https://www.html.cn/archives/7340
- 阻塞队列BlockingQueue之ASynchronousQueue
一.SynchronousQueue简介 Java 6的并发编程包中的SynchronousQueue是一个没有数据缓冲的BlockingQueue,生产者线程对其的插入操作put必须等待消费者的移除 ...
- Atcoder Beginner Contest 155D(二分,尺取法,细节模拟)
二分,尺取法,细节模拟,尤其是要注意a[i]被计算到和a[i]成对的a[j]里时 #define HAVE_STRUCT_TIMESPEC #include<bits/stdc++.h> ...
- sourcetree(mac)设置代理
Mercurial: edit ~/.hgrcas shown here: http://www.selenic.com/mercurial/hgrc.5.html#http-proxy Git: e ...
- lminus
lminus是Synopsy自带的tcl list 操作command. 顾名思义,可以将两个list相减,即过滤掉两个list中相同的element,生成一个新的list,其实是用lsearch与l ...
- Vue-移动端开发全家桶
内容:node.js,vue-cli,vuex,axios,postcss-pxtorem,lib-flexible,vant ,babel-plugin-import 1.安装脚手架工具: npm ...