原文地址：https://www.jianshu.com/p/3f7d4aa6a7cf

问题描述

程序实现

# coding: utf-8

import numpy as np

import math

import matplotlib.pyplot as plt

def sign(x):

    if(x>=0):

        return 1

    else:

        return -1

def read_data(dataFile):

    with open(dataFile,'r') as f:

        lines=f.readlines()

        data_list=[]

        for line in lines:

            line=line.strip().split()

            data_list.append([1.0] + [float(l) for l in line])

        dataArray=np.array(data_list)

        num_data=dataArray.shape[0]

        num_dim=dataArray.shape[1]-1

        dataX=dataArray[:,:-1].reshape((num_data,num_dim))

        dataY=dataArray[:,-1].reshape((num_data,1))

        return dataX,dataY

def w_reg(dataX,dataY,namuta):

    num_dim=dataX.shape[1]

    dataX_T=np.transpose(dataX)

    tmp=np.dot(np.linalg.inv(np.dot(dataX_T,dataX)+namuta*np.eye(num_dim)),dataX_T)

    return np.dot(tmp,dataY)

def pred(wREG,dataX):

    pred=np.dot(dataX,wREG)

    num_data=dataX.shape[0]

    for i in range(num_data):

        pred[i][0]=sign(pred[i][0])

    return pred

def zero_one_cost(pred,dataY):

    return np.sum(pred!=dataY)/dataY.shape[0]

if __name__=="__main__":

    # train

    dataX,dataY=read_data("hw4_train.dat")

    print("\n13")

    wREG=w_reg(dataX,dataY,namuta=10)

    Ein=zero_one_cost(pred(wREG,dataX),dataY)

    print("the Ein on the train set: ",Ein)

    # test

    testX,testY=read_data("hw4_test.dat")

    Eout=zero_one_cost(pred(wREG,testX),testY)

    print("the Eout on the test set: ",Eout)

    l=[2,1,0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10]

    print("\n14")

    Ein_list=[]

    Eout_list=[]

    for i in l:

        namuta=math.pow(10,i)

        wREG=w_reg(dataX,dataY,namuta)

        Ein_list.append(zero_one_cost(pred(wREG,dataX),dataY))

        Eout_list.append(zero_one_cost(pred(wREG,testX),testY))

    id_in=Ein_list.index(min(Ein_list))

    plt.figure()

    plt.plot(np.power(np.full(shape=(len(l),),fill_value=10,dtype=np.int32),l),Ein_list)

    plt.xlabel("namuta")

    plt.xlim((math.pow(10,l[0]),math.pow(10,l[-1])))

    plt.ylabel("Ein")

    plt.savefig("14.png")

    print("the namuta with the minimun Ein: ",math.pow(10,l[id_in]))

    print("the Eout on such namuta: ", Eout_list[id_in])

    print("\n15")

    id_out = Eout_list.index(min(Eout_list))

    plt.figure()

    plt.plot(np.power(np.full(shape=(len(l),),fill_value=10,dtype=np.int32),l),Eout_list)

    plt.xlabel("namuta")

    plt.xlim((math.pow(10,l[0]),math.pow(10,l[-1])))

    plt.ylabel("Eout")

    plt.savefig("15.png")

    print("the namuta with the minimun Eout: ", math.pow(10, l[id_out]))

    trainX=dataX[:120]

    trainY=dataY[:120]

    validX=dataX[120:]

    validY=dataY[120:]

    # validation

    print("\n16")

    Ein_list.clear()

    Eout_list.clear()

    Eval_list=[]

    for i in l:

        namuta=math.pow(10,i)

        wREG=w_reg(trainX,trainY,namuta)

        Ein_list.append(zero_one_cost(pred(wREG,trainX),trainY))

        Eout_list.append(zero_one_cost(pred(wREG,testX),testY))

        Eval_list.append(zero_one_cost(pred(wREG,validX),validY))

    id_in=Ein_list.index(min(Ein_list))

    plt.figure()

    plt.plot(np.power(np.full(shape=(len(l),),fill_value=10,dtype=np.int32),l),Ein_list)

    plt.xlabel("namuta")

    plt.xlim((math.pow(10,l[0]),math.pow(10,l[-1])))

    plt.ylabel("Ein")

    plt.savefig("16.png")

    print("the namuta with the minimun Ein: ",math.pow(10,l[id_in]))

    print("the Eout on such namuta: ", Eout_list[id_in])

    print("\n17")

    id_val=Eval_list.index(min(Eval_list))

    plt.figure()

    plt.plot(np.power(np.full(shape=(len(l),),fill_value=10,dtype=np.int32),l),Eval_list)

    plt.xlabel("namuta")

    plt.xlim((math.pow(10,l[0]),math.pow(10,l[-1])))

    plt.ylabel("Eval")

    plt.savefig("17.png")

    print("the namuta with the minimun Eval: ",math.pow(10,l[id_val]))

    print("the Eout on such namuta: ", Eout_list[id_val])

    print("\n18")

    wREG=w_reg(dataX,dataY,namuta=math.pow(10,l[id_val]))

    Ein=zero_one_cost(pred(wREG,dataX),dataY)

    Eout = zero_one_cost(pred(wREG, testX), testY)

    print("Ein: ",Ein)

    print("Eout: ",Eout)

    # 5-fold cross validation

    print("\n19")

    Eval_list.clear()

    splX=np.split(dataX,5,axis=0)

    splY=np.split(dataY,5,axis=0)

    for j in l:

        Eval = 0

        namuta=math.pow(10,j)

        for i in range(5):

            li=[a for a in range(5)]

            li.pop(i)

            trainX=np.concatenate([splX[k] for k in li],axis=0)

            trainY=np.concatenate([splY[k] for k in li],axis=0)

            wREG=w_reg(trainX,trainY,namuta)

            Eval+=zero_one_cost(pred(wREG,splX[i]),splY[i])/5

        Eval_list.append(Eval)

    id_val=Eval_list.index(min(Eval_list))

    plt.figure()

    plt.plot(np.power(np.full(shape=(len(l),),fill_value=10,dtype=np.int32),l),Eval_list)

    plt.xlabel("namuta")

    plt.xlim((math.pow(10,l[0]),math.pow(10,l[-1])))

    plt.ylabel("Ecv")

    plt.savefig("19.png")

    print("the namuta with the minimun Ecv: ",math.pow(10,l[id_val]))

    print("\n20")

    wREG=w_reg(dataX,dataY,namuta=math.pow(10,l[id_val]))

    Ein=zero_one_cost(pred(wREG,dataX),dataY)

    Eout = zero_one_cost(pred(wREG, testX), testY)

    print("Ein: ",Ein)

    print("Eout: ",Eout)

运行结果

13

14

15

16

17

18

19

20 机器学习基石笔记：Homework #4 Regularization&Validation相关习题的更多相关文章

机器学习基石笔记：14 Regularization
一.正则化的假设集合通过从高次多项式的H退回到低次多项式的H来降低模型复杂度, 以降低过拟合的可能性, 如何退回? 通过加约束条件: 如果加了严格的约束条件, 没有必要从H10退回到H2, 直接使用 ...
机器学习基石笔记：Homework #1 PLA&PA相关习题
原文地址:http://www.jianshu.com/p/5b4a64874650 问题描述程序实现 # coding: utf-8 import numpy as np import matpl ...
机器学习基石笔记：Homework #2 decision stump相关习题
原文地址:http://www.jianshu.com/p/4bc01760ac20 问题描述程序实现 17-18 # coding: utf-8 import numpy as np import ...
机器学习基石笔记：Homework #3 LinReg&LogReg相关习题
原文地址:http://www.jianshu.com/p/311141f2047d 问题描述程序实现 13-15 # coding: utf-8 import numpy as np import ...
机器学习基石笔记：15 Validation
一.模型选择问题如何选择? 视觉上 NO 不是所有资料都能可视化;人脑模型复杂度也得算上. 通过Ein NO 容易过拟合;泛化能力差. 通过Etest NO 能保证好的泛化,不过往往没法提前获得测试 ...
机器学习基石：Homework #0 SVD相关&常用矩阵求导公式
机器学习基石笔记：13 Hazard of Overfitting
泛化能力差和过拟合: 引起过拟合的原因: 1)过度VC维(模型复杂度高)------确定性噪声: 2)随机噪声: 3)有限的样本数量N. 具体实验来看模型复杂度Qf/确定性噪声.随机噪声sigma2. ...
【原】Coursera—Andrew Ng机器学习—课程笔记 Lecture 7 Regularization 正则化
Lecture7 Regularization 正则化 7.1 过拟合问题 The Problem of Overfitting7.2 代价函数 Cost Function7.3 正则化线性回归 R ...
林轩田机器学习基石笔记1—The Learning Problem
机器学习分为四步: When Can Machine Learn? Why Can Machine Learn? How Can Machine Learn? How Can Machine Lear ...

随机推荐

Java Software Engineer Skill Map
# Java Software Engineer Skill Map## Basic### Core Java- Java The Complete Reference Ninth Edition.p ...
python singleton 4种单例
def singleton(cls, *args, **kwargs): instances = {} def inner(cls, *args, **kwargs): if cls not in i ...
share memory cache across multi web application
Single instance of a MemoryCache across multiple application pools on the same server [duplicate] Yo ...
windows系统查看端口占用
netstat -ano #列出所用端口使用情况 netstat -aon|findstr "端口号" #查询指定端口 tasklist|findstr "PID&qu ...
shell 从函数文件中调用函数的方法
你可以把所有的函数存储在一个函数文件中你可以把所有的文件函数加载到当前脚本或命令行加载函数文件中所有函数的方法: source xxx.sh
leetcode python翻转字符串里的单词
# Leetcode 151 翻转字符串里的单词### 题目描述给定一个字符串,逐个翻转字符串中的每个单词. **示例1:** 输入: "the sky is blue" 输出: ...
洛谷 P2024 [NOI2001]食物链——带权值的并查集维护
先上一波题目 https://www.luogu.org/problem/P2024 通过这道题复习了一波并查集,学习了一波带权值操作首先我们观察到所有的环都是以A->B->C-> ...
Sql Server 表结构相关
1.库表列信息 --取所有库 SELECT Name FROM Master..SysDatabases ORDER BY Name --查询所有表 select name from 库名..syso ...
Python 如何debug
一.常见错误: 1.漏了末尾的冒号,如 if语句,循环语句,定义函数 2.缩进错误,该缩进的时候没有缩进 3.把英文符号写成中文符号,如: ' ' () , 4.字符串拼接,把字符串和数字拼接一起 ...
stdio - 标准输入输出库函数
SYNOPSIS 总览 #include <stdio.h> FILE *stdin; FILE *stdout; FILE *stderr; DESCRIPTION 描述标注 I/O ...

机器学习基石笔记：Homework #4 Regularization&Validation相关习题

问题描述

程序实现

运行结果

13

14

15

16

17

18

19

20

机器学习基石笔记：Homework #4 Regularization&Validation相关习题的更多相关文章

随机推荐

热门专题