原文地址：https://www.jianshu.com/p/6bf801bdc644

特征变换

问题描述

程序实现

# coding: utf-8

import numpy as np

from cvxopt import matrix, solvers

from sklearn import svm

def gen_data():

    X = [[1, 0], [0, 1], [0, -1], [-1, 0], [0, 2], [0, -2], [-2, 0]]

    X = np.array(X)

    y = [-1, -1, -1, 1, 1, 1, 1]

    y = np.array(y)

    assert X.shape[0] == y.shape[0] and X.shape[1] == 2, "wrong data shape!"

    return X, y

def explict_transform(X):

    assert X.shape[1] == 2, "wrong shape of X!"

    num = X.shape[0]

    X1 = X[:, 0]

    X2 = X[:, 1]

    new_X1 = X2 ** 2 - 2 * X1 + 3

    new_X2 = X1 ** 2 - 2 * X2 - 3

    new_X = np.concatenate((new_X1.reshape((num, 1)), new_X2.reshape(num, 1)), axis=1)

    return new_X

def svm_hard_linear(X, y):

    num, dim = X.shape

    P = matrix(np.concatenate((np.zeros((1, 1 + dim)),

                               np.concatenate((np.zeros((dim, 1)), np.eye(dim)), axis=1)), axis=0), tc='d')

    q = matrix(np.zeros((1 + dim, 1)), tc='d')

    G = matrix(-y * np.concatenate((np.ones((num, 1), dtype=np.float), X), axis=1), tc='d')

    h = matrix(-np.ones((num, 1)), tc='d')

    sol = solvers.qp(P, q, G, h)

    return sol['x']

def implicit_transform(X):

    assert X.shape[1] == 2, "wrong shape of X!"

    num=X.shape[0]

    X1 = X[:, 0]

    X2 = X[:, 1]

    new_X1=np.ones((num,1))

    new_X2=2**(0.5)*X1

    new_X3=2**(0.5)*X2

    new_X4=X1**2

    new_X5=X2**2

    new_X6=2**(0.5)*X1*X2

    new_X = np.concatenate((new_X1.reshape((num, 1)), new_X2.reshape(num, 1),new_X3.reshape(num, 1),

                            new_X4.reshape(num, 1),new_X5.reshape(num, 1),new_X6.reshape(num, 1)), axis=1)

    return new_X

if __name__ == "__main__":

    np.set_printoptions(precision=6,suppress=True)

    X, y = gen_data()

    # explicit

    # 2

    exp_X= explict_transform(X)

    u = np.array(svm_hard_linear(exp_X, y.reshape(y.shape[0],1)))

    b = u[0, :]

    w = u[1:, :]

    print("b:\n", b)

    print("w:\n", w)

    # implicit

    clf=svm.SVC(C=1000000,kernel='poly',degree=2,gamma=1,coef0=1)

    clf.fit(X,y)

    # 3

    alpha_y=clf.dual_coef_

    alpha_y=alpha_y.reshape((alpha_y.shape[1],))

    sv_ID=clf.support_

    sv_y=[]

    for i in range(sv_ID.shape[0]):

        sv_y.append(y[sv_ID[i]])

    alpha=[alpha_y[i]/sv_y[i] for i in range(sv_ID.shape[0])]

    print("alpha*y:\n",alpha_y)

    print("alpha:\n",alpha)

    sv_X=clf.support_vectors_

    print("support vectors:\n",sv_X)

    # 4

    b=clf.intercept_

    print("b:\n",b)

    w=np.dot(alpha_y,implicit_transform(sv_X)).reshape((6,1))

    print("w:\n",w)

运行结果

Soft-Margin SVM

问题描述

程序实现

# coding: utf-8

import numpy as np

from sklearn import svm

import matplotlib.pyplot as plt

def read_data(dataFile):

    with open(dataFile,'r') as f:

        lines=f.readlines()

        data_list=[]

        for line in lines:

            line=line.strip().split()

            data_list.append([float(l) for l in line])

        dataArray=np.array(data_list)

        num_data=dataArray.shape[0]

        num_dim=dataArray.shape[1]-1

        dataX=dataArray[:,1:].reshape((num_data,num_dim))

        dataY=dataArray[:,0].reshape((num_data,))

        return dataX,dataY

data_X,data_Y=read_data("features.train")

test_X,test_Y=read_data("features.test")

def convert_label(dataY,chosen_class):

    num=dataY.shape[0]

    new_Y=-np.ones_like(dataY)

    for i in range(num):

        if dataY[i]==chosen_class:

            new_Y[i]=1

    return new_Y

def zero_one_cost(pred,Y):

    assert pred.shape==Y.shape,"wrong shape of pred and Y!"

    return np.sum(np.not_equal(pred,Y))/Y.shape[0]

def question15():

    c_list=[-6,-4,-2,0,2]

    w_list=[]

    new_Y=convert_label(data_Y,0)

    for i in c_list:

        clf=svm.LinearSVC(loss="hinge",C=10**i)

        clf.fit(data_X,new_Y)

        w_list.append(np.sqrt(np.sum(clf.coef_**2)))

    plt.figure(figsize=(10,6))

    plt.plot(c_list,w_list,'b')

    plt.plot(c_list,w_list,'ro')

    for (c,w) in zip(c_list,w_list):

        plt.text(c+0.1,w,str(round(w,4)))

    plt.xlabel("log10(C)")

    plt.ylabel("||w||")

    plt.xlim(-8,4)

    plt.title("||w|| versus log10(C)")

    plt.savefig("15.png")

def question16and17():

    # 16

    c_list = [-6, -4, -2, 0, 2]

    Ein_list=[]

    alpha_sum_list=[]

    new_Y=convert_label(data_Y,8)

    for i in c_list:

        clf=svm.SVC(C=10**i,kernel='poly',degree=2,gamma=1,coef0=1)

        clf.fit(data_X,new_Y)

        pred=clf.predict(data_X)

        Ein_list.append(zero_one_cost(pred,new_Y))

        alpha_sum_list.append(np.sum(np.abs(clf.dual_coef_)))

        # print(np.sum(clf.dual_coef_))

        # print(clf.n_support_)

    plt.figure(figsize=(10,6))

    plt.plot(c_list,Ein_list,'b')

    plt.plot(c_list,Ein_list,'ro')

    for (c,e) in zip(c_list,Ein_list):

        plt.text(c+0.1,e,str(round(e,4)))

    plt.xlabel("log10(C)")

    plt.ylabel("Ein")

    plt.xlim(-8, 4)

    plt.title("Ein versus log10(C)")

    plt.savefig("16.png")

    # 17

    plt.figure(figsize=(10,6))

    plt.plot(c_list,alpha_sum_list,'b')

    plt.plot(c_list,alpha_sum_list,'ro')

    for (c,a) in zip(c_list,alpha_sum_list):

        plt.text(c+0.1,a,str(round(a,6)))

    plt.xlabel("log10(C)")

    plt.ylabel("sum of alpha")

    plt.xlim(-8, 4)

    plt.title("sum of alpha versus log10(C)")

    plt.savefig("17.png")

def question18():

    c_list=[-3,-2,-1,0,1]

    dis_list=[]

    new_Y=convert_label(data_Y,0)

    for i in c_list:

        clf=svm.SVC(C=10**i,kernel='rbf',gamma=100)

        clf.fit(data_X,new_Y)

        sv_ID=clf.support_

        dis_list.append(new_Y[sv_ID[0]]*clf.decision_function(data_X)[sv_ID[0]])

    plt.figure(figsize=(10,6))

    plt.plot(c_list,dis_list,'b')

    plt.plot(c_list,dis_list,'ro')

    for (c,w) in zip(c_list,dis_list):

        plt.text(c+0.1,w,str(round(w,4)))

    plt.xlabel("log10(C)")

    plt.ylabel("free sv's function distance to hyperplane")

    plt.xlim(-5, 3)

    plt.ylim(ymax=1.01)

    plt.title("free sv's function distance to hyperplane versus log10(C)")

    plt.savefig("18.png")

def question19():

    new_Y=convert_label(data_Y,0)

    new_test_Y=convert_label(test_Y,0)

    gamma_list=[0,1,2,3,4]

    Eout_list=[]

    for i in gamma_list:

        clf=svm.SVC(C=0.1,kernel='rbf',gamma=10**i)

        clf.fit(data_X,new_Y)

        pred=clf.predict(test_X)

        Eout_list.append(zero_one_cost(pred,new_test_Y))

    plt.figure(figsize=(10,6))

    plt.plot(gamma_list,Eout_list,'b')

    plt.plot(gamma_list,Eout_list,'ro')

    for (c,w) in zip(gamma_list,Eout_list):

        plt.text(c+0.1,w,str(round(w,4)))

    plt.xlabel("log10(gamma)")

    plt.ylabel("Eout")

    plt.xlim(-1, 5)

    plt.ylim(ymax=0.19)

    plt.title("Eout versus log10(C)")

    plt.savefig("19.png")

def question20():

    new_Y=convert_label(data_Y,0)

    gamma_list=[0,1,2,3,4]

    chosen_gamma=[]

    for t in range(100):

        np.random.seed(t)

        chosenID=np.random.randint(0,data_X.shape[0],1000)

        train_X=[]

        train_Y=[]

        val_X=[]

        val_Y=[]

        for i in range(data_X.shape[0]):

            if(i not in chosenID):

                train_X.append(data_X[i,:])

                train_Y.append(new_Y[i])

            else:

                val_X.append(data_X[i,:])

                val_Y.append(new_Y[i])

        train_X=np.array(train_X)

        train_Y=np.array(train_Y)

        val_X=np.array(val_X)

        val_Y=np.array(val_Y)

        Eval_list=[]

        for g in gamma_list:

            clf=svm.SVC(C=0.1,kernel='rbf',gamma=10**g)

            clf.fit(train_X,train_Y)

            pred=clf.predict(val_X)

            Eval_list.append(zero_one_cost(pred,val_Y))

        chosen_gamma.append(gamma_list[Eval_list.index(min(Eval_list))])

    times=[]

    for i in gamma_list:

        times.append(chosen_gamma.count(i))

    plt.figure(figsize=(10,6))

    plt.bar(left=(gamma_list),height=(times),width=1,align="center",yerr=0.000001)

    for (c,w) in zip(gamma_list,times):

        plt.text(c,w*1.03,str(round(w,4)))

    plt.xlabel("log10(gamma)")

    plt.ylabel("the number of chosen times")

    plt.xlim(-1, 5)

    plt.ylim(0,80)

    plt.title("the number of chosen times for gamma")

    plt.savefig("20.png")

if __name__=="__main__":

    question15()

    question16and17()

    question18()

    question19()

    question20()

运行结果

机器学习技法笔记：Homework #5 特征变换&Soft-Margin SVM相关习题的更多相关文章

机器学习技法笔记(2)-Linear SVM
从这一节开始学习机器学习技法课程中的SVM, 这一节主要介绍标准形式的SVM: Linear SVM 引入SVM 首先回顾Percentron Learning Algrithm(感知器算法PLA)是 ...
机器学习：SVM（目标函数推导：Hard Margin SVM、Soft Margin SVM）
一.Hard Margin SVM SVM 的思想,最终用数学表达出来,就是在优化一个有条件的目标函数: 此为 Hard Margin SVM,一切的前提都是样本类型线性可分: 1)思想 SVM 算法 ...
Jordan Lecture Note-7: Soft Margin SVM
Soft Margin SVM (1)Recall 之前分析到SVM的模型为: \begin{align}\mathop{\min}&\quad \frac{1}{2}w^\prime w\ ...
SVM3 Soft Margin SVM
之前分为两部分讨论过SVM.第一部分讨论了线性SVM,并且针对线性不可分的数据,把原始的问题转化为对偶的SVM求解.http://www.cnblogs.com/futurehau/p/6143178 ...
机器学习技法笔记：Homework #8 kNN&RBF&k-Means相关习题
原文地址:https://www.jianshu.com/p/1db700f866ee 问题描述程序实现 # kNN_RBFN.py # coding:utf-8 import numpy as n ...
机器学习技法笔记：Homework #7 Decision Tree&Random Forest相关习题
原文地址:https://www.jianshu.com/p/7ff6fd6fc99f 问题描述程序实现 13-15 # coding:utf-8 # decision_tree.py import ...
机器学习技法笔记：Homework #6 AdaBoost&Kernel Ridge Regression相关习题
原文地址:http://www.jianshu.com/p/9bf9e2add795 AdaBoost 问题描述程序实现 # coding:utf-8 import math import nump ...
spark机器学习从0到1特征变换-标签和索引的转化（十六）
一.原理在机器学习处理过程中,为了方便相关算法的实现,经常需要把标签数据(一般是字符串)转化成整数索引,或是在计算结束后将整数索引还原为相应的标签. Spark ML 包中提供了几个相关的转换器 ...
机器学习技法笔记：05 Kernel Logistic Regression
Roadmap Soft-Margin SVM as Regularized Model SVM versus Logistic Regression SVM for Soft Binary Clas ...

随机推荐

NGINX-二级域名
先给二级域名添加到 DNS 解析再配置 nginx server { #侦听80端口 listen 80; #定义使用 www.nginx.cn访问 server_name ~^(?<subdo ...
Windows环境下Oracle数据库的自动备份脚本自动删除30天前的备份
@echo off echo ================================================ echo Windows环境下Oracle数据库的自动备份脚本 echo ...
nginx 配置反向代理和静态资源
https://unit.nginx.org/integration/ 与NGINX集成在NGINX后面安装单元将NGINX配置为静态Web服务器,并在Unit前面配置反向代理. NGINX直接从 ...
Eureka 系列（03）Spring Cloud 自动装配原理
Eureka 系列(03)Spring Cloud 自动装配原理 [TOC] 0. Spring Cloud 系列目录 - Eureka 篇本文主要是分析 Spring Cloud 是如何整合 Eu ...
Vertical-Align，你应该知道的一切
我们聊聊vertical-align.这个属性主要目的用于将相邻的文本与元素对齐.而实际上,verticle-algin可以在不同上下文中灵活地对齐元素,以及进行细粒度的控制,不必知道元素的大小.元素 ...
python- 粘包 struct,socketserver
黏包黏包现象让我们基于tcp先制作一个远程执行命令的程序(命令ls -l ; lllllll ; pwd) res=subprocess.Popen(cmd.decode('utf-8'), sh ...
Python可以用于客户端Web开发吗？
N00b在Python,但我有大量的PHP经验,并希望扩展我的技能. 我知道Python在服务器端执行方面很出色,只是想知道客户端. 解决方案你看过skulpt吗? http://www.skulp ...
redis 入门之列表
lpush 将一个或多个值 value 插入到列表 key 的表头如果有多个 value 值,那么各个 value 值按从左到右的顺序依次插入到表头: 比如说,对空列表 mylist 执行命令 LPU ...
[轉]udp_sendmsg空指针漏洞分析 by wzt
udp_sendmsg空指针漏洞分析 by wzt 漏洞描述: 由于Linux ipv4协议栈中udp_sendmsg()函数设计上存在缺陷, 导致struct rtable *rt以空指针形式 ...
Java技术专区-虚拟机系列-虚拟机参数（常用）
基础参数系类(内存分配) -server:一定要作为第一个参数,在多个CPU时性能佳 -Xmn:young generation的heap大小,一般设置为Xmx的3.4分之一-Xms:初始Heap大小 ...

机器学习技法笔记：Homework #5 特征变换&Soft-Margin SVM相关习题

特征变换

问题描述

程序实现

运行结果

Soft-Margin SVM

问题描述

程序实现

运行结果

机器学习技法笔记：Homework #5 特征变换&Soft-Margin SVM相关习题的更多相关文章

随机推荐

热门专题