机器学习技法笔记:Homework #5 特征变换&Soft-Margin SVM相关习题
原文地址:https://www.jianshu.com/p/6bf801bdc644
特征变换
问题描述


程序实现
# coding: utf-8
import numpy as np
from cvxopt import matrix, solvers
from sklearn import svm
def gen_data():
X = [[1, 0], [0, 1], [0, -1], [-1, 0], [0, 2], [0, -2], [-2, 0]]
X = np.array(X)
y = [-1, -1, -1, 1, 1, 1, 1]
y = np.array(y)
assert X.shape[0] == y.shape[0] and X.shape[1] == 2, "wrong data shape!"
return X, y
def explict_transform(X):
assert X.shape[1] == 2, "wrong shape of X!"
num = X.shape[0]
X1 = X[:, 0]
X2 = X[:, 1]
new_X1 = X2 ** 2 - 2 * X1 + 3
new_X2 = X1 ** 2 - 2 * X2 - 3
new_X = np.concatenate((new_X1.reshape((num, 1)), new_X2.reshape(num, 1)), axis=1)
return new_X
def svm_hard_linear(X, y):
num, dim = X.shape
P = matrix(np.concatenate((np.zeros((1, 1 + dim)),
np.concatenate((np.zeros((dim, 1)), np.eye(dim)), axis=1)), axis=0), tc='d')
q = matrix(np.zeros((1 + dim, 1)), tc='d')
G = matrix(-y * np.concatenate((np.ones((num, 1), dtype=np.float), X), axis=1), tc='d')
h = matrix(-np.ones((num, 1)), tc='d')
sol = solvers.qp(P, q, G, h)
return sol['x']
def implicit_transform(X):
assert X.shape[1] == 2, "wrong shape of X!"
num=X.shape[0]
X1 = X[:, 0]
X2 = X[:, 1]
new_X1=np.ones((num,1))
new_X2=2**(0.5)*X1
new_X3=2**(0.5)*X2
new_X4=X1**2
new_X5=X2**2
new_X6=2**(0.5)*X1*X2
new_X = np.concatenate((new_X1.reshape((num, 1)), new_X2.reshape(num, 1),new_X3.reshape(num, 1),
new_X4.reshape(num, 1),new_X5.reshape(num, 1),new_X6.reshape(num, 1)), axis=1)
return new_X
if __name__ == "__main__":
np.set_printoptions(precision=6,suppress=True)
X, y = gen_data()
# explicit
# 2
exp_X= explict_transform(X)
u = np.array(svm_hard_linear(exp_X, y.reshape(y.shape[0],1)))
b = u[0, :]
w = u[1:, :]
print("b:\n", b)
print("w:\n", w)
# implicit
clf=svm.SVC(C=1000000,kernel='poly',degree=2,gamma=1,coef0=1)
clf.fit(X,y)
# 3
alpha_y=clf.dual_coef_
alpha_y=alpha_y.reshape((alpha_y.shape[1],))
sv_ID=clf.support_
sv_y=[]
for i in range(sv_ID.shape[0]):
sv_y.append(y[sv_ID[i]])
alpha=[alpha_y[i]/sv_y[i] for i in range(sv_ID.shape[0])]
print("alpha*y:\n",alpha_y)
print("alpha:\n",alpha)
sv_X=clf.support_vectors_
print("support vectors:\n",sv_X)
# 4
b=clf.intercept_
print("b:\n",b)
w=np.dot(alpha_y,implicit_transform(sv_X)).reshape((6,1))
print("w:\n",w)
运行结果

Soft-Margin SVM
问题描述



程序实现
# coding: utf-8
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt
def read_data(dataFile):
with open(dataFile,'r') as f:
lines=f.readlines()
data_list=[]
for line in lines:
line=line.strip().split()
data_list.append([float(l) for l in line])
dataArray=np.array(data_list)
num_data=dataArray.shape[0]
num_dim=dataArray.shape[1]-1
dataX=dataArray[:,1:].reshape((num_data,num_dim))
dataY=dataArray[:,0].reshape((num_data,))
return dataX,dataY
data_X,data_Y=read_data("features.train")
test_X,test_Y=read_data("features.test")
def convert_label(dataY,chosen_class):
num=dataY.shape[0]
new_Y=-np.ones_like(dataY)
for i in range(num):
if dataY[i]==chosen_class:
new_Y[i]=1
return new_Y
def zero_one_cost(pred,Y):
assert pred.shape==Y.shape,"wrong shape of pred and Y!"
return np.sum(np.not_equal(pred,Y))/Y.shape[0]
def question15():
c_list=[-6,-4,-2,0,2]
w_list=[]
new_Y=convert_label(data_Y,0)
for i in c_list:
clf=svm.LinearSVC(loss="hinge",C=10**i)
clf.fit(data_X,new_Y)
w_list.append(np.sqrt(np.sum(clf.coef_**2)))
plt.figure(figsize=(10,6))
plt.plot(c_list,w_list,'b')
plt.plot(c_list,w_list,'ro')
for (c,w) in zip(c_list,w_list):
plt.text(c+0.1,w,str(round(w,4)))
plt.xlabel("log10(C)")
plt.ylabel("||w||")
plt.xlim(-8,4)
plt.title("||w|| versus log10(C)")
plt.savefig("15.png")
def question16and17():
# 16
c_list = [-6, -4, -2, 0, 2]
Ein_list=[]
alpha_sum_list=[]
new_Y=convert_label(data_Y,8)
for i in c_list:
clf=svm.SVC(C=10**i,kernel='poly',degree=2,gamma=1,coef0=1)
clf.fit(data_X,new_Y)
pred=clf.predict(data_X)
Ein_list.append(zero_one_cost(pred,new_Y))
alpha_sum_list.append(np.sum(np.abs(clf.dual_coef_)))
# print(np.sum(clf.dual_coef_))
# print(clf.n_support_)
plt.figure(figsize=(10,6))
plt.plot(c_list,Ein_list,'b')
plt.plot(c_list,Ein_list,'ro')
for (c,e) in zip(c_list,Ein_list):
plt.text(c+0.1,e,str(round(e,4)))
plt.xlabel("log10(C)")
plt.ylabel("Ein")
plt.xlim(-8, 4)
plt.title("Ein versus log10(C)")
plt.savefig("16.png")
# 17
plt.figure(figsize=(10,6))
plt.plot(c_list,alpha_sum_list,'b')
plt.plot(c_list,alpha_sum_list,'ro')
for (c,a) in zip(c_list,alpha_sum_list):
plt.text(c+0.1,a,str(round(a,6)))
plt.xlabel("log10(C)")
plt.ylabel("sum of alpha")
plt.xlim(-8, 4)
plt.title("sum of alpha versus log10(C)")
plt.savefig("17.png")
def question18():
c_list=[-3,-2,-1,0,1]
dis_list=[]
new_Y=convert_label(data_Y,0)
for i in c_list:
clf=svm.SVC(C=10**i,kernel='rbf',gamma=100)
clf.fit(data_X,new_Y)
sv_ID=clf.support_
dis_list.append(new_Y[sv_ID[0]]*clf.decision_function(data_X)[sv_ID[0]])
plt.figure(figsize=(10,6))
plt.plot(c_list,dis_list,'b')
plt.plot(c_list,dis_list,'ro')
for (c,w) in zip(c_list,dis_list):
plt.text(c+0.1,w,str(round(w,4)))
plt.xlabel("log10(C)")
plt.ylabel("free sv's function distance to hyperplane")
plt.xlim(-5, 3)
plt.ylim(ymax=1.01)
plt.title("free sv's function distance to hyperplane versus log10(C)")
plt.savefig("18.png")
def question19():
new_Y=convert_label(data_Y,0)
new_test_Y=convert_label(test_Y,0)
gamma_list=[0,1,2,3,4]
Eout_list=[]
for i in gamma_list:
clf=svm.SVC(C=0.1,kernel='rbf',gamma=10**i)
clf.fit(data_X,new_Y)
pred=clf.predict(test_X)
Eout_list.append(zero_one_cost(pred,new_test_Y))
plt.figure(figsize=(10,6))
plt.plot(gamma_list,Eout_list,'b')
plt.plot(gamma_list,Eout_list,'ro')
for (c,w) in zip(gamma_list,Eout_list):
plt.text(c+0.1,w,str(round(w,4)))
plt.xlabel("log10(gamma)")
plt.ylabel("Eout")
plt.xlim(-1, 5)
plt.ylim(ymax=0.19)
plt.title("Eout versus log10(C)")
plt.savefig("19.png")
def question20():
new_Y=convert_label(data_Y,0)
gamma_list=[0,1,2,3,4]
chosen_gamma=[]
for t in range(100):
np.random.seed(t)
chosenID=np.random.randint(0,data_X.shape[0],1000)
train_X=[]
train_Y=[]
val_X=[]
val_Y=[]
for i in range(data_X.shape[0]):
if(i not in chosenID):
train_X.append(data_X[i,:])
train_Y.append(new_Y[i])
else:
val_X.append(data_X[i,:])
val_Y.append(new_Y[i])
train_X=np.array(train_X)
train_Y=np.array(train_Y)
val_X=np.array(val_X)
val_Y=np.array(val_Y)
Eval_list=[]
for g in gamma_list:
clf=svm.SVC(C=0.1,kernel='rbf',gamma=10**g)
clf.fit(train_X,train_Y)
pred=clf.predict(val_X)
Eval_list.append(zero_one_cost(pred,val_Y))
chosen_gamma.append(gamma_list[Eval_list.index(min(Eval_list))])
times=[]
for i in gamma_list:
times.append(chosen_gamma.count(i))
plt.figure(figsize=(10,6))
plt.bar(left=(gamma_list),height=(times),width=1,align="center",yerr=0.000001)
for (c,w) in zip(gamma_list,times):
plt.text(c,w*1.03,str(round(w,4)))
plt.xlabel("log10(gamma)")
plt.ylabel("the number of chosen times")
plt.xlim(-1, 5)
plt.ylim(0,80)
plt.title("the number of chosen times for gamma")
plt.savefig("20.png")
if __name__=="__main__":
question15()
question16and17()
question18()
question19()
question20()
运行结果






机器学习技法笔记:Homework #5 特征变换&Soft-Margin SVM相关习题的更多相关文章
- 机器学习技法笔记(2)-Linear SVM
从这一节开始学习机器学习技法课程中的SVM, 这一节主要介绍标准形式的SVM: Linear SVM 引入SVM 首先回顾Percentron Learning Algrithm(感知器算法PLA)是 ...
- 机器学习:SVM(目标函数推导:Hard Margin SVM、Soft Margin SVM)
一.Hard Margin SVM SVM 的思想,最终用数学表达出来,就是在优化一个有条件的目标函数: 此为 Hard Margin SVM,一切的前提都是样本类型线性可分: 1)思想 SVM 算法 ...
- Jordan Lecture Note-7: Soft Margin SVM
Soft Margin SVM (1)Recall 之前分析到SVM的模型为: \begin{align}\mathop{\min}&\quad \frac{1}{2}w^\prime w\ ...
- SVM3 Soft Margin SVM
之前分为两部分讨论过SVM.第一部分讨论了线性SVM,并且针对线性不可分的数据,把原始的问题转化为对偶的SVM求解.http://www.cnblogs.com/futurehau/p/6143178 ...
- 机器学习技法笔记:Homework #8 kNN&RBF&k-Means相关习题
原文地址:https://www.jianshu.com/p/1db700f866ee 问题描述 程序实现 # kNN_RBFN.py # coding:utf-8 import numpy as n ...
- 机器学习技法笔记:Homework #7 Decision Tree&Random Forest相关习题
原文地址:https://www.jianshu.com/p/7ff6fd6fc99f 问题描述 程序实现 13-15 # coding:utf-8 # decision_tree.py import ...
- 机器学习技法笔记:Homework #6 AdaBoost&Kernel Ridge Regression相关习题
原文地址:http://www.jianshu.com/p/9bf9e2add795 AdaBoost 问题描述 程序实现 # coding:utf-8 import math import nump ...
- spark机器学习从0到1特征变换-标签和索引的转化(十六)
一.原理 在机器学习处理过程中,为了方便相关算法的实现,经常需要把标签数据(一般是字符串)转化成整数索引,或是在计算结束后将整数索引还原为相应的标签. Spark ML 包中提供了几个相关的转换器 ...
- 机器学习技法笔记:05 Kernel Logistic Regression
Roadmap Soft-Margin SVM as Regularized Model SVM versus Logistic Regression SVM for Soft Binary Clas ...
随机推荐
- windows下Mysql5.7表名不区分大小写问题
前言 Windwos文件系统本身是不区分大小写的,但是Linux文件系统是支持大小写的.于是安装在Linux下的Mysql导出到windows下可能因为大小写问题导致错误,因此要开启window下My ...
- Git 设置和取消代理(SOCKS5代理)
设置代理 git config --global http.proxy 'socks5://127.0.0.1:1080' git config --global https.proxy 'socks ...
- Linux执行Java文件
最近学习shell脚本,写个简单java类让linux去执行 java类没别的东西,就引了一个fastjson的jar,写了个main方法 序列化一个User对象 打印 package com.lws ...
- 一道面试题:js返回函数, 函数名后带多个括号的用法及join()的注意事项
博客搬迁,给你带来的不便,敬请谅解! http://www.suanliutudousi.com/2017/11/13/js%E8%BF%94%E5%9B%9E%E5%87%BD%E6%95%B0%E ...
- MATLAB图像uint8,uint16,double, rgb转灰度解释
1.uint8,uint16与double 为了节省存储空间,matlab为图像提供了特殊的数据类型uint8(8位无符号整数),以此方式存储的图像称作8位图像.matlab读入图像的数据是uint8 ...
- 2019-9-2-win10-uwp-Markdown
title author date CreateTime categories win10 uwp Markdown lindexi 2019-09-02 12:57:38 +0800 2018-2- ...
- Linux面试基础(一)
Linux基本命令 man rm 查看命令帮助 tar cf 压缩文件 tar xf 解压缩 ping 查看是否能连接网络 rm 删除数据 find 搜索数 ...
- 视频专家之路【二】:ffmpeg工具的使用
本文是听了雷宵骅大神的课之后的总结,部分内容借用了其PPT的内容,如有侵权请告知删除. 雷宵骅大神的博客为:https://blog.csdn.net/leixiaohua1020 要学习汽修,那么首 ...
- 前端学习(十二)js数据类型(笔记)
选项卡: for循环 for(初始值,条件,自增){} for(var i=0; i<9;i++){} 几个按钮对应相同个内容!!! -------------------- ...
- NDK笔记(二)-在Android Studio中使用ndk-build(转)
前面一篇我们接触了CMake,这一篇写写关于ndk-build的使用过程.刚刚用到,想到哪儿写哪儿. 环境背景 Android开发IDE版本:AndroidStudio 2.2以上版本(目前已经升级到 ...