原文地址:https://www.jianshu.com/p/6bf801bdc644

特征变换

问题描述



程序实现

# coding: utf-8

import numpy as np
from cvxopt import matrix, solvers
from sklearn import svm def gen_data():
X = [[1, 0], [0, 1], [0, -1], [-1, 0], [0, 2], [0, -2], [-2, 0]]
X = np.array(X)
y = [-1, -1, -1, 1, 1, 1, 1]
y = np.array(y)
assert X.shape[0] == y.shape[0] and X.shape[1] == 2, "wrong data shape!"
return X, y def explict_transform(X):
assert X.shape[1] == 2, "wrong shape of X!"
num = X.shape[0]
X1 = X[:, 0]
X2 = X[:, 1]
new_X1 = X2 ** 2 - 2 * X1 + 3
new_X2 = X1 ** 2 - 2 * X2 - 3
new_X = np.concatenate((new_X1.reshape((num, 1)), new_X2.reshape(num, 1)), axis=1)
return new_X def svm_hard_linear(X, y):
num, dim = X.shape
P = matrix(np.concatenate((np.zeros((1, 1 + dim)),
np.concatenate((np.zeros((dim, 1)), np.eye(dim)), axis=1)), axis=0), tc='d')
q = matrix(np.zeros((1 + dim, 1)), tc='d')
G = matrix(-y * np.concatenate((np.ones((num, 1), dtype=np.float), X), axis=1), tc='d')
h = matrix(-np.ones((num, 1)), tc='d')
sol = solvers.qp(P, q, G, h)
return sol['x'] def implicit_transform(X):
assert X.shape[1] == 2, "wrong shape of X!"
num=X.shape[0]
X1 = X[:, 0]
X2 = X[:, 1]
new_X1=np.ones((num,1))
new_X2=2**(0.5)*X1
new_X3=2**(0.5)*X2
new_X4=X1**2
new_X5=X2**2
new_X6=2**(0.5)*X1*X2
new_X = np.concatenate((new_X1.reshape((num, 1)), new_X2.reshape(num, 1),new_X3.reshape(num, 1),
new_X4.reshape(num, 1),new_X5.reshape(num, 1),new_X6.reshape(num, 1)), axis=1)
return new_X if __name__ == "__main__":
np.set_printoptions(precision=6,suppress=True)
X, y = gen_data() # explicit
# 2
exp_X= explict_transform(X)
u = np.array(svm_hard_linear(exp_X, y.reshape(y.shape[0],1)))
b = u[0, :]
w = u[1:, :]
print("b:\n", b)
print("w:\n", w) # implicit
clf=svm.SVC(C=1000000,kernel='poly',degree=2,gamma=1,coef0=1)
clf.fit(X,y)
# 3
alpha_y=clf.dual_coef_
alpha_y=alpha_y.reshape((alpha_y.shape[1],))
sv_ID=clf.support_
sv_y=[]
for i in range(sv_ID.shape[0]):
sv_y.append(y[sv_ID[i]])
alpha=[alpha_y[i]/sv_y[i] for i in range(sv_ID.shape[0])]
print("alpha*y:\n",alpha_y)
print("alpha:\n",alpha)
sv_X=clf.support_vectors_
print("support vectors:\n",sv_X)
# 4
b=clf.intercept_
print("b:\n",b)
w=np.dot(alpha_y,implicit_transform(sv_X)).reshape((6,1))
print("w:\n",w)

运行结果

Soft-Margin SVM

问题描述





程序实现

# coding: utf-8

import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt def read_data(dataFile):
with open(dataFile,'r') as f:
lines=f.readlines()
data_list=[]
for line in lines:
line=line.strip().split()
data_list.append([float(l) for l in line])
dataArray=np.array(data_list)
num_data=dataArray.shape[0]
num_dim=dataArray.shape[1]-1
dataX=dataArray[:,1:].reshape((num_data,num_dim))
dataY=dataArray[:,0].reshape((num_data,))
return dataX,dataY data_X,data_Y=read_data("features.train")
test_X,test_Y=read_data("features.test") def convert_label(dataY,chosen_class):
num=dataY.shape[0]
new_Y=-np.ones_like(dataY)
for i in range(num):
if dataY[i]==chosen_class:
new_Y[i]=1
return new_Y def zero_one_cost(pred,Y):
assert pred.shape==Y.shape,"wrong shape of pred and Y!"
return np.sum(np.not_equal(pred,Y))/Y.shape[0] def question15():
c_list=[-6,-4,-2,0,2]
w_list=[]
new_Y=convert_label(data_Y,0)
for i in c_list:
clf=svm.LinearSVC(loss="hinge",C=10**i)
clf.fit(data_X,new_Y)
w_list.append(np.sqrt(np.sum(clf.coef_**2)))
plt.figure(figsize=(10,6))
plt.plot(c_list,w_list,'b')
plt.plot(c_list,w_list,'ro')
for (c,w) in zip(c_list,w_list):
plt.text(c+0.1,w,str(round(w,4)))
plt.xlabel("log10(C)")
plt.ylabel("||w||")
plt.xlim(-8,4)
plt.title("||w|| versus log10(C)")
plt.savefig("15.png") def question16and17():
# 16
c_list = [-6, -4, -2, 0, 2]
Ein_list=[]
alpha_sum_list=[]
new_Y=convert_label(data_Y,8)
for i in c_list:
clf=svm.SVC(C=10**i,kernel='poly',degree=2,gamma=1,coef0=1)
clf.fit(data_X,new_Y)
pred=clf.predict(data_X)
Ein_list.append(zero_one_cost(pred,new_Y))
alpha_sum_list.append(np.sum(np.abs(clf.dual_coef_)))
# print(np.sum(clf.dual_coef_))
# print(clf.n_support_)
plt.figure(figsize=(10,6))
plt.plot(c_list,Ein_list,'b')
plt.plot(c_list,Ein_list,'ro')
for (c,e) in zip(c_list,Ein_list):
plt.text(c+0.1,e,str(round(e,4)))
plt.xlabel("log10(C)")
plt.ylabel("Ein")
plt.xlim(-8, 4)
plt.title("Ein versus log10(C)")
plt.savefig("16.png")
# 17
plt.figure(figsize=(10,6))
plt.plot(c_list,alpha_sum_list,'b')
plt.plot(c_list,alpha_sum_list,'ro')
for (c,a) in zip(c_list,alpha_sum_list):
plt.text(c+0.1,a,str(round(a,6)))
plt.xlabel("log10(C)")
plt.ylabel("sum of alpha")
plt.xlim(-8, 4)
plt.title("sum of alpha versus log10(C)")
plt.savefig("17.png") def question18():
c_list=[-3,-2,-1,0,1]
dis_list=[]
new_Y=convert_label(data_Y,0)
for i in c_list:
clf=svm.SVC(C=10**i,kernel='rbf',gamma=100)
clf.fit(data_X,new_Y)
sv_ID=clf.support_
dis_list.append(new_Y[sv_ID[0]]*clf.decision_function(data_X)[sv_ID[0]])
plt.figure(figsize=(10,6))
plt.plot(c_list,dis_list,'b')
plt.plot(c_list,dis_list,'ro')
for (c,w) in zip(c_list,dis_list):
plt.text(c+0.1,w,str(round(w,4)))
plt.xlabel("log10(C)")
plt.ylabel("free sv's function distance to hyperplane")
plt.xlim(-5, 3)
plt.ylim(ymax=1.01)
plt.title("free sv's function distance to hyperplane versus log10(C)")
plt.savefig("18.png") def question19():
new_Y=convert_label(data_Y,0)
new_test_Y=convert_label(test_Y,0)
gamma_list=[0,1,2,3,4]
Eout_list=[]
for i in gamma_list:
clf=svm.SVC(C=0.1,kernel='rbf',gamma=10**i)
clf.fit(data_X,new_Y)
pred=clf.predict(test_X)
Eout_list.append(zero_one_cost(pred,new_test_Y))
plt.figure(figsize=(10,6))
plt.plot(gamma_list,Eout_list,'b')
plt.plot(gamma_list,Eout_list,'ro')
for (c,w) in zip(gamma_list,Eout_list):
plt.text(c+0.1,w,str(round(w,4)))
plt.xlabel("log10(gamma)")
plt.ylabel("Eout")
plt.xlim(-1, 5)
plt.ylim(ymax=0.19)
plt.title("Eout versus log10(C)")
plt.savefig("19.png") def question20():
new_Y=convert_label(data_Y,0)
gamma_list=[0,1,2,3,4]
chosen_gamma=[]
for t in range(100):
np.random.seed(t)
chosenID=np.random.randint(0,data_X.shape[0],1000)
train_X=[]
train_Y=[]
val_X=[]
val_Y=[]
for i in range(data_X.shape[0]):
if(i not in chosenID):
train_X.append(data_X[i,:])
train_Y.append(new_Y[i])
else:
val_X.append(data_X[i,:])
val_Y.append(new_Y[i])
train_X=np.array(train_X)
train_Y=np.array(train_Y)
val_X=np.array(val_X)
val_Y=np.array(val_Y)
Eval_list=[]
for g in gamma_list:
clf=svm.SVC(C=0.1,kernel='rbf',gamma=10**g)
clf.fit(train_X,train_Y)
pred=clf.predict(val_X)
Eval_list.append(zero_one_cost(pred,val_Y))
chosen_gamma.append(gamma_list[Eval_list.index(min(Eval_list))])
times=[]
for i in gamma_list:
times.append(chosen_gamma.count(i))
plt.figure(figsize=(10,6))
plt.bar(left=(gamma_list),height=(times),width=1,align="center",yerr=0.000001)
for (c,w) in zip(gamma_list,times):
plt.text(c,w*1.03,str(round(w,4)))
plt.xlabel("log10(gamma)")
plt.ylabel("the number of chosen times")
plt.xlim(-1, 5)
plt.ylim(0,80)
plt.title("the number of chosen times for gamma")
plt.savefig("20.png") if __name__=="__main__": question15()
question16and17()
question18()
question19()
question20()

运行结果











机器学习技法笔记:Homework #5 特征变换&Soft-Margin SVM相关习题的更多相关文章

  1. 机器学习技法笔记(2)-Linear SVM

    从这一节开始学习机器学习技法课程中的SVM, 这一节主要介绍标准形式的SVM: Linear SVM 引入SVM 首先回顾Percentron Learning Algrithm(感知器算法PLA)是 ...

  2. 机器学习:SVM(目标函数推导:Hard Margin SVM、Soft Margin SVM)

    一.Hard Margin SVM SVM 的思想,最终用数学表达出来,就是在优化一个有条件的目标函数: 此为 Hard Margin SVM,一切的前提都是样本类型线性可分: 1)思想 SVM 算法 ...

  3. Jordan Lecture Note-7: Soft Margin SVM

    Soft Margin SVM  (1)Recall 之前分析到SVM的模型为: \begin{align}\mathop{\min}&\quad \frac{1}{2}w^\prime w\ ...

  4. SVM3 Soft Margin SVM

    之前分为两部分讨论过SVM.第一部分讨论了线性SVM,并且针对线性不可分的数据,把原始的问题转化为对偶的SVM求解.http://www.cnblogs.com/futurehau/p/6143178 ...

  5. 机器学习技法笔记:Homework #8 kNN&RBF&k-Means相关习题

    原文地址:https://www.jianshu.com/p/1db700f866ee 问题描述 程序实现 # kNN_RBFN.py # coding:utf-8 import numpy as n ...

  6. 机器学习技法笔记:Homework #7 Decision Tree&Random Forest相关习题

    原文地址:https://www.jianshu.com/p/7ff6fd6fc99f 问题描述 程序实现 13-15 # coding:utf-8 # decision_tree.py import ...

  7. 机器学习技法笔记:Homework #6 AdaBoost&Kernel Ridge Regression相关习题

    原文地址:http://www.jianshu.com/p/9bf9e2add795 AdaBoost 问题描述 程序实现 # coding:utf-8 import math import nump ...

  8. spark机器学习从0到1特征变换-标签和索引的转化(十六)

      一.原理 在机器学习处理过程中,为了方便相关算法的实现,经常需要把标签数据(一般是字符串)转化成整数索引,或是在计算结束后将整数索引还原为相应的标签. Spark ML 包中提供了几个相关的转换器 ...

  9. 机器学习技法笔记:05 Kernel Logistic Regression

    Roadmap Soft-Margin SVM as Regularized Model SVM versus Logistic Regression SVM for Soft Binary Clas ...

随机推荐

  1. ROS的使用

    1.输入roscore时出现错误:Unable to contact my own server at 修改: 在.bashrc文件中添加以下内容: export ROS_HOSTNAME=local ...

  2. Windows-右键菜单添加选项

    新建 add.reg 输入选项名和选项对应程序路径 Windows Registry Editor Version 5.00 [HKEY_CLASSES_ROOT\*\shell\选项名] [HKEY ...

  3. sqlserver2012分页注意事项

    SELECT orderid, orderdate, custid, empid FROM Sales.Orders ORDER BY orderdate, orderid OFFSET 600 RO ...

  4. http相应状态码大全

    100 Continue 初始的请求已经接受,客户应当继续发送请求的其余部分 101 Switching Protocols 服务器将遵从客户的请求转换到另外一种协议 200 OK 一切正常,对GET ...

  5. JAVA中一个汉字占多少个字符(转载)

    1.先说重点: 不同的编码格式占字节数是不同的,UTF-8编码下一个中文所占字节也是不确定的,可能是2个.3个.4个字节: 2.以下是源码: 1 @Test 2 public void test1() ...

  6. JavaBean详解

    JavaBean详解 我们对JavaBean的理解可以从java和bean两个方面来理解,对于Java我们都很清楚,而bean在计算机中的含义为可重用组件.(ps:对Spring中的bean是不是有了 ...

  7. 转 MySQL乐观锁和悲观锁

    悲观锁(Pessimistic Lock) 顾名思义,就是很悲观,每次去拿数据的时候都认为别人会修改,所以每次在拿数据的时候都会上锁,这样别人想拿这个数据就会block直到它拿到锁.传统的关系型数据库 ...

  8. maven命名

    <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpcl ...

  9. 27-python基础-python3-异常处理(try except)

    到目前为止,在 Python 程序中遇到错误,或“异常”,意味着整个程序崩溃.不希望这发生在真实世界的程序中. 相反,希望程序能检测错误,处理它们,然后继续运行.   实例1: 当试图用一个数除以零时 ...

  10. CSS和jQuery分别实现图片无缝滚动效果

    一.效果图 二.使用CSS实现 <!DOCTYPE html> <html> <head> <meta charset="utf-8"&g ...