原文地址:http://www.jianshu.com/p/9bf9e2add795

AdaBoost

问题描述



程序实现

# coding:utf-8

import math
import numpy as np
import matplotlib.pyplot as plt def ReadData(dataFile): with open(dataFile, 'r') as f:
lines = f.readlines()
data_list = []
for line in lines:
line = line.strip().split()
data_list.append([float(l) for l in line])
dataArray = np.array(data_list)
return dataArray def sign(n): if(n>=0):
return 1
else:
return -1 def GetSortedArray(dataArray,i):
# 根据dataArray第i列的值对dataArray进行从小到大的排序
data_list=dataArray.tolist()
sorted_data_list=sorted(data_list,key=lambda x:x[i],reverse=False)
sortedDataArray=np.array(sorted_data_list)
return sortedDataArray def GetUZeroOneError(pred,dataY,u):
return np.sum(u*np.not_equal(pred,dataY))/np.sum(u) def GetZeroOneError(pred,dataY):
return np.sum(np.not_equal(pred,dataY))/dataY.shape[0] def decision_stump(dataArray,u): num_data=dataArray.shape[0]
num_dim=dataArray.shape[1]-1
min_e=np.inf
min_s = np.inf
min_d=np.inf
min_theta = np.inf
min_pred = np.zeros((num_data,))
for d in range(num_dim):
sortedDataArray=GetSortedArray(dataArray,d) # 确保有效theta
d_min_e=np.inf
d_min_s = np.inf
d_min_theta = np.inf
d_min_pred = np.zeros((num_data,))
for s in [-1.0,1.0]:
for i in range(num_data):
if(i==0):
theta=-np.inf
pred=s*np.ones((num_data,))
else:
if sortedDataArray[i-1,d]==sortedDataArray[i,d]:
continue
theta=(sortedDataArray[i-1,d]+sortedDataArray[i,d])/2
pred=np.zeros((num_data,))
for n in range(num_data):
pred[n]=s*sign(dataArray[n,d]-theta)
d_now_e=GetUZeroOneError(pred,dataArray[:,-1],u)
if(d_now_e<d_min_e):
d_min_e=d_now_e
d_min_s=s
d_min_theta=theta
d_min_pred=pred
if(d_min_e<min_e):
min_e=d_min_e
min_s=d_min_s
min_d=d
min_theta=d_min_theta
min_pred=d_min_pred
return min_s,min_d,min_theta,min_pred,min_e def Pred(paraList,dataX):
# paraList=[s,d,theta]
num_data=dataX.shape[0]
pred=np.zeros((num_data,))
for i in range(num_data):
pred[i]=paraList[0]*sign(dataX[i,paraList[1]]-paraList[2])
return pred def plot_line_chart(X=np.arange(0,300,1).tolist(),Y=np.arange(0,300,1).tolist(),nameX="t",nameY="Ein(gt)",saveName="12.png"): plt.figure(figsize=(30,12))
plt.plot(X,Y,'b')
plt.plot(X,Y,'ro')
plt.xlim((X[0]-1,X[-1]+1))
for (x,y) in zip(X,Y):
if(x%10==0):
plt.text(x+0.1,y,str(round(y,4)))
plt.xlabel(nameX)
plt.ylabel(nameY)
plt.title(nameY+" versus "+nameX)
plt.savefig(saveName)
return if __name__=="__main__": dataArray=ReadData("hw2_adaboost_train.dat")
dataY=dataArray[:,-1]
dataX=dataArray[:,:-1]
num_data=dataArray.shape[0]
u=np.full(shape=(num_data,),fill_value=1/num_data)
ein_g_list=[]
alpha_list=[]
g_list=[]
ein_G_list=[]
u_sum_list=[]
epi_list=[]
min_pred_list=[] # adaboost
for t in range(300):
u_sum_list.append(np.sum(u))
min_s,min_d,min_theta,min_pred,epi=decision_stump(dataArray,u)
g_list.append([min_s,min_d,min_theta])
min_pred_list.append(min_pred)
ein_g=GetZeroOneError(min_pred,dataY)
ein_g_list.append(ein_g)
epi_list.append(epi)
para=math.sqrt((1-epi)/epi)
alpha_list.append(math.log(para))
for i in range(num_data):
if min_pred[i]==dataY[i]:
u[i]/=para
else:
u[i]*=para
predG=np.zeros((num_data,))
for ta in range(t):
predG+=alpha_list[ta]*min_pred_list[ta]
for n in range(num_data):
predG[n]=sign(predG[n])
ein_G_list.append(GetZeroOneError(predG,dataY)) # 12
plot_line_chart(Y=ein_g_list)
print("Ein(g1):",ein_g_list[0])
print("alpha1:",alpha_list[0]) # 14
plot_line_chart(Y=ein_G_list,nameY="Ein(Gt)",saveName="14.png")
print("Ein(G):",ein_G_list[-1]) # 15
plot_line_chart(Y=u_sum_list, nameY="Ut", saveName="15.png")
print("U2:",u_sum_list[1])
print("UT:",u_sum_list[-1]) # 16
plot_line_chart(Y=epi_list,nameY="epsilon_t",saveName="16.png")
print("the minimum value of epsilon_t:",min(epi_list)) testArray=ReadData("hw2_adaboost_test.dat")
num_test=testArray.shape[0]
testX=testArray[:,:-1]
testY=testArray[:,-1]
pred_g_list=[]
eout_g_list=[]
eout_G_list=[]
for t in range(300):
pred_g=Pred(g_list[t],testX)
pred_g_list.append(pred_g)
eout_g_list.append(GetZeroOneError(pred_g,testY))
pred_G=np.zeros((num_test,))
for ta in range(t):
pred_G+=alpha_list[ta]*pred_g_list[ta]
sign_ufunc=np.frompyfunc(sign,1,1)
pred_G=sign_ufunc(pred_G)
eout_G_list.append(GetZeroOneError(pred_G,testY)) # 17
plot_line_chart(Y=eout_g_list, nameY="Eout(gt)", saveName="17.png")
print("Eout(g1):",eout_g_list[0]) # 18
plot_line_chart(Y=eout_G_list, nameY="Eout(Gt)", saveName="18.png")
print("Eout(G):",eout_G_list[-1])

运行结果













Kernel Ridge Regression

问题描述

程序实现

# coding:utf-8

import numpy as np
import math def ReadData(dataFile): with open(dataFile, 'r') as f:
lines = f.readlines()
data_list = []
for line in lines:
line = line.strip().split()
data_list.append([1.0]+[float(l) for l in line])
dataArray = np.array(data_list)
return dataArray def sign(n): if(n>=0):
return 1
else:
return -1 def RBFKernel(X1,X2,gamma):
return math.exp(-gamma*np.sum(np.square(X1-X2))) def GetKernelMatrix(trainX,dataX,gamma):
num_train = trainX.shape[0]
num_data = dataX.shape[0]
mat = np.zeros((num_train,num_data))
for i in range(num_train):
if num_train==num_data and np.equal(trainX,dataX).all():
for j in range(i+1):
mat[i][j] = RBFKernel(dataX[i, :], dataX[j, :], gamma)
if(i!=j):
mat[j][i]=mat[i][j]
else:
for j in range(num_data):
mat[i][j]=RBFKernel(trainX[i,:],dataX[j,:],gamma)
return mat def GetZeroOneError(pred,dataY):
return np.sum(np.not_equal(pred,dataY))/dataY.shape[0] def KernelRidgeRegression(trainArray,lamb,gamma):
num_train=trainArray.shape[0]
trainX=trainArray[:,:-1]
trainY=trainArray[:,-1].reshape((num_train,1))
K=GetKernelMatrix(trainX,trainX,gamma)
beta=np.dot(np.linalg.inv(lamb*np.eye(num_train)+K),trainY)
return beta def Predict(trainX,dataX,beta,gamma):
num_data=dataX.shape[0]
pred=np.zeros((num_data,))
K=GetKernelMatrix(trainX,dataX,gamma)
pred=np.dot(K.transpose(),beta).reshape((num_data,))
for n in range(num_data):
pred[n]=sign(pred[n])
return pred if __name__=="__main__":
dataArray=ReadData("hw2_lssvm_all.dat")
trainArray=dataArray[:400,:]
testArray=dataArray[400:,:]
gammaList=[32,2,0.125]
lambdaList=[0.001,1,1000]
ein_list=[]
eout_list=[]
for l in lambdaList:
for g in gammaList:
beta=KernelRidgeRegression(trainArray,l,g)
ein_list.append(GetZeroOneError(Predict(trainArray[:,:-1],trainArray[:,:-1],beta,g),trainArray[:,-1]))
eout_list.append(GetZeroOneError(Predict(trainArray[:,:-1],testArray[:,:-1],beta,g),testArray[:,-1]))
min_ein=min(ein_list)
min_ein_id=ein_list.index(min_ein)
min_eout=min(eout_list)
min_eout_id=eout_list.index(min_eout) # 19
print("the minimum Ein(g):",min_ein,",the corresponding parameter combinations: gamma=",gammaList[min_ein_id%3],",lambda=",lambdaList[min_ein_id//3])
# 20
print("the minimum Eout(g):",min_eout,",the corresponding parameter combinations: gamma=",gammaList[min_eout_id%3],",lambda=",lambdaList[min_eout_id//3])

运行结果

机器学习技法笔记:Homework #6 AdaBoost&Kernel Ridge Regression相关习题的更多相关文章

  1. 机器学习技法笔记(2)-Linear SVM

    从这一节开始学习机器学习技法课程中的SVM, 这一节主要介绍标准形式的SVM: Linear SVM 引入SVM 首先回顾Percentron Learning Algrithm(感知器算法PLA)是 ...

  2. 机器学习技法笔记:06 Support Vector Regression

    Roadmap Kernel Ridge Regression Support Vector Regression Primal Support Vector Regression Dual Summ ...

  3. support vector regression与 kernel ridge regression

    前一篇,我们将SVM与logistic regression联系起来,这一次我们将SVM与ridge regression(之前的linear regression)联系起来. (一)kernel r ...

  4. Kernel ridge regression(KRR)

    作者:桂. 时间:2017-05-23  15:52:51 链接:http://www.cnblogs.com/xingshansi/p/6895710.html 一.理论描述 Kernel ridg ...

  5. 机器学习技法笔记:05 Kernel Logistic Regression

    Roadmap Soft-Margin SVM as Regularized Model SVM versus Logistic Regression SVM for Soft Binary Clas ...

  6. 机器学习技法笔记:03 Kernel Support Vector Machine

    Roadmap Kernel Trick Polynomial Kernel Gaussian Kernel Comparison of Kernels Summary

  7. 机器学习技法笔记:Homework #5 特征变换&Soft-Margin SVM相关习题

    原文地址:https://www.jianshu.com/p/6bf801bdc644 特征变换 问题描述 程序实现 # coding: utf-8 import numpy as np from c ...

  8. 机器学习技法笔记:Homework #8 kNN&RBF&k-Means相关习题

    原文地址:https://www.jianshu.com/p/1db700f866ee 问题描述 程序实现 # kNN_RBFN.py # coding:utf-8 import numpy as n ...

  9. 机器学习技法笔记:Homework #7 Decision Tree&Random Forest相关习题

    原文地址:https://www.jianshu.com/p/7ff6fd6fc99f 问题描述 程序实现 13-15 # coding:utf-8 # decision_tree.py import ...

随机推荐

  1. Windows-WAMP搭建与配置

    使用 WampServer 整合软件包进行 WAMP 环境搭建 WampServer 是一款由法国人开发的 Apache Web 服务器.PHP 解释器以及 MySQL 数据库的整合软件包.免去了开发 ...

  2. 76、python学习第二篇

    生成随机数的测试数据 ''' Created on 2017年4月8日 @author: weizhen #to create data for testing ''' import random f ...

  3. java %d %n \n

    Java中,%d和%f分别用来表示输出时,替换整型输出和浮点型输出的占位符. 如: int a=28; float b = 13.0f; System.out.printf("整数是:%d% ...

  4. ROS编程: 重要的代码优化知识点记录(1)

    订阅多个话题并对其进行同步处理 本小节针对在ROS节点中需要订阅两个及两个以上的话题时,需要保持对这两个话题数据的同步,且需要同时接收数据一起处理然后当做参数传入到另一个函数中: 研究背景:reals ...

  5. Pandas DataFrame操作

    DataFrame的创建 >>> import pandas as pd >>> from pandas import DataFrame #define a di ...

  6. jQuery选择器中空格的问题再探究

    jQuery选择器的空格问题,看似很小,但是差之毫厘谬以千里,让人很是恼火,<锋利的jQuery>书中有个经典的例子,我这里也拷贝下来,再加点自己的想法 <html> < ...

  7. vue自定义指令clickoutside实现点击其他元素才会触发

    clickoutside.js // 代码内容 const clickoutsideContext = '@@clickoutsideContext'; export default { bind(e ...

  8. 14-vim-替换命令-01-替换

    命令 英文 功能 工作模式 r replace 替换当前字符 命令模式 R replace 进入替换模式 替换模式 R命令进入替换模式,输入新字符替换当前光标所在位置的字符,替换完成后,按下ESC可以 ...

  9. UVALive 4794 Sharing Chocolate

    Sharing Chocolate Chocolate in its many forms is enjoyed by millions of people around the world ever ...

  10. postgresql 取出分组中最大的几条数据

    WITH Name AS ( SELECT * FROM ( SELECT xzqdm, , ) xzdm, COUNT (*) sl FROM sddltb_qc WHERE xzqdm ') GR ...