Logistic Regression

特别需要注意的是 exp 和 log 的使用。

sigmoid 原始表达式为 1 / (1+exp(-z))，但如果直接使用 z=-710，会显示 overflow。因此对于 z<0 的情况，采用 exp(z) / (1 + exp(z)) ，这样一来，exp(-710) 就没问题了。这就是 scipy 包里的 expit 函数

log_logistic = log(sigmoid)，注意和 expit 函数是一致的，分情况讨论。

 import numpy as np

 from scipy.special import expit

 from sklearn.utils.extmath import log_logistic

 def predict(theta, x):

     return expit(x.dot(theta))

 def compute_loss(y, yz):

     return - np.sum(log_logistic(yz))

 def gradientdescent(x, y, theta, iterations=2000, lr=0.01):

     loss_list = []

     for i in range(iterations):

         yhat = predict(theta, x)

         delta = x.T.dot(yhat - y) / m

         loss = compute_loss(y, y * X.dot(theta))

         loss_list.append(loss)

         theta = theta - lr * delta

     return theta, loss_list

 theta, loss_list = gradientdescent(X, y, np.zeros((n, 1)))

Kmeans

Kmeans 的本质就是 EM 算法，只不过是硬间隔而不是软间隔。首先初始化 K 个中心点，在 E 步，将样本分配到最近的中心点，在 M 步，选取新的中心点以最小化组内距离。

 import numpy as np

 def calc_dist(x1, x2):

     return sum([(x1[i] - x2[i])**2 for i in range(len(x1))])

 # Assign samples to given centers

 def E_step(X, cents):

     cent_dict = dict(zip(cents, [[] for _ in range(len(cents))]))

     for row in X:

         min_dist, best_cent = 1e10, None

         for cent in cent_dict:

             dist = calc_dist(row, cent)

             if dist < min_dist:

                 min_dist = dist

                 best_cent = cent

         cent_dict[best_cent] += [row.tolist()]

     return cent_dict

 # Compute new centers

 def M_step(cent_dict):

     new_cents = []

     for cent in cent_dict:

         new_cent = np.mean(np.array(cent_dict[cent]), axis=0)

         new_cents.append(tuple(new_cent))

     return new_cents

 def Kmeans(X, K=3, max_iter=10):

     np.random.seed(1)

     inds = np.random.choice(len(X), K)

     init_cents = [tuple(X[i]) for i in inds]

     cents = init_cents

     for k in range(max_iter):

         cent_dict = E_step(X, cents)

         new_cents = M_step(cent_dict)

         move = sum([calc_dist(c1, c2) for c1, c2 in zip(cents, new_cents)])

         if move < 0.1:

             print('Converged in %s steps' % k)

             break

         cents = new_cents

     return cent_dict

Neural Network

注意 softmax 的计算，需要考虑到 exp 的 overflow。因此通常会在 softmax 分子分母同时乘上一个常数 C，log(C) = -max(z)，这就是 shift_score。

这里使用了 scipy 包里的 logsumexp，理由同 LR，logsumexp = log(sum(exp(z)))。

 from scipy.special import logsumexp

 import numpy as np

 class Neural_Network:

     def __init__(self, n, h, c, std=1e-4):

         W1 = np.random.randn(n, h) * std

         b1 = np.zeros(h)

         W2 = np.random.randn(h, c) * std

         b2 = np.zeros(c)

         self.params = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

     def forward_backward_prop(self, X, y):

         W1, b1 = self.params['W1'], self.params['b1']

         W2, b2 = self.params['W2'], self.params['b2']

         # forward prop

         hidden = X.dot(W1) + b1

         relu = np.maximum(0, hidden)

         scores = relu.dot(W2) + b2

         shift_scores = scores - np.max(scores, axis=1, keepdims=True)

         softmax = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1, keepdims=True)

         loss = - np.sum(y * (shift_scores - logsumexp(shift_scores, axis=1, keepdims=True))) / X.shape[0]

         # backward prop

         dscores = (softmax - y) / X.shape[0]

         drelu = dscores.dot(W2.T)

         dW2 = relu.T.dot(dscores)

         db2 = np.sum(dscores, axis=0)

         dhidden = (hidden > 0) * drelu

         dW1 = X.T.dot(dhidden)

         db1 = np.sum(dhidden, axis=0)

         grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}

         return loss, grads

     def train(self, X, y, lr=0.01, decay=0.95, iters=5000):

         loss_list, acc_list = [], []

         for it in range(iters):

             loss, grads = self.forward_backward_prop(X, y)

             loss_list.append(loss)

             self.params['W1'] -= lr * grads['dW1']

             self.params['b1'] -= lr * grads['db1']

             self.params['W2'] -= lr * grads['dW2']

             self.params['b2'] -= lr * grads['db2']

             if it % 100 == 0:

                 yhat = self.predict(X)

                 acc = np.sum(np.argmax(y, axis=1) == yhat) / X.shape[0]

                 acc_list.append(acc)

                 lr *= decay

         return loss_list, acc_list

     def predict(self, X):

         hidden = X.dot(self.params['W1']) + self.params['b1']

         relu = np.maximum(0, hidden)

         scores = relu.dot(self.params['W2']) + self.params['b2']

         yhat = np.argmax(scores, axis=1)

         return yhat

Recurrent Neural Network

 import numpy as np

 def tanh(x):

     return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

 def softmax(x):

     ex = np.exp(x - np.max(x))

     return ex / ex.sum(axis=0)

 class RNN:

     def __init__(self, na, nx, ny, m, seed=1):

         np.random.seed(seed)

         Waa = np.random.randn(na, na)

         Wax = np.random.randn(na, nx)

         Wya = np.random.randn(ny, na)

         ba = np.random.randn(na, 1)

         by = np.random.randn(ny, 1)

         self.a0 = np.random.randn(na, m)

         self.params = {'Waa': Waa, 'Wax': Wax, 'Wya': Wya, 'ba': ba, 'by': by}

     def RNN_cell_forward(self, xt, a_prev):

         """

         Inputs:

         xt -- Current input data, of shape (nx, m).

         a_prev -- Previous hidden state, of shape (na, m)

         Outputs:

         at -- Current hidden state, of shape (na, m)

         yt -- Current prediction, of shape (ny, m)

         """

         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']

         Wya, by = self.params['Wya'], self.params['by']

         at = tanh(Waa.dot(a_prev) + Wax.dot(xt) + ba)

         score = Wya.dot(at) + by

         yt = softmax(score)

         return at, yt

     def RNN_forward(self, X, y):

         """

         Inputs:

         X -- Input data for every time step, of shape (nx, m, Tx)

         y -- Target for every time step, of shape (ny, m, Tx)

         Outputs:

         a -- Hidden states for every time-step, of shape (n_a, m, T_x)

         yhat -- Predictions for every time-step, of shape (n_y, m, T_x)

         """

         a_prev = self.a0

         na, m = a_prev.shape

         ny = y.shape[0]

         Tx = X.shape[2]

         a = np.zeros((na, m, Tx))

         yhat = np.zeros((ny, m, Tx))

         loss = 0

         for t in range(Tx):

             a_next, yt = self.RNN_cell_forward(X[:, :, t], a_prev)

             yhat[:, :, t] = yt

             a[:, :, t] = a_next

             loss -= np.sum(np.log(yt.T.dot(y[:, :, t])))

             a_prev = a_next

         cache = (a, yhat)

         return loss, cache

     def RNN_cell_backward(self, dz, grads, cache):

         """

         Inputs:

         dz -- Gradient of loss with respect to score

         grads -- Dictionary contains all gradients

         cache -- Tuple contains xt, a_next, a_prev

         Outputs:

         grads -- Dictionary contains all gradients

         """

         xt, a_next, a_prev = cache

         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']

         Wya, by = self.params['Wya'], self.params['by']

         grads['dWya'] += dz.dot(a_next.T)

         grads['dby'] += np.sum(dz, axis=1, keepdims=True)

         da_y = Wya.T.dot(dz)

         da_a = grads['da_prev']

         da_next = da_y + da_a      # da is computed based on two paths, from da_y and da_a.

         dtanh = (1 - a_next**2) * da_next

         grads['dWaa'] += dtanh.dot(a_prev.T)

         grads['da_prev'] = Waa.T.dot(dtanh)

         grads['dWax'] += dtanh.dot(xt.T)

         grads['dba'] += np.sum(dtanh, axis=1, keepdims=True)

         return grads

     def RNN_backward(self, X, y, cache):

         """

         Inputs:

         X -- Input data for every time step, of shape (nx, m, Tx)

         y -- Target for every time step, of shape (ny, m, Tx)

         cache -- Tuple from RNN_forward, contains a, yhat

         Outputs:

         grads -- Dictionary contains all gradients

         a -- Hidden states for every time-step, of shape (n_a, m, T_x)

         """

         a, yhat = cache

         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']

         Wya, by = self.params['Wya'], self.params['by']

         Tx = X.shape[2]

         grads = {}

         grads['dWya'], grads['dby'] = np.zeros_like(Wya), np.zeros_like(by)

         grads['dWaa'], grads['da_prev'] = np.zeros_like(Waa), np.zeros_like(self.a0)

         grads['dWax'], grads['dba'] = np.zeros_like(Wax), np.zeros_like(ba)

         for t in reversed(range(Tx)):

             # compute gradient of loss wrt score

             dz = yhat[:, :, t] - y[:, :, t]

             cell_cache = X[:, :, t], a[:, :, t], a[:, :, t-1]

             grads = self.RNN_cell_backward(dz, grads, cell_cache)

         return grads, a

     def update_parameters(self, grads, lr):

         self.params['Wax'] -= lr * grads['dWax']

         self.params['Waa'] -= lr * grads['dWaa']

         self.params['Wya'] -= lr * grads['dWya']

         self.params['ba']  -= lr * grads['dba']

         self.params['by']  -= lr * grads['dby']

     def clip(self, grads, maxValue):

         for key in ['dWax', 'dWaa', 'dWya', 'dba', 'dby']:

             gradient = grads[key]

             grads[key] = np.clip(gradient, -maxValue, maxValue, out=gradient)

         return grads

     def train(self, X, y, lr, iters=1):

         loss_list = []

         for it in range(iters):

             loss, cache = self.RNN_forward(X, y)

             grads, a = self.RNN_backward(X, y, cache)

             # Clip gradients between -5 (min) and 5 (max)

             grads = self.clip(grads, 5)

             self.update_parameters(grads, lr)

             loss_list.append(loss)

         return loss, grads, a

简易机器学习代码（LR，Kmeans，NN，RNN）的更多相关文章

机器学习中的K-means算法的python实现
<机器学习实战>kMeans算法(K均值聚类算法) 机器学习中有两类的大问题,一个是分类,一个是聚类.分类是根据一些给定的已知类别标号的样本,训练某种学习机器,使它能够对未知类别的样本进行 ...
【机器学习】：Kmeans均值聚类算法原理(附带Python代码实现)
这个算法中文名为k均值聚类算法,首先我们在二维的特殊条件下讨论其实现的过程,方便大家理解. 第一步.随机生成质心由于这是一个无监督学习的算法,因此我们首先在一个二维的坐标轴下随机给定一堆点,并随即给 ...
机器学习之寻找KMeans的最优K
K-Means聚类算法是最为经典的,同时也是使用最为广泛的一种基于划分的聚类算法,它属于基于距离的无监督聚类算法.KMeans算法简单实用,在机器学习算法中占有重要的地位.对于KMeans算法而言,如 ...
Python机器学习笔记：K-Means算法，DBSCAN算法
K-Means算法 K-Means 算法是无监督的聚类算法,它实现起来比较简单,聚类效果也不错,因此应用很广泛.K-Means 算法有大量的变体,本文就从最传统的K-Means算法学起,在其基础上学习 ...
Data scientist———java实现常见的机器学习代码（跟百度深度学习研究院师兄学机器学习）
2016-05-02开始决定好好记录一切有关<数据科学家>的学习过程.记录学习笔记. --------------------------------------------------- ...
深度学习原理与框架-图像补全(原理与代码) 1.tf.nn.moments(求平均值和标准差) 2.tf.control_dependencies(先执行内部操作) 3.tf.cond(判别执行前或后函数) 4.tf.nn.atrous_conv2d 5.tf.nn.conv2d_transpose(反卷积) 7.tf.train.get_checkpoint_state(判断sess是否存在
1. tf.nn.moments(x, axes=[0, 1, 2]) # 对前三个维度求平均值和标准差,结果为最后一个维度,即对每个feature_map求平均值和标准差参数说明:x为输入的fe ...
深度学习原理与框架-Tensorflow卷积神经网络-cifar10图片分类(代码) 1.tf.nn.lrn(局部响应归一化操作) 2.random.sample(在列表中随机选值) 3.tf.one_hot(对标签进行one_hot编码)
1.tf.nn.lrn(pool_h1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75) # 局部响应归一化,使用相同位置的前后的filter进行响应归一化操作参数 ...
Python机器学习(1)：KMeans聚类
Python进行KMeans聚类是比较简单的,首先需要import numpy,从sklearn.cluster中import KMeans模块: import numpy as np from sk ...
Andrew Ng机器学习编程作业:K-means Clustering and Principal Component Analysis
作业文件 machine-learning-ex7 1. K-means聚类在这节练习中,我们将实现K-means聚类,并将其应用到图片压缩上.我们首先从二维数据开始,获得一个直观的感受K-mea ...

随机推荐

Linux atop监控说明
2017-02-22 09:42 by pursuer.chen, 7060 阅读, 2 评论, 收藏, 编辑介绍 atop是一个功能非常强大的linux服务器监控工具,它的数据采集主要包括:CPU ...
supervisor 文档
supervisor 是用 Python 开发的一个 C/S 服务.是 Linux/Unix 系统下的进程管理工具.它可以很方便的监听.启动.停止.重启一个或多个进程.用Supervisor管理的进程 ...
DNS缓存服务器与转发服务器
DNS缓存服务器与转发服务器什么是缓存服务器(cache-only) 缓存服务器只需要根区域解析库文件,不包含任何其它区域解析库文件,这样的服务器就叫着缓存服务器.这样的服务器只有缓存搜寻结果的功能 ...
百度短信API开发
由于楼主学的是C#,所以目前做的百度短信API是C#版的,废话不说了,直接上代码. public void PostData() { string url = "http://sms.bj. ...
DockerToolbox在Win7上的安装和设置
为什么使用Docker Toolbox Docker在Windows上使用有两种方式,一是利用VirtualBox建立linux虚拟机,在linux虚拟机中安装docker服务端和客户端,二是利用Wi ...
C#-记录一些常见的语法特性
C#6.0的一些语法一.Lambda表达式的一些运用 1.在属性.方法中应用例如常规对属性的赋值: public bool IsNavigation { get { return this._na ...
spring 事物不回滚
使用spring控制事物,为什么有些情况事物,事物不回滚呢?? 默认spring事务只在发生未被捕获的 RuntimeException时才回滚. spring aop 异常捕获原理: 被拦截的 ...
spring ref history Design philosophy
一.前言 Spring 框架可以说是 Java 开发人员使用的最流行的应用程序开发框架之一.它目前由大量提供一系列服务的模块组成.包括模块容器,为构建横切关注点提供支持的面向切面编程(AOP),安全框 ...
ES6系列之解构
本系列是在平时阅读.学习.实际项目中有关于es6中的新特性.用发的简单总结,目的是记录以备日后温习:本系列预计包含let/const.箭头函数.解构.常用新增方法.Symbol.Set&Map ...
Apartment 2019
Apartment 2019 最近在学习3D建模,Apartment 2019是我的个人项目. 初步的想法是,先在网上找公寓建筑的平面图以及室内效果照片,根据这些参考图像来练习建模与渲染. 建模顺序与 ...