BP算法在minist数据集上的简单实现
BP算法在minist上的简单实现
数据:http://yann.lecun.com/exdb/mnist/
参考:blog,blog2,blog3,tensorflow
基本实现
import struct
import random
import numpy as np
from math import sqrt
class Data:
def __init__(self):
print 'parameter initializing...'
self.num_train= 50000
self.num_confirm=10000
self.num_test= 10000
self.node_in=28*28
self.node_out=10
# need to adjust
#epoch:8 hide_node:39 accuracy:0.9613
#epoch:8 hide_node:44 accuracy:0.9612
#epoch:8 hide_node:48 accuracy:0.9624
#epoch:9 hide_node:48 accuracy:0.9648
#epoch:10 hide_node:200 accuracy:0.9724
self.epoch= 15
self.node_hide= 30
self.study_rate= 0.05
self.error_limit= 1e-2
def read_train_image(self,filename):
print 'reading train-image data...'
binfile=open(filename,'rb')
buffer=binfile.read()
index=0
magic,num,rows,colums = struct.unpack_from('>IIII',buffer,index) #>I:big-endian,unsigned int
index+=struct.calcsize('IIII')
for i in range(self.num_train):
im=struct.unpack_from('784B',buffer,index) #28*28=786,B unsigned char
index+=struct.calcsize('784B')
im=np.array(im)
im=im.reshape(1,784)/255.0 #28*28-->1
self.train_imag_list[i,:]=im
j=0
for i in range(self.num_train,self.num_train+self.num_confirm):
im=struct.unpack_from('784B',buffer,index)
index+=struct.calcsize('784B')
im=np.array(im)
im=im.reshape(1,784)/255.0
self.confirm_imag_list[j,:]=im
j=j+1
def read_train_label(self,filename):
print 'reading train-label data...'
binfile=open(filename,'rb')
buffer=binfile.read()
index=0
magic,num= struct.unpack_from('>II',buffer,index)
index+=struct.calcsize('II')
for i in range(self.num_train):
lb=struct.unpack_from('B',buffer,index)
index+=struct.calcsize('B')
lb=int(lb[0])
self.train_label_list[i,:]=lb
j=0
for i in range(self.num_train,self.num_train+self.num_confirm):
lb=struct.unpack_from('B',buffer,index)
index+=struct.calcsize('B')
lb=int(lb[0])
self.confirm_label_list[j,:]=lb
j=j+1
def read_test_image(self,filename):
print 'reading test-image data...'
binfile=open(filename,'rb')
buffer=binfile.read()
index=0
magic,num,rows,colums = struct.unpack_from('>IIII',buffer,index)
index+=struct.calcsize('IIII')
for i in range(self.num_test):
im=struct.unpack_from('784B',buffer,index)
index+=struct.calcsize('784B')
im=np.array(im)
im=im.reshape(1,784)/256.0
self.test_imag_list[i,:]=im
def read_test_label(self,filename):
print 'reading test-label data...'
binfile=open(filename,'rb')
buffer=binfile.read()
index=0
magic,num= struct.unpack_from('>II',buffer,index)
index+=struct.calcsize('II')
for i in range(self.num_test):
lb=struct.unpack_from('B',buffer,index)
index+=struct.calcsize('B')
lb=int(lb[0])
self.test_label_list[i,:]=lb
def init_network(self):
print 'network initializing...'
self.train_imag_list=np.zeros((self.num_train,self.node_in))
self.train_label_list=np.zeros((self.num_train,1))
self.confirm_imag_list=np.zeros((self.num_confirm,self.node_in))
self.confirm_label_list=np.zeros((self.num_confirm,1))
self.test_imag_list=np.zeros((self.num_test,self.node_in))
self.test_label_list=np.zeros((self.num_test,1))
self.read_train_image('train-images.idx3-ubyte')
self.read_train_label('train-labels.idx1-ubyte')
self.read_test_image('t10k-images.idx3-ubyte')
self.read_test_label('t10k-labels.idx1-ubyte')
self.wjk=(np.random.rand(self.node_hide,self.node_out)-0.5)*2/sqrt(self.node_hide)
self.wj0=(np.random.rand(self.node_out)-0.5)*2/sqrt(self.node_hide)
self.wij=(np.random.rand(self.node_in,self.node_hide)-0.5)*2/sqrt(self.node_in)
self.wi0=(np.random.rand(self.node_hide)-0.5)*2/sqrt(self.node_in)
def sigmode(self,x):
return 1.0/(1.0+np.exp(-x))
def calc_yjzk(self,sample_i,imag_list):
self.netj=np.dot(imag_list[sample_i],self.wij)+self.wi0
self.yj=self.sigmode(self.netj)
self.netk=np.dot(self.yj,self.wjk)+self.wj0
self.zk=self.sigmode(self.netk)
def calc_error(self):
ans=0.0
for sample_i in range(self.num_confirm):
self.calc_yjzk(sample_i,self.confirm_imag_list)
label_tmp=np.zeros(self.node_out)
label_tmp[int(self.confirm_label_list[sample_i])]=1
ans=ans+sum(np.square(label_tmp-self.zk)/2.0)
# print ans
return ans
def training(self):
print 'training model...'
for epoch_i in range(self.epoch):
for circle in range(self.num_train):
sample_i=np.random.randint(0,self.num_train)
#print 'debug epoch:%d sample:%d' % (epoch_i,sample_i)
#calc error
#error_before=self.calc_error()
self.calc_yjzk(sample_i,self.train_imag_list)
#update weight hide->out
tmp_label=np.zeros(self.node_out)
tmp_label[int(self.train_label_list[sample_i])]=1
delta_k=(self.zk-tmp_label)*self.zk*(1-self.zk)
self.yj.shape=(self.node_hide,1)
delta_k.shape=(1,self.node_out)
self.wjk=self.wjk-self.study_rate*np.dot(self.yj,delta_k)
#update weight in->hide
self.yj=self.yj.T
delta_j=np.dot(delta_k,self.wjk.T)*self.yj*(1-self.yj)
tmp_imag=self.train_imag_list[sample_i]
tmp_imag.shape=(self.node_in,1)
self.wij=self.wij-self.study_rate*np.dot(tmp_imag,delta_j)
# calc error
# self.calc_yjzk(sample_i,self.train_imag_list)
# error_delta=error_before-self.calc_error()
# if np.abs(error_delta)<self.error_limit:
# print 'debug break'
# print error_delta
# break
#print 'error %d %.2f' % (epoch_i,self.calc_error())
def testing(self):
print 'testing...'
num_right=0.0
for sample_i in range(self.num_test):
self.calc_yjzk(sample_i,self.test_imag_list)
ans=self.zk.argmax()
if ans==int(self.test_label_list[sample_i]):
num_right=num_right+1
self.accuracy=num_right/self.num_test
print 'accuracy: %.4f' % (self.accuracy*100) +'%'
def main():
data=Data()
data.init_network()
data.training()
data.testing()
if __name__=='__main__':
main()
注意
- 注意数据的编码格式,在数据来源网站最底下有指出,上面还展示了一些机器学习的经典模型在minist数据集上的错误率可供参考
- 权值合理的初始化,及迭代次数,学习速率,隐层节点数的设置可参考经验值
- 数据的归一化(防止sigmode函数溢出)
- 矩阵乘法时注意行列条件的满足
- 合理的epoch(即迭代次数,学习速率小的时候可以大一点的迭代次数,学习速率大的时候迭代次数取较小值)
- 确认合适的迭代次数后可去掉确认集,用全部的样本数据训练模型
- 隐层节点基本上越多越好
调参脚本
import ann
f=open('best_parameter', 'a+')
for e in range(10,40):
for node in range(10,50):
data=ann.Data()
data.node_hide=node
data.epoch=e
data.init_network()
data.training()
data.testing()
ans='circling to get best parameter----->epoch:%d hide_node:%d accuracy:%.4f\n' % (e,node,data.accuracy)
print ans
f.write(ans)
f.close()
可迭代计算迭代次数和隐层节点的数目对准确率的影响,大致规律是在学习速率0.05时,迭代次数在10-15为宜,隐层节点30以上
一些试验的结果如下:
circling to get best parameter----->epoch:14 hide_node:43 accuracy:0.9656
circling to get best parameter----->epoch:14 hide_node:44 accuracy:0.9651
circling to get best parameter----->epoch:14 hide_node:45 accuracy:0.9638
circling to get best parameter----->epoch:14 hide_node:46 accuracy:0.9641
circling to get best parameter----->epoch:14 hide_node:47 accuracy:0.9649
circling to get best parameter----->epoch:14 hide_node:48 accuracy:0.9651
circling to get best parameter----->epoch:14 hide_node:49 accuracy:0.9671
circling to get best parameter----->epoch:15 hide_node:46 accuracy:0.9661
circling to get best parameter----->epoch:15 hide_node:47 accuracy:0.9660
circling to get best parameter----->epoch:15 hide_node:48 accuracy:0.9650
circling to get best parameter----->epoch:15 hide_node:49 accuracy:0.9655
circling to get best parameter----->epoch:10 hide_node:100 accuracy:0.9685
circling to get best parameter----->epoch:10 hide_node:200 accuracy:0.9724
circling to get best parameter----->epoch:10 hide_node:300 accuracy:0.9718
circling to get best parameter----->epoch:10 hide_node:1000 accuracy:0.9568
Tensorflow实现
import argparse
# Import data
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
FLAGS = None
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def add_layer(inputs, in_size, out_size, activation_function=None):
# add a fully collected layer
Weights = weight_variable([in_size, out_size])
biases = bias_variable([out_size])
Wx_plus_b = tf.matmul(inputs, Weights) + biases
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
return outputs
def main(_):
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# reshape the input to have batch size, width, height, channel size
x = tf.placeholder(tf.float32, [None, 784])
x_image = tf.reshape(x, [-1, 28, 28, 1])
# 5*5 patch size, input channel is 1, output channel is 32
W_conv1 = weight_variable([5, 5, 1, 32])
# bias, same size with the output channel
b_conv1 = bias_variable([32])
# the first convolutional layer with a max pooling layer
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#after pooling, we have a tensor with shape[-1, 14, 14, 32]
# the weights and bias for the second layer, we will get 64 channels
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
# the second convolutional layer with a max pooling layer
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# after pooling, we have a tensor with shape[-1, 7, 7, 64]
# add a fully connected layer with 1024 neurons and use relu as the activation function
h_pool2_flat = tf.reshape(h_pool2, [-1,7*7*64])
h_fc1 = add_layer(h_pool2_flat, 7*7*64, 1024, tf.nn.relu)
# we add dropout for the fully connected layer to avoid overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# finally, the output layer
y_conv = add_layer(h_fc1_drop, 1024, 10, None)
# loss function and so on
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# start training, and we test our model every 100 steps
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
for i in range(10000):
batch = mnist.train.next_batch(100)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
print("step %d, training accuracy %g, test accuracy %g" % (i, train_accuracy, test_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# modify the dir path to your own dataset
parser.add_argument('--data_dir', type=str, default='/tmp/mnist',
help='Directory for storing data')
FLAGS = parser.parse_args()
tf.app.run()
需要配置tensorflow和python3.+的运行环境
结果如下
step 0, training accuracy 0.06, test accuracy 0.0892
step 100, training accuracy 0.86, test accuracy 0.8692
step 200, training accuracy 0.97, test accuracy 0.9207
step 300, training accuracy 0.92, test accuracy 0.9403
step 400, training accuracy 0.95, test accuracy 0.9485
step 500, training accuracy 0.91, test accuracy 0.9522
step 600, training accuracy 0.97, test accuracy 0.9565
step 700, training accuracy 0.97, test accuracy 0.9622
step 800, training accuracy 0.96, test accuracy 0.9638
step 900, training accuracy 0.98, test accuracy 0.9687
step 1000, training accuracy 0.97, test accuracy 0.9703
有任何环境配置的问题请联系,欢迎指出错误
BP算法在minist数据集上的简单实现的更多相关文章
- (2) 用DPM(Deformable Part Model,voc-release4.01)算法在INRIA数据集上训练自己的人体检測模型
步骤一,首先要使voc-release4.01目标检測部分的代码在windows系统下跑起来: 參考在window下执行DPM(deformable part models) -(检測demo部分) ...
- 如何高效的通过BP算法来训练CNN
< Neural Networks Tricks of the Trade.2nd>这本书是收录了1998-2012年在NN上面的一些技巧.原理.算法性文章,对于初学者或者是正在学习NN的 ...
- 一文彻底搞懂BP算法:原理推导+数据演示+项目实战(上篇)
欢迎大家关注我们的网站和系列教程:http://www.tensorflownews.com/,学习更多的机器学习.深度学习的知识! 反向传播算法(Backpropagation Algorithm, ...
- Backpropagation反向传播算法(BP算法)
1.Summary: Apply the chain rule to compute the gradient of the loss function with respect to the inp ...
- 在Titanic数据集上应用AdaBoost元算法
一.AdaBoost 元算法的基本原理 AdaBoost是adaptive boosting的缩写,就是自适应boosting.元算法是对于其他算法进行组合的一种方式. 而boosting是在从原始数 ...
- TersorflowTutorial_MNIST数据集上简单CNN实现
MNIST数据集上简单CNN实现 觉得有用的话,欢迎一起讨论相互学习~Follow Me 参考文献 Tensorflow机器学习实战指南 源代码请点击下方链接欢迎加星 Tesorflow实现基于MNI ...
- MNIST数据集上卷积神经网络的简单实现(使用PyTorch)
设计的CNN模型包括一个输入层,输入的是MNIST数据集中28*28*1的灰度图 两个卷积层, 第一层卷积层使用6个3*3的kernel进行filter,步长为1,填充1.这样得到的尺寸是(28+1* ...
- DNN的BP算法Python简单实现
BP算法是神经网络的基础,也是最重要的部分.由于误差反向传播的过程中,可能会出现梯度消失或者爆炸,所以需要调整损失函数.在LSTM中,通过sigmoid来实现三个门来解决记忆问题,用tensorflo ...
- 史上最简单的排序算法?看起来却满是bug
大家好,我是雨乐. 今天在搜论文的时候,偶然发现一篇文章,名为<Is this the simplest (and most surprising) sorting algorithm ever ...
随机推荐
- OC-协议与代理
[协议]================================================================ @protocol [协议的作用]:规定了需要实现的接口方法, ...
- C#调用EasyPusher推送到EasyDarwin流媒体服务器直播方案及示例代码整理
博客一:转自:http://blog.csdn.net/u011039529/article/details/70832857 大家好,本人刚毕业程序猿一枚.受人所托,第一次写博客,如有错误之处敬请谅 ...
- 正则 去除html标签
String.prototype.stripHtml=function(){ var re=/<(?:.)*?>/g; // *? 意味着匹配任意数量的重复 return this.rep ...
- CodeForces - 662A:Gambling Nim (求有多少个子集其异或为S)(占位)
As you know, the game of "Nim" is played with n piles of stones, where the i-th pile initi ...
- asp.net core microservices 架构之eureka服务发现
一 简介 微服务将需多的功能拆分为许多的轻量级的子应用,这些子应用相互调度.好处就是轻量级,完全符合了敏捷开发的精神.我们知道ut(单元测试),不仅仅提高我们的程序的健壮性,而且可以强制将类和方法的设 ...
- 关于altera的fft核使用问题记录
altera的fft核使用比较特别,今天我做了一下仿真,发现一些问题,现做记录如下: 1,ip配置 parameters选项卡主要是fft变换的长度和数据长度,旋转因子长度,需要注意的是“Twiddl ...
- 转载FPGA学习之内嵌乘法器调用
补充一点,除法的时候如果直接a/b那么就会调用lpm模块,不管输入是否是常数,乘法的时候输入都是reg型变量会调用硬件乘法器,有一个是常数就会调用lpm模块. 上课的时候一直听老师说真正实践的时候你别 ...
- gulp 集成其他基于流的工具
1. 流.缓冲.vinyl 文件对象 gulp 的流是虚拟文件对象 包含的属性有 base 文件名 path 文件路径 content 缓冲.nodejs 流 2. gulp 集成 browserif ...
- 【转】简单明了区分escape、encodeURI和encodeURIComponent
一.前言 讲这3个方法区别的文章太多了,但是大部分写的都很绕.本文试图从实践角度去讲这3个方法. 二.escape和它们不是同一类 简单来说,escape是对字符串(string)进行编码(而另外两种 ...
- 关于IO流的抽象类
被一个问题问愣了:java的IO里有哪些抽象类?这个一时半会儿还真记不得,只知道IO有好几类,具体有哪些抽象类从来没有去认真记过.回头仔细看了下分类和继承才发现其实就两对:字节流的抽象类是InputS ...