基于theano的深度卷积神经网络

使用了两个卷积层、一个全连接层和一个softmax分类器。

在测试数据集上正确率可以达到99.22%。

 #coding:utf8

 import cPickle

 import numpy as np

 import theano

 import theano.tensor as T

 from theano.tensor.nnet import conv

 from theano.tensor.nnet import softmax

 from theano.tensor import shared_randomstreams

 from theano.tensor.signal import downsample

 def ReLU(z): return T.maximum(0.0, z)

 from theano.tensor.nnet import sigmoid

 def load_data_shared():

     f = open('mnist.pkl', 'rb')

     training_data, validation_data, test_data = cPickle.load(f)

     f.close()

     def shared(data):

         shared_x = theano.shared(

             np.asarray(data[0], dtype=theano.config.floatX), borrow=True)

         shared_y = theano.shared(

             np.asarray(data[1], dtype=theano.config.floatX), borrow=True)

         return shared_x, T.cast(shared_y, "int32")

     return [shared(training_data), shared(validation_data), shared(test_data)]

 class Network(object):

     def __init__(self, layers, mini_batch_size):

         self.layers = layers

         self.mini_batch_size = mini_batch_size

         self.params = [param for layer in self.layers for param in layer.params]  # w,b

         self.x = T.matrix("x")

         self.y = T.ivector("y")  # 1 dimensional

         init_layer = self.layers[0]

         init_layer.set_inpt(self.x, self.x, self.mini_batch_size)

         for j in xrange(1, len(self.layers)):

             prev_layer, layer  = self.layers[j-1], self.layers[j]  # layer[j-1]->j

             layer.set_inpt(

                 prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)

         self.output = self.layers[-1].output

         self.output_dropout = self.layers[-1].output_dropout

     def SGD(self, training_data, epochs, mini_batch_size, eta,

             validation_data, test_data, lmbda=0.0):

         training_x, training_y = training_data

         validation_x, validation_y = validation_data

         test_x, test_y = test_data

         num_training_batches = size(training_data)/mini_batch_size

         num_validation_batches = size(validation_data)/mini_batch_size

         num_test_batches = size(test_data)/mini_batch_size

         l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])

         cost = self.layers[-1].cost(self)+\

                0.5*lmbda*l2_norm_squared/num_training_batches

         grads = T.grad(cost, self.params)  # 根据cost计算梯度，无需prime函数

         updates = [(param, param-eta*grad)

                    for param, grad in zip(self.params, grads)]

         i = T.lscalar() # mini-batch index

         train_mb = theano.function(

             [i], cost, updates=updates,

             givens={

                 self.x:

                 training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

                 self.y:

                 training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         validate_mb_accuracy = theano.function(

             [i], self.layers[-1].accuracy(self.y),

             givens={

                 self.x:

                 validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

                 self.y:

                 validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         test_mb_accuracy = theano.function(

             [i], self.layers[-1].accuracy(self.y),

             givens={

                 self.x:

                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

                 self.y:

                 test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         self.test_mb_predictions = theano.function(

             [i], self.layers[-1].y_out,

             givens={

                 self.x:

                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

             })

         best_validation_accuracy = 0.0

         for epoch in xrange(epochs):

             for minibatch_index in xrange(num_training_batches):

                 iteration = num_training_batches*epoch+minibatch_index

                 if iteration % 1000 == 0:

                     print("Training mini-batch number {0}".format(iteration))

                 cost_ij = train_mb(minibatch_index)

                 if (iteration+1) % num_training_batches == 0:

                     validation_accuracy = np.mean(

                         [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])

                     print("Epoch {0}: validation accuracy {1:.2%},cost={2}".format(

                         epoch, validation_accuracy,cost_ij))

                     if validation_accuracy >= best_validation_accuracy:

                         print("This is the best validation accuracy to date.")

                         best_validation_accuracy = validation_accuracy

                         best_iteration = iteration

                         if test_data:

                             test_accuracy = np.mean(

                                 [test_mb_accuracy(j) for j in xrange(num_test_batches)])

                             print('The corresponding test accuracy is {0:.2%}'.format(

                                 test_accuracy))

         print("Finished training network.")

         print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(

             best_validation_accuracy, best_iteration))

         print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))

 class ConvPoolLayer(object):  # layer init

     def __init__(self, filter_shape, image_shape, poolsize=(2, 2),

                  activation_fn=ReLU):

         self.filter_shape = filter_shape  # 20, 1, 5, 5, 输入个数1, 卷积核5*5，20个

         self.image_shape = image_shape  # 10, 1, 28, 28, 1与上面一致

         self.poolsize = poolsize  # 2,2

         self.activation_fn=activation_fn  # theano.tensor.nnet.sigmoid

         n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))  # 20*5*5/2/2=125

         self.w = theano.shared(  # 20, 1, 5, 5

             np.asarray(

                 np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),

                 dtype=theano.config.floatX),

             borrow=True)

         self.b = theano.shared(  #

             np.asarray(

                 np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),

                 dtype=theano.config.floatX),

             borrow=True)

         self.params = [self.w, self.b]

     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

         self.inpt = inpt.reshape(self.image_shape)    # 10, 1, 28, 28

         conv_out = conv.conv2d(  # 28-5+1=24   1, 20, 24, 24

             input=self.inpt, filters=self.w, filter_shape=self.filter_shape,

             image_shape=self.image_shape)

         pooled_out = downsample.max_pool_2d(  # 24/2=12   1, 20, 12, 12

             input=conv_out, ds=self.poolsize, ignore_border=True)

         self.output = self.activation_fn(  # 1, 20, 12, 12 + 1, 20, 1, 1= 1, 20, 12, 12

             pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))  # 1, 20, 1, 1

         self.output_dropout = self.output  # no dropout in the convolutional layers

 class FullyConnectedLayer(object):

     def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):

         self.n_in = n_in

         self.n_out = n_out

         self.activation_fn = activation_fn

         self.p_dropout = p_dropout

         self.w = theano.shared(

             np.asarray(

                 np.random.normal(

                     loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),

                 dtype=theano.config.floatX),

             name='w', borrow=True)

         self.b = theano.shared(

             np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),

                        dtype=theano.config.floatX),

             name='b', borrow=True)

         self.params = [self.w, self.b]

     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

         self.inpt = inpt.reshape((mini_batch_size, self.n_in))

         self.output = self.activation_fn(

             (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)

         self.y_out = T.argmax(self.output, axis=1)

         self.inpt_dropout = dropout_layer(

             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)

         self.output_dropout = self.activation_fn(

             T.dot(self.inpt_dropout, self.w) + self.b)

     def accuracy(self, y):

         return T.mean(T.eq(y, self.y_out))

 class SoftmaxLayer(object):

     def __init__(self, n_in, n_out, p_dropout=0.0):

         self.n_in = n_in

         self.n_out = n_out

         self.p_dropout = p_dropout

         self.w = theano.shared(

             np.zeros((n_in, n_out), dtype=theano.config.floatX),

             name='w', borrow=True)

         self.b = theano.shared(

             np.zeros((n_out,), dtype=theano.config.floatX),

             name='b', borrow=True)

         self.params = [self.w, self.b]

     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

         self.inpt = inpt.reshape((mini_batch_size, self.n_in))

         self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)  # theano.tensor.nnet.softmax

         self.y_out = T.argmax(self.output, axis=1)

         self.inpt_dropout = dropout_layer(

             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)

         self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)

     def cost(self, net):

         return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])

     def accuracy(self, y):

         return T.mean(T.eq(y, self.y_out))

 def size(data):  # for shared data

     return len(data[0].get_value())

 def dropout_layer(layer, p_dropout):  # 随机无视p_dropout的隐含层节点

     srng = shared_randomstreams.RandomStreams(

         np.random.RandomState(0).randint(999999))

     mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)

     return layer*T.cast(mask, theano.config.floatX)

 if __name__ =='__main__':

     training_data, validation_data, test_data = load_data_shared()

     mini_batch_size = 10

     net = Network([

         ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),

                       filter_shape=(20, 1, 5, 5),

                       poolsize=(2, 2)),

         ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),

                       filter_shape=(40, 20, 5, 5),

                       poolsize=(2, 2)),

         FullyConnectedLayer(n_in=40*4*4, n_out=100),

         SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)

     net.SGD(training_data, 30, mini_batch_size, 0.1,

             validation_data, test_data)

 # Sigmoid ConvPoolLayer

 # Epoch 29: validation accuracy 98.96%,cost=9.70275432337e-05

 # This is the best validation accuracy to date.

 # The corresponding test accuracy is 98.86%

 # ReLU ConvPoolLayer

 # Epoch 29: validation accuracy 99.06%,cost=4.11269593315e-06

 # This is the best validation accuracy to date.

 # The corresponding test accuracy is 99.22%

基于theano的深度卷积神经网络的更多相关文章

优化基于FPGA的深度卷积神经网络的加速器设计
英文论文链接:http://cadlab.cs.ucla.edu/~cong/slides/fpga2015_chen.pdf 翻译:卜居转载请注明出处:http://blog.csdn.net/k ...
基于MTCNN多任务级联卷积神经网络进行的人脸识别世纪晟人脸检测
神经网络和深度学习目前为处理图像识别的许多问题提供了最佳解决方案,而基于MTCNN(多任务级联卷积神经网络)的人脸检测算法也解决了传统算法对环境要求高.人脸要求高.检测耗时高的弊端. 基于MTCNN多 ...
（转）基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
特别棒的一篇文章,仍不住转一下,留着以后需要时阅读基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
基于MNIST数据的卷积神经网络CNN
基于tensorflow使用CNN识别MNIST 参数数量:第一个卷积层5x5x1x32=800个参数,第二个卷积层5x5x32x64=51200个参数,第三个全连接层7x7x64x1024=3211 ...
基于Theano的深度学习框架keras及配合SVM训练模型
https://blog.csdn.net/a819825294/article/details/51334397 1.介绍 Keras是基于Theano的一个深度学习框架,它的设计参考了Torch, ...
DeepLearning.ai学习笔记（四）卷积神经网络 -- week2深度卷积神经网络实例探究
一.为什么要进行实例探究? 通过他人的实例可以更好的理解如何构建卷积神经网络,本周课程主要会介绍如下网络 LeNet-5 AlexNet VGG ResNet (有152层) Inception 二. ...
【原创】梵高油画用深度卷积神经网络迭代十万次是什么效果？ A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
【原创】梵高油画用深度卷积神经网络迭代10万次是什么效果？ A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
深度学习（五）基于tensorflow实现简单卷积神经网络Lenet５
原文作者:aircraft 原文地址:https://www.cnblogs.com/DOMLX/p/8954892.html 参考博客:https://blog.csdn.net/u01287127 ...

随机推荐

poj1179
//Accepted 244 KB 0 ms //区间dp //石子合并模型 #include <cstdio> #include <cstring> #include < ...
hadoop常见错误集锦
1 Hadoop namenode -format后Incompatible namespaceIDS 错误解决办法重新格式话namenode后,会重新分配一个namespaceId, 这个nam ...
Linux内存管理之地址映射
写在前面:由于地址映射涉及到各种寄存器的设置访问,Linux对于不同体系结构处理器的地址映射采用不同的方法,例如对于i386及后来的32位的Intel的处理器在页式映射时采用的是2级页表映射,而对于I ...
OD调试程序3
条件跳转指令的图片,作为以后的参考. 载入了reverses.eve程序,F8下去,发现一个跳转,调用了一个函数,致使程序结束.于是我们绕过它,参考上面的跳转指令图片. 然后继续F8 又会发现一个跳 ...
HTTP Live Streaming直播（iOS直播）技术分析与实现
前些日子,也是项目需要,花了一些时间研究了HTTP Live Streaming(HLS)技术,并实现了一个HLS编码器HLSLiveEncoder,当然,C++写的.其功能是采集摄像头与麦克风,实时 ...
HDU 4054
http://acm.hdu.edu.cn/showproblem.php?pid=4054 模拟题,对一个字符串的每个字符输出16进制表示的数字,每行处理16个字符,后面再把这16个字符输出,大小写 ...
Inno Setup脚本语法大全
Inno Setup脚本语法大全 ResourceShare Bruce 11个月前 (10-28) 6136浏览 0评论 Inno Setup 是什么?Inno Setup 是一个免费的 Win ...
Selenium Waits
Selenium高级功能包含查找等待, Selenium的查找等待有两种方式, 隐式等待(Implicit Waits)和显示等待(Explicit Waits): 这里写下我对两者的理解, 1. 隐 ...
magneto创建运费模板
Magento系统自带了大概7种运费方式:平价.运费表.免运费.ups.usps.fedex.dhl等.不过这些依然无法满足我们的需求,这时候就需要创建一个shipping module 来实现了.创 ...
PostgreSQL增删数据命令示例
在PostgreSQL中如何用简单的几条SQL语句生成大量的测试数据呢? 此处,我简单的写一个例子,供参考(在Postgresql9.1下面做的): (1)生成一万条测试数据的表foo mydb=# ...

基于theano的深度卷积神经网络

基于theano的深度卷积神经网络的更多相关文章

随机推荐

热门专题