基于theano的深度卷积神经网络
使用了两个卷积层、一个全连接层和一个softmax分类器。
在测试数据集上正确率可以达到99.22%。
代码参考了neural-networks-and-deep-learning
#coding:utf8
import cPickle
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor.nnet import softmax
from theano.tensor import shared_randomstreams
from theano.tensor.signal import downsample
def ReLU(z): return T.maximum(0.0, z)
from theano.tensor.nnet import sigmoid def load_data_shared():
f = open('mnist.pkl', 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
def shared(data):
shared_x = theano.shared(
np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
shared_y = theano.shared(
np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
return shared_x, T.cast(shared_y, "int32")
return [shared(training_data), shared(validation_data), shared(test_data)] class Network(object):
def __init__(self, layers, mini_batch_size):
self.layers = layers
self.mini_batch_size = mini_batch_size
self.params = [param for layer in self.layers for param in layer.params] # w,b
self.x = T.matrix("x")
self.y = T.ivector("y") # 1 dimensional
init_layer = self.layers[0]
init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
for j in xrange(1, len(self.layers)):
prev_layer, layer = self.layers[j-1], self.layers[j] # layer[j-1]->j
layer.set_inpt(
prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
self.output = self.layers[-1].output
self.output_dropout = self.layers[-1].output_dropout def SGD(self, training_data, epochs, mini_batch_size, eta,
validation_data, test_data, lmbda=0.0):
training_x, training_y = training_data
validation_x, validation_y = validation_data
test_x, test_y = test_data
num_training_batches = size(training_data)/mini_batch_size
num_validation_batches = size(validation_data)/mini_batch_size
num_test_batches = size(test_data)/mini_batch_size
l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
cost = self.layers[-1].cost(self)+\
0.5*lmbda*l2_norm_squared/num_training_batches
grads = T.grad(cost, self.params) # 根据cost计算梯度,无需prime函数
updates = [(param, param-eta*grad)
for param, grad in zip(self.params, grads)] i = T.lscalar() # mini-batch index
train_mb = theano.function(
[i], cost, updates=updates,
givens={
self.x:
training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
validate_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
test_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
self.test_mb_predictions = theano.function(
[i], self.layers[-1].y_out,
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
}) best_validation_accuracy = 0.0
for epoch in xrange(epochs):
for minibatch_index in xrange(num_training_batches):
iteration = num_training_batches*epoch+minibatch_index
if iteration % 1000 == 0:
print("Training mini-batch number {0}".format(iteration))
cost_ij = train_mb(minibatch_index)
if (iteration+1) % num_training_batches == 0:
validation_accuracy = np.mean(
[validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
print("Epoch {0}: validation accuracy {1:.2%},cost={2}".format(
epoch, validation_accuracy,cost_ij))
if validation_accuracy >= best_validation_accuracy:
print("This is the best validation accuracy to date.")
best_validation_accuracy = validation_accuracy
best_iteration = iteration
if test_data:
test_accuracy = np.mean(
[test_mb_accuracy(j) for j in xrange(num_test_batches)])
print('The corresponding test accuracy is {0:.2%}'.format(
test_accuracy))
print("Finished training network.")
print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
best_validation_accuracy, best_iteration))
print("Corresponding test accuracy of {0:.2%}".format(test_accuracy)) class ConvPoolLayer(object): # layer init
def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
activation_fn=ReLU):
self.filter_shape = filter_shape # 20, 1, 5, 5, 输入个数1, 卷积核5*5,20个
self.image_shape = image_shape # 10, 1, 28, 28, 1与上面一致
self.poolsize = poolsize # 2,2
self.activation_fn=activation_fn # theano.tensor.nnet.sigmoid
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize)) # 20*5*5/2/2=125
self.w = theano.shared( # 20, 1, 5, 5
np.asarray(
np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
dtype=theano.config.floatX),
borrow=True)
self.b = theano.shared( #
np.asarray(
np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
dtype=theano.config.floatX),
borrow=True)
self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape(self.image_shape) # 10, 1, 28, 28
conv_out = conv.conv2d( # 28-5+1=24 1, 20, 24, 24
input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
image_shape=self.image_shape)
pooled_out = downsample.max_pool_2d( # 24/2=12 1, 20, 12, 12
input=conv_out, ds=self.poolsize, ignore_border=True)
self.output = self.activation_fn( # 1, 20, 12, 12 + 1, 20, 1, 1= 1, 20, 12, 12
pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # 1, 20, 1, 1
self.output_dropout = self.output # no dropout in the convolutional layers class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
self.w = theano.shared(
np.asarray(
np.random.normal(
loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn(
(1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(
T.dot(self.inpt_dropout, self.w) + self.b) def accuracy(self, y):
return T.mean(T.eq(y, self.y_out)) class SoftmaxLayer(object): def __init__(self, n_in, n_out, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.p_dropout = p_dropout
self.w = theano.shared(
np.zeros((n_in, n_out), dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.zeros((n_out,), dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) # theano.tensor.nnet.softmax
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b) def cost(self, net):
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y]) def accuracy(self, y):
return T.mean(T.eq(y, self.y_out)) def size(data): # for shared data
return len(data[0].get_value()) def dropout_layer(layer, p_dropout): # 随机无视p_dropout的隐含层节点
srng = shared_randomstreams.RandomStreams(
np.random.RandomState(0).randint(999999))
mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
return layer*T.cast(mask, theano.config.floatX) if __name__ =='__main__':
training_data, validation_data, test_data = load_data_shared()
mini_batch_size = 10
net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2)),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2)),
FullyConnectedLayer(n_in=40*4*4, n_out=100),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 30, mini_batch_size, 0.1,
validation_data, test_data) # Sigmoid ConvPoolLayer
# Epoch 29: validation accuracy 98.96%,cost=9.70275432337e-05
# This is the best validation accuracy to date.
# The corresponding test accuracy is 98.86% # ReLU ConvPoolLayer
# Epoch 29: validation accuracy 99.06%,cost=4.11269593315e-06
# This is the best validation accuracy to date.
# The corresponding test accuracy is 99.22%
基于theano的深度卷积神经网络的更多相关文章
- 优化基于FPGA的深度卷积神经网络的加速器设计
英文论文链接:http://cadlab.cs.ucla.edu/~cong/slides/fpga2015_chen.pdf 翻译:卜居 转载请注明出处:http://blog.csdn.net/k ...
- 基于MTCNN多任务级联卷积神经网络进行的人脸识别 世纪晟人脸检测
神经网络和深度学习目前为处理图像识别的许多问题提供了最佳解决方案,而基于MTCNN(多任务级联卷积神经网络)的人脸检测算法也解决了传统算法对环境要求高.人脸要求高.检测耗时高的弊端. 基于MTCNN多 ...
- (转) 基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
特别棒的一篇文章,仍不住转一下,留着以后需要时阅读 基于Theano的深度学习(Deep Learning)框架Keras学习随笔-01-FAQ
- 基于MNIST数据的卷积神经网络CNN
基于tensorflow使用CNN识别MNIST 参数数量:第一个卷积层5x5x1x32=800个参数,第二个卷积层5x5x32x64=51200个参数,第三个全连接层7x7x64x1024=3211 ...
- 基于Theano的深度学习框架keras及配合SVM训练模型
https://blog.csdn.net/a819825294/article/details/51334397 1.介绍 Keras是基于Theano的一个深度学习框架,它的设计参考了Torch, ...
- DeepLearning.ai学习笔记(四)卷积神经网络 -- week2深度卷积神经网络 实例探究
一.为什么要进行实例探究? 通过他人的实例可以更好的理解如何构建卷积神经网络,本周课程主要会介绍如下网络 LeNet-5 AlexNet VGG ResNet (有152层) Inception 二. ...
- 【原创】梵高油画用深度卷积神经网络迭代十万次是什么效果? A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
- 【原创】梵高油画用深度卷积神经网络迭代10万次是什么效果? A neural style of convolutional neural networks
作为一个脱离了低级趣味的码农,春节假期闲来无事,决定做一些有意思的事情打发时间,碰巧看到这篇论文: A neural style of convolutional neural networks,译作 ...
- 深度学习(五)基于tensorflow实现简单卷积神经网络Lenet5
原文作者:aircraft 原文地址:https://www.cnblogs.com/DOMLX/p/8954892.html 参考博客:https://blog.csdn.net/u01287127 ...
随机推荐
- 2016年3月AV评测
- Ogre碰撞检测
转自:http://blog.csdn.net/weiqubo/article/details/7108363 Ogre采用树桩管理场景中的各种"元素"(摄像机.灯光.物体等),所 ...
- Android弱网测试中关于网络检测的一些借鉴方法
Android 平台下提供了一个android.net.ConnectivityManager类来监控当前的网络状态包括wifi.gprs.UMTS等.可以判断当前用户网络到底是WIFI还是移动网络, ...
- Oracle数据库的引导过程
Oracle在启动数据库:会先去读1号数据文件的文件头中记录的root dba, 再通过root dba去找bootstrap$中存储的那些数据字典的基表的定义,最后根据这些定义创建数字字典,即所谓的 ...
- 【Tsinghua OJ】多米诺骨牌(domino)问题
(domino.c/cpp)[问题描述] 小牛牛对多米诺骨牌有很大兴趣,然而她的骨牌比较特别,只有黑色和白色的两种.她觉 得如果存在连续三个骨牌是同一种颜色,那么这个骨牌排列便是不美观的.现在她有n个 ...
- 解决关于打开plist文件乱码问题,plist转换为xml文件的txt文件!
自己是程序员,干嘛不自己写代码完成?下载工具还不一定管用!具体解决方案如下: 1,获得内容 NSArray *dictionary = [NSArray arrayWithContentsOfFile ...
- Visual Studio Usage
Navigation Double click on UI element goes to its _click event. F12 – for method or variable, go to ...
- for else
- python学习笔记 - assert用法
[转自]http://blog.sina.com.cn/s/blog_76e94d210100vz37.html 1.assert语句用来声明某个条件是真的. 2.如果你非常确信某个你使用的列表中 ...
- tools/version.c
/* * linux/version.c * * Copyright (C) 1992 Theodore Ts'o * * May be freely distributed as part ...