fc_net.py cs231n
n如果有错误,欢迎指出,不胜感激
import numpy as np from cs231n.layers import *
from cs231n.layer_utils import * class TwoLayerNet(object):
"""
A two-layer fully-connected neural network with ReLU nonlinearity and
softmax loss that uses a modular layer design. We assume an input dimension
of D, a hidden dimension of H, and perform classification over C classes. The architecure should be affine - relu - affine - softmax. Note that this class does not implement gradient descent; instead, it
will interact with a separate Solver object that is responsible for running
optimization. The learnable parameters of the model are stored in the dictionary
self.params that maps parameter names to numpy arrays.
""" def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,
weight_scale=1e-3, reg=0.0):
"""
Initialize a new network. Inputs:
- input_dim: An integer giving the size of the input
- hidden_dim: An integer giving the size of the hidden layer
- num_classes: An integer giving the number of classes to classify
- dropout: Scalar between 0 and 1 giving dropout strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- reg: Scalar giving L2 regularization strength.
"""
self.params = {}
self.reg = reg
self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale
self.params['b1']=np.zeros((hidden_dim,))
self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale
self.params['b2']=np.zeros((num_classes,)) def loss(self, X, y=None):
"""
Compute loss and gradient for a minibatch of data. Inputs:
- X: Array of input data of shape (N, d_1, ..., d_k)
- y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns:
If y is None, then run a test-time forward pass of the model and return:
- scores: Array of shape (N, C) giving classification scores, where
scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and
return a tuple of:
- loss: Scalar value giving the loss
- grads: Dictionary with the same keys as self.params, mapping parameter
names to gradients of the loss with respect to those parameters.
"""
scores = None
out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1'])
out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2'])
if y is None :
return out2 loss,dx=softmax_loss(out2,y)
loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) )
grads={}
dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2)
true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1) grads['W2']+=self.params['W2']*self.reg
grads['W1']+=self.params['W1']*self.reg
#grads['b2']+=self.params['b2']*self.reg
#grads['b1']+=self.params['b1']*self.reg # If y is None then we are in test mode so just return scores
# if y is None:
# return scores # loss, grads = 0, {} return loss, grads class FullyConnectedNet(object):
"""
A fully-connected neural network with an arbitrary number of hidden layers,
ReLU nonlinearities, and a softmax loss function. This will also implement
dropout and batch normalization as options. For a network with L layers,
the architecture will be {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax where batch normalization and dropout are optional, and the {...} block is
repeated L - 1 times. Similar to the TwoLayerNet above, learnable parameters are stored in the
self.params dictionary and will be learned using the Solver class.
""" def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10,
dropout=0, use_batchnorm=False, reg=0.0,
weight_scale=1e-2, dtype=np.float32, seed=None):
"""
Initialize a new FullyConnectedNet. Inputs:
- hidden_dims: A list of integers giving the size of each hidden layer.
- input_dim: An integer giving the size of the input.
- num_classes: An integer giving the number of classes to classify.
- dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
the network should not use dropout at all.
- use_batchnorm: Whether or not the network should use batch normalization.
- reg: Scalar giving L2 regularization strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- dtype: A numpy datatype object; all computations will be performed using
this datatype. float32 is faster but less accurate, so you should use
float64 for numeric gradient checking.
- seed: If not None, then pass this random seed to the dropout layers. This
will make the dropout layers deteriminstic so we can gradient check the
model.
"""
self.use_batchnorm = use_batchnorm
self.use_dropout = dropout > 0
self.reg = reg
self.num_layers = 1 + len(hidden_dims)
self.dtype = dtype
self.params = {} self.num_layers=len(hidden_dims) num_layers=self.num_layers last_dims=input_dim
for i in xrange(num_layers):
self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale
self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],)
if self.use_batchnorm:
self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],)
self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],)
last_dims=hidden_dims[i] self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale
self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,) # When using dropout we need to pass a dropout_param dictionary to each
# dropout layer so that the layer knows the dropout probability and the mode
# (train / test). You can pass the same dropout_param to each dropout layer.
self.dropout_param = {}
if self.use_dropout:
self.dropout_param = {'mode': 'train', 'p': dropout}
if seed is not None:
self.dropout_param['seed'] = seed # With batch normalization we need to keep track of running means and
# variances, so we need to pass a special bn_param object to each batch
# normalization layer. You should pass self.bn_params[0] to the forward pass
# of the first batch normalization layer, self.bn_params[1] to the forward
# pass of the second batch normalization layer, etc.
self.bn_params = []
if self.use_batchnorm:
self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)]
#print len(self.bn_params) # Cast all parameters to the correct datatype
for k, v in self.params.iteritems():
self.params[k] = v.astype(dtype) def loss(self, X, y=None):
"""
Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above.
"""
X = X.astype(self.dtype)
mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they
# behave differently during training and testing.
if self.dropout_param is not None:
self.dropout_param['mode'] = mode if self.use_batchnorm:
for bn_param in self.bn_params:
bn_param[mode] = mode scores = None
cache={}
num_layers=self.num_layers
next=X
for i in xrange(num_layers): next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)]) if self.use_batchnorm: next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i]) next,cache['cacher%d'%(i+1)]=relu_forward(next)
if self.use_dropout:
next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param) scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)]) # If test mode return early if mode == 'test':
return scores loss, grads = 0.0, {}
loss,dscores=softmax_loss(scores,y)
for i in xrange(num_layers+1):
loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg
dout=dscores dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)]) grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg for i in xrange(num_layers):
i=num_layers-i
if self.use_dropout:
dout=dropout_backward(dout,cache['cached%d'%i])
dout=relu_backward(dout,cache['cacher%d'%i])
if self.use_batchnorm:
#print i
dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i]) dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i])
# print "W%d s is "%(i)+str(grads['W%d'%i].shape) grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg return loss, grads
n
fc_net.py cs231n的更多相关文章
- cnn.py cs231n
n import numpy as np from cs231n.layers import * from cs231n.fast_layers import * from cs231n.layer_ ...
- layers.py cs231n
如果有错误,欢迎指出,不胜感激. import numpy as np def affine_forward(x, w, b): 第一个最简单的 affine_forward简单的前向传递,返回 ou ...
- optim.py cs231n
n如果有错误,欢迎指出,不胜感激 import numpy as np """ This file implements various first-order upda ...
- 深度学习原理与框架-神经网络-cifar10分类(代码) 1.np.concatenate(进行数据串接) 2.np.hstack(将数据横着排列) 3.hasattr(判断.py文件的函数是否存在) 4.reshape(维度重构) 5.tanspose(维度位置变化) 6.pickle.load(f文件读入) 7.np.argmax(获得最大值索引) 8.np.maximum(阈值比较)
横1. np.concatenate(list, axis=0) 将数据进行串接,这里主要是可以将列表进行x轴获得y轴的串接 参数说明:list表示需要串接的列表,axis=0,表示从上到下进行串接 ...
- 『cs231n』通过代码理解风格迁移
『cs231n』卷积神经网络的可视化应用 文件目录 vgg16.py import os import numpy as np import tensorflow as tf from downloa ...
- 转:深度学习斯坦福cs231n 课程笔记
http://blog.csdn.net/dinosoft/article/details/51813615 前言 对于深度学习,新手我推荐先看UFLDL,不做assignment的话,一两个晚上就可 ...
- 笔记:CS231n+assignment2(作业二)(一)
第二个作业难度很高,但做(抄)完之后收获还是很大的.... 一.Fully-Connected Neural Nets 首先是对之前的神经网络的程序进行重构,目的是可以构建任意大小的全连接的neura ...
- CS231n 2016 通关 第五、六章 Fully-Connected Neural Nets 作业
要求:实现任意层数的NN. 每一层结构包含: 1.前向传播和反向传播函数:2.每一层计算的相关数值 cell 1 依旧是显示的初始设置 # As usual, a bit of setup impor ...
- CS231n 2016 通关 第四章-NN 作业
cell 1 显示设置初始化 # A bit of setup import numpy as np import matplotlib.pyplot as plt from cs231n.class ...
随机推荐
- 从0开始学习ssh之搭建环境
ssh即struts+spring+Hibernate,从头开始学习这个框架. struts环境配置,首先在apps目录下找到struts2-blank-xxx.war这个文件,这是已经发布好的war ...
- 02.Hibernate配置文件之映射配置文件
映射文件,即xxx.hbm.xml的配置文件 <class>标签:用来将类与数据库表建立映射关系 属性: name:类中的全路径 table:表名(如果类与表名一致,那么table属性可以 ...
- Android开发 Camera2开发_2_预览分辨率或拍照分辨率的计算
前言 不管在Camera1或者Camera2在适配不同手机/不同使用场景的情况下都需要计算摄像头里提供的分辨率列表中最合适的那一个分辨率.所以在需要大量机型适配的app,是不建议不经过计算直接自定义分 ...
- Extjs4 的一些语法 持续更新中
一.给GridPanel增加成两行toolbar tbar: { xtype: 'container', layout: 'anchor', defaults: {anchor: '0'}, defa ...
- 机器学习之五 正则化的线性回归-岭回归与Lasso回归
机器学习之五 正则化的线性回归-岭回归与Lasso回归 注:正则化是用来防止过拟合的方法.在最开始学习机器学习的课程时,只是觉得这个方法就像某种魔法一样非常神奇的改变了模型的参数.但是一直也无法对其基 ...
- Redis学习笔记03-持久化
redis是一个内存型数据库,这就意味着,当主机重启或者宕机时,内存中的数据会被清空,redis可能会丢失数据.为了保存数据,实现数据持久化就必须要有一种机制,可以将redis数据库的数据保留在硬盘上 ...
- U盘安装Linux CentOS 6.8 系统
1.插入U盘在服务器中的USB接口: 2.选择启动的U盘进入装系统的页面: 3.选择English,按ok: 4.选UEFI:SanDisk Cruzer Edge 1.26 5.选 us 按 ok ...
- CentOS 7 忘记root密码的修改方法
1.开机按esc 2.选择CentOS Linux (3.10.0-693.......) 按 e 键: 3.光标移动到 linux 16 开头的行,找到 ro 改为 rw init=sysr ...
- 用Python的requests库作接口测试——上传文件
POST一个多部分编码(Multipart-Encoded)的文件 Requests使得上传多部分编码文件变得很简单: >>> url = 'http://httpbin.org/p ...
- 时序数据库连载系列: 时序数据库一哥InfluxDB之存储机制解析
InfluxDB 的存储机制解析 本文介绍了InfluxDB对于时序数据的存储/索引的设计.由于InfluxDB的集群版已在0.12版就不再开源,因此如无特殊说明,本文的介绍对象都是指 InfluxD ...