n如果有错误,欢迎指出,不胜感激

import numpy as np

from cs231n.layers import *
from cs231n.layer_utils import * class TwoLayerNet(object):
"""
A two-layer fully-connected neural network with ReLU nonlinearity and
softmax loss that uses a modular layer design. We assume an input dimension
of D, a hidden dimension of H, and perform classification over C classes. The architecure should be affine - relu - affine - softmax. Note that this class does not implement gradient descent; instead, it
will interact with a separate Solver object that is responsible for running
optimization. The learnable parameters of the model are stored in the dictionary
self.params that maps parameter names to numpy arrays.
""" def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,
weight_scale=1e-3, reg=0.0):
"""
Initialize a new network. Inputs:
- input_dim: An integer giving the size of the input
- hidden_dim: An integer giving the size of the hidden layer
- num_classes: An integer giving the number of classes to classify
- dropout: Scalar between 0 and 1 giving dropout strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- reg: Scalar giving L2 regularization strength.
"""
self.params = {}
self.reg = reg
self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale
self.params['b1']=np.zeros((hidden_dim,))
self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale
self.params['b2']=np.zeros((num_classes,)) def loss(self, X, y=None):
"""
Compute loss and gradient for a minibatch of data. Inputs:
- X: Array of input data of shape (N, d_1, ..., d_k)
- y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns:
If y is None, then run a test-time forward pass of the model and return:
- scores: Array of shape (N, C) giving classification scores, where
scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and
return a tuple of:
- loss: Scalar value giving the loss
- grads: Dictionary with the same keys as self.params, mapping parameter
names to gradients of the loss with respect to those parameters.
"""
scores = None
out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1'])
out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2'])
if y is None :
return out2 loss,dx=softmax_loss(out2,y)
loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) )
grads={}
dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2)
true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1) grads['W2']+=self.params['W2']*self.reg
grads['W1']+=self.params['W1']*self.reg
#grads['b2']+=self.params['b2']*self.reg
#grads['b1']+=self.params['b1']*self.reg # If y is None then we are in test mode so just return scores
# if y is None:
# return scores # loss, grads = 0, {} return loss, grads class FullyConnectedNet(object):
"""
A fully-connected neural network with an arbitrary number of hidden layers,
ReLU nonlinearities, and a softmax loss function. This will also implement
dropout and batch normalization as options. For a network with L layers,
the architecture will be {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax where batch normalization and dropout are optional, and the {...} block is
repeated L - 1 times. Similar to the TwoLayerNet above, learnable parameters are stored in the
self.params dictionary and will be learned using the Solver class.
""" def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10,
dropout=0, use_batchnorm=False, reg=0.0,
weight_scale=1e-2, dtype=np.float32, seed=None):
"""
Initialize a new FullyConnectedNet. Inputs:
- hidden_dims: A list of integers giving the size of each hidden layer.
- input_dim: An integer giving the size of the input.
- num_classes: An integer giving the number of classes to classify.
- dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
the network should not use dropout at all.
- use_batchnorm: Whether or not the network should use batch normalization.
- reg: Scalar giving L2 regularization strength.
- weight_scale: Scalar giving the standard deviation for random
initialization of the weights.
- dtype: A numpy datatype object; all computations will be performed using
this datatype. float32 is faster but less accurate, so you should use
float64 for numeric gradient checking.
- seed: If not None, then pass this random seed to the dropout layers. This
will make the dropout layers deteriminstic so we can gradient check the
model.
"""
self.use_batchnorm = use_batchnorm
self.use_dropout = dropout > 0
self.reg = reg
self.num_layers = 1 + len(hidden_dims)
self.dtype = dtype
self.params = {} self.num_layers=len(hidden_dims) num_layers=self.num_layers last_dims=input_dim
for i in xrange(num_layers):
self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale
self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],)
if self.use_batchnorm:
self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],)
self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],)
last_dims=hidden_dims[i] self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale
self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,) # When using dropout we need to pass a dropout_param dictionary to each
# dropout layer so that the layer knows the dropout probability and the mode
# (train / test). You can pass the same dropout_param to each dropout layer.
self.dropout_param = {}
if self.use_dropout:
self.dropout_param = {'mode': 'train', 'p': dropout}
if seed is not None:
self.dropout_param['seed'] = seed # With batch normalization we need to keep track of running means and
# variances, so we need to pass a special bn_param object to each batch
# normalization layer. You should pass self.bn_params[0] to the forward pass
# of the first batch normalization layer, self.bn_params[1] to the forward
# pass of the second batch normalization layer, etc.
self.bn_params = []
if self.use_batchnorm:
self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)]
#print len(self.bn_params) # Cast all parameters to the correct datatype
for k, v in self.params.iteritems():
self.params[k] = v.astype(dtype) def loss(self, X, y=None):
"""
Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above.
"""
X = X.astype(self.dtype)
mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they
# behave differently during training and testing.
if self.dropout_param is not None:
self.dropout_param['mode'] = mode if self.use_batchnorm:
for bn_param in self.bn_params:
bn_param[mode] = mode scores = None
cache={}
num_layers=self.num_layers
next=X
for i in xrange(num_layers): next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)]) if self.use_batchnorm: next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i]) next,cache['cacher%d'%(i+1)]=relu_forward(next)
if self.use_dropout:
next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param) scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)]) # If test mode return early if mode == 'test':
return scores loss, grads = 0.0, {}
loss,dscores=softmax_loss(scores,y)
for i in xrange(num_layers+1):
loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg
dout=dscores dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)]) grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg for i in xrange(num_layers):
i=num_layers-i
if self.use_dropout:
dout=dropout_backward(dout,cache['cached%d'%i])
dout=relu_backward(dout,cache['cacher%d'%i])
if self.use_batchnorm:
#print i
dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i]) dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i])
# print "W%d s is "%(i)+str(grads['W%d'%i].shape) grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg return loss, grads

  

n

fc_net.py cs231n的更多相关文章

  1. cnn.py cs231n

    n import numpy as np from cs231n.layers import * from cs231n.fast_layers import * from cs231n.layer_ ...

  2. layers.py cs231n

    如果有错误,欢迎指出,不胜感激. import numpy as np def affine_forward(x, w, b): 第一个最简单的 affine_forward简单的前向传递,返回 ou ...

  3. optim.py cs231n

    n如果有错误,欢迎指出,不胜感激 import numpy as np """ This file implements various first-order upda ...

  4. 深度学习原理与框架-神经网络-cifar10分类(代码) 1.np.concatenate(进行数据串接) 2.np.hstack(将数据横着排列) 3.hasattr(判断.py文件的函数是否存在) 4.reshape(维度重构) 5.tanspose(维度位置变化) 6.pickle.load(f文件读入) 7.np.argmax(获得最大值索引) 8.np.maximum(阈值比较)

    横1. np.concatenate(list, axis=0) 将数据进行串接,这里主要是可以将列表进行x轴获得y轴的串接 参数说明:list表示需要串接的列表,axis=0,表示从上到下进行串接 ...

  5. 『cs231n』通过代码理解风格迁移

    『cs231n』卷积神经网络的可视化应用 文件目录 vgg16.py import os import numpy as np import tensorflow as tf from downloa ...

  6. 转:深度学习斯坦福cs231n 课程笔记

    http://blog.csdn.net/dinosoft/article/details/51813615 前言 对于深度学习,新手我推荐先看UFLDL,不做assignment的话,一两个晚上就可 ...

  7. 笔记:CS231n+assignment2(作业二)(一)

    第二个作业难度很高,但做(抄)完之后收获还是很大的.... 一.Fully-Connected Neural Nets 首先是对之前的神经网络的程序进行重构,目的是可以构建任意大小的全连接的neura ...

  8. CS231n 2016 通关 第五、六章 Fully-Connected Neural Nets 作业

    要求:实现任意层数的NN. 每一层结构包含: 1.前向传播和反向传播函数:2.每一层计算的相关数值 cell 1 依旧是显示的初始设置 # As usual, a bit of setup impor ...

  9. CS231n 2016 通关 第四章-NN 作业

    cell 1 显示设置初始化 # A bit of setup import numpy as np import matplotlib.pyplot as plt from cs231n.class ...

随机推荐

  1. Codeforces 500D. New Year Santa Network

    题目大意 给你一颗有\(n\)个点的树\(T\),边上有边权. 规定,\(d(i,j)\)表示点i到点j路径上的边权之和. 给你\(q\)次询问,每次询问格式为\(i, j\),表示将按输入顺序排序的 ...

  2. WPF 导出Excel 导出图片

    /// <summary> /// 导出Excel /// </summary> private void ExportExcel(DataTable ExcelDt) { / ...

  3. charles for https

    To remotely capture http or https traffic with charles you will need to do the following: HOST - Mac ...

  4. 44个 Javascript 变态题解析 (上)

    原题来自: javascript-puzzlers(http://javascript-puzzlers.herokuapp.com/) 读者可以先去做一下感受感受. 当初笔者的成绩是 21/44… ...

  5. 新闻内页 上一篇写一篇问题,ID不连续,不用链表

    y要什么链表? 用sql查询上一篇 SELECT id,title FROM t_article WHERE id<10 ORDER BY id DESC LIMIT 1; 用sql查下一篇 S ...

  6. java类增强方式

    我理解的增强类即是对类进行功能性扩展,除了网上常规的3种方法( 1.继承或者实现接口:特点是被增强对象不能变,增强的内容不能变. 2.装饰着模式:特点是被增强对象可变,但增强内容不可变. 3.动态代理 ...

  7. Python实现十大经典排序算法(史上最简单)

    十大排序算法(Python实现)一. 算法介绍及相关概念解读 算法分类十种常见排序算法可以分为两大类: 非线性时间比较类排序:通过比较来决定元素间的相对次序,由于其时间复杂度不能突破O(nlogn), ...

  8. 单例模式(Singleton)(单一实例)

    单例模式基本要点: 用于确保一个类只有一个实例,并且这个实例易于被访问. 让类自身负责保存他的唯一实例.这个类可以保证没有其他实例创建,并且他可以提供一个访问实例的方法,来实现单例模式. (1)把构造 ...

  9. mavenjar 一些拉取不下来问题

    http://search.maven.org/这里找相近版本替换试试.拉取不下来是因为官方版本不足或者网络问题.

  10. Tensorflow技巧

    1.尽量控制图片大小在1024以内,不然显存会爆炸. 2.尽量使用多GPU并行工作,训练下降速度快. 3.当需要被检测的单张图片里物体太多时,记得修改Region_proposals的个数 4.测试的 ...