fc_net.py cs231n

n如果有错误，欢迎指出，不胜感激

import numpy as np

from cs231n.layers import *

from cs231n.layer_utils import *

class TwoLayerNet(object):

  """

  A two-layer fully-connected neural network with ReLU nonlinearity and

  softmax loss that uses a modular layer design. We assume an input dimension

  of D, a hidden dimension of H, and perform classification over C classes.

  The architecure should be affine - relu - affine - softmax.

  Note that this class does not implement gradient descent; instead, it

  will interact with a separate Solver object that is responsible for running

  optimization.

  The learnable parameters of the model are stored in the dictionary

  self.params that maps parameter names to numpy arrays.

  """

  def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,

               weight_scale=1e-3, reg=0.0):

    """

    Initialize a new network.

    Inputs:

    - input_dim: An integer giving the size of the input

    - hidden_dim: An integer giving the size of the hidden layer

    - num_classes: An integer giving the number of classes to classify

    - dropout: Scalar between 0 and 1 giving dropout strength.

    - weight_scale: Scalar giving the standard deviation for random

      initialization of the weights.

    - reg: Scalar giving L2 regularization strength.

    """

    self.params = {}

    self.reg = reg

    self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale

    self.params['b1']=np.zeros((hidden_dim,))

    self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale

    self.params['b2']=np.zeros((num_classes,))

  def loss(self, X, y=None):

    """

    Compute loss and gradient for a minibatch of data.

    Inputs:

    - X: Array of input data of shape (N, d_1, ..., d_k)

    - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

    Returns:

    If y is None, then run a test-time forward pass of the model and return:

    - scores: Array of shape (N, C) giving classification scores, where

      scores[i, c] is the classification score for X[i] and class c.

    If y is not None, then run a training-time forward and backward pass and

    return a tuple of:

    - loss: Scalar value giving the loss

    - grads: Dictionary with the same keys as self.params, mapping parameter

      names to gradients of the loss with respect to those parameters.

    """

    scores = None

    out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1'])

    out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2'])

    if y is None :

        return out2

    loss,dx=softmax_loss(out2,y)

    loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) )

    grads={}

    dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2)

    true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1)

    grads['W2']+=self.params['W2']*self.reg

    grads['W1']+=self.params['W1']*self.reg

    #grads['b2']+=self.params['b2']*self.reg

    #grads['b1']+=self.params['b1']*self.reg

    # If y is None then we are in test mode so just return scores

 #   if y is None:

 #     return scores

 #   loss, grads = 0, {}

    return loss, grads

class FullyConnectedNet(object):

  """

  A fully-connected neural network with an arbitrary number of hidden layers,

  ReLU nonlinearities, and a softmax loss function. This will also implement

  dropout and batch normalization as options. For a network with L layers,

  the architecture will be

  {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax

  where batch normalization and dropout are optional, and the {...} block is

  repeated L - 1 times.

  Similar to the TwoLayerNet above, learnable parameters are stored in the

  self.params dictionary and will be learned using the Solver class.

  """

  def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10,

               dropout=0, use_batchnorm=False, reg=0.0,

               weight_scale=1e-2, dtype=np.float32, seed=None):

    """

    Initialize a new FullyConnectedNet.

    Inputs:

    - hidden_dims: A list of integers giving the size of each hidden layer.

    - input_dim: An integer giving the size of the input.

    - num_classes: An integer giving the number of classes to classify.

    - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then

      the network should not use dropout at all.

    - use_batchnorm: Whether or not the network should use batch normalization.

    - reg: Scalar giving L2 regularization strength.

    - weight_scale: Scalar giving the standard deviation for random

      initialization of the weights.

    - dtype: A numpy datatype object; all computations will be performed using

      this datatype. float32 is faster but less accurate, so you should use

      float64 for numeric gradient checking.

    - seed: If not None, then pass this random seed to the dropout layers. This

      will make the dropout layers deteriminstic so we can gradient check the

      model.

    """

    self.use_batchnorm = use_batchnorm

    self.use_dropout = dropout > 0

    self.reg = reg

    self.num_layers = 1 + len(hidden_dims)

    self.dtype = dtype

    self.params = {}

    self.num_layers=len(hidden_dims)

    num_layers=self.num_layers

    last_dims=input_dim

    for i in xrange(num_layers):

        self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale

        self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],)

        if self.use_batchnorm:

            self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],)

            self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],)

        last_dims=hidden_dims[i]

    self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale

    self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,)

    # When using dropout we need to pass a dropout_param dictionary to each

    # dropout layer so that the layer knows the dropout probability and the mode

    # (train / test). You can pass the same dropout_param to each dropout layer.

    self.dropout_param = {}

    if self.use_dropout:

      self.dropout_param = {'mode': 'train', 'p': dropout}

      if seed is not None:

        self.dropout_param['seed'] = seed

    # With batch normalization we need to keep track of running means and

    # variances, so we need to pass a special bn_param object to each batch

    # normalization layer. You should pass self.bn_params[0] to the forward pass

    # of the first batch normalization layer, self.bn_params[1] to the forward

    # pass of the second batch normalization layer, etc.

    self.bn_params = []

    if self.use_batchnorm:

      self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)]

    #print len(self.bn_params)

    # Cast all parameters to the correct datatype

    for k, v in self.params.iteritems():

      self.params[k] = v.astype(dtype)

  def loss(self, X, y=None):

    """

    Compute loss and gradient for the fully-connected net.

    Input / output: Same as TwoLayerNet above.

    """

    X = X.astype(self.dtype)

    mode = 'test' if y is None else 'train'

    # Set train/test mode for batchnorm params and dropout param since they

    # behave differently during training and testing.

    if self.dropout_param is not None:

      self.dropout_param['mode'] = mode   

    if self.use_batchnorm:

      for bn_param in self.bn_params:

        bn_param[mode] = mode

    scores = None

    cache={}

    num_layers=self.num_layers

    next=X

    for i in xrange(num_layers):

        next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)])

        if self.use_batchnorm:

            next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i])

        next,cache['cacher%d'%(i+1)]=relu_forward(next)

        if self.use_dropout:

            next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param)

    scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)])

    # If test mode return early

    if mode == 'test':

      return scores

    loss, grads = 0.0, {}

    loss,dscores=softmax_loss(scores,y)

    for i in xrange(num_layers+1):

        loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg

    dout=dscores

    dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)])

    grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg

    for i in xrange(num_layers):

        i=num_layers-i

        if self.use_dropout:

            dout=dropout_backward(dout,cache['cached%d'%i])

        dout=relu_backward(dout,cache['cacher%d'%i])

        if self.use_batchnorm:

            #print i

            dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i])

        dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i])

        # print "W%d s is "%(i)+str(grads['W%d'%i].shape)

        grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg

    return loss, grads

fc_net.py cs231n的更多相关文章

cnn.py cs231n
n import numpy as np from cs231n.layers import * from cs231n.fast_layers import * from cs231n.layer_ ...
layers.py cs231n
如果有错误,欢迎指出,不胜感激. import numpy as np def affine_forward(x, w, b): 第一个最简单的 affine_forward简单的前向传递,返回 ou ...
optim.py cs231n
n如果有错误,欢迎指出,不胜感激 import numpy as np """ This file implements various first-order upda ...
深度学习原理与框架-神经网络-cifar10分类(代码) 1.np.concatenate(进行数据串接) 2.np.hstack(将数据横着排列) 3.hasattr(判断.py文件的函数是否存在) 4.reshape(维度重构) 5.tanspose(维度位置变化) 6.pickle.load(f文件读入) 7.np.argmax(获得最大值索引) 8.np.maximum(阈值比较)
横1. np.concatenate(list, axis=0) 将数据进行串接,这里主要是可以将列表进行x轴获得y轴的串接参数说明:list表示需要串接的列表,axis=0,表示从上到下进行串接 ...
『cs231n』通过代码理解风格迁移
『cs231n』卷积神经网络的可视化应用文件目录 vgg16.py import os import numpy as np import tensorflow as tf from downloa ...
转：深度学习斯坦福cs231n 课程笔记
http://blog.csdn.net/dinosoft/article/details/51813615 前言对于深度学习,新手我推荐先看UFLDL,不做assignment的话,一两个晚上就可 ...
笔记：CS231n+assignment2（作业二）（一）
第二个作业难度很高,但做(抄)完之后收获还是很大的.... 一.Fully-Connected Neural Nets 首先是对之前的神经网络的程序进行重构,目的是可以构建任意大小的全连接的neura ...
CS231n 2016 通关第五、六章 Fully-Connected Neural Nets 作业
要求:实现任意层数的NN. 每一层结构包含: 1.前向传播和反向传播函数:2.每一层计算的相关数值 cell 1 依旧是显示的初始设置 # As usual, a bit of setup impor ...
CS231n 2016 通关第四章-NN 作业
cell 1 显示设置初始化 # A bit of setup import numpy as np import matplotlib.pyplot as plt from cs231n.class ...

随机推荐

java笔试之求int型正整数在内存中存储时1的个数
输入一个int型的正整数,计算出该int型数据在内存中存储时1的个数. 关键点:n与二进制的1相与:判断最末位是否为1:向右移位. 类似题目是查找输入整数二进制中1的个数. package test; ...
第一次个人项目【词频统计】——PSP表格
PSP2.1 任务内容计划完成需要的时间(min) 实际完成需要的时间(min) Planning 计划 45 40 Estimate 估计这个任务需要多少时间,并规划大致工作步骤 30 20 De ...
延迟对象deferred
Twisted 官方称,“Twisted is event-based, asynchronous framework ”.这个“异步”功能的代表就是 deferred. deferred 的作用类似 ...
SQL的特点
1.综合统一 2.高度非过程化 3.面向集合的操作方式 4.一同一种语法结构提供两种使用5.语言简洁易学易用
CentOS 6.5 Apache、MySQL、PHP环境配置(LAMP)
yum -y install httpd mysql-server php #安装apache.mysql和PHP yum -y install php-mysql php-gd php-mbstri ...
用py3的nonlocal来打破局部变量间的作用域
nonlocal:用于局部变量,找上层中离当前函数最近一层的局部变量,找到为止,如果在全局找到或找不到,报错. 使用场景:内层函数对外层数据修改/处理
[转]SQLserver字符串分割函数
一.按指定符号分割字符串,返回分割后的元素个数,方法很简单,就是看字符串中存在多少个分隔符号,然后再加一,就是要求的结果. CREATE function Get_StrArrayLength ( ) ...
Vue. 之 Element dialog 拖拽
Vue. 之 Element dialog 拖拽默认情况下,在使用Element的Dialog模块时,弹出框是不能移动的,且一旦点击遮罩层区域,弹框就会消失. 解决方案: 1 在 utils 中新 ...
垂直对齐：vertical-align属性——使用中注意事项
1.vertical-align(垂直对齐),只对行内元素和单元格元素有效,例如属性为inline和inline-block的元素以及图片.输入表单等都是行内元素; 2.元素默认的垂直对齐方式为基线对 ...
nfs服务安装配置
一.准备阶段配置解析主机检查版本及内核二.服务端安装 1) 配置yum把下载好的软件留着,下次备用,不用再下载 cachedir=/var/cache/yum/$basearch/$releas ...

fc_net.py cs231n

fc_net.py cs231n的更多相关文章

随机推荐

热门专题