softmax_loss.cu 和 softmax

  #include <algorithm>

  #include <cfloat>

  #include <vector>

  #include "caffe/layers/softmax_loss_layer.hpp"

  #include "caffe/util/math_functions.hpp"

  namespace caffe {

  template <typename Dtype>

  __global__ void SoftmaxLossForwardGPU(const int nthreads,

            const Dtype* prob_data, const Dtype* label, Dtype* loss,

            const int num, const int dim, const int spatial_dim,

            const bool has_ignore_label_, const int ignore_label_,

            Dtype* counts) {

    CUDA_KERNEL_LOOP(index, nthreads) {

      const int n = index / spatial_dim;

      const int s = index % spatial_dim;

      const int label_value = static_cast<int>(label[n * spatial_dim + s]);

      if (has_ignore_label_ && label_value == ignore_label_) {

        loss[index] = ;

        counts[index] = ;

      } else {

        loss[index] = -log(max(prob_data[n * dim + label_value * spatial_dim + s],

                        Dtype(FLT_MIN)));

        counts[index] = ;

      }

    }

  }

  template <typename Dtype>

  void SoftmaxWithLossLayer<Dtype>::Forward_gpu(

      const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {

    softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);

    const Dtype* prob_data = prob_.gpu_data();

    const Dtype* label = bottom[]->gpu_data();

    const int dim = prob_.count() / outer_num_;

    const int nthreads = outer_num_ * inner_num_;

    // Since this memory is not used for anything until it is overwritten

    // on the backward pass, we use it here to avoid having to allocate new GPU

    // memory to accumulate intermediate results in the kernel.

    Dtype* loss_data = bottom[]->mutable_gpu_diff();

    // Similarly, this memory is never used elsewhere, and thus we can use it

    // to avoid having to allocate additional GPU memory.

    Dtype* counts = prob_.mutable_gpu_diff();

    // NOLINT_NEXT_LINE(whitespace/operators)

    SoftmaxLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),

        CAFFE_CUDA_NUM_THREADS>>>(nthreads, prob_data, label, loss_data,

        outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);

    Dtype loss;

    caffe_gpu_asum(nthreads, loss_data, &loss);

    Dtype valid_count = -;

    // Only launch another CUDA kernel if we actually need the count of valid

    // outputs.

    if (normalization_ == LossParameter_NormalizationMode_VALID &&

        has_ignore_label_) {

      caffe_gpu_asum(nthreads, counts, &valid_count);

    }

    top[]->mutable_cpu_data()[] = loss / get_normalizer(normalization_,

                                                          valid_count);

    if (top.size() == ) {

      top[]->ShareData(prob_);

    }

  }

  template <typename Dtype>

  __global__ void SoftmaxLossBackwardGPU(const int nthreads, const Dtype* top,

            const Dtype* label, Dtype* bottom_diff, const int num, const int dim,

            const int spatial_dim, const bool has_ignore_label_,

            const int ignore_label_, Dtype* counts) {

    const int channels = dim / spatial_dim;

    CUDA_KERNEL_LOOP(index, nthreads) {

      const int n = index / spatial_dim;

      const int s = index % spatial_dim;

      const int label_value = static_cast<int>(label[n * spatial_dim + s]);

      if (has_ignore_label_ && label_value == ignore_label_) {

        for (int c = ; c < channels; ++c) {

          bottom_diff[n * dim + c * spatial_dim + s] = ;

        }

        counts[index] = ;

      } else {

        bottom_diff[n * dim + label_value * spatial_dim + s] -= ;

        counts[index] = ;

      }

    }

  }

  template <typename Dtype>

  void SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,

      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

    if (propagate_down[]) {

      LOG(FATAL) << this->type()

                 << " Layer cannot backpropagate to label inputs.";

    }

    if (propagate_down[]) {

      Dtype* bottom_diff = bottom[]->mutable_gpu_diff();

      const Dtype* prob_data = prob_.gpu_data();

     const Dtype* top_data = top[]->gpu_data();

     caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff);

     const Dtype* label = bottom[]->gpu_data();

     const int dim = prob_.count() / outer_num_;

     const int nthreads = outer_num_ * inner_num_;

     // Since this memory is never used for anything else,

     // we use to to avoid allocating new GPU memory.

     Dtype* counts = prob_.mutable_gpu_diff();

     // NOLINT_NEXT_LINE(whitespace/operators)

     SoftmaxLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),

         CAFFE_CUDA_NUM_THREADS>>>(nthreads, top_data, label, bottom_diff,

         outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);

     Dtype valid_count = -;

     // Only launch another CUDA kernel if we actually need the count of valid

     // outputs.

     if (normalization_ == LossParameter_NormalizationMode_VALID &&

         has_ignore_label_) {

       caffe_gpu_asum(nthreads, counts, &valid_count);

     }

     const Dtype loss_weight = top[]->cpu_diff()[] /

         (get_normalizer(normalization_, valid_count) * Caffe::getThreadNum());

     caffe_gpu_scal(prob_.count(), loss_weight , bottom_diff);

   }

 }

 INSTANTIATE_LAYER_GPU_FUNCS_DISABLE_FP16(SoftmaxWithLossLayer);

 }  // namespace caffe

outer_num_：相当于batch_size

dim: c*w*h

spatial_dim(inner_num_):w*h

softmax_loss.cpp的代码:

outer_num_ = bottom[]->count(, softmax_axis_);

inner_num_ = bottom[]->count(softmax_axis_ + );

其实可以看出来count的只取前,不取后,(0, softmax_axis_)只取了0这一个轴

softmax_loss.cu 和 softmax_loss.cpp源码的更多相关文章

Mavlink_main.cpp源码学习
int mavlink_main(int argc, char *argv[]) { if (argc < 2) { usage(); ...
Caffe源码-SGDSolver类
SGDSolver类简介 Solver类用于网络参数的更新,而SGDSolver类实现了优化方法中的随机梯度下降法(stochastic gradient descent),此外还具备缩放.正则化梯度 ...
android后台截屏实现(2)--screencap源码修改
首先找到screencap类在Android源码中的位置,/442/frameworks/base/cmds/screencap/screencap.cpp 源码如下: /* * Copyright ...
【精解】EOS标准货币体系与源码实现分析
EOS智能合约中包含一个exchange合约,它支持用户创建一笔交易,是任何两个基本货币类型之间的交易.这个合约的作用是跨不同币种(都是EOS上的标准货币类型)的,通过各自与EOS主链价值进行锚定,然 ...
duilib 使用图片素材或者算法给窗体增加阴影（源码和demo）
转载请说明原出处,谢谢:http://blog.csdn.net/zhuhongshu/article/details/42580877 之前我写的程序使用阴影时,一直是使用codeproject网站 ...
Caffe源码-Solver类
Solver类简介 Net类中实现了网络的前向/反向计算和参数更新,而Solver类中则是对此进行进一步封装,包含可用于逐次训练网络的Step()函数,和用于求解网络的优化解的Solve()函数,同时 ...
Caffe源码-InsertSplits()函数
InsertSplits()函数在Net初始化的过程中,存在一个特殊的修改网络结构的操作,那就是当某层的输出blob对应多个其他层的输入blob时,会在输出blob所在层的后面插入一个新的Split ...
Caffe源码-Blob类
Blob类简介 Blob是caffe中的数据传递的一个基本类,网络各层的输入输出数据以及网络层中的可学习参数(learnable parameters,如卷积层的权重和偏置参数)都是Blob类型.Bl ...
Caffe源码-SyncedMemory类
SyncedMemory类简介最近在阅读caffe源码,代码来自BVLC/caffe,基本是参照网络上比较推荐的 Blob-->Layer-->Net-->Solver 的顺序来分 ...

随机推荐

DP简单问题联系--最长递增子序列+最长公共子序列等
今天重温了一下dp问题,发现自己两个礼拜不写题目就什么都不会了...心态爆炸,感觉去考试怕是要gg了... 不过今天总结一下写的题目,全部都是基础的dp问题第一个是求最长不下降子序列的长度第一行 ...
Codeforces714C【映射】
题意: T次操作: +的话就是往 multiset 塞进一个: -的话就是往 multiset 去除一个: ?操作思路: +和-操作就是处理字符串直接实现一个原字符串改成"01" ...
[Xcode 实际操作]八、网络与多线程-(20)时间控件Timer定时功能
目录:[Swift]Xcode实际操作本文将演示时间控件Timer定时功能的使用. 在项目导航区,打开视图控制器的代码文件[ViewController.swift] import UIKit cl ...
Node.js 自定义模块
Node.js内置多个模块,也可以使用第三方模块,今天学习一下如何使用自己定义的模块在同级目录下定义两个js文件第一个:custom1.js "use strict"; fun ...
IT兄弟连 JavaWeb教程 Servlet会话跟踪设置Session存活时长
方式一:修改所有的session默认时长,修改tomcat目录下的conf文件夹下的web.xml文件. <session-config> <session-timeout>希 ...
关于web常见的安全问题
一. Web攻击动机: 1.恶作剧: 2.关闭Web站点,拒绝正常服务: 3.篡改Web网页,损害企业名誉; 4.免费浏览收费内容; 5.盗窃用户隐私信息,例如手机号.Email等个人信息; 6.以用 ...
iOS UITableView 解决估算行高和指定行高的矛盾
喜欢交朋友的加:微信号 dwjluck2013 1.一般来说在iOS 中若UITableViewCell 固定行高, 会通过 - (CGFloat)tableView:(UITableView *) ...
使用Hexo 搭建自己的博客
使用Hexo 搭建自己的博客最近一直都想着如何去搭建属于自己的博客,有空的时候就写写文章什么的. 本人对该博客系统的要求是: 博文的编写要采用现在流行的MarkDown来进行编写. 本人还不想去注册 ...
HTML超链接的使用
基本语法 <a href="" target="打开方式" name="页面锚点名称">链接文字或图片</a> 属性 ...
Spark Mllib里如何将预测结果如0或1，转换为文字描述来显示预测结果输出（图文详解）
不多说,直接上干货! 具体,见 Hadoop+Spark大数据巨量分析与机器学习整合开发实战的第13章使用决策树二元分类算法来预测分类StumbleUpon数据集

softmax_loss.cu 和 softmax_loss.cpp源码

softmax_loss.cu 和 softmax_loss.cpp源码的更多相关文章

随机推荐

热门专题