#include <algorithm>
#include <cfloat>
#include <vector> #include "caffe/layers/softmax_loss_layer.hpp"
#include "caffe/util/math_functions.hpp" namespace caffe { template <typename Dtype>
__global__ void SoftmaxLossForwardGPU(const int nthreads,
const Dtype* prob_data, const Dtype* label, Dtype* loss,
const int num, const int dim, const int spatial_dim,
const bool has_ignore_label_, const int ignore_label_,
Dtype* counts) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int n = index / spatial_dim;
const int s = index % spatial_dim;
const int label_value = static_cast<int>(label[n * spatial_dim + s]);
if (has_ignore_label_ && label_value == ignore_label_) {
loss[index] = ;
counts[index] = ;
} else {
loss[index] = -log(max(prob_data[n * dim + label_value * spatial_dim + s],
Dtype(FLT_MIN)));
counts[index] = ;
}
}
} template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
const Dtype* prob_data = prob_.gpu_data();
const Dtype* label = bottom[]->gpu_data();
const int dim = prob_.count() / outer_num_;
const int nthreads = outer_num_ * inner_num_;
// Since this memory is not used for anything until it is overwritten
// on the backward pass, we use it here to avoid having to allocate new GPU
// memory to accumulate intermediate results in the kernel.
Dtype* loss_data = bottom[]->mutable_gpu_diff();
// Similarly, this memory is never used elsewhere, and thus we can use it
// to avoid having to allocate additional GPU memory.
Dtype* counts = prob_.mutable_gpu_diff();
// NOLINT_NEXT_LINE(whitespace/operators)
SoftmaxLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
CAFFE_CUDA_NUM_THREADS>>>(nthreads, prob_data, label, loss_data,
outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
Dtype loss;
caffe_gpu_asum(nthreads, loss_data, &loss);
Dtype valid_count = -;
// Only launch another CUDA kernel if we actually need the count of valid
// outputs.
if (normalization_ == LossParameter_NormalizationMode_VALID &&
has_ignore_label_) {
caffe_gpu_asum(nthreads, counts, &valid_count);
}
top[]->mutable_cpu_data()[] = loss / get_normalizer(normalization_,
valid_count);
if (top.size() == ) {
top[]->ShareData(prob_);
}
} template <typename Dtype>
__global__ void SoftmaxLossBackwardGPU(const int nthreads, const Dtype* top,
const Dtype* label, Dtype* bottom_diff, const int num, const int dim,
const int spatial_dim, const bool has_ignore_label_,
const int ignore_label_, Dtype* counts) {
const int channels = dim / spatial_dim; CUDA_KERNEL_LOOP(index, nthreads) {
const int n = index / spatial_dim;
const int s = index % spatial_dim;
const int label_value = static_cast<int>(label[n * spatial_dim + s]); if (has_ignore_label_ && label_value == ignore_label_) {
for (int c = ; c < channels; ++c) {
bottom_diff[n * dim + c * spatial_dim + s] = ;
}
counts[index] = ;
} else {
bottom_diff[n * dim + label_value * spatial_dim + s] -= ;
counts[index] = ;
}
}
} template <typename Dtype>
void SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down[]) {
Dtype* bottom_diff = bottom[]->mutable_gpu_diff();
const Dtype* prob_data = prob_.gpu_data();
const Dtype* top_data = top[]->gpu_data();
caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff);
const Dtype* label = bottom[]->gpu_data();
const int dim = prob_.count() / outer_num_;
const int nthreads = outer_num_ * inner_num_;
// Since this memory is never used for anything else,
// we use to to avoid allocating new GPU memory.
Dtype* counts = prob_.mutable_gpu_diff();
// NOLINT_NEXT_LINE(whitespace/operators)
SoftmaxLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
CAFFE_CUDA_NUM_THREADS>>>(nthreads, top_data, label, bottom_diff,
outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); Dtype valid_count = -;
// Only launch another CUDA kernel if we actually need the count of valid
// outputs.
if (normalization_ == LossParameter_NormalizationMode_VALID &&
has_ignore_label_) {
caffe_gpu_asum(nthreads, counts, &valid_count);
}
const Dtype loss_weight = top[]->cpu_diff()[] /
(get_normalizer(normalization_, valid_count) * Caffe::getThreadNum());
caffe_gpu_scal(prob_.count(), loss_weight , bottom_diff);
}
} INSTANTIATE_LAYER_GPU_FUNCS_DISABLE_FP16(SoftmaxWithLossLayer); } // namespace caffe

outer_num_:相当于batch_size

dim: c*w*h

spatial_dim(inner_num_):w*h

softmax_loss.cpp的代码:

outer_num_ = bottom[]->count(, softmax_axis_);
inner_num_ = bottom[]->count(softmax_axis_ + );

其实可以看出来count的只取前,不取后,(0, softmax_axis_)只取了0这一个轴

softmax_loss.cu 和 softmax_loss.cpp源码的更多相关文章

  1. Mavlink_main.cpp源码学习

    int mavlink_main(int argc, char *argv[]) { if (argc < 2) { usage();                               ...

  2. Caffe源码-SGDSolver类

    SGDSolver类简介 Solver类用于网络参数的更新,而SGDSolver类实现了优化方法中的随机梯度下降法(stochastic gradient descent),此外还具备缩放.正则化梯度 ...

  3. android后台截屏实现(2)--screencap源码修改

    首先找到screencap类在Android源码中的位置,/442/frameworks/base/cmds/screencap/screencap.cpp 源码如下: /* * Copyright ...

  4. 【精解】EOS标准货币体系与源码实现分析

    EOS智能合约中包含一个exchange合约,它支持用户创建一笔交易,是任何两个基本货币类型之间的交易.这个合约的作用是跨不同币种(都是EOS上的标准货币类型)的,通过各自与EOS主链价值进行锚定,然 ...

  5. duilib 使用图片素材或者算法给窗体增加阴影(源码和demo)

    转载请说明原出处,谢谢:http://blog.csdn.net/zhuhongshu/article/details/42580877 之前我写的程序使用阴影时,一直是使用codeproject网站 ...

  6. Caffe源码-Solver类

    Solver类简介 Net类中实现了网络的前向/反向计算和参数更新,而Solver类中则是对此进行进一步封装,包含可用于逐次训练网络的Step()函数,和用于求解网络的优化解的Solve()函数,同时 ...

  7. Caffe源码-InsertSplits()函数

    InsertSplits()函数 在Net初始化的过程中,存在一个特殊的修改网络结构的操作,那就是当某层的输出blob对应多个其他层的输入blob时,会在输出blob所在层的后面插入一个新的Split ...

  8. Caffe源码-Blob类

    Blob类简介 Blob是caffe中的数据传递的一个基本类,网络各层的输入输出数据以及网络层中的可学习参数(learnable parameters,如卷积层的权重和偏置参数)都是Blob类型.Bl ...

  9. Caffe源码-SyncedMemory类

    SyncedMemory类简介 最近在阅读caffe源码,代码来自BVLC/caffe,基本是参照网络上比较推荐的 Blob-->Layer-->Net-->Solver 的顺序来分 ...

随机推荐

  1. IsPostBack深入探讨

    1IsPostBack介绍 IsPostBack是Page类有一个bool类型的属性,用来判断针对当前Form的请求是第一次还是非第一次请求.当IsPostBack=true时表示非第一次请求,我们称 ...

  2. Spring Boot + Vue 前后端分离开发,前端网络请求封装与配置

    前端网络访问,主流方案就是 Ajax,Vue 也不例外,在 Vue2.0 之前,网络访问较多的采用 vue-resources,Vue2.0 之后,官方不再建议使用 vue-resources ,这个 ...

  3. ionic4+angular7+cordova开发入门

    前言 ionic是一个垮平台开发框架,可通过web技术开发出多平台的应用.但只建议开发简单应用.复杂的应用需要用到许多cordova插件,而cordova插件的更新或者移动平台的更新很可能导致插件的不 ...

  4. PostgreSQL - 模糊查询

    前言 like.not like在SQL中用于模糊查询,%表示任意个字符,_表示单个任意字符,如果需要在模糊查询中查询这两个通配符,需要用ESCAPE进行转义,如下: select * from ta ...

  5. mysql整理(个人)

    注意:以下命令都是在Linux系统下执行的: 1.验证mysql是否安装成功: mysqladmin --version 2.连接mysql服务器: mysql -u root -p 之后输入密码 3 ...

  6. [題解]luogu_P1854 花店櫥窗佈置

    來源:題解 一開始看不懂題目,一萬年了終於看懂 f [ i ] [ j ] 表示第i朵花放在第j個花瓶中最大美學值,(花是必須用完嗎?) 顯然放i-1朵花至少要放到前i-1個瓶子里,最多放到前j-1個 ...

  7. 捕获异常try-catch-finally

    异常分类 try-carch-finally出现规则 return关键字的使用 finally中慎用return,虽然语法上没错,但是由于finally的强制执行,影响逻辑上需要return的值 pa ...

  8. [arc063]F.すぬけ君の塗り絵2

    因为这题考虑可以观察一个性质,答案的下界为 \(2×(max(w,h)+1)\), 因为你至少可以空出一行或一列,因此这个矩形一定会经过 \(x=\frac{w}{2}\) 或 \(y=\frac{h ...

  9. 找回phpstorm删除文件/文件夹(phpstorm删除文件/文件夹的恢复)

    恢复phpstorm删除文件/文件夹 再开发的过程中,不小心删除了一个文件夹,后来百度了一下如何恢复,还好PHPStorm是个十分强大的编辑器,不小心删除了文件还可以恢复.一下是恢复的操作: 打开Vi ...

  10. B. Hierarchy

    http://codeforces.com/problemset/problem/17/B 用邻接矩阵建图后, 设cost[v]表示去到顶点v的最小值. 很多个人去顶点v的话,就选最小的那个就OK 然 ...