CUDA Thread Indexing
1D grid of 1D blocks __device__ int getGlobalIdx_1D_1D()
{
return blockIdx.x *blockDim.x + threadIdx.x;
} 1D grid of 2D blocks __device__ int getGlobalIdx_1D_2D()
{
return blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
} 1D grid of 3D blocks __device__ int getGlobalIdx_1D_3D()
{
return blockIdx.x * blockDim.x * blockDim.y * blockDim.z
+ threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
} {
return blockIdx.x * blockDim.x * blockDim.y * blockDim.z
+ threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
} 2D grid of 1D blocks __device__ int getGlobalIdx_2D_1D()
{
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * blockDim.x + threadIdx.x;
return threadId;
} {
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * blockDim.x + threadIdx.x;
return threadId;
} 2D grid of 2D blocks __device__ int getGlobalIdx_2D_2D()
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
return threadId;
} 2D grid of 3D blocks __device__ int getGlobalIdx_2D_3D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x;
int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
+ (threadIdx.z * (blockDim.x * blockDim.y))
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;
return threadId;
} 3D grid of 1D blocks __device__ int getGlobalIdx_3D_1D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;
int threadId = blockId * blockDim.x + threadIdx.x;
return threadId;
} 3D grid of 2D blocks __device__ int getGlobalIdx_3D_2D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;
int threadId = blockId * (blockDim.x * blockDim.y)
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;
return threadId;
} 3D grid of 3D blocks __device__ int getGlobalIdx_3D_3D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;
int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
+ (threadIdx.z * (blockDim.x * blockDim.y))
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;
return threadId;
}
CUDA Thread Indexing的更多相关文章
- 计算机系列:CUDA 深入研究
Copyright © 1900-2016, NORYES, All Rights Reserved. http://www.cnblogs.com/noryes/ 欢迎转载,请保留此版权声明. -- ...
- CUDA 并行编程简介
前言 并行就是让计算中相同或不同阶段的各个处理同时进行.目前有很多种实现并行的手段,如多核处理器,分布式系统等.本专题的文章将主要介绍使用 GPU 实现并行的方法.参考本专题文章前请务必搭建好 CUD ...
- ### CUDA
CUDA Learning. #@author: gr #@date: 2014-04-06 #@email: forgerui@gmail.com 1. Introduction CPU和GPU的区 ...
- CUDA 计算线程索引的一般公式
CUDA thread index: int blockId = blockIdx.z * (gridDim.x*gridDim.y) + blockIdx.y ...
- 第二篇:CUDA 并行编程简介
前言 并行就是让计算中相同或不同阶段的各个处理同时进行. 目前有很多种实现并行的手段,如多核处理器,分布式系统等,而本专题的文章将主要介绍使用 GPU 实现并行的方法. 参考本专题文章前请务必搭建好 ...
- CUDA 内存统一分析
CUDA 内存统一分析 关于CUDA 编程的基本知识,如何编写一个简单的程序,在内存中分配两个可供 GPU 访问的数字数组,然后将它们加在 GPU 上. 本文介绍内存统一,这使得分配和访问系统中任何处 ...
- Caffe 编译
Compilation Now that you have the prerequisites, edit your Makefile.config to change the paths for y ...
- 计算机组成原理 — GPU 图形处理器
目录 文章目录 目录 显卡 GPU GPU 与深度学习 GPU 与 CPU 体系结构的区别 GPU 显存与 CPU 主存的区别 GPU 与 CPU 之间的数据交互方式 GPU 的体系结构 GPU 的工 ...
- [源码解析] Pytorch 如何实现后向传播 (3)---- 引擎动态逻辑
[源码解析] Pytorch 如何实现后向传播 (3)---- 引擎动态逻辑 目录 [源码解析] Pytorch 如何实现后向传播 (3)---- 引擎动态逻辑 0x00 摘要 0x01 前文回顾 0 ...
随机推荐
- Cocoa Drawing
Graphics Contexts Graphics contexts are a fundamental part of the drawing infrastructure in Cocoa ap ...
- poj1141 区间dp+路径
//Accepted 176 KB 47 ms //感谢大神们为我们这群渣渣铺平前进的道路!! //用scanf("%s",s)!=EOF WA到死 #include <cs ...
- kali linux karmetasploit配置
原理分析:http://www.freebuf.com/articles/77055.html 转官方说明:https://www.offensive-security.com/metasploit- ...
- linux下的文件权限管理
权限管理有两个层面 第一层区分用户:文件属主(u), 组用户(g), 其它(o) 第二层区分权限:读(r),写(w),可执行(x) 这两个层次构成文件权限管理的二维结构 u g ...
- 使用Wireshark捕捉USB通信数据
USB,是英文Universal Serial Bus(通用串行总线)的缩写,而其中文简称为“通串线”,是一个外部总线标准,用于规范电脑与外部设备的连接和通讯.USB接口支持设备的即插即用和热插拔功能 ...
- Ubuntu 环境变量及 ADB 配置
Ubuntu Linux 环境变量 同Windows一样,Ubuntu Linux系统包含两类环境变量:系统环境变量和用户环境变量.系统环境变量对所有系统用户都有效,用户环境变量仅仅对当前的用户有效. ...
- Ubuntu 启动黑屏解决
要sudo apt-get install xserver...................balabala... then.... sudo gedit /boot/grub/grub.cf ...
- iOS提交AppStore被拒原因
1. Terms and conditions(法律与条款) 1.1 As a developer of applications for the App Store you are bound by ...
- JS对于Android和IOS平台的点击响应的适配
IOS点击事件 Click 300毫秒点击延迟 解决办法: 参考:http://cuiqingcai.com/1687.html 可判断设备 if (/(iPhone|iPad|iPod|iOS)/i ...
- Linux性能监控
转自:http://blog.csdn.net/chosen0ne/article/details/8200737 linux性能监控,就是要监控系统的各个子系统是否正常.linux主要的子系统包括: ...