1D grid of 1D blocks

__device__ int getGlobalIdx_1D_1D()
{
return blockIdx.x *blockDim.x + threadIdx.x;
} 1D grid of 2D blocks __device__ int getGlobalIdx_1D_2D()
{
return blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
} 1D grid of 3D blocks __device__ int getGlobalIdx_1D_3D()
{
return blockIdx.x * blockDim.x * blockDim.y * blockDim.z
+ threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
} {
return blockIdx.x * blockDim.x * blockDim.y * blockDim.z
+ threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
} 2D grid of 1D blocks __device__ int getGlobalIdx_2D_1D()
{
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * blockDim.x + threadIdx.x;
return threadId;
} {
int blockId = blockIdx.y * gridDim.x + blockIdx.x;
int threadId = blockId * blockDim.x + threadIdx.x;
return threadId;
} 2D grid of 2D blocks __device__ int getGlobalIdx_2D_2D()
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
return threadId;
} 2D grid of 3D blocks __device__ int getGlobalIdx_2D_3D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x;
int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
+ (threadIdx.z * (blockDim.x * blockDim.y))
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;
return threadId;
} 3D grid of 1D blocks __device__ int getGlobalIdx_3D_1D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;
int threadId = blockId * blockDim.x + threadIdx.x;
return threadId;
} 3D grid of 2D blocks __device__ int getGlobalIdx_3D_2D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;
int threadId = blockId * (blockDim.x * blockDim.y)
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;
return threadId;
} 3D grid of 3D blocks __device__ int getGlobalIdx_3D_3D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;
int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
+ (threadIdx.z * (blockDim.x * blockDim.y))
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;
return threadId;
}

  

CUDA Thread Indexing的更多相关文章

  1. 计算机系列:CUDA 深入研究

    Copyright © 1900-2016, NORYES, All Rights Reserved. http://www.cnblogs.com/noryes/ 欢迎转载,请保留此版权声明. -- ...

  2. CUDA 并行编程简介

    前言 并行就是让计算中相同或不同阶段的各个处理同时进行.目前有很多种实现并行的手段,如多核处理器,分布式系统等.本专题的文章将主要介绍使用 GPU 实现并行的方法.参考本专题文章前请务必搭建好 CUD ...

  3. ### CUDA

    CUDA Learning. #@author: gr #@date: 2014-04-06 #@email: forgerui@gmail.com 1. Introduction CPU和GPU的区 ...

  4. CUDA 计算线程索引的一般公式

    CUDA thread index: int blockId = blockIdx.z * (gridDim.x*gridDim.y)                    + blockIdx.y ...

  5. 第二篇:CUDA 并行编程简介

    前言 并行就是让计算中相同或不同阶段的各个处理同时进行. 目前有很多种实现并行的手段,如多核处理器,分布式系统等,而本专题的文章将主要介绍使用 GPU 实现并行的方法. 参考本专题文章前请务必搭建好 ...

  6. CUDA 内存统一分析

    CUDA 内存统一分析 关于CUDA 编程的基本知识,如何编写一个简单的程序,在内存中分配两个可供 GPU 访问的数字数组,然后将它们加在 GPU 上. 本文介绍内存统一,这使得分配和访问系统中任何处 ...

  7. Caffe 编译

    Compilation Now that you have the prerequisites, edit your Makefile.config to change the paths for y ...

  8. 计算机组成原理 — GPU 图形处理器

    目录 文章目录 目录 显卡 GPU GPU 与深度学习 GPU 与 CPU 体系结构的区别 GPU 显存与 CPU 主存的区别 GPU 与 CPU 之间的数据交互方式 GPU 的体系结构 GPU 的工 ...

  9. [源码解析] Pytorch 如何实现后向传播 (3)---- 引擎动态逻辑

    [源码解析] Pytorch 如何实现后向传播 (3)---- 引擎动态逻辑 目录 [源码解析] Pytorch 如何实现后向传播 (3)---- 引擎动态逻辑 0x00 摘要 0x01 前文回顾 0 ...

随机推荐

  1. How To Create A Struts 2 Web Application

    以简单登录为例 1.创建一个Dynamic Web projec项目记得勾选Generate web.xml deployment dsecriptor 2.引入Struts 2工程所需运行库文件 解 ...

  2. kali linux karmetasploit配置【续】

    Karmetasploit In Action https://www.offensive-security.com/metasploit-unleashed/karmetasploit-action ...

  3. 2016-1-5第一个完整APP 私人通讯录的实现 1:登录界面及跳转的简单实现2

    ---恢复内容开始--- 实际效果如上 一:Segue的学习 1.什么是Segue: Storyboard上每一根用来界面跳转的线,都是一个UIStoryboardSegue对象(简称Segue)   ...

  4. Spring MVC配置文件解释

    <?xml version="1.0" encoding="UTF-8"?><beans xmlns="http://www.spr ...

  5. Get start with Android development

    Firstly we should install the right version of JDK and JRE, there are two version of ADK for differe ...

  6. HDOJ-三部曲一(搜索、数学)-1002-Children of the Candy Corn

    Children of the Candy Corn Time Limit : 2000/1000ms (Java/Other)   Memory Limit : 131072/65536K (Jav ...

  7. 判断字符串中是否有SQL攻击代码

    判断一个输入框中是否有SQL攻击代码 public const string SQLSTR2 = @"exec|cast|convert|set|insert|select|delete|u ...

  8. magento缓存系列详解:clean cache

    cache是一个很大的概念,涉及的内容方方面面,magento cache是基于zend的,如果你对zend cache理解很深的话,相信magento cache也不再话下,本篇文章着重介绍Flus ...

  9. 作业6 分析项目的NABCD和项目的产品Backlog

    项目scrum:邵家文 NABCD模型分析 N(Need 需求)根据采访用户下面可以得出用户的基本需求:1.小孩说:我想要做适合自己能力的四则运算2.小孩说:我想这个四则运算软件里面的题目越做越提高自 ...

  10. javascript中 IE事件处理程序中try catch用法

    本例是学习中笔记 望指正批评! <input id='b1' type='button' value='按钮'/> <script> window.onload=functio ...