cuda vector addition

http://webgpu.hwu.crhc.illinois.edu/

// MP 1

#include    <wb.h>

__global__ void vecAdd(float * in1, float * in2, float * out, int len) {

    //@@ Insert code to implement vector addition here

    int i = blockIdx.x * blockDim.x + threadIdx.x ;

    if( i < len )

        out[i] = in1[i] + in2[i] ;

}

int main(int argc, char ** argv) {

    wbArg_t args;

    int inputLength;

    float * hostInput1;

    float * hostInput2;

    float * hostOutput;

    float * deviceInput1;

    float * deviceInput2;

    float * deviceOutput;

    args = wbArg_read(argc, argv);

    wbTime_start(Generic, "Importing data and creating memory on host");

    hostInput1 = (float *) wbImport(wbArg_getInputFile(args, ), &inputLength);

    hostInput2 = (float *) wbImport(wbArg_getInputFile(args, ), &inputLength);

    hostOutput = (float *) malloc(inputLength * sizeof(float));

    wbTime_stop(Generic, "Importing data and creating memory on host");

    wbLog(TRACE, "The input length is ", inputLength);

    wbTime_start(GPU, "Allocating GPU memory.");

    //@@ Allocate GPU memory here

    cudaMalloc((void**)&deviceInput1 , sizeof(float) * inputLength);

    cudaMalloc((void**)&deviceInput2 , sizeof(float) * inputLength);

    cudaMalloc((void**)&deviceOutput , sizeof(float) * inputLength);

    wbTime_stop(GPU, "Allocating GPU memory.");

    wbTime_start(GPU, "Copying input memory to the GPU.");

    //@@ Copy memory to the GPU here

    cudaMemcpy(deviceInput1,hostInput1,sizeof(float) * inputLength , cudaMemcpyHostToDevice) ;

    cudaMemcpy(deviceInput2,hostInput2,sizeof(float) * inputLength , cudaMemcpyHostToDevice) ; 

    wbTime_stop(GPU, "Copying input memory to the GPU.");

    //@@ Initialize the grid and block dimensions here

    dim3 DimGrid( (inputLength  - )/  +  ,  ,  ) ;

    dim3 DimBlock(  , , ) ; 

    wbTime_start(Compute, "Performing CUDA computation");

    //@@ Launch the GPU Kernel here

    vecAdd<<<DimGrid,DimBlock>>>(deviceInput1,deviceInput2,deviceOutput,inputLength);

    cudaThreadSynchronize();

    wbTime_stop(Compute, "Performing CUDA computation");

    wbTime_start(Copy, "Copying output memory to the CPU");

    //@@ Copy the GPU memory back to the CPU here

    cudaMemcpy(hostOutput,deviceOutput,sizeof(float)*inputLength,cudaMemcpyDeviceToHost);

    wbTime_stop(Copy, "Copying output memory to the CPU");

    wbTime_start(GPU, "Freeing GPU Memory");

    //@@ Free the GPU memory here

    cudaFree(deviceInput1);

    cudaFree(deviceInput2);

    cudaFree(deviceOutput);

    wbTime_stop(GPU, "Freeing GPU Memory");

    wbSolution(args, hostOutput, inputLength);

    free(hostInput1);

    free(hostInput2);

    free(hostOutput);

    return ;

}

cuda vector addition的更多相关文章

CUDA Samples：Vector Add
以下CUDA sample是分别用C++和CUDA实现的两向量相加操作,参考CUDA 8.0中的sample:C:\ProgramData\NVIDIA Corporation\CUDA Sample ...
6.2 CUDA streams
stream是什么 nivdia给出的解释是:A sequence of operations that execute in issue-order on the GPU. 可以理解成在GPU上执 ...
Vector Math for 3D Computer Graphics (Bradley Kjell 著)
https://chortle.ccsu.edu/VectorLessons/index.html Chapter0 Points and Lines (已看) Chapter1 Vectors, P ...
【读书笔记】：MIT线性代数(2):Vector Spaces and Subspaces
Vector Space: R1, R2, R3,R4 , .... Each space Rn consists of a whole collection of vectors. R5 conta ...
2.1CUDA-Thread
在HOST端我们会分配block的dimension, grid的dimension.但是对应到实际的硬件是如何执行这些硬件的呢? 如下图: lanuch kernel 执行一个grid. 一个Gri ...
8.3 MPI
MPI 模型如图MPI的各个运算节点是分布式的.每一个节点可以视为是一个“Thread”,但这里的不同之处在于这些节点没有所谓的共享内存,或者说Global Memory.所以,在后面也会看到,一般 ...
QuantStart量化交易文集
Over the last seven years more than 200 quantitative finance articles have been written by members o ...
Kubernetes 教程：在 Containerd 容器中使用 GPU
原文链接:https://fuckcloudnative.io/posts/add-nvidia-gpu-support-to-k8s-with-containerd/ 前两天闹得沸沸扬扬的事件不知道 ...
现代3D图形编程学习-基础简介(1) (译)
本书系列现代3D图形编程学习基础简介并不像本书的其他章节,这章内容没有相关的源代码或是项目.本章,我们将讨论向量,图形渲染理论,以及OpenGL. 向量在阅读这本书的时候,你需要熟悉代数和几何 ...

随机推荐

Java乔晓松-android中调用系统拍照功能并显示拍照的图片
android中调用系统拍照功能并显示拍照的图片如果你是拍照完,利用onActivityResult获取data数据,把data数据转换成Bitmap数据,这样获取到的图片,是拍照的照片的缩略图代 ...
Axuer 网页
http://www.webppd.com/axure/
银行家算法java实现
关于银行家算法的理论知识,课本或者百度上有好多资料,我就不再多说了,这里把我最近写的银行家算法的实现带码贴出来. 由于这是我们的一个实验,对系统资源数和进程数都指定了,所以这里也将其指定了,其中系统资 ...
在VC/MFC中嵌入Google地图——图文并茂
近期须要实验室须要将在无人机地面站中嵌入地图,在网上找了非常多资料,最终有些眉目了, 首先.做这个须要用到的知识有.MFC控件.MFC类库.JavaScript脚本语言.Google API.Goog ...
Windows下一个AndroidStudio 正在使用Git(AndroidStudio工程GitHub关联)
前提条件 : 1. 设备 Git client 下载链接 2. 有着 GitHub 账号 (假设你已经有了一些git基础, 假设还一点都不会, 请去找其它加成学习) AndroidStudio项目公布 ...
SQL server 表数据改变触发发送邮件
今天遇到一个问题,原有生产系统正在健康运行,现需要监控一张数据表,当增加数据的时候,给管理员发送邮件. 领到这个需求后,有同事提供方案:写触发器触发外部应用程序.这是个大胆的想法啊,从来没写过这样的触 ...
Hello World! 2010年山东省第一届ACM大学生程序设计竞赛
Hello World! Time Limit: 1000MS Memory limit: 65536K 题目描述 We know that Ivan gives Saya three problem ...
WPF技术触屏上的应用系列（二）: 嵌入百度地图、API调用及结合本地数据库在地图上进行自定义标点的实现
原文:WPF技术触屏上的应用系列(二): 嵌入百度地图.API调用及结合本地数据库在地图上进行自定义标点的实现去年某客户单位要做个大屏触屏应用,要对档案资源进行展示之用.客户端是Window7操作系 ...
【android】优秀的UI资源站点集合
1.http://ionicons.com/ 这个站点的图标能满足大部分人的需求.里面包括了经常使用的android风格的图标 ios7风格的图标以及一些社会化分享图标,总共500个左右.githu ...
TI C66x DSP 系统events及其应用 - 5.8（ISTP）
中断服务表指针ISTP(Interrupt Service Table Pointer)位置寄存器用于定位的中断服务例程,那ISTP去哪里找要运行的程序,ISTP(当中的ISTB字段)就是指向IST表 ...

cuda vector addition

cuda vector addition的更多相关文章

随机推荐

热门专题