0_Simple__template
简单的 CUDA 应用模板,白送的 Sample。
▶ 源代码
//template_cpu.cpp
extern "C" void computeGold(float *, const unsigned int); void computeGold(float *idata, const unsigned int len)
{
const float f_len = static_cast<float>(len);
for (unsigned int i = ; i < len; ++i)
idata[i] *= f_len;
}
// template.cu
#include <stdio.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h> extern "C" void computeGold(float *, const unsigned int); __global__ void testKernel(float *g_idata, float *g_odata)
{
extern __shared__ float sdata[];
const unsigned int tid = threadIdx.x; sdata[tid] = g_idata[tid];
__syncthreads();
sdata[tid] = (float)blockDim.x * sdata[tid];
__syncthreads();
g_odata[tid] = sdata[tid];
} int main()
{
printf("\n\tStart.\n"); cudaSetDevice();
StopWatchInterface *timer = ;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); unsigned int num_threads = ;
unsigned int mem_size = sizeof(float) * num_threads;
float *h_idata, *h_odata, *d_idata, *d_odata;
h_idata = (float *)malloc(mem_size);
h_odata = (float *)malloc(mem_size);
cudaMalloc((void **) &d_idata, mem_size);
cudaMalloc((void **)&d_odata, mem_size);
for (unsigned int i = ; i < num_threads; ++i)
h_idata[i] = (float)i;
cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice); testKernel << < dim3(, , ), dim3(num_threads, , ), mem_size >> > (d_idata, d_odata);
//getLastCudaError("Kernel execution failed");// 检查内核调用的报错结果
cudaMemcpy(h_odata, d_odata, sizeof(float) * num_threads, cudaMemcpyDeviceToHost);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\tProcessing time: %f ms\n", sdkGetTimerValue(&timer));
sdkDeleteTimer(&timer); computeGold(h_idata, num_threads);
printf("\n\tFinish, return %s.\n", compareData(h_idata, h_odata, num_threads, 0.0f,0.0f) ? "Passed" : "Failed"); free(h_idata);
free(h_odata);
cudaFree(d_idata);
cudaFree(d_odata);
getchar();
return ;
}
▶ 输出结果:
Start. Processing time: 101.169357 ms Finish, return Passed.
▶ 涨姿势:没有
0_Simple__template的更多相关文章
随机推荐
- centos配置ruby开发环境(转 )
转自http://my.oschina.net/u/1449160/blog/260764 1. 安装ruby 1.1 yum安装,版本旧 #yum install ruby ruby-devel ...
- HDU 2544:最短路
最短路 Time Limit: 5000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)Total Submiss ...
- rabbitmq学习(五):springboot整合rabbitmq
一.Springboot对rabbitmq的支持 springboot提供了对rabbitmq的支持,并且大大简化了rabbitmq的相关配置.在springboot中,框架帮我们将不同的交换机划分出 ...
- 思维题(两点逼近)LeetCode11 Container with Most Water
Given n non-negative integers a1, a2, ..., an, where each represents a point at coordinate (i, ai). ...
- mac OS 安装 scikit-learn
最近用来做实验,使用python时发现scikit-learn提供的库非常好用.因此,在电脑上果断下载安装: step1: sudo easy_install pip step2: sudo pip ...
- 来自官方的一些dbt 最佳实践
限制对原始数据的依赖性 您的项目将取决于存储在数据库中的原始数据.我们建议制作所谓的“基本模型”,以最大限度地减少对原始数据表的依赖性.在此约定中,基本模型可以具有以下职责: 仅选择与当前分析相关的字 ...
- 解决python2安装MySQL-python模块报错
今天电脑重装系统,所有软件都重装一遍,MySQLdb模块一直装不好,纠结了好久,终于解决,方法分享给大家. MySQLdb模块安装: 1.下载MySQL-pyhon模块,网站为:https://pyp ...
- FineUI导出Excel
1.[经验分享]导出Excel的乱码问题http://www.fineui.com/bbs/forum.php?mod=viewthread&tid=6326&highlight=Ex ...
- sublime text3安装 mac os汉化/常用模块
sublime text介绍: Sublime Text 是一个代码编辑器(Sublime Text 2是收费软件,但可以无限期试用),也是HTML和散文先进的文本编辑器.Sublime Text是由 ...
- 计算机中丢失api-ms-win-crt-runtime-l1-1-0.dll解决办法
今天开始Python学习之旅,安装配置好Python环境之后,在命令行中输入Python命令,提示“计算机中丢失api-ms-win-crt-runtime-l1-1-0.dll...” 解决办法: ...