0_Simple__template
简单的 CUDA 应用模板,白送的 Sample。
▶ 源代码
//template_cpu.cpp
extern "C" void computeGold(float *, const unsigned int); void computeGold(float *idata, const unsigned int len)
{
const float f_len = static_cast<float>(len);
for (unsigned int i = ; i < len; ++i)
idata[i] *= f_len;
}
// template.cu
#include <stdio.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h> extern "C" void computeGold(float *, const unsigned int); __global__ void testKernel(float *g_idata, float *g_odata)
{
extern __shared__ float sdata[];
const unsigned int tid = threadIdx.x; sdata[tid] = g_idata[tid];
__syncthreads();
sdata[tid] = (float)blockDim.x * sdata[tid];
__syncthreads();
g_odata[tid] = sdata[tid];
} int main()
{
printf("\n\tStart.\n"); cudaSetDevice();
StopWatchInterface *timer = ;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); unsigned int num_threads = ;
unsigned int mem_size = sizeof(float) * num_threads;
float *h_idata, *h_odata, *d_idata, *d_odata;
h_idata = (float *)malloc(mem_size);
h_odata = (float *)malloc(mem_size);
cudaMalloc((void **) &d_idata, mem_size);
cudaMalloc((void **)&d_odata, mem_size);
for (unsigned int i = ; i < num_threads; ++i)
h_idata[i] = (float)i;
cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice); testKernel << < dim3(, , ), dim3(num_threads, , ), mem_size >> > (d_idata, d_odata);
//getLastCudaError("Kernel execution failed");// 检查内核调用的报错结果
cudaMemcpy(h_odata, d_odata, sizeof(float) * num_threads, cudaMemcpyDeviceToHost);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\tProcessing time: %f ms\n", sdkGetTimerValue(&timer));
sdkDeleteTimer(&timer); computeGold(h_idata, num_threads);
printf("\n\tFinish, return %s.\n", compareData(h_idata, h_odata, num_threads, 0.0f,0.0f) ? "Passed" : "Failed"); free(h_idata);
free(h_odata);
cudaFree(d_idata);
cudaFree(d_odata);
getchar();
return ;
}
▶ 输出结果:
Start.
Processing time: 101.169357 ms
Finish, return Passed.
▶ 涨姿势:没有
0_Simple__template的更多相关文章
随机推荐
- (考研)散列表和hashcode和hashmap
package tt; import java.util.HashMap; import java.util.Map; public class a0 { public static void mai ...
- Patch打补丁学习笔记
1.基本命令语法: patch [-R] {-p(n)} [--dry-run] < patch_file_name p:为path的缩写. n:指将patch文件中的path第n条’/’及其左 ...
- alpha和color key
一.alpha 1.透明度,一般取值0-255 2.Alpha 通道: Alpha 通道是为保存选择区域而专门设计的通道.在生成一个图像文件时,并不必须产生 Alpha 通道.通常它是由人们在图 ...
- MySQL--常见ALTER TABLE 操作
##================================## ## 修改表的存储引擎 ## SHOW TABLE STATUS LIKE 'TB_001' \G; ALTER TABLE ...
- Linux 'XXXXXX' "is not in the sudoers file. This incident will be reported" 解决方法
添加方法如下: 1.进入root模式su - 注意:su和-之间有空格输入当前用户的密码 2.添加写权限chmod u+w /etc/sudoers 3.将自己加入到sudoers中 gedit / ...
- windows下,java环境变量的设置,设置点击startup.bat启动tomcat
1.首先.安装好java jdk以后环境变量设置: CLASSPATH:.;%JAVA_HOME%\lib\dt.jar;%JAVA_HOME%\lib\tools.jar JAVA_HOME:C:\ ...
- nyoj 吃土豆
吃土豆 时间限制:1000 ms | 内存限制:65535 KB 难度:4 描述 Bean-eating is an interesting game, everyone owns an M* ...
- tomcat catalina.out切割脚本
shell脚本catalina.out 切割脚本...每天23.30切割.删除七天之前的日志这里3个tomcat实例(1)拷贝日志文件(2)清空日志文件*只能清空如果删除tomcat不重启不会生成新的 ...
- 日志挖掘(logminer)
转. 如何使用logminer分析Oracle 联机日志 笔者在工作中经常遇到这样的情况:为了追踪数据的变化,需要知道某个表或者表中的某行数据是什么时候被修改的,以及修改前的内容. ...
- windows7 64位下git和tortoisegit的安装和使用
githttps://github.com/git-for-windows/git/releases tortoisegit安装下载https://tortoisegit.org/download/ ...