OpenCL 存储器次序的验证
▶ 《OpenCL异构并行编程实战》P224 的代码,先放上来,坐等新设备到了再执行
//kernel.cl
__global volatile atomic_int globalAtom = ATOMIC_VAR_INIT(); // 全局原子对象
__kernel void memoryOrderTest01(__global int *dst)
{
__local volatile atomic_int localAtom; // 本地原子对象
atomic_init(&localAtom, );
const int gid = get_global_id();
work_group_barrier(CLK_LOCAL_MEM_FENCE);
if (gid == ) // 0 号工作项尝试写入 1
{
atomic_store_explicit(&localAtom, , memory_order_seq_cst, memory_scope_work_group);
atomic_store_explicit(&globalAtom, , memory_order_seq_cst, memory_scope_device);
}
//atomic_work_item_fence(CLK_LOCAL_MEM_FENCE, memory_order_acq_rel, memory_scope_work_group);
if (gid == )
{
int a, count;
for (a = , count = ; a == && count < ; count++)
a = atomic_load_explicit(&localAtom, memory_order_seq_cst, memory_scope_work_group);
dst[] = !!a;
dst[] = count;
for (count = ; a == && count < ; count++)
a = atomic_load_explicit(&globalAtom, memory_order_seq_cst, memory_scope_device);
dst[] = !!a;
}
work_group_barrier();// 必须添加,将 0 号工作项的副作用暴露给其他工作项
} __kernel void memoryOrderTest02(__global int *dst)
{
__local volatile atomic_int localAtom;
atomic_init(&localAtom, );
const int gid = get_global_id();
work_group_barrier(CLK_LOCAL_MEM_FENCE);
if (gid == )
{
atomic_store(&localAtom, );
atomic_store(&globalAtom, );
}
//atomic_work_item_fence(CLK_LOCAL_MEM_FENCE, memory_order_acq_rel, memory_scope_work_group);
if (gid == )
{
int a, count;
for (a = , count = ; a == && count < ; count++)
a = atomic_load(&localAtom);
dst[] = !!a;
dst[] = count;
for (count = ; a == && count < ; count++)
a = atomic_load(&globalAtom);
dst[] = !!a;
}
work_group_barrier();
}
//main.c
#include <stdio.h>
#include <stdlib.h>
#include <cl.h> const char *sourceCode = "D:/Code/kernel.cl"; int readSource(const char* kernelPath, char *source)// 读取文本文件,存储为 char *,返回代码长度
{
FILE *fp;
long int size;
//printf("readSource, Program file: %s\n", kernelPath);
fopen_s(&fp, kernelPath, "rb");
if (!fp)
{
printf("Open kernel file failed\n");
exit(-);
}
if (fseek(fp, , SEEK_END) != )
{
printf("Seek end of file faildd\n");
exit(-);
}
if ((size = ftell(fp)) < )
{
printf("Get file position failed\n");
exit(-);
}
rewind(fp);
if ((source = (char *)malloc(size + )) == NULL)
{
printf("Allocate space failed\n");
exit(-);
}
fread(source, , size, fp);
fclose(fp);
source[size] = '\0';
return size + ;
} int main()
{
const int nElement = , dataSize = nElement * sizeof(float);
int i, host[nElement] = { };
char info[]; // 初始化平台
cl_int status;
cl_platform_id platform;
clGetPlatformIDs(, &platform, NULL);
cl_device_id device[];
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, , device, NULL);
cl_context_properties contextProp[] = { CL_CONTEXT_PLATFORM,(cl_context_properties)(platform), };
cl_context context = clCreateContext(contextProp, , device, NULL, contextProp, &status);
cl_command_queue_properties queueProp = ;// useless
cl_command_queue queue = clCreateCommandQueueWithProperties(context, device[], NULL, &status); cl_mem buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status); char *source;
size_t sourceLength = readSource(sourceCode, source);
cl_program program = clCreateProgramWithSource(context, , &source, &sourceLength, &status);
status = clBuildProgram(program, , device, "-cl-std=CL2.0", NULL, NULL);
if (status)
{
clGetProgramBuildInfo(program, device[], CL_PROGRAM_BUILD_LOG, , info, NULL);
printf("Build log:\n%s\n", info);
}
cl_kernel kernel = clCreateKernel(program, "memoryOrderTest", &status);
clSetKernelArg(kernel, , sizeof(cl_mem), buffer);
size_t globalSize = nElement, localSize = ;
clEnqueueNDRangeKernel(queue, kernel, , NULL, &globalSize, &localSize, , NULL, NULL);
clFinish(queue); clEnqueueReadBuffer(queue, buffer, CL_TRUE, , dataSize, host, , NULL, NULL); printf("Local memory result: %d, global memory result: %d, waiting count: %d\n", host[], host[], host[]); clReleaseContext(context);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseMemObject(buffer);
getchar();
return ;
}
OpenCL 存储器次序的验证的更多相关文章
- CUDA与OpenCL架构
CUDA与OpenCL架构 目录 CUDA与OpenCL架构 目录 1 GPU的体系结构 1.1 GPU简介 1.2 GPU与CPU的差异 2 CUDA架构 2.1 硬件架构 2.1.1 GPU困境 ...
- [区块链] 共识算法之争(PBFT,Raft,PoW,PoS,DPoS,Ripple)
近几天对区块链中几种常见的共识机制(PBFT,Raft,PoW,PoS,DPoS,Ripple)进行了总结.尽量使用简单易懂语言,篇幅较大,想了解的可以只读每个算法介绍中前边的原理.本篇文章主要参考& ...
- 使用Powermock和mockito来进行单元测试
转载:http://blog.csdn.net/u013428664/article/details/44095889 简介 Mockito是一个流行的Mocking框架.它使用起来简单,学习成本很低 ...
- Nepxion Discovery【探索】微服务企业级解决方案
Nepxion Discovery[探索]微服务企业级解决方案] Nepxion Discovery[探索]使用指南,基于Spring Cloud Greenwich版.Finchley版和Hoxto ...
- ISO/IEC 9899:2011 条款6.5.16——赋值操作符
6.5.16 赋值操作符 语法 1.assignment-expression: conditional-expression unary-expression assignment-opera ...
- ISO/IEC 9899:2011 条款6.5.2——后缀操作符
6.5.2 后缀操作符 语法 1.postfix-expression: primary-expression postfix-expression [ expression ] p ...
- 区块链知识博文1: 共识算法之争(PBFT,Raft,PoW,PoS,DPoS,Ripple)
注:这是本人读到的关于共识算法最全和最好的分享博文,系统的介绍了拜占庭容错技术以及共识算法的原理和常用共识算法,原文链接请见后. 目录 一.拜占庭容错技术(Byzantine Fault Tolera ...
- OpenCL使用CL_MEM_USE_HOST_PTR存储器对象属性与存储器映射
随着OpenCL的普及,现在有越来越多的移动设备以及平板.超级本等都支持OpenCL异构计算.而这些设备与桌面计算机.服务器相比而言性能不是占主要因素的,反而能耗更受人关注.因此,这些移动设备上的GP ...
- 关于OpenCL中三重循环的执行次序
源自OpenGPU社区的一个帖子的讨论: 一个有意思的openCL问题
随机推荐
- 20155310 2016-2017-2 《Java程序设计》第八周学习总结
20155310 2016-2017-2 <Java程序设计>第八周学习总结 教材学习内容总结 第十五章 通用API 通用API •日志:日志对信息安全意义重大,审计.取证.入侵检验等都会 ...
- Codeforces Round #224 (Div. 2) A. Ksenia and Pan Scales
A. Ksenia and Pan Scales time limit per test 1 second memory limit per test 256 megabytes input stan ...
- 斐波那契数列的5种python实现写法
斐波那契数列的5种python写法 斐波那契数列(Fibonacci sequence),又称黄金分割数列.因数学家列昂纳多·斐波那契(Leonardoda Fibonacci)以兔子繁殖 ...
- 【BZOJ1703】【usaco2007margold】ranking the cows 奶牛的魅力排名
想的时间比较长所以看题解了= = 原题: Fj有N(N<=1000)头牛,每头牛都有独一无二的正整数 魅力值,Fj想让他们按 魅力值排序. Fj已经知道M(1<=M<=10000)对 ...
- 类名.fromObject(obj)静态方法
- MySQL Disk--NAND Flash原理
====================================================== NAND Flash最小存储单元: 写数据操作: 通过对控制闸(Control Gate) ...
- OASGraph 转换rest api graphql 试用
创建rest api lb4 appdemo 参考提示即可 安装 OASGraph git clone https://github.com/strongloop/oasgraph.git cd oa ...
- Linux下的Nginx、php、mysql、apache部署
待补充,先搞几个博客链接: https://www.cnblogs.com/Candies/p/8282934.html http://sujianjob.com/2017/12/18/yum%E5% ...
- Oracle 11gR2 RAC 常用维护操作 说明
一.启动和停止集群 在Oracle 11gR2 下的RAC,架构发生了变化.CRS的信息也是放在ASM 实例里的,所以要关asm,必须关闭crs, 如果还使用了acfs的话,一关crs那么acfs里的 ...
- mac系统下 Homebrew 使用
brew 又叫 Homebrew,是一款Mac OS平台下的软件包管理工具. brew 常用命令: 命令 作用 brew install [package] 安装包 brew uninstall [p ...