参考文献:

http://blog.csdn.net/neoxmu/article/details/8866928

我安装的是CUDA5.5,代码如下:

//#include "stdafx.h"
#include "CL\cl.h"
#include <stdlib.h>
#include <stdio.h> #pragma comment(lib,"OpenCL.lib") #define CL_VERBOSE
void openclRetTackle(cl_int retValue, char* processInfo){
if(retValue!=CL_SUCCESS){
#if (defined CL_DEBUG) || (defined CL_VERBOSE)
printf("%s Error!\n",processInfo);
#endif
exit(-1);
}else{
#ifdef CL_VERBOSE
printf("%s Success!\n",processInfo);
#endif
}
} cl_platform_id cpPlatform;
cl_device_id cdDevice;
cl_context cxGPUContext;
cl_command_queue cqCommandQueue; int openclInit()
{
cl_int ret;
//得到平台ID
openclRetTackle( clGetPlatformIDs(1, &cpPlatform, NULL), "clGetPlatFormIDs");
//得到GPU设备ID
openclRetTackle( clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU,1,&cdDevice,NULL), "clGetDeviceIDs");
//获取GPU设备上下文
cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ret);
openclRetTackle( ret , "clCreateContext" );
//开辟任务队列
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ret);
openclRetTackle( ret , "clCreateCommandQueue");
return CL_SUCCESS;
} int run()
{
openclInit();
system("pause");
return 0;
}
<span style="font-family:Microsoft YaHei;font-size:18px;">//#include "stdafx.h"
#include <stdio.h>
#include <vector>
#include <CL/cl.h>
#include <iostream>
#include <fstream>
#include <string> #pragma comment(lib,"OpenCL.lib") int print_device()
{
cl_int err;
cl_uint num;
err = clGetPlatformIDs(0, 0, &num);
if(err != CL_SUCCESS)
{
std::cerr << "Unable to get platforms\n";
return 0;
}
std::vector<cl_platform_id> platforms(num);
err = clGetPlatformIDs(num, &platforms[0], &num);
if(err != CL_SUCCESS)
{
std::cerr << "Unable to get platform ID\n";
return 0;
} cl_context_properties prop[] = { CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platforms[0]), 0 };
cl_context context = clCreateContextFromType(prop, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, NULL);
if(context == 0)
{
std::cerr << "Can't create OpenCL context\n";
return 0;
} size_t cb;
clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
std::vector<cl_device_id> devices(cb / sizeof(cl_device_id));
clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, &devices[0], 0); clGetDeviceInfo(devices[0], CL_DEVICE_NAME, 0, NULL, &cb);
std::string devname;
devname.resize(cb);
clGetDeviceInfo(devices[0], CL_DEVICE_NAME, cb, &devname[0], 0);
std::cout << "Device: " << devname.c_str() << "\n"; clReleaseContext(context);
return 0; } cl_program load_program(cl_context context, const char* filename)
{
std::ifstream in(filename, std::ios_base::binary);
if(!in.good())
{
return 0; }// get file length
in.seekg(0, std::ios_base::end);
size_t length = in.tellg();
in.seekg(0, std::ios_base::beg); // read program source
std::vector<char> data(length + 1);
in.read(&data[0], length);
data[length] = 0; // create and build program
const char* source = &data[0];
cl_program program = clCreateProgramWithSource(context, 1, &source, 0, 0);
if(program == 0)
{
return 0;
}
if(clBuildProgram(program, 0, 0, 0, 0, 0) != CL_SUCCESS)
{
return 0;
}
return program;
}
int main()
{
print_device();
cl_int err;
cl_uint num;
err = clGetPlatformIDs(0, 0, &num);
if(err != CL_SUCCESS)
{
std::cerr << "Unable to get platforms\n";
return 0;
} std::vector<cl_platform_id> platforms(num);
err = clGetPlatformIDs(num, &platforms[0], &num);
if(err != CL_SUCCESS)
{
std::cerr << "Unable to get platform ID\n";
return 0;
}
cl_context_properties prop[] = { CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platforms[0]), 0 };
cl_context context = clCreateContextFromType(prop, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, NULL);
if(context == 0)
{
std::cerr << "Can't create OpenCL context\n";
return 0;
} size_t cb;
clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
std::vector<cl_device_id> devices(cb / sizeof(cl_device_id));
clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, &devices[0], 0); clGetDeviceInfo(devices[0], CL_DEVICE_NAME, 0, NULL, &cb);
std::string devname;
devname.resize(cb);
clGetDeviceInfo(devices[0], CL_DEVICE_NAME, cb, &devname[0], 0);
std::cout << "Device: " << devname.c_str() << "\n"; cl_command_queue queue = clCreateCommandQueue(context, devices[0], 0, 0);
if(queue == 0)
{
std::cerr << "Can't create command queue\n";
clReleaseContext(context);
return 0;
} const int DATA_SIZE = 1048576;
std::vector<float> a(DATA_SIZE), b(DATA_SIZE), res(DATA_SIZE);
for(int i = 0; i < DATA_SIZE; i++)
{
a[i] = std::rand();
b[i] = std::rand();
} cl_mem cl_a = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * DATA_SIZE, &a[0], NULL);
cl_mem cl_b = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * DATA_SIZE, &b[0], NULL);
cl_mem cl_res = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * DATA_SIZE, NULL, NULL);
if(cl_a == 0 || cl_b == 0 || cl_res == 0)
{
std::cerr << "Can't create OpenCL buffer\n";
clReleaseMemObject(cl_a);
clReleaseMemObject(cl_b);
clReleaseMemObject(cl_res);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
} cl_program program = load_program(context, "..\\shader.txt");
if(program == 0)
{
std::cerr << "Can't load or build program\n";
clReleaseMemObject(cl_a);
clReleaseMemObject(cl_b);
clReleaseMemObject(cl_res);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
cl_kernel adder = clCreateKernel(program, "adder", 0);
if(adder == 0)
{
std::cerr << "Can't load kernel\n";
clReleaseProgram(program);
clReleaseMemObject(cl_a);
clReleaseMemObject(cl_b);
clReleaseMemObject(cl_res);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
} clSetKernelArg(adder, 0, sizeof(cl_mem), &cl_a); clSetKernelArg(adder, 1, sizeof(cl_mem), &cl_b); clSetKernelArg(adder, 2, sizeof(cl_mem), &cl_res); size_t work_size = DATA_SIZE; err = clEnqueueNDRangeKernel(queue, adder, 1, 0, &work_size, 0, 0, 0, 0);
if(err == CL_SUCCESS)
{ err = clEnqueueReadBuffer(queue, cl_res, CL_TRUE, 0, sizeof(float) * DATA_SIZE, &res[0], 0, 0, 0);
}
if(err == CL_SUCCESS)
{
bool correct = true;
for(int i = 0; i < DATA_SIZE; i++)
{
if(a[i] + b[i] != res[i])
{
correct = false;
break;
}
}
if(correct)
{ std::cout << "Data is correct\n";
}
else
{ std::cout << "Data is incorrect\n"; }
} else
{
std::cerr << "Can't run kernel or read back data\n";
} clReleaseKernel(adder);
clReleaseProgram(program);
clReleaseMemObject(cl_a);
clReleaseMemObject(cl_b);
clReleaseMemObject(cl_res);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0; }</span>

需要使用的数据:

shader.txt

<span style="font-family:Microsoft YaHei;font-size:18px;">__kernel void adder(__global const float* a, __global const float* b, __global float* result)
{
int idx = get_global_id(0);
result[idx] = a[idx] + b[idx];
}</span>

CUDA版本的OpenCL在windows 7的下编程初步的更多相关文章

  1. windows下安装python、环境设置、多python版本的切换、pyserial与多版本python安装、windows命令行下切换目录

    1.windows下安装python 官网下载安装即可 2.安装后的环境设置 我的电脑--属性--高级--设置path的地方添加python安装目录,如C:\Python27;C:\Python33 ...

  2. YOLOv4:目标检测(windows和Linux下Darknet 版本)实施

    YOLOv4:目标检测(windows和Linux下Darknet 版本)实施 YOLOv4 - Neural Networks for Object Detection (Windows and L ...

  3. cmake编译opencv时指定cuda版本

    之前有网友提问说,基于cmake编译时如果切换cuda版本,比如我同时装了cuda8和cuda9,opencv总是找到cuda9,我想用cuda8怎么办?实际上,手头上要配置的工程是基于opencv3 ...

  4. 如何解决pytorch 编译时CUDA版本与运行时CUDA版本不对应

    转载请注明: 仰望高端玩家的小清新 http://www.cnblogs.com/luruiyuan/ 如何解决pytorch 编译时CUDA版本与运行时CUDA版本不对应 如果pytorch的编译时 ...

  5. cuda 版本查阅

    查看cuda版本 cat  /usr/local/cuda/version.txt nvcc -V

  6. [AI] 切换cuda版本的万金油

    1. 环境 ubuntu16.04 GTX1080Ti x 4 nvidia-418 cuda-10.1 pytorch1.0.0 目标:在最新的显卡驱动下,使用不同版本的cuda和深度学习框架来执行 ...

  7. pytorch Model Linear实现线性回归CUDA版本

    实验代码 import torch import torch.nn as nn #y = wx + b class MyModel(nn.Module): def __init__(self): su ...

  8. 矩池云里查看cuda版本

    可以用下面的命令查看 cat /usr/local/cuda/version.txt 如果想用nvcc来查看可以用下面的命令 nvcc -V 如果环境内没有nvcc可以安装一下,教程是矩池云上如何安装 ...

  9. 矩池云上如何修改cuda版本

    cuda版本可能对系统,驱动版本会有影响,修改之前需要先进行确认 1.检查系统版本 source /etc/os-release && echo $VERSION_ID 2.导入apt ...

随机推荐

  1. maven隐式依赖引起的包冲突

    包冲突 使用maven管理项目时可能会遇到包冲突的情况比如:log4j-over-slf4j.jar 和 slf4j-log4j12.jar这两个包同时一起运行时就会有问题. 这种冲突可能是显式依赖导 ...

  2. Java并发框架——AQS之阻塞与唤醒

    根据前面的线程阻塞与唤醒小节知道,目前在Java语言层面能实现阻塞唤醒的方式一共有三种:suspend与resume组合.wait与notify组合.park与unpark组合.其中suspend与r ...

  3. Retrofit 2.0 超能实践(三),轻松实现文件/多图片上传/Json字符串

    文:http://blog.csdn.net/sk719887916/article/details/51755427 Tamic 简书&csdn同步 通过前两篇姿势的入门 Retrofit ...

  4. ExpandableListView简单应用及listview模拟ExpandableListView

    首先我们还是来看一些案例,还是拿搜狐新闻客户端,因为我天天上下班没事爱看这个东东,上班又没时间看新闻,上下班路途之余浏览下新闻打发时间嘛.           看这个效果挺棒吧,其实实现起来也不难,我 ...

  5. 06 intent flag三种属性

    flag属性可以看做和写在清单文件中的启动模式一样 但效果有一定差别 1,FLAG_ACTIVITY_SINGLE_TOP:启动模式里的SingleTop一致  如果X启动模式设置为FLAG_ACTI ...

  6. Cocos2D iOS之旅:如何写一个敲地鼠游戏(七):弹出地鼠

    大熊猫猪·侯佩原创或翻译作品.欢迎转载,转载请注明出处. 如果觉得写的不好请告诉我,如果觉得不错请多多支持点赞.谢谢! hopy ;) 免责申明:本博客提供的所有翻译文章原稿均来自互联网,仅供学习交流 ...

  7. Win7 Eclipse Hadoop2.4插件配置

    准备工作: 1.下载hadoop2x-eclipse-plugin-master.zip Github地址:https://github.com/winghc/hadoop2x-eclipse-plu ...

  8. Java之泛型编程

    1.概念 泛型就是参数化类型.泛型的好处是在编译的时候检查类型安全,并且所有的强制转换都是自动和隐式的,提高代码的重用率. 2.案例 1)先看下面案例: //不适用泛型编程 Apple app0=ne ...

  9. Cocos2D:塔防游戏制作之旅(六)

    现在,创建一个新的类用来表示炮塔.添加新的类文件,名称为Tower,继承于CCNode. 替换Tower.h文件为如下内容: #import "cocos2d.h" #import ...

  10. bootmgr解压缩

    主要参考以下两个文章: 1.  http://bbs.wuyou.com/forum.php?mod=viewthread&tid=211314 2.  http://reboot.pro/f ...