使用表面写入函数,结合纹理引用实现图片的旋转
▶ 源代码

 #include <stdio.h>
#include <windows.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define WINDOWS_LEAN_AND_MEAN
#define NOMINMAX
#define MIN_EPSILON_ERROR 5e-3f
float angle = 0.5f; // 弧度制
texture<float, , cudaReadModeElementType> tex;
surface<void, > outputSurface; // 使用表面写入,将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中
__global__ void surfaceWriteKernel(float *gIData, int width, int height)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);
} // 利用纹理取样,将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转,写入全局内存 d_data 中
__global__ void transformKernel(float *gOData,int width,int height,float theta)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width - 0.5f;
float v = y / (float)height - 0.5f; gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);
} int main()
{
printf("\n\tStart.\n");
cudaSetDevice();// 删掉了筛选设备的过程
cudaDeviceProp deviceProps;
cudaGetDeviceProperties(&deviceProps, );
printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor); // 读取图片数据
float *h_data = NULL, *h_dataRef = NULL;
unsigned int width, height, size;
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程
size = width * height * sizeof(float);
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);
printf("\n\tLoad input files, %d x %d pixels\n", width, height); // 申请设备内存
float *d_data = NULL;
cudaMalloc((void **) &d_data, size);
cudaArray *cuArray;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);
cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);
//cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时,可以直接拷贝到cuArray中 // 绑定表面引用
cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc); // 使用表面写入
dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height); // 绑定纹理引用
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cuArray, channelDesc); // 预跑
transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle); cudaDeviceSynchronize();
sdkStopTimer(&timer);
sdkDeleteTimer(&timer);
printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); // 结果回收、输出和检验
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);
printf("\n\tSave output file.\n");
printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed"); cudaFree(d_data);
cudaFreeArray(cuArray);
getchar();
return ;
}

▶ 输出结果

 Start.

 Device GeForce GTX , Multi-Processors: , SM 6.1

 Load input files,  x  pixels

 Cost time: 0.000000 ms, inf Mpixels/sec

 Save output file.

 Finish, return Passed

▶ 涨姿势

● 使用函数 sdkLoadPGM() 读取图片数据

 // helper_image.h
inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)
{
FILE *fp = NULL;
if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
{
std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
return false;
} // check header
char header[PGMHeaderSize];
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
}
if (strncmp(header, "P5", ) == )
{
*channels = ;
}
else if (strncmp(header, "P6", ) == )
{
*channels = ;
}
else
{
std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
*channels = ;
return false;
} // parse header, read maxval, width and height
unsigned int width = ;
unsigned int height = ;
unsigned int maxval = ;
unsigned int i = ;
while (i < )
{
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
} if (header[] == '#')
{
continue;
} if (i == )
{
i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u %u", &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u", &maxval);
}
} // check if given handle for the data is initialized
if (NULL != *data)
{
if (*w != width || *h != height)
{
std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
}
}
else
{
*data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);
*w = width;
*h = height;
} // read and close file
if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )
{
std::cerr << "__LoadPPM() read data returned error." << std::endl;
} fclose(fp);
return true;
} template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
{
unsigned char *idata = NULL;
unsigned int channels; if (!__loadPPM(file, &idata, w, h, &channels))
return false;
unsigned int size = *w **h * channels; if (*data == NULL)// 如果 T **data 没有初始化,则按照读取的 size 进行初始化
*data = (T *)malloc(sizeof(T) * size); std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中 free(idata);
return true;
}

● 使用到的表面写入函数原型

 // surface_functions.h
template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
__nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);
#endif
}

0_Simple__simpleSurfaceWrite的更多相关文章

  1. 0_Simple__simpleTexture + 0_Simple__simpleTextureDrv

    使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...

随机推荐

  1. ZZNU 2125:A + B 普拉斯(傻逼题+大数加法)

    2125: A + B 普拉斯 时间限制: 1 Sec  内存限制: 128 MB 提交: 94  解决: 28 [提交] [状态] [讨论版] [命题人:admin] 题目描述 "别人总说 ...

  2. yocto和bitbake

    一.yocto 1.yocto简介 Yocto 是一个开源社区通过它提供模版.工具和方法帮助开发者创建基于linux内核的定制系统,支持ARM, PPC, MIPS, x86 (32 & 64 ...

  3. apache airflow docker 运行简单试用

    airflow 是一个编排.调度和监控workflow的平台,由Airbnb开源,现在在Apache Software Foundation 孵化. airflow 将workflow编排为tasks ...

  4. 使用 telnet 发邮件

    我们都习惯了用邮件客户端软件或者登录到电子邮件站点进行收发邮件,现在尝试一下使用 Windows 自带的 Telnet 程序手工地发送一封简单的邮件,以此来稍微明白关于邮件发送的一些知识. 现在 E- ...

  5. Vquery PHP 简单爬虫类

    http://www.thinkphp.cn/topic/36693.html 在使用php进行网页抓取的时候你有没有感觉到用起来比较麻烦呢?目前我还没有发现php有这样针对网页抓取的类,每次用到这个 ...

  6. 【转】每天一个linux命令目录

    原文网址:http://www.cnblogs.com/peida/archive/2012/12/05/2803591.html 开始详细系统的学习linux常用命令,坚持每天一个命令,所以这个系列 ...

  7. JUC集合之 CopyOnWriteArraySet

    CopyOnWriteArraySet介绍 它是线程安全的无序的集合,可以将它理解成线程安全的HashSet.有意思的是,CopyOnWriteArraySet和HashSet虽然都继承于共同的父类A ...

  8. centos6/7安装gitlab

    CentOS/RHEL 6/7安装gitlab新建 /etc/yum.repos.d/gitlab-ce.repo,内容为你的CentOS/RHEL版本:centos6 [gitlab-ce] nam ...

  9. linux 命令:chmod权限设置命令

    Linux系统中的每个文件和目录都有访问许可权限,用它来确定谁可以通过何种方式对文件和目录进行访问和操作. 文件或目录的访问权限分为只读,只写和可执行三种.以文件为例,只读权限表示只允许读其内容,而禁 ...

  10. jmeter --上传文件

    jmeter-场景-上传文件-send-a-file 简要说就3点: POST请求 Request的参数都写在路径内,不写在表单里 上传的文件写在表单里 只要记住以上3点,也就避免了在设计脚本的时候走 ...