0_Simple__simpleSurfaceWrite

使用表面写入函数，结合纹理引用实现图片的旋转
▶ 源代码

 #include <stdio.h>

 #include <windows.h>

 #include <cuda_runtime.h>

 #include "device_launch_parameters.h"

 #include <helper_functions.h>

 #include <helper_cuda.h>    

 #define WINDOWS_LEAN_AND_MEAN

 #define NOMINMAX

 #define MIN_EPSILON_ERROR 5e-3f

 float angle = 0.5f;                             // 弧度制

 texture<float, , cudaReadModeElementType> tex;

 surface<void, > outputSurface;

 // 使用表面写入，将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中

 __global__ void surfaceWriteKernel(float *gIData, int width, int height)

 {

     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

     surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);

 }

 // 利用纹理取样，将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转，写入全局内存 d_data 中

 __global__ void transformKernel(float *gOData,int width,int height,float theta)

 {

     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

     float u = x / (float)width - 0.5f;

     float v = y / (float)height - 0.5f;

     gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);

 }

 int main()

 {

     printf("\n\tStart.\n");

     cudaSetDevice();// 删掉了筛选设备的过程

     cudaDeviceProp deviceProps;

     cudaGetDeviceProperties(&deviceProps, );

     printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor);

     // 读取图片数据

     float *h_data = NULL, *h_dataRef = NULL;

     unsigned int width, height, size;

     sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程

     size = width * height * sizeof(float);

     sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);

     printf("\n\tLoad input files, %d x %d pixels\n", width, height);

     // 申请设备内存

     float *d_data = NULL;

     cudaMalloc((void **) &d_data, size);

     cudaArray *cuArray;

     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);

     cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);

     cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);

     //cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时，可以直接拷贝到cuArray中

     // 绑定表面引用

     cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc);

     // 使用表面写入

     dim3 dimBlock(, , );

     dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );

     surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height);

     // 绑定纹理引用

     tex.addressMode[] = cudaAddressModeWrap;

     tex.addressMode[] = cudaAddressModeWrap;

     tex.filterMode = cudaFilterModeLinear;

     tex.normalized = true;

     cudaBindTextureToArray(tex, cuArray, channelDesc);

     // 预跑

     transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);

     cudaDeviceSynchronize();

     StopWatchInterface *timer = NULL;

     sdkCreateTimer(&timer);

     sdkStartTimer(&timer);

     transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);

     cudaDeviceSynchronize();

     sdkStopTimer(&timer);

     sdkDeleteTimer(&timer);

     printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6);

     // 结果回收、输出和检验

     cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);

     sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);

     printf("\n\tSave output file.\n");

     printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed");

     cudaFree(d_data);

     cudaFreeArray(cuArray);

     getchar();

     return ;

 }

▶ 输出结果

 Start.

 Device GeForce GTX , Multi-Processors: , SM 6.1

 Load input files,  x  pixels

 Cost time: 0.000000 ms, inf Mpixels/sec

 Save output file.

 Finish, return Passed

▶ 涨姿势

● 使用函数 sdkLoadPGM() 读取图片数据

 // helper_image.h

 inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)

 {

     FILE *fp = NULL;

     if (FOPEN_FAIL(FOPEN(fp, file, "rb")))

     {

         std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;

         return false;

     }

     // check header

     char header[PGMHeaderSize];

     if (fgets(header, PGMHeaderSize, fp) == NULL)

     {

         std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;

         return false;

     }

     if (strncmp(header, "P5", ) == )

     {

         *channels = ;

     }

     else if (strncmp(header, "P6", ) == )

     {

         *channels = ;

     }

     else

     {

         std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;

         *channels = ;

         return false;

     }

     // parse header, read maxval, width and height

     unsigned int width = ;

     unsigned int height = ;

     unsigned int maxval = ;

     unsigned int i = ;

     while (i < )

     {

         if (fgets(header, PGMHeaderSize, fp) == NULL)

         {

             std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;

             return false;

         }

         if (header[] == '#')

         {

             continue;

         }

         if (i == )

         {

             i += SSCANF(header, "%u %u %u", &width, &height, &maxval);

         }

         else if (i == )

         {

             i += SSCANF(header, "%u %u", &height, &maxval);

         }

         else if (i == )

         {

             i += SSCANF(header, "%u", &maxval);

         }

     }

     // check if given handle for the data is initialized

     if (NULL != *data)

     {

         if (*w != width || *h != height)

         {

             std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;

         }

     }

     else

     {

         *data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);

         *w = width;

         *h = height;

     }

     // read and close file

     if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )

     {

         std::cerr << "__LoadPPM() read data returned error." << std::endl;

     }

     fclose(fp);

     return true;

 }

 template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)

 {

     unsigned char *idata = NULL;

     unsigned int channels;

     if (!__loadPPM(file, &idata, w, h, &channels))

         return false;

     unsigned int size = *w **h * channels;

     if (*data == NULL)// 如果 T **data 没有初始化，则按照读取的 size 进行初始化

         *data = (T *)malloc(sizeof(T) * size);

     std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中

     free(idata);

     return true;

 }

● 使用到的表面写入函数原型

 // surface_functions.h

 template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)

 {

 #ifdef __CUDA_ARCH__

     __nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);

 #endif

 }

0_Simple__simpleSurfaceWrite的更多相关文章

0_Simple__simpleTexture + 0_Simple__simpleTextureDrv
使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...

随机推荐

Windows10 解决“装了 .NET Framework 4.5.2/4.6.1/4.7.1等等任何版本或版本更高的更新”问题
========================================================= 系统自带的.net framework版本为4.7,自己安装.NET Framewo ...
TP3.2整合kindeditor
HTML  <link rel="stylesheet" href="__PUBLIC__/kindeditor/th ...
matplotlib的颜色及线条控制
refer to: https://www.cnblogs.com/darkknightzh/p/6117528.html
stenciljs 学习一 web 组件开发
stenciljs 介绍参考官方网站,或者 https://www.cnblogs.com/rongfengliang/p/9706542.html 创建项目使用脚手架工具 npm init ste ...
bootstrap modal模态框的运用
http://www.ziqiangxuetang.com/bootstrap/bootstrap-modal-plugin.html 方法下面是一些可与 modal() 一起使用的有用的方法. 方 ...
logging 的配置和使用
logging 的配置和使用 reference : logging cookbook logging HOWTO 测试源码,example import logging nt = 'xwei' # ...
ORACLE数据导入导出后新数据库中某些表添加操作报错[ORA-12899]
由于项目需要,我在搭建了新的开发环境后,需要将之前环境中的ORACLE数据库导出,再导入到新的开发环境下.当导出导入完成后,使用数据库进行添加操作时发现针对很多表的添加操作报错,具体报错原因描述为: ...
POJ2411骨牌覆盖——状压dp
题目:http://poj.org/problem?id=2411 状压dp.注意一下代码中标记的地方. #include<iostream> #include<cstdio> ...
MySQL的结构图
MySQL的结构图为了更好的了解和配置MySQL,就必须先了解一下MySQL的体系结构.如下图所示: ▲MySQL体系架构图理解MySQL的体系架构对于成功的配置和调试至关重要.以下将对架构图进行 ...
linux 线程的同步一（互斥量和信号量）
互斥量(Mutex) 互斥量表现互斥现象的数据结构,也被当作二元信号灯.一个互斥基本上是一个多任务敏感的二元信号,它能用作同步多任务的行为,它常用作保护从中断来的临界段代码并且在共享同步使用的资源. ...

0_Simple__simpleSurfaceWrite

0_Simple__simpleSurfaceWrite的更多相关文章

随机推荐

热门专题