0_Simple__simpleSurfaceWrite

使用表面写入函数，结合纹理引用实现图片的旋转
▶ 源代码

 #include <stdio.h>

 #include <windows.h>

 #include <cuda_runtime.h>

 #include "device_launch_parameters.h"

 #include <helper_functions.h>

 #include <helper_cuda.h>    

 #define WINDOWS_LEAN_AND_MEAN

 #define NOMINMAX

 #define MIN_EPSILON_ERROR 5e-3f

 float angle = 0.5f;                             // 弧度制

 texture<float, , cudaReadModeElementType> tex;

 surface<void, > outputSurface;

 // 使用表面写入，将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中

 __global__ void surfaceWriteKernel(float *gIData, int width, int height)

 {

     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

     surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);

 }

 // 利用纹理取样，将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转，写入全局内存 d_data 中

 __global__ void transformKernel(float *gOData,int width,int height,float theta)

 {

     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

     float u = x / (float)width - 0.5f;

     float v = y / (float)height - 0.5f;

     gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);

 }

 int main()

 {

     printf("\n\tStart.\n");

     cudaSetDevice();// 删掉了筛选设备的过程

     cudaDeviceProp deviceProps;

     cudaGetDeviceProperties(&deviceProps, );

     printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor);

     // 读取图片数据

     float *h_data = NULL, *h_dataRef = NULL;

     unsigned int width, height, size;

     sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程

     size = width * height * sizeof(float);

     sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);

     printf("\n\tLoad input files, %d x %d pixels\n", width, height);

     // 申请设备内存

     float *d_data = NULL;

     cudaMalloc((void **) &d_data, size);

     cudaArray *cuArray;

     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);

     cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);

     cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);

     //cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时，可以直接拷贝到cuArray中

     // 绑定表面引用

     cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc);

     // 使用表面写入

     dim3 dimBlock(, , );

     dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );

     surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height);

     // 绑定纹理引用

     tex.addressMode[] = cudaAddressModeWrap;

     tex.addressMode[] = cudaAddressModeWrap;

     tex.filterMode = cudaFilterModeLinear;

     tex.normalized = true;

     cudaBindTextureToArray(tex, cuArray, channelDesc);

     // 预跑

     transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);

     cudaDeviceSynchronize();

     StopWatchInterface *timer = NULL;

     sdkCreateTimer(&timer);

     sdkStartTimer(&timer);

     transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);

     cudaDeviceSynchronize();

     sdkStopTimer(&timer);

     sdkDeleteTimer(&timer);

     printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6);

     // 结果回收、输出和检验

     cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);

     sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);

     printf("\n\tSave output file.\n");

     printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed");

     cudaFree(d_data);

     cudaFreeArray(cuArray);

     getchar();

     return ;

 }

▶ 输出结果

 Start.

 Device GeForce GTX , Multi-Processors: , SM 6.1

 Load input files,  x  pixels

 Cost time: 0.000000 ms, inf Mpixels/sec

 Save output file.

 Finish, return Passed

▶ 涨姿势

● 使用函数 sdkLoadPGM() 读取图片数据

 // helper_image.h

 inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)

 {

     FILE *fp = NULL;

     if (FOPEN_FAIL(FOPEN(fp, file, "rb")))

     {

         std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;

         return false;

     }

     // check header

     char header[PGMHeaderSize];

     if (fgets(header, PGMHeaderSize, fp) == NULL)

     {

         std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;

         return false;

     }

     if (strncmp(header, "P5", ) == )

     {

         *channels = ;

     }

     else if (strncmp(header, "P6", ) == )

     {

         *channels = ;

     }

     else

     {

         std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;

         *channels = ;

         return false;

     }

     // parse header, read maxval, width and height

     unsigned int width = ;

     unsigned int height = ;

     unsigned int maxval = ;

     unsigned int i = ;

     while (i < )

     {

         if (fgets(header, PGMHeaderSize, fp) == NULL)

         {

             std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;

             return false;

         }

         if (header[] == '#')

         {

             continue;

         }

         if (i == )

         {

             i += SSCANF(header, "%u %u %u", &width, &height, &maxval);

         }

         else if (i == )

         {

             i += SSCANF(header, "%u %u", &height, &maxval);

         }

         else if (i == )

         {

             i += SSCANF(header, "%u", &maxval);

         }

     }

     // check if given handle for the data is initialized

     if (NULL != *data)

     {

         if (*w != width || *h != height)

         {

             std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;

         }

     }

     else

     {

         *data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);

         *w = width;

         *h = height;

     }

     // read and close file

     if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )

     {

         std::cerr << "__LoadPPM() read data returned error." << std::endl;

     }

     fclose(fp);

     return true;

 }

 template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)

 {

     unsigned char *idata = NULL;

     unsigned int channels;

     if (!__loadPPM(file, &idata, w, h, &channels))

         return false;

     unsigned int size = *w **h * channels;

     if (*data == NULL)// 如果 T **data 没有初始化，则按照读取的 size 进行初始化

         *data = (T *)malloc(sizeof(T) * size);

     std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中

     free(idata);

     return true;

 }

● 使用到的表面写入函数原型

 // surface_functions.h

 template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)

 {

 #ifdef __CUDA_ARCH__

     __nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);

 #endif

 }

0_Simple__simpleSurfaceWrite的更多相关文章

0_Simple__simpleTexture + 0_Simple__simpleTextureDrv
使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...

随机推荐

bootstrap中如何控制input的宽度
☆1☆ bootstrap中如何控制input的宽度: v2版本:定义了很多class,可用在input. "input-block-level"."input-mini ...
【java规则引擎】《Drools7.0.0.Final规则引擎教程》第4章 4.2 activation-group& dialect& date-effective
转载至:https://blog.csdn.net/wo541075754/article/details/75511887 activation-group 该属性将若干个规则划分成一个组,统一命名 ...
CentOS升级Python2.6到Python2.7并安装pip
原文:http://ruter.sundaystart.net/2015/12/03/Update-python/ 貌似CentOS 6.X系统默认安装的Python都是2.6版本的?平时使用以及很多 ...
hasura graphql-engine v1.0.0-alpha26 版本新功能试用
hasura graphql-engine v1.0.0-alpha26 已经发布了,有好多新的变动,测试使用docker 环境,同时pg 数据库使用了citus citus 是一个方便扩展的pg ...
openresty 使用cuid 类库生成短链接id
cuid 是一个不错的id 生成算发,类似的有shortid .hashid 演示使用lua 包集成openresty 做测试使用docker-compose 运行 dockerfile FROM ...
XDomainRequest object
The XDomainRequest object has these types of members: Events Methods Properties Events The XDomainRe ...
Windows环境下用jwplayer+Nginx搭建视频点播服务器
flv视频可以采用两种方式发布: 一.普通的HTTP下载方式二.基于Flash Media Server或Red5服务器的rtmp/rtmpt流媒体方式. 多数知名视频网站都采用的是前一种方式. 两 ...
升级CentOS 7.4内核版本的三种方案
https://blog.csdn.net/breeze915/article/details/79243673 在实验环境下,已安装了最新的CentOS 7.4操作系统,现在需要升级内核版本. 实验 ...
hadoop项目开发案例方案汇总
大数据Hadoop应用开发技术正可谓如火如荼推进中,以为大数据已经不仅仅是局限在互联网领域,而是已经被上升到了国家战略的高度层面.大数据正在深刻影响和改变我们的日常生活和工作方式. Hadoop应用开 ...
既做无线客户端又做无线ap、又可只存在一种模式
1. 1.1 打开 /barrier_breaker/package/base-files/files/etc/init.d 加入 disable_sta_mode_wifi_interfaces # ...

0_Simple__simpleSurfaceWrite

0_Simple__simpleSurfaceWrite的更多相关文章

随机推荐

热门专题