使用表面写入函数,结合纹理引用实现图片的旋转
▶ 源代码

 #include <stdio.h>
#include <windows.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define WINDOWS_LEAN_AND_MEAN
#define NOMINMAX
#define MIN_EPSILON_ERROR 5e-3f
float angle = 0.5f; // 弧度制
texture<float, , cudaReadModeElementType> tex;
surface<void, > outputSurface; // 使用表面写入,将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中
__global__ void surfaceWriteKernel(float *gIData, int width, int height)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);
} // 利用纹理取样,将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转,写入全局内存 d_data 中
__global__ void transformKernel(float *gOData,int width,int height,float theta)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width - 0.5f;
float v = y / (float)height - 0.5f; gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);
} int main()
{
printf("\n\tStart.\n");
cudaSetDevice();// 删掉了筛选设备的过程
cudaDeviceProp deviceProps;
cudaGetDeviceProperties(&deviceProps, );
printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor); // 读取图片数据
float *h_data = NULL, *h_dataRef = NULL;
unsigned int width, height, size;
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程
size = width * height * sizeof(float);
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);
printf("\n\tLoad input files, %d x %d pixels\n", width, height); // 申请设备内存
float *d_data = NULL;
cudaMalloc((void **) &d_data, size);
cudaArray *cuArray;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);
cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);
//cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时,可以直接拷贝到cuArray中 // 绑定表面引用
cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc); // 使用表面写入
dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height); // 绑定纹理引用
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cuArray, channelDesc); // 预跑
transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle); cudaDeviceSynchronize();
sdkStopTimer(&timer);
sdkDeleteTimer(&timer);
printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); // 结果回收、输出和检验
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);
printf("\n\tSave output file.\n");
printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed"); cudaFree(d_data);
cudaFreeArray(cuArray);
getchar();
return ;
}

▶ 输出结果

 Start.

 Device GeForce GTX , Multi-Processors: , SM 6.1

 Load input files,  x  pixels

 Cost time: 0.000000 ms, inf Mpixels/sec

 Save output file.

 Finish, return Passed

▶ 涨姿势

● 使用函数 sdkLoadPGM() 读取图片数据

 // helper_image.h
inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)
{
FILE *fp = NULL;
if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
{
std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
return false;
} // check header
char header[PGMHeaderSize];
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
}
if (strncmp(header, "P5", ) == )
{
*channels = ;
}
else if (strncmp(header, "P6", ) == )
{
*channels = ;
}
else
{
std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
*channels = ;
return false;
} // parse header, read maxval, width and height
unsigned int width = ;
unsigned int height = ;
unsigned int maxval = ;
unsigned int i = ;
while (i < )
{
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
} if (header[] == '#')
{
continue;
} if (i == )
{
i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u %u", &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u", &maxval);
}
} // check if given handle for the data is initialized
if (NULL != *data)
{
if (*w != width || *h != height)
{
std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
}
}
else
{
*data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);
*w = width;
*h = height;
} // read and close file
if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )
{
std::cerr << "__LoadPPM() read data returned error." << std::endl;
} fclose(fp);
return true;
} template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
{
unsigned char *idata = NULL;
unsigned int channels; if (!__loadPPM(file, &idata, w, h, &channels))
return false;
unsigned int size = *w **h * channels; if (*data == NULL)// 如果 T **data 没有初始化,则按照读取的 size 进行初始化
*data = (T *)malloc(sizeof(T) * size); std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中 free(idata);
return true;
}

● 使用到的表面写入函数原型

 // surface_functions.h
template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
__nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);
#endif
}

0_Simple__simpleSurfaceWrite的更多相关文章

  1. 0_Simple__simpleTexture + 0_Simple__simpleTextureDrv

    使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...

随机推荐

  1. BZOJ1095: [ZJOI2007]Hide 捉迷藏【线段树维护括号序列】【思维好题】

    Description 捉迷藏 Jiajia和Wind是一对恩爱的夫妻,并且他们有很多孩子.某天,Jiajia.Wind和孩子们决定在家里玩 捉迷藏游戏.他们的家很大且构造很奇特,由N个屋子和N-1条 ...

  2. Codeforces 698A:Vacations(DP)

    题目链接:http://codeforces.com/problemset/problem/698/A 题意 Vasya在n天中,有三件事情可以做,健身.比赛或者休息,但是不能连续两天都是比赛或都是但 ...

  3. Python 操作Excel之通过xlutils实现在保留原格式的情况下追加写入数据

    在Python操作Excel 的模块有 xlrd.xlwt.xlutils等. xlrd:读取Excel文件数据 xlwt:写入Excel 数据,缺点是Excel格式无法复用,为了方便用户,写入的话, ...

  4. linux内核空间和用户空间详解

    linux驱动程序一般工作在内核空间,但也可以工作在用户空间.下面我们将详细解析,什么是内核空间,什么是用户空间,以及如何判断他们.Linux简化了分段机制,使得虚拟地址与线性地址总是一致,因此,Li ...

  5. shell学习笔记汇总

    1.shell脚本中函数使用 函数定义在前,调用在后,顺序反了就没有效果了.函数调用为:函数名 参数列表 函数内部通过以下变量访问函数的参数:shell脚本函数中: $0: 这个脚本的名字 $n: 这 ...

  6. curl 知识点

    curl :command line tool and library for transferring data with URLs curl 命令,常用缩写: curl 命令 缩写 说明 curl ...

  7. Anglarjs 工具方法

    Angularjs 常用方法 1. isArray使用 效果图如下,结果为true 2.uppercase使用 $scope.name = "zhangsan"; $scope.n ...

  8. 使用scrapy框架爬取自己的博文(3)

    既然如此,何不再抓一抓网页的文字内容呢? 谷歌浏览器有个审查元素的功能,就是按树的结构查看html的组织形式,如图: 这样已经比较明显了,博客的正文内容主要在div 的class = cnblogs_ ...

  9. C# 正则表达式入门

    转自:http://www.cnblogs.com/KissKnife/archive/2008/03/23/1118423.html 另外推荐一篇学正则表达式的文章:http://www.unibe ...

  10. css transition transform animation例子讲解

    1.transition属性: transition属性是一个速记属性有四个属性:transition-property , transition-duration, transition-timin ...