0_Simple__simpleSurfaceWrite
使用表面写入函数,结合纹理引用实现图片的旋转
▶ 源代码
#include <stdio.h>
#include <windows.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define WINDOWS_LEAN_AND_MEAN
#define NOMINMAX
#define MIN_EPSILON_ERROR 5e-3f
float angle = 0.5f; // 弧度制
texture<float, , cudaReadModeElementType> tex;
surface<void, > outputSurface; // 使用表面写入,将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中
__global__ void surfaceWriteKernel(float *gIData, int width, int height)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);
} // 利用纹理取样,将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转,写入全局内存 d_data 中
__global__ void transformKernel(float *gOData,int width,int height,float theta)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width - 0.5f;
float v = y / (float)height - 0.5f; gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);
} int main()
{
printf("\n\tStart.\n");
cudaSetDevice();// 删掉了筛选设备的过程
cudaDeviceProp deviceProps;
cudaGetDeviceProperties(&deviceProps, );
printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor); // 读取图片数据
float *h_data = NULL, *h_dataRef = NULL;
unsigned int width, height, size;
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程
size = width * height * sizeof(float);
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);
printf("\n\tLoad input files, %d x %d pixels\n", width, height); // 申请设备内存
float *d_data = NULL;
cudaMalloc((void **) &d_data, size);
cudaArray *cuArray;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);
cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);
//cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时,可以直接拷贝到cuArray中 // 绑定表面引用
cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc); // 使用表面写入
dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height); // 绑定纹理引用
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cuArray, channelDesc); // 预跑
transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle); cudaDeviceSynchronize();
sdkStopTimer(&timer);
sdkDeleteTimer(&timer);
printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); // 结果回收、输出和检验
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);
printf("\n\tSave output file.\n");
printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed"); cudaFree(d_data);
cudaFreeArray(cuArray);
getchar();
return ;
}
▶ 输出结果
Start. Device GeForce GTX , Multi-Processors: , SM 6.1 Load input files, x pixels Cost time: 0.000000 ms, inf Mpixels/sec Save output file. Finish, return Passed
▶ 涨姿势
● 使用函数 sdkLoadPGM() 读取图片数据
// helper_image.h
inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)
{
FILE *fp = NULL;
if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
{
std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
return false;
} // check header
char header[PGMHeaderSize];
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
}
if (strncmp(header, "P5", ) == )
{
*channels = ;
}
else if (strncmp(header, "P6", ) == )
{
*channels = ;
}
else
{
std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
*channels = ;
return false;
} // parse header, read maxval, width and height
unsigned int width = ;
unsigned int height = ;
unsigned int maxval = ;
unsigned int i = ;
while (i < )
{
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
} if (header[] == '#')
{
continue;
} if (i == )
{
i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u %u", &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u", &maxval);
}
} // check if given handle for the data is initialized
if (NULL != *data)
{
if (*w != width || *h != height)
{
std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
}
}
else
{
*data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);
*w = width;
*h = height;
} // read and close file
if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )
{
std::cerr << "__LoadPPM() read data returned error." << std::endl;
} fclose(fp);
return true;
} template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
{
unsigned char *idata = NULL;
unsigned int channels; if (!__loadPPM(file, &idata, w, h, &channels))
return false;
unsigned int size = *w **h * channels; if (*data == NULL)// 如果 T **data 没有初始化,则按照读取的 size 进行初始化
*data = (T *)malloc(sizeof(T) * size); std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中 free(idata);
return true;
}
● 使用到的表面写入函数原型
// surface_functions.h
template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
__nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);
#endif
}
0_Simple__simpleSurfaceWrite的更多相关文章
- 0_Simple__simpleTexture + 0_Simple__simpleTextureDrv
使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...
随机推荐
- ElasticSearch(八):springboot集成ElasticSearch集群并使用
1. 集群的搭建 见:ElasticSearch(七) 2. springboot配置集群 2.1 创建springboot项目,使用idea创建,不过多介绍(创建项目时候建议不要勾选elastics ...
- hdu 1723 DP/递推
题意:有一队人(人数 ≥ 1),开头一个人要将消息传到末尾一个人那里,规定每次最多可以向后传n个人,问共有多少种传达方式. 这道题我刚拿到手没有想过 DP ,我觉得这样传消息其实很像 Fibonacc ...
- java泛型学习(2)
一:深入泛型使用.主要是父类和子类存在泛型的demo /** * 父类为泛型类 * @author 尚晓飞 * @date 2014-7-15 下午7:31:25 * * * 父类和子类的泛型. * ...
- LG1116 【车厢重组】
前言 看了大家的做法,什么冒泡排序,插入排序,树状数组,线段树,都好厉害呐,我都没想出来 但我发现竟然还没有人用主席树,于是我跟大家交流一下 主席树 做法 显然我们有 \(Ans=\sum_{i=1} ...
- MySQL表类型MyISAM/InnoDB的区别(解决事务不回滚的问题)(转)
本文参考: http://mysqlpub.com/thread-5383-1-1.html http://blog.csdn.net/c466254931/article/details/53463 ...
- grandstack graphql 工具基本试用
grandstack 是一个方便graphql 应用开发的工具 使用docker-compose 运行 环境准备 官方的starter 比较好,已经是使用docker-compose 创建好了所有 ...
- 解决python2安装MySQL-python模块报错
今天电脑重装系统,所有软件都重装一遍,MySQLdb模块一直装不好,纠结了好久,终于解决,方法分享给大家. MySQLdb模块安装: 1.下载MySQL-pyhon模块,网站为:https://pyp ...
- Erlang
Erlang The Erlang BEAM Virtual Machine Specificationhttp://www.cs-lab.org/historical_beam_instructio ...
- 【转】每天一个linux命令(2):cd命令
原文网址:http://www.cnblogs.com/peida/archive/2012/10/24/2736501.html Linux cd 命令可以说是Linux中最基本的命令语句,其他的命 ...
- WeakHashMap 理解
WeakHashMap 通过 expungeStaleEntries 这个函数的来实现:移除其内部不用的条目,从而达到自动释放内存的目的. 基本上只要对 WeakHashMap 的内容进行访问就会调用 ...