0_Simple__simpleSurfaceWrite
使用表面写入函数,结合纹理引用实现图片的旋转
▶ 源代码
#include <stdio.h>
#include <windows.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define WINDOWS_LEAN_AND_MEAN
#define NOMINMAX
#define MIN_EPSILON_ERROR 5e-3f
float angle = 0.5f; // 弧度制
texture<float, , cudaReadModeElementType> tex;
surface<void, > outputSurface; // 使用表面写入,将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中
__global__ void surfaceWriteKernel(float *gIData, int width, int height)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);
} // 利用纹理取样,将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转,写入全局内存 d_data 中
__global__ void transformKernel(float *gOData,int width,int height,float theta)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width - 0.5f;
float v = y / (float)height - 0.5f; gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);
} int main()
{
printf("\n\tStart.\n");
cudaSetDevice();// 删掉了筛选设备的过程
cudaDeviceProp deviceProps;
cudaGetDeviceProperties(&deviceProps, );
printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor); // 读取图片数据
float *h_data = NULL, *h_dataRef = NULL;
unsigned int width, height, size;
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程
size = width * height * sizeof(float);
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);
printf("\n\tLoad input files, %d x %d pixels\n", width, height); // 申请设备内存
float *d_data = NULL;
cudaMalloc((void **) &d_data, size);
cudaArray *cuArray;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);
cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);
//cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时,可以直接拷贝到cuArray中 // 绑定表面引用
cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc); // 使用表面写入
dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height); // 绑定纹理引用
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cuArray, channelDesc); // 预跑
transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle); cudaDeviceSynchronize();
sdkStopTimer(&timer);
sdkDeleteTimer(&timer);
printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); // 结果回收、输出和检验
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);
printf("\n\tSave output file.\n");
printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed"); cudaFree(d_data);
cudaFreeArray(cuArray);
getchar();
return ;
}
▶ 输出结果
Start. Device GeForce GTX , Multi-Processors: , SM 6.1 Load input files, x pixels Cost time: 0.000000 ms, inf Mpixels/sec Save output file. Finish, return Passed
▶ 涨姿势
● 使用函数 sdkLoadPGM() 读取图片数据
// helper_image.h
inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)
{
FILE *fp = NULL;
if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
{
std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
return false;
} // check header
char header[PGMHeaderSize];
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
}
if (strncmp(header, "P5", ) == )
{
*channels = ;
}
else if (strncmp(header, "P6", ) == )
{
*channels = ;
}
else
{
std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
*channels = ;
return false;
} // parse header, read maxval, width and height
unsigned int width = ;
unsigned int height = ;
unsigned int maxval = ;
unsigned int i = ;
while (i < )
{
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
} if (header[] == '#')
{
continue;
} if (i == )
{
i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u %u", &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u", &maxval);
}
} // check if given handle for the data is initialized
if (NULL != *data)
{
if (*w != width || *h != height)
{
std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
}
}
else
{
*data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);
*w = width;
*h = height;
} // read and close file
if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )
{
std::cerr << "__LoadPPM() read data returned error." << std::endl;
} fclose(fp);
return true;
} template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
{
unsigned char *idata = NULL;
unsigned int channels; if (!__loadPPM(file, &idata, w, h, &channels))
return false;
unsigned int size = *w **h * channels; if (*data == NULL)// 如果 T **data 没有初始化,则按照读取的 size 进行初始化
*data = (T *)malloc(sizeof(T) * size); std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中 free(idata);
return true;
}
● 使用到的表面写入函数原型
// surface_functions.h
template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
__nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);
#endif
}
0_Simple__simpleSurfaceWrite的更多相关文章
- 0_Simple__simpleTexture + 0_Simple__simpleTextureDrv
使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...
随机推荐
- bootstrap中如何控制input的宽度
☆1☆ bootstrap中如何控制input的宽度: v2版本:定义了很多class,可用在input. "input-block-level"."input-mini ...
- 【java规则引擎】《Drools7.0.0.Final规则引擎教程》第4章 4.2 activation-group& dialect& date-effective
转载至:https://blog.csdn.net/wo541075754/article/details/75511887 activation-group 该属性将若干个规则划分成一个组,统一命名 ...
- CentOS升级Python2.6到Python2.7并安装pip
原文:http://ruter.sundaystart.net/2015/12/03/Update-python/ 貌似CentOS 6.X系统默认安装的Python都是2.6版本的?平时使用以及很多 ...
- hasura graphql-engine v1.0.0-alpha26 版本新功能试用
hasura graphql-engine v1.0.0-alpha26 已经发布了,有好多新的变动,测试使用docker 环境,同时pg 数据库使用了citus citus 是一个方便扩展的pg ...
- openresty 使用cuid 类库生成短链接id
cuid 是一个不错的id 生成算发,类似的有shortid .hashid 演示使用lua 包集成openresty 做测试 使用docker-compose 运行 dockerfile FROM ...
- XDomainRequest object
The XDomainRequest object has these types of members: Events Methods Properties Events The XDomainRe ...
- Windows环境下用jwplayer+Nginx搭建视频点播服务器
flv视频可以采用两种方式发布: 一.普通的HTTP下载方式 二.基于Flash Media Server或Red5服务器的rtmp/rtmpt流媒体方式. 多数知名视频网站都采用的是前一种方式. 两 ...
- 升级CentOS 7.4内核版本的三种方案
https://blog.csdn.net/breeze915/article/details/79243673 在实验环境下,已安装了最新的CentOS 7.4操作系统,现在需要升级内核版本. 实验 ...
- hadoop项目开发案例方案汇总
大数据Hadoop应用开发技术正可谓如火如荼推进中,以为大数据已经不仅仅是局限在互联网领域,而是已经被上升到了国家战略的高度层面.大数据正在深刻影响和改变我们的日常生活和工作方式. Hadoop应用开 ...
- 既做无线客户端又做无线ap、又可只存在一种模式
1. 1.1 打开 /barrier_breaker/package/base-files/files/etc/init.d 加入 disable_sta_mode_wifi_interfaces # ...