使用表面写入函数,结合纹理引用实现图片的旋转
▶ 源代码

 #include <stdio.h>
#include <windows.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define WINDOWS_LEAN_AND_MEAN
#define NOMINMAX
#define MIN_EPSILON_ERROR 5e-3f
float angle = 0.5f; // 弧度制
texture<float, , cudaReadModeElementType> tex;
surface<void, > outputSurface; // 使用表面写入,将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中
__global__ void surfaceWriteKernel(float *gIData, int width, int height)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);
} // 利用纹理取样,将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转,写入全局内存 d_data 中
__global__ void transformKernel(float *gOData,int width,int height,float theta)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width - 0.5f;
float v = y / (float)height - 0.5f; gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);
} int main()
{
printf("\n\tStart.\n");
cudaSetDevice();// 删掉了筛选设备的过程
cudaDeviceProp deviceProps;
cudaGetDeviceProperties(&deviceProps, );
printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor); // 读取图片数据
float *h_data = NULL, *h_dataRef = NULL;
unsigned int width, height, size;
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程
size = width * height * sizeof(float);
sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);
printf("\n\tLoad input files, %d x %d pixels\n", width, height); // 申请设备内存
float *d_data = NULL;
cudaMalloc((void **) &d_data, size);
cudaArray *cuArray;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);
cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);
//cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时,可以直接拷贝到cuArray中 // 绑定表面引用
cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc); // 使用表面写入
dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height); // 绑定纹理引用
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cuArray, channelDesc); // 预跑
transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle); cudaDeviceSynchronize();
sdkStopTimer(&timer);
sdkDeleteTimer(&timer);
printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); // 结果回收、输出和检验
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);
printf("\n\tSave output file.\n");
printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed"); cudaFree(d_data);
cudaFreeArray(cuArray);
getchar();
return ;
}

▶ 输出结果

 Start.

 Device GeForce GTX , Multi-Processors: , SM 6.1

 Load input files,  x  pixels

 Cost time: 0.000000 ms, inf Mpixels/sec

 Save output file.

 Finish, return Passed

▶ 涨姿势

● 使用函数 sdkLoadPGM() 读取图片数据

 // helper_image.h
inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)
{
FILE *fp = NULL;
if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
{
std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
return false;
} // check header
char header[PGMHeaderSize];
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
}
if (strncmp(header, "P5", ) == )
{
*channels = ;
}
else if (strncmp(header, "P6", ) == )
{
*channels = ;
}
else
{
std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
*channels = ;
return false;
} // parse header, read maxval, width and height
unsigned int width = ;
unsigned int height = ;
unsigned int maxval = ;
unsigned int i = ;
while (i < )
{
if (fgets(header, PGMHeaderSize, fp) == NULL)
{
std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
return false;
} if (header[] == '#')
{
continue;
} if (i == )
{
i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u %u", &height, &maxval);
}
else if (i == )
{
i += SSCANF(header, "%u", &maxval);
}
} // check if given handle for the data is initialized
if (NULL != *data)
{
if (*w != width || *h != height)
{
std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
}
}
else
{
*data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);
*w = width;
*h = height;
} // read and close file
if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )
{
std::cerr << "__LoadPPM() read data returned error." << std::endl;
} fclose(fp);
return true;
} template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
{
unsigned char *idata = NULL;
unsigned int channels; if (!__loadPPM(file, &idata, w, h, &channels))
return false;
unsigned int size = *w **h * channels; if (*data == NULL)// 如果 T **data 没有初始化,则按照读取的 size 进行初始化
*data = (T *)malloc(sizeof(T) * size); std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中 free(idata);
return true;
}

● 使用到的表面写入函数原型

 // surface_functions.h
template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
{
#ifdef __CUDA_ARCH__
__nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);
#endif
}

0_Simple__simpleSurfaceWrite的更多相关文章

  1. 0_Simple__simpleTexture + 0_Simple__simpleTextureDrv

    使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...

随机推荐

  1. php 加载字体 并保存成图片

    // Set the content-type header("Content-type: image/png"); // Create the image $im = image ...

  2. dbt 的知识文档管理

    dbt 支持docs的管理,可以方便进行分享,以及大家的可视化工作 有一篇文档讲的特别好分析了知识共享,知识管理的重要性(dbt 对应公司的ceo) https://blog.fishtownanal ...

  3. 如何彻底卸载Jenkins(Windows版本)

    起因: 最近在做持续集成测试过程中遇到一个问题,之前部署的Jenkins管理员密码忘了之后无法登陆,而且删除掉tomcat下webapps文件夹中的Jenkins目录后,再次安装Jenkins后相关的 ...

  4. 【转】每天一个linux命令(15):tail 命令

    原文网址:http://www.cnblogs.com/peida/archive/2012/11/07/2758084.html tail 命令从指定点开始将文件写到标准输出.使用tail命令的-f ...

  5. h5 的 audio 标签知识点

    因为音频格式有版权,各浏览器使用不同的音频格式. 音频格式兼容性 音频格式 Chrome Firefox IE9 Opera Safari MP3 支持 不支持 支持 不支持 支持 OGG 支持 支持 ...

  6. base64编码的原理及实现

    base64编码的原理及实现 我们的图片大部分都是可以转换成base64编码的data:image. 这个在将canvas保存为img的时候尤其有用.虽然除ie外,大部分现代浏览器都已经支持原生的基于 ...

  7. 转 JMeter之修改Sampler响应数据的编码格式

    问题:JMeter的sampler响应数据中有中文时,会解析出错. JMeter的Sampler中的响应数据默认的编码格式是:ISO-8859-1.来自文件: jmeter.properties中的语 ...

  8. centos7下svn的安装与配置

    1.环境 centos7 2.安装svnyum -y install subversion 3.配置 建立版本库目录mkdir /www/svndata svnserve -d -r /www/svn ...

  9. Java中对话框的弹出

    最近在做学校的课程设计,java编程需要用到对话框弹出,第一反应是js中的alert和confirm,java的话瞬间懵,查阅学习总结如下,用以以后的学习 1.显示一个错误对话框,该对话框显示的 me ...

  10. Excel数组排序+图片统一大小

    Sub 图片调整合适大小() ' Debug.Print ActiveWorkbook.Name 图片显示比例 = 0.9 '1为顶满单元格 Dim wb As Workbook, sh As Wor ...