二维分层纹理

▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数再加上层数放入全局内存,输出。

 #include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define MIN_EPSILON_ERROR 5e-3f
#define OUTPUT 5 texture<float, cudaTextureType2DLayered> tex; __global__ void transformKernel(float *g_odata, int width, int height, int layer)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; float u = (x + 0.5f) / (float)width;
float v = (y + 0.5f) / (float)height; g_odata[layer*width*height + y*width + x] = - tex2DLayered(tex, u, v, layer) + layer;
} int main(int argc, char **argv)
{
unsigned int width = , height = , num_layers = ;
unsigned int size = width * height * num_layers * sizeof(float);
float *h_data = (float *)malloc(size);
float *h_data_ref = (float *)malloc(size);
float *d_data = NULL;
cudaMalloc((void **)&d_data, size); for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data[layer*width*height + i] = (float)i;
} for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data_ref[layer*width*height + i] = - h_data[layer*width*height + i] + layer;
} printf("\n\t\Input data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for(int k=;k<OUTPUT;k++)
printf("%2.1f ", h_data[i*width*height+j*width+k]);
printf("\n\t");
}
printf("\n\t");
}
printf("\n\t\Ideal output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data_ref[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} // 设置 CUDA 3D 数组参数和数据拷贝
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaArray *cu_3darray;
cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, height, num_layers), cudaArrayLayered);
cudaMemcpy3DParms myparms = { };
myparms.srcPos = make_cudaPos(, , );
myparms.dstPos = make_cudaPos(, , );
myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, height);
myparms.dstArray = cu_3darray;
myparms.extent = make_cudaExtent(width, height, num_layers);
myparms.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(&myparms); // 设置纹理参数并绑定
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cu_3darray, channelDesc); dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
printf("Covering 2D data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads\n", width, height, num_layers, dimGrid.x, dimGrid.y);
transformKernel << < dimGrid, dimBlock >> >(d_data, width, height, );// 预跑
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); for (unsigned int layer = ; layer < num_layers; layer++)// 启用多个核,每个核完成一层
transformKernel << < dimGrid, dimBlock, >> >(d_data, width, height, layer);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\Time: %.3f msec, %.2f Mtexlookups/sec\n", sdkGetTimerValue(&timer), (width *height *num_layers / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
sdkDeleteTimer(&timer); // 返回计算结果并检验
memset(h_data, , size);
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
else
printf("Comparing kernel output to expected data return %d\n", compareData(h_data, h_data_ref, width * height * num_layers, MIN_EPSILON_ERROR, 0.0f)); printf("\n\tActual output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} free(h_data);
free(h_data_ref);
cudaFree(d_data);
cudaFreeArray(cu_3darray); getchar();
return ;
}

▶ 输出结果

    Input data
0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 Ideal output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0 Covering 2D data of * * : Grid size is x , each block has x threads Time: 0.995 msec, 1317.00 Mtexlookups/sec
Comparing kernel output to expected data return Actual output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0

▶ 涨姿势

● 与前面立方体贴图纹理不同的地方:申请 CUDA3D 数组的时候使用标志 cudaArrayLayered 而不是 cudaArrayCubemap,并注意调整相关的维度参数。

0_Simple__simpleLayeredTexture的更多相关文章

随机推荐

  1. ThinkPHP AJAX分页及JS缓存的应用

    //AJAX分页详见兄弟连PHP项目视频教程22讲35分钟左右 主要实现是需要将分页中的每个链接都改为AJAX请求 //前端缓存技术:基于javascript传输的数据,只要浏览器没关,都保存在内存中 ...

  2. web页面的数据从excel中读取

    # -*- coding: utf-8 -*- import xdrlib ,sysimport xlrdimport datetimeimport jsonimport conf,reimport ...

  3. WPF优化体验<一>(转)

    最近将一个开发和维护了五年的一个Winform项目进行重构,考虑到最近很流行将用户体验挂在嘴上,于是采用了WPF技术,希望能在外观和体验上有一个全新的效果. 以前使用Winform的时候内存控制得不错 ...

  4. EF Codefirst方式数据库维护操作

    关于EF codefirst方式数据库维护操作 1.数据实体更新 2.打开pm - 锁定项目:MLearning.Data 3.执行命令 : add-migration [名称] 4.检查无误后,执行 ...

  5. os层删除与数据库层drop的区别于对策。

    os rm删除的,可以通过os层面恢复(句柄.inode.如果inode没有了可以根据block去重组出来数据文件),也可以通过rman备份还原方式恢复.  db 层面drop删除的 只能不完全恢复( ...

  6. java 多线程之 interrupt()和线程终止方式

    interrupt() 说明 interrupt()的作用是中断本线程. 本线程中断自己是被允许的:其它线程调用本线程的interrupt()方法时,会通过checkAccess()检查权限.这有可能 ...

  7. mac 下 mysql 安装

    1. 下载安装文件: 下载地址:https://dev.mysql.com/downloads/mysql/ 下载后缀名为dmg的安装文件 2. 启动mysql 安装后,打开"系统偏好设置& ...

  8. Maven 项目报告插件

    Maven 项目报告插件,都是对于前面生成的项目站点的内容丰富,因此都是基于项目站点的,生成的命令和生成项目站点一致(mvn site),项目报告插件的配置和一般插件不同,是在 project-> ...

  9. asp.net如何使用cookie(创建、保存、读取)

    Cookie的用法也和ASP中差不多.比如我们建立一个名为aspcn,值为大众的cookie HttpCookie cookie = new HttpCookie("aspcn") ...

  10. myeclipse修改编译器版本的方法 .

    今天在导入一个工程时,发现出现java.lang.UnsupportedClassVersionError: Bad version number in .class file异常,检查了一下我的my ...