二维分层纹理

▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数再加上层数放入全局内存,输出。

 #include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define MIN_EPSILON_ERROR 5e-3f
#define OUTPUT 5 texture<float, cudaTextureType2DLayered> tex; __global__ void transformKernel(float *g_odata, int width, int height, int layer)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; float u = (x + 0.5f) / (float)width;
float v = (y + 0.5f) / (float)height; g_odata[layer*width*height + y*width + x] = - tex2DLayered(tex, u, v, layer) + layer;
} int main(int argc, char **argv)
{
unsigned int width = , height = , num_layers = ;
unsigned int size = width * height * num_layers * sizeof(float);
float *h_data = (float *)malloc(size);
float *h_data_ref = (float *)malloc(size);
float *d_data = NULL;
cudaMalloc((void **)&d_data, size); for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data[layer*width*height + i] = (float)i;
} for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data_ref[layer*width*height + i] = - h_data[layer*width*height + i] + layer;
} printf("\n\t\Input data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for(int k=;k<OUTPUT;k++)
printf("%2.1f ", h_data[i*width*height+j*width+k]);
printf("\n\t");
}
printf("\n\t");
}
printf("\n\t\Ideal output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data_ref[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} // 设置 CUDA 3D 数组参数和数据拷贝
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaArray *cu_3darray;
cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, height, num_layers), cudaArrayLayered);
cudaMemcpy3DParms myparms = { };
myparms.srcPos = make_cudaPos(, , );
myparms.dstPos = make_cudaPos(, , );
myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, height);
myparms.dstArray = cu_3darray;
myparms.extent = make_cudaExtent(width, height, num_layers);
myparms.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(&myparms); // 设置纹理参数并绑定
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cu_3darray, channelDesc); dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
printf("Covering 2D data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads\n", width, height, num_layers, dimGrid.x, dimGrid.y);
transformKernel << < dimGrid, dimBlock >> >(d_data, width, height, );// 预跑
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); for (unsigned int layer = ; layer < num_layers; layer++)// 启用多个核,每个核完成一层
transformKernel << < dimGrid, dimBlock, >> >(d_data, width, height, layer);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\Time: %.3f msec, %.2f Mtexlookups/sec\n", sdkGetTimerValue(&timer), (width *height *num_layers / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
sdkDeleteTimer(&timer); // 返回计算结果并检验
memset(h_data, , size);
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
else
printf("Comparing kernel output to expected data return %d\n", compareData(h_data, h_data_ref, width * height * num_layers, MIN_EPSILON_ERROR, 0.0f)); printf("\n\tActual output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} free(h_data);
free(h_data_ref);
cudaFree(d_data);
cudaFreeArray(cu_3darray); getchar();
return ;
}

▶ 输出结果

    Input data
0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 Ideal output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0 Covering 2D data of * * : Grid size is x , each block has x threads Time: 0.995 msec, 1317.00 Mtexlookups/sec
Comparing kernel output to expected data return Actual output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0

▶ 涨姿势

● 与前面立方体贴图纹理不同的地方:申请 CUDA3D 数组的时候使用标志 cudaArrayLayered 而不是 cudaArrayCubemap,并注意调整相关的维度参数。

0_Simple__simpleLayeredTexture的更多相关文章

随机推荐

  1. (2)bytes类型

    bytes类型就是字节类型 把8个二进制一组称为一个byte,用16进制来表示 Python2里面字符串其实更应该称为字节串,但是python2里面有一个类型是butes,所以在Python2里面by ...

  2. python之concurrent.futures模块

    一.concurrent.futures模块简介 concurrent.futures 模块提供了并发执行调用的高级接口 并发可以使用threads执行,使用ThreadPoolExecutor 或 ...

  3. 【MVC】VS常用技巧

    1,在VS2010中,选中指定的代码段,可以拖拽到工具箱中,形成标签,以后还想书写类似的代码,双击鼠标即可. 2,在VS2012中,可以在注释上标注//TODO:我是注释 这样,注释就会出现在任务列表 ...

  4. Msmq设计文档(赋源代码)

    Msmq设计文档(赋源代码)   Msmq设计文档     文件状态: [√] 草稿 [  ] 正式发布 [  ] 正在修改 文件标识: ECI-MSMQ v01 当前版本: 0.5 作    者: ...

  5. 系列文章--jQuery教程

    从零开始学习jQuery (一) 开天辟地入门篇 从零开始学习jQuery (二) 万能的选择器 从零开始学习jQuery (三) 管理jQuery包装集 从零开始学习jQuery (四) 使用jQu ...

  6. Asp.net自定义控件系列(一)

    最近看到公司某个网站中用到了自定义控件,咋一眼看去,不明白什么玩意, 网上一搜,好像确实不是几句话就能写出强大的自定义控件.好吧,作为一个码农,我决定从基本学起,写一个关于自定义控件学习过程系列. 当 ...

  7. 监控分析——Web中间件

    发现 中间件监控看是否有性能瓶颈 核心:主要看中间件的线性池进程池有没有排队情况,请求是否处理及时就OK Apache 以前php都是用apache,现在基本用nginx了. 首先自己启动apache ...

  8. FastAdmin 开发第三天:安装 FastAdmin

    环境安装安装好后就可以安装 FastAdmin 了. 根据文档说明安装步骤如下,推荐使用命令行安装: 克隆FastAdmin到你本地 git clone https://git.oschina.net ...

  9. spring 使用 maven profile

    先看看 maven 定义 profile 的写法 <!-- profiles --> <profiles> <profile> <activation> ...

  10. Oracle的静默安装 升级和卸载 参考规范

    Oracle的静默安装 升级和卸载 参考规范 20180912 V1 一.Oracle的安装 Oracle产品的三种安装方式分别为: 1.图形化(Java向导)安装引导 2.使用应答文件静默安装 3. ...