二维分层纹理

▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数再加上层数放入全局内存,输出。

 #include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define MIN_EPSILON_ERROR 5e-3f
#define OUTPUT 5 texture<float, cudaTextureType2DLayered> tex; __global__ void transformKernel(float *g_odata, int width, int height, int layer)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; float u = (x + 0.5f) / (float)width;
float v = (y + 0.5f) / (float)height; g_odata[layer*width*height + y*width + x] = - tex2DLayered(tex, u, v, layer) + layer;
} int main(int argc, char **argv)
{
unsigned int width = , height = , num_layers = ;
unsigned int size = width * height * num_layers * sizeof(float);
float *h_data = (float *)malloc(size);
float *h_data_ref = (float *)malloc(size);
float *d_data = NULL;
cudaMalloc((void **)&d_data, size); for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data[layer*width*height + i] = (float)i;
} for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data_ref[layer*width*height + i] = - h_data[layer*width*height + i] + layer;
} printf("\n\t\Input data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for(int k=;k<OUTPUT;k++)
printf("%2.1f ", h_data[i*width*height+j*width+k]);
printf("\n\t");
}
printf("\n\t");
}
printf("\n\t\Ideal output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data_ref[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} // 设置 CUDA 3D 数组参数和数据拷贝
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaArray *cu_3darray;
cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, height, num_layers), cudaArrayLayered);
cudaMemcpy3DParms myparms = { };
myparms.srcPos = make_cudaPos(, , );
myparms.dstPos = make_cudaPos(, , );
myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, height);
myparms.dstArray = cu_3darray;
myparms.extent = make_cudaExtent(width, height, num_layers);
myparms.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(&myparms); // 设置纹理参数并绑定
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cu_3darray, channelDesc); dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
printf("Covering 2D data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads\n", width, height, num_layers, dimGrid.x, dimGrid.y);
transformKernel << < dimGrid, dimBlock >> >(d_data, width, height, );// 预跑
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); for (unsigned int layer = ; layer < num_layers; layer++)// 启用多个核,每个核完成一层
transformKernel << < dimGrid, dimBlock, >> >(d_data, width, height, layer);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\Time: %.3f msec, %.2f Mtexlookups/sec\n", sdkGetTimerValue(&timer), (width *height *num_layers / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
sdkDeleteTimer(&timer); // 返回计算结果并检验
memset(h_data, , size);
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
else
printf("Comparing kernel output to expected data return %d\n", compareData(h_data, h_data_ref, width * height * num_layers, MIN_EPSILON_ERROR, 0.0f)); printf("\n\tActual output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} free(h_data);
free(h_data_ref);
cudaFree(d_data);
cudaFreeArray(cu_3darray); getchar();
return ;
}

▶ 输出结果

    Input data
0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 Ideal output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0 Covering 2D data of * * : Grid size is x , each block has x threads Time: 0.995 msec, 1317.00 Mtexlookups/sec
Comparing kernel output to expected data return Actual output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0

▶ 涨姿势

● 与前面立方体贴图纹理不同的地方:申请 CUDA3D 数组的时候使用标志 cudaArrayLayered 而不是 cudaArrayCubemap,并注意调整相关的维度参数。

0_Simple__simpleLayeredTexture的更多相关文章

随机推荐

  1. Eclipse+Spring学习(一)环境搭建(转)

    最近由于投了一家公司实习,他要java工程师,而我大学3年的精力都花到了ASP.NET和前端上面,到找工作的时候才发现大公司不要.NET的,所以马上转型java...由于网上的高手都不屑于写这类文章, ...

  2. MySQL Transaction--RR事务隔离级别下加锁测试

    ============================================================================== 按照非索引列更新 在可重复读的事务隔离级别 ...

  3. log parser 微软iis 日志分析

    Log Parser 2.2 您可以从 Microsoft 下载中心下载 Log Parser. Log Parser 2.2 是一个功能强大的通用工具,它可对基于文本的数据(如日志文件.XML 文件 ...

  4. XDomainRequest object

    The XDomainRequest object has these types of members: Events Methods Properties Events The XDomainRe ...

  5. FastAdmin CMS 插件下载

    FastAdmin CMS 插件下载 CMS内容管理系统插件(含小程序) 自定义内容模型.自定义单页.自定义表单.自定义会员发布.付费阅读.小程序等 提供全部前后端源代码和小程序源代码 功能特性 基于 ...

  6. Oracle RAC Failover 详解

    Oracle  RAC 同时具备HA(High Availiablity) 和LB(LoadBalance). 而其高可用性的基础就是Failover(故障转移). 它指集群中任何一个节点的故障都不会 ...

  7. npm 构建时,次要版本变化引起的问题

    问题:下载项目后,运行 npm install 或 yarn install,vue-awesome-swiper 做的功能显示不正常. 解决:从 官方网站 文件 README.md 中找到: // ...

  8. package.json 文件中的版本号

    版本号,格式:"主要版本,次要版本,补丁版本" 指定版本:比如1.2.2,遵循"主版本,次要版本,补丁版本"的格式规定,安装时只安装指定版本. 波浪号(tild ...

  9. JS enter事件及数据不完整阻止下一步操作

    阻止下一步操作: 1.return false;  2.e.preventDefault(); 但IE8不支持 //键盘事件|enter $(function () { document.onkeyd ...

  10. hadoop框架结构介绍

    近年,随着互联网的发展特别是移动互联网的发展,数据的增长呈现出一种爆炸式的成长势头.单是谷歌的爬虫程序每天下载的网页超过1亿个(2000年数据,)数据的爆炸式增长直接推动了海量数据处理技术的发展.谷歌 ...