二维分层纹理

▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数再加上层数放入全局内存,输出。

 #include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define MIN_EPSILON_ERROR 5e-3f
#define OUTPUT 5 texture<float, cudaTextureType2DLayered> tex; __global__ void transformKernel(float *g_odata, int width, int height, int layer)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; float u = (x + 0.5f) / (float)width;
float v = (y + 0.5f) / (float)height; g_odata[layer*width*height + y*width + x] = - tex2DLayered(tex, u, v, layer) + layer;
} int main(int argc, char **argv)
{
unsigned int width = , height = , num_layers = ;
unsigned int size = width * height * num_layers * sizeof(float);
float *h_data = (float *)malloc(size);
float *h_data_ref = (float *)malloc(size);
float *d_data = NULL;
cudaMalloc((void **)&d_data, size); for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data[layer*width*height + i] = (float)i;
} for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(width * height); i++)
h_data_ref[layer*width*height + i] = - h_data[layer*width*height + i] + layer;
} printf("\n\t\Input data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for(int k=;k<OUTPUT;k++)
printf("%2.1f ", h_data[i*width*height+j*width+k]);
printf("\n\t");
}
printf("\n\t");
}
printf("\n\t\Ideal output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data_ref[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} // 设置 CUDA 3D 数组参数和数据拷贝
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaArray *cu_3darray;
cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, height, num_layers), cudaArrayLayered);
cudaMemcpy3DParms myparms = { };
myparms.srcPos = make_cudaPos(, , );
myparms.dstPos = make_cudaPos(, , );
myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, height);
myparms.dstArray = cu_3darray;
myparms.extent = make_cudaExtent(width, height, num_layers);
myparms.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(&myparms); // 设置纹理参数并绑定
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cu_3darray, channelDesc); dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );
printf("Covering 2D data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads\n", width, height, num_layers, dimGrid.x, dimGrid.y);
transformKernel << < dimGrid, dimBlock >> >(d_data, width, height, );// 预跑
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); for (unsigned int layer = ; layer < num_layers; layer++)// 启用多个核,每个核完成一层
transformKernel << < dimGrid, dimBlock, >> >(d_data, width, height, layer);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\Time: %.3f msec, %.2f Mtexlookups/sec\n", sdkGetTimerValue(&timer), (width *height *num_layers / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
sdkDeleteTimer(&timer); // 返回计算结果并检验
memset(h_data, , size);
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
else
printf("Comparing kernel output to expected data return %d\n", compareData(h_data, h_data_ref, width * height * num_layers, MIN_EPSILON_ERROR, 0.0f)); printf("\n\tActual output data\n\t");
for (int i = ; i < num_layers; i++)
{
for (int j = ; j < OUTPUT; j++)
{
for (int k = ; k<OUTPUT; k++)
printf("%2.1f ", h_data[i*width*height + j*width + k]);
printf("\n\t");
}
printf("\n\t");
} free(h_data);
free(h_data_ref);
cudaFree(d_data);
cudaFreeArray(cu_3darray); getchar();
return ;
}

▶ 输出结果

    Input data
0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 0.0 1.0 2.0 3.0 4.0
512.0 513.0 514.0 515.0 516.0
1024.0 1025.0 1026.0 1027.0 1028.0
1536.0 1537.0 1538.0 1539.0 1540.0
2048.0 2049.0 2050.0 2051.0 2052.0 Ideal output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0 Covering 2D data of * * : Grid size is x , each block has x threads Time: 0.995 msec, 1317.00 Mtexlookups/sec
Comparing kernel output to expected data return Actual output data
0.0 -1.0 -2.0 -3.0 -4.0
-512.0 -513.0 -514.0 -515.0 -516.0
-1024.0 -1025.0 -1026.0 -1027.0 -1028.0
-1536.0 -1537.0 -1538.0 -1539.0 -1540.0
-2048.0 -2049.0 -2050.0 -2051.0 -2052.0 1.0 0.0 -1.0 -2.0 -3.0
-511.0 -512.0 -513.0 -514.0 -515.0
-1023.0 -1024.0 -1025.0 -1026.0 -1027.0
-1535.0 -1536.0 -1537.0 -1538.0 -1539.0
-2047.0 -2048.0 -2049.0 -2050.0 -2051.0 2.0 1.0 0.0 -1.0 -2.0
-510.0 -511.0 -512.0 -513.0 -514.0
-1022.0 -1023.0 -1024.0 -1025.0 -1026.0
-1534.0 -1535.0 -1536.0 -1537.0 -1538.0
-2046.0 -2047.0 -2048.0 -2049.0 -2050.0 3.0 2.0 1.0 0.0 -1.0
-509.0 -510.0 -511.0 -512.0 -513.0
-1021.0 -1022.0 -1023.0 -1024.0 -1025.0
-1533.0 -1534.0 -1535.0 -1536.0 -1537.0
-2045.0 -2046.0 -2047.0 -2048.0 -2049.0 4.0 3.0 2.0 1.0 0.0
-508.0 -509.0 -510.0 -511.0 -512.0
-1020.0 -1021.0 -1022.0 -1023.0 -1024.0
-1532.0 -1533.0 -1534.0 -1535.0 -1536.0
-2044.0 -2045.0 -2046.0 -2047.0 -2048.0

▶ 涨姿势

● 与前面立方体贴图纹理不同的地方:申请 CUDA3D 数组的时候使用标志 cudaArrayLayered 而不是 cudaArrayCubemap,并注意调整相关的维度参数。

0_Simple__simpleLayeredTexture的更多相关文章

随机推荐

  1. BZOJ4481: [Jsoi2015]非诚勿扰【概率期望+树状数组】

    Description [故事背景] JYY赶上了互联网创业的大潮,为非常勿扰开发了最新的手机App实现单身 大龄青年之间的"速配".然而随着用户数量的增长,JYY发现现有速配的算 ...

  2. day2-Iptables笔记

    1.   iptables防火墙简介 Iptables也叫netfilter是Linux下自带的一款免费且优秀的基于包过滤的防火墙工具,它的功能十分强大,使用非常灵活,可以对流入.流出.流经服务器的数 ...

  3. oracle之 获取建表ddl语句

    第一种方法是使用工具,如:pl/sql developer,在[工具]--[导出用户对象]出现就可以得到建表脚本. 第二种方法是,sql语句. DBMS_METADATA.GET_DDL包可以得到数据 ...

  4. Django中更新多个对象数据与删除对象的方法

    更新多个对象 例如说我们现在想要将Apress Publisher的名称由原来的”Apress”更改为”Apress Publishing”.若使用save()方法,如: ? 1 2 3 >&g ...

  5. Redis的多线程

    Redis是单线程内部机制,那么怎么实现并发?在单机上部署多个Redis实例.

  6. Angular 4 http通讯 解决服务器参数无法接收问题

    1.导入 import { Component, OnInit } from '@angular/core'; import {Observable} from 'rxjs/Observable'; ...

  7. C# 爬取网页上的数据

    最近工作中需求定时爬取不同城市每天的温度.其实就是通过编程的方法去抓取不同网站网页进行分析筛选的过程..NET提供了很多类去访问并获得远程网页的数据,比如WebClient类和HttpWebReque ...

  8. ES(4): ES Cluster Security Settings

    目录: ES安全事件回顾 ES集群安全建议 安全访问配置 license更新 ES安全事件回顾 下面是白帽汇监测到针对全球使用广泛的全文索引引擎Elasticsearch的勒索事件: 2017年1月1 ...

  9. Elasticsearch 基础入门

    原文地址:Elasticsearch 基础入门 博客地址:http://www.extlight.com 一.什么是 ElasticSearch ElasticSearch是一个基于 Lucene 的 ...

  10. Mybatis常见面试题 三

    1.什么是mybatis? (1)mybatis是一个优秀的基于java的持久层框架,它内部封装了jdbc,使开发者只需要关注sql语句本身,而不需要花费精力去处理加载驱动.创建连接.创建statem ...