立方体纹理贴图

▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数放入全局内存,输出。

 #include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define MIN_EPSILON_ERROR 5e-3f texture<float, cudaTextureTypeCubemap> tex; __global__ void transformKernel(float *g_odata, int width)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; float u = ((x + 0.5f) / (float)width) * .f - .f;// [0, width-1] 间隔 1 的坐标变换为 [-1+1/width,1-1/width] 间隔 1/width 的坐标
float v = ((y + 0.5f) / (float)width) * .f - .f; float cx, cy, cz; for (unsigned int face = ; face < ; face++)
{
if (face == )// x 正层
{
cx = ;
cy = -v;
cz = -u;
}
else if (face == )// x 负层
{
cx = -;
cy = -v;
cz = u;
}
else if (face == )// y 正层
{
cx = u;
cy = ;
cz = v;
}
else if (face == )// y 负层
{
cx = u;
cy = -;
cz = -v;
}
else if (face == )// z 正层
{
cx = u;
cy = -v;
cz = ;
}
else if (face == )// z 负层
{
cx = -u;
cy = -v;
cz = -;
}
g_odata[face*width*width + y*width + x] = - texCubemap(tex, cx, cy, cz);// 纹理数据读取到全局内存中输出
}
} int main(int argc, char** argv)
{
unsigned int width = , num_faces = , num_layers = ;
unsigned int cubemap_size = width * width * num_faces;
unsigned int size = cubemap_size * num_layers * sizeof(float);
float *h_data = (float *)malloc(size);
float *h_data_ref = (float *)malloc(size); // 理论输出
float *d_data = NULL;
cudaMalloc((void **)&d_data, size); for (int i = ; i < (int)(cubemap_size * num_layers); i++)
h_data[i] = (float)i;
for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(cubemap_size); i++)
h_data_ref[layer*cubemap_size + i] = -h_data[layer*cubemap_size + i] + layer;
} printf("\n\t\Input data.n\t");
for (int i = ; i < width * num_faces * num_layers; i++)
{
printf("%2.1f ", h_data[i]);
if ((i + ) % width == )
printf("\n\t");
if ((i + ) % (width *width) == )
printf("\n\t");
}
printf("\n\tIdeal output data\n\t");
for (int i = ; i < width * num_faces * num_layers; i++)
{
printf("%2.1f ", h_data_ref[i]);
if ((i + ) % width == )
printf("\n\t");
if ((i + ) % (width *width) == )
printf("\n\t");
} // 设置 CUDA 3D 数组参数和数据拷贝
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaArray *cu_3darray;
cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, width, num_faces), cudaArrayCubemap);
cudaMemcpy3DParms myparms = { };
myparms.srcPos = make_cudaPos(, , );
myparms.dstPos = make_cudaPos(, , );
myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, width);
myparms.dstArray = cu_3darray;
myparms.extent = make_cudaExtent(width, width, num_faces);
myparms.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(&myparms); // 设置纹理参数并绑定
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cu_3darray, channelDesc); dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, width / dimBlock.y, );
printf("\n\tCubemap data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads.\n", width, width, num_layers, dimGrid.x, dimGrid.y);
transformKernel << < dimGrid, dimBlock >> >(d_data, width);// 预跑
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;// 新的计时工具
sdkCreateTimer(&timer);
sdkStartTimer(&timer); transformKernel << < dimGrid, dimBlock, >> >(d_data, width);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\Time: %.3f msec, %.2f Mtexlookups/sec\n", sdkGetTimerValue(&timer), (cubemap_size / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
sdkDeleteTimer(&timer); // 返回计算结果并检验
memset(h_data, , size);
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
else
printf("Comparing kernel output to expected data return %d\n", compareData(h_data, h_data_ref, cubemap_size, MIN_EPSILON_ERROR, 0.0f)); printf("\n\tActual output data\n\t");
for (int i = ; i < width * num_faces * num_layers; i++)
{
printf("%2.1f ", h_data[i]);
if ((i + ) % width == )
printf("\n\t");
if ((i + ) % (width * width) == )
printf("\n\t");
} free(h_data);
free(h_data_ref);
cudaFree(d_data);
cudaFreeArray(cu_3darray); getchar();
return ;
}

▶ 输出结果

    Input data.n    0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0
64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0
128.0 129.0 130.0 131.0 132.0 133.0 134.0 135.0 136.0 137.0 138.0 139.0 140.0 141.0 142.0 143.0 144.0 145.0 146.0 147.0 148.0 149.0 150.0 151.0 152.0 153.0 154.0 155.0 156.0 157.0 158.0 159.0 160.0 161.0 162.0 163.0 164.0 165.0 166.0 167.0 168.0 169.0 170.0 171.0 172.0 173.0 174.0 175.0 176.0 177.0 178.0 179.0 180.0 181.0 182.0 183.0 184.0 185.0 186.0 187.0 188.0 189.0 190.0 191.0
192.0 193.0 194.0 195.0 196.0 197.0 198.0 199.0 200.0 201.0 202.0 203.0 204.0 205.0 206.0 207.0 208.0 209.0 210.0 211.0 212.0 213.0 214.0 215.0 216.0 217.0 218.0 219.0 220.0 221.0 222.0 223.0 224.0 225.0 226.0 227.0 228.0 229.0 230.0 231.0 232.0 233.0 234.0 235.0 236.0 237.0 238.0 239.0 240.0 241.0 242.0 243.0 244.0 245.0 246.0 247.0 248.0 249.0 250.0 251.0 252.0 253.0 254.0 255.0
256.0 257.0 258.0 259.0 260.0 261.0 262.0 263.0 264.0 265.0 266.0 267.0 268.0 269.0 270.0 271.0 272.0 273.0 274.0 275.0 276.0 277.0 278.0 279.0 280.0 281.0 282.0 283.0 284.0 285.0 286.0 287.0 288.0 289.0 290.0 291.0 292.0 293.0 294.0 295.0 296.0 297.0 298.0 299.0 300.0 301.0 302.0 303.0 304.0 305.0 306.0 307.0 308.0 309.0 310.0 311.0 312.0 313.0 314.0 315.0 316.0 317.0 318.0 319.0
320.0 321.0 322.0 323.0 324.0 325.0 326.0 327.0 328.0 329.0 330.0 331.0 332.0 333.0 334.0 335.0 336.0 337.0 338.0 339.0 340.0 341.0 342.0 343.0 344.0 345.0 346.0 347.0 348.0 349.0 350.0 351.0 352.0 353.0 354.0 355.0 356.0 357.0 358.0 359.0 360.0 361.0 362.0 363.0 364.0 365.0 366.0 367.0 368.0 369.0 370.0 371.0 372.0 373.0 374.0 375.0 376.0 377.0 378.0 379.0 380.0 381.0 382.0 383.0 Ideal output data
0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
-64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
-128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
-192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
-256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
-320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0 Cubemap data of * * : Grid size is x , each block has x threads. Time: 0.098 msec, 249.50 Mtexlookups/sec
Comparing kernel output to expected data return Actual output data
-0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
-64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
-128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
-192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
-256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
-320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0

▶ 涨姿势

● helper_time.h 中新定义的计时函数

 // 关键步骤
StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); sdkStopTimer(&timer);
sdkGetTimerValue(&timer);
sdkDeleteTimer(&timer); // helper_time.h
class StopWatchInterface
{
public:
StopWatchInterface() {};
virtual ~StopWatchInterface() {}; public:
virtual void start() = ;
virtual void stop() = ;
virtual void reset() = ;
virtual float getTime() = ;// 获取计时(计时器不停)
virtual float getAverageTime() = ;
}; inline bool sdkCreateTimer(StopWatchInterface **timer_interface)
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
*timer_interface = (StopWatchInterface *)new StopWatchWin();
#else
*timer_interface = (StopWatchInterface *)new StopWatchLinux();
#endif
return (*timer_interface != NULL) ? true : false;
} inline bool sdkDeleteTimer(StopWatchInterface **timer_interface)
{
if (*timer_interface)
{
delete *timer_interface;
*timer_interface = NULL;
}
return true;
} inline bool sdkStartTimer(StopWatchInterface **timer_interface)
{
if (*timer_interface)
(*timer_interface)->start();
return true;
} inline bool sdkStopTimer(StopWatchInterface **timer_interface)
{
if (*timer_interface)
(*timer_interface)->stop();
return true;
} inline float sdkGetTimerValue(StopWatchInterface **timer_interface)
{
if (*timer_interface)
return (*timer_interface)->getTime();
else
return 0.0f;
}

● 立方体纹理贴图。六个面分别为 x = 1 正面、x = -1 轴负面、y = 1 正面、y = -1 负面、z = 1 正面、x = -1 负面,对应前、后、右、左、上、下。按照线性下标 [0, width * width * 6 - 1] 顺序访问时,各元素存储位置如下图所示(width == 2 为例)。

    

0_Simple__simpleCubemapTexture的更多相关文章

随机推荐

  1. Sql语句中两个比较迷糊的概念:“连接查询” 与 “外键约束”

    Sql语句中两个比较迷糊的概念:“连接查询” 与 “外键约束 Sql 中的连接查询:就是为了避免笛卡尔积,因为涉及到多表查询的化,不使用连接查询,会先将多个互相乘,求出笛卡尔积,然后在在里面查询符合的 ...

  2. 将string str中的str转换成字符数组

    #include <iostream> #include <map> #include <string.h> using namespace std; int ma ...

  3. day11 python学习 函数的建立,返回值,参数

    函数的定义主要有如下要点: def:表示函数的关键字 函数名:函数的名称,日后根据函数名调用函数 函数体:函数中进行一系列的逻辑计算,如:发送邮件.计算出 [11,22,38,888,2]中的最大数等 ...

  4. smarty学习——内建函数(部分接上)

    9.{foreach} {foreachelse} 格式如下: {foreach $arrayvar as $itemvar} {foreach $arrayvar as $keyvar=>$i ...

  5. hdu - 1823 - Luck and Love(线段树)

    版权声明:本文为博主原创文章.未经博主同意不得转载. https://blog.csdn.net/SCNU_Jiechao/article/details/24406391 题意:Wiskey招女友, ...

  6. Oracle RAC Failover 详解

    Oracle  RAC 同时具备HA(High Availiablity) 和LB(LoadBalance). 而其高可用性的基础就是Failover(故障转移). 它指集群中任何一个节点的故障都不会 ...

  7. 文件上传 accept 兼容性

    写法1 在chrome下有反应很慢的问题,不要使用 写法2 在firefox.Safari 中有兼容性问题,弹出选择框不会高亮显示jpg后缀的图片 写法3 在写法2上都添加了image/jpeg,解决 ...

  8. php 5.2.17 升级到5.3.29

    修改php.ini配置文件 register_globals =On include_path = ".;d:/testoa/webroot" error_reporting = ...

  9. FineUI 3升级4.1.1时,SingleClickExpand属性改什么了? (树控件单击展开)

    private Tree InitTreeMenu(List<Menu> menus) { Tree treeMenu = new Tree(); treeMenu.ID = " ...

  10. Server对象,HttpServerUtility类,获取服务器信息

    在Asp.net WebForm中,Server对象是HttpServerUtility类的实例.看下图: 而在Asp.net MVC中,Server对象是HttpServerUtilityBase对 ...