0_Simple__simpleSeparateCompilation

▶ 简单的将纯 C/C++ 函数放到另一个文件中，利用头文件引用到主体 .cu 中来，编译时共同编译。

▶ 源代码，把 C++ 的部分去掉了

 // simpleDeviceLibrary.cuh

 #ifndef SIMPLE_DEVICE_LIBRARY_CUH

 #define SIMPLE_DEVICE_LIBRARY_CUH

 extern "C" __device__ float multiplyByTwo(float number);

 extern "C" __device__ float divideByTwo(float number);

 #endif

 // simpleDeviceLibrary.cu

 #include <cuda_runtime.h>

 extern "C" __device__ float multiplyByTwo(float number)

 {

     return number * 2.0f;

 }

 extern "C" __device__ float divideByTwo(float number)

 {

     return number * 0.5f;

 }

 // simpleSeparateCompilation.cu

 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>

 #include <cuda_runtime.h>

 #include "device_launch_parameters.h"

 #include "simpleDeviceLibrary.cuh"

 #define EPS 1e-5

 typedef float(*deviceFunc)(float);

 __device__ deviceFunc dMultiplyByTwoPtr = multiplyByTwo;    // 本地声明，直接在代码中调用 multiplyByTwo / divideByTwo 会导致运行时错误

 __device__ deviceFunc dDivideByTwoPtr = divideByTwo;

 __global__ void transformVector(float *v, deviceFunc f, unsigned int size)

 {

     unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;

     if (tid < size)

         v[tid] = (*f)(v[tid]);

 }

 int test()

 {

     cudaSetDevice();

     const unsigned int size = ;

     float hVector[size], hResultVector[size], *dVector;

     for (unsigned int i = ; i < size; ++i)

     {

         hVector[i] = rand() / (float)RAND_MAX;

         hResultVector[i] = 0.0f;

     }

     cudaMalloc((void **)&dVector, size * sizeof(float));

     cudaMemcpy(dVector, hVector, sizeof(float) * size, cudaMemcpyHostToDevice);

     deviceFunc hFunctionPtr;                                                    // 作为调用参数的函数指针

     cudaMemcpyFromSymbol(&hFunctionPtr, dMultiplyByTwoPtr, sizeof(deviceFunc)); // 给 hFunctionPtr 一个地址，方便调用

     transformVector << <,  >>>(dVector, hFunctionPtr, size);

     cudaMemcpyFromSymbol(&hFunctionPtr, dDivideByTwoPtr, sizeof(deviceFunc));

     transformVector << <,  >> > (dVector, hFunctionPtr, size); 

     cudaMemcpy(hResultVector, dVector, sizeof(float) * size, cudaMemcpyDeviceToHost);

     cudaDeviceSynchronize();

     if (dVector)

         cudaFree(dVector);

     for (int i = ; i < size; ++i)

     {

         if (fabs(hVector[i] - hResultVector[i]) > EPS)

         {

             printf("\nError at i == %d, hVector[i] == %f, hResultVector[i] == %f", i, hVector[i], hResultVector[i]);

             return ;

         }

     }

     return ;

 }

 int main()

 {

     printf("\n\tStart.\n");

     printf("\n\tFinish: %s\n", test() ? "Pass" : "Fail");

     getchar();

     return ;

 }

● 输出结果：

        Start.

        Finish: Pass

▶ 涨姿势

// cuda_runtime_api.h

#define __dv(v) \

        = v

extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol(void *dst, const void *symbol, size_t count, size_t offset __dv(), enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost));

    // 从指定符号 symbol 处偏移 offset 字节处，拷贝 count 字节到 dst，默认模式为设备拷到主机

0_Simple__simpleSeparateCompilation的更多相关文章

随机推荐

BZOJ4481: [Jsoi2015]非诚勿扰【概率期望+树状数组】
Description [故事背景] JYY赶上了互联网创业的大潮,为非常勿扰开发了最新的手机App实现单身大龄青年之间的"速配".然而随着用户数量的增长,JYY发现现有速配的算 ...
nginx+keepalived实现负载均衡nginx的高可用
准备四台服务器两台做主备,另外两台做访问 192.168.1.120 master 192.168.1.121 backup 192.168.1.122 nginx 192.168.1.123 ng ...
动态绑定AJAX，获取下级分类并延迟执行
HTML: <div id='allType'> <div class='allTypeHead'><span>所有分类</span></div& ...
http协议详谈
scheme - 定义因特网服务的类型.最常见的类型是 httphost - 定义域主机(http 的默认主机是 www)domain - 定义因特网域名,比如 runoob.comport - 定义 ...
系统有专门画图的api
lseek成功但未生效？
如果open打开文件时,指定了O_APPEND,即“追加”模式,那么lseek的向前移动指针的操作无法凑效,包括lseek(fd, 负数, SEEK_CUR)和lseek(fd, 小于当前偏移的位置, ...
linux挂载SD卡
(1)通过#fdisk -l命令确认板子上的linux系统是否识别SD卡 MP805M板子插入SD卡后显示 SD30 slot is without WPmmc1: new high speed SD ...
前端可视化建模技术概览，包括：GoJS
我推荐使用的: 库网址备注 GoJS https://gojs.net/latest/samples/flowchart.html 推荐使用相关文章: 前端可视化建模技术概览:http://le ...
http报头 Accept 与 Content-Type 的区别
Accept属于请求头, Content-Type属于实体头. Http报头分为通用报头,请求报头,响应报头和实体报头. 请求方的http报头结构:通用报头|请求报头|实体报头响应方的http报头结 ...
设置新时间校正服务器NTP SERVER
时间校正服务器IP : 10.*.*.* 适用系统:windows server 2008/windows 7 net stop w32time net start w32time w32tm /qu ...

0_Simple__simpleSeparateCompilation

0_Simple__simpleSeparateCompilation的更多相关文章

随机推荐

热门专题