cuda - CUDA関数ポインター

Question

私はCUDAでこのようなsomtehingを作ろうとしていました（実際にはいくつかの統合関数を書く必要があります）

#include <iostream>
using namespace std;

float f1(float x) {
    return x * x;
}

float f2(float x) {
    return x;
}

void tabulate(float p_f(float)) {
    for (int i = 0; i != 10; ++i) {
        std::cout << p_f(i) << ' ';
    }
    std::cout << std::endl;
}

int main() {
    tabulate(f1);
    tabulate(f2);
    return 0;
}

出力：

0 1 4 9 16 25 36 49 64 81
0 1 2 3 4 5 6 7 8 9

次のことを試しましたが、エラーが発生しました

エラー：関数ポインターと関数テンプレートパラメーターはsm_1xではサポートされていません。

float f1(float x) {
    return x;
}

__global__ void tabulate(float lower, float upper, float p_function(float), float* result) {
    for (lower; lower < upper; lower++) {
        *result = *result + p_function(lower);
    }
}

int main() {
    float res;
    float* dev_res;

    cudaMalloc( (void**)&dev_res, sizeof(float) ) ;

    tabulate<<<1,1>>>(0.0, 5.0, f1, dev_res);
    cudaMemcpy(&res, dev_res, sizeof(float), cudaMemcpyDeviceToHost);

    printf("%f\n", res);
    /************************************************************************/
    scanf("%s");

    return 0;
}

score 10 · Accepted Answer

コンパイルエラーを取り除くには-gencode arch=compute_20,code=sm_20、コードをコンパイルするときにコンパイラ引数として使用する必要があります。ただし、実行時に問題が発生する可能性があります。

CUDAプログラミングガイドhttp://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#functionsから取得

関数への関数ポインター__global__はホストコードでサポートされていますが、デバイスコードではサポートされていません。関数への関数ポインター__device__は、計算機能2.x以降のデバイス用にコンパイルされたデバイスコードでのみサポートされます。

__device__ホストコードで関数のアドレスを取得することは許可されていません。

したがって、次のようなものを使用できます（「FunctionPointers」サンプルから採用）。

//your function pointer type - returns unsigned char, takes parameters of type unsigned char and float
typedef unsigned char(*pointFunction_t)(unsigned char, float);

//some device function to be pointed to
__device__ unsigned char
Threshold(unsigned char in, float thresh)
{
   ...
}

//pComputeThreshold is a device-side function pointer to your __device__ function
__device__ pointFunction_t pComputeThreshold = Threshold;
//the host-side function pointer to your __device__ function
pointFunction_t h_pointFunction;

//in host code: copy the function pointers to their host equivalent
cudaMemcpyFromSymbol(&h_pointFunction, pComputeThreshold, sizeof(pointFunction_t))

次に、をパラメータとしてカーネルに渡すことができh_pointFunctionます。カーネルは、それを使用して__device__関数を呼び出すことができます。

//your kernel taking your __device__ function pointer as a parameter
__global__ void kernel(pointFunction_t pPointOperation)
{
    unsigned char tmp;
    ...
    tmp = (*pPointOperation)(tmp, 150.0)
    ...
}

//invoke the kernel in host code, passing in your host-side __device__ function pointer
kernel<<<...>>>(h_pointFunction);

うまくいけば、それはある程度意味がありました。全体として、f1関数を関数に変更し__device__、同様の手順（typedefは必要ありませんが、コードをより適切にする）に従って、ホスト上の有効な関数ポインターとして取得する必要があるようです。 -カーネルに渡す側。また、FunctionPointersCUDAサンプルを確認することをお勧めします

score 1 · Accepted Answer

このコードをコンパイルできる場合でも（@Robert Crovellaの回答を参照）、このコードは機能しません。ホストコンパイラには関数アドレスを把握する方法がないため、ホストコードから関数ポインタを渡すことはできません。

score 1 · Accepted Answer

これは、この質問に基づいて作成したカーネル内から呼び出すことができる関数ポインターの単純なクラスです。

template <typename T>
struct cudaCallableFunctionPointer
{
public:
  cudaCallableFunctionPointer(T* f_)
  {
    T* host_ptr = (T*)malloc(sizeof(T));
    cudaMalloc((void**)&ptr, sizeof(T));

    cudaMemcpyFromSymbol(host_ptr, *f_, sizeof(T));
    cudaMemcpy(ptr, host_ptr, sizeof(T), cudaMemcpyHostToDevice);
    
    cudaFree(host_ptr)
  }

  ~cudaCallableFunctionPointer()
  {
    cudaFree(ptr);
  }

  T* ptr;
};

あなたはそれをこのように使うことができます：

__device__ double func1(double x)
{
    return x + 1.0f;
}

typedef double (*func)(double x);
__device__ func f_ = func1;



__global__ void test_kernel(func* f)
{
    double x = (*f)(2.0);
    printf("%g\n", x);
}



int main()
{
    cudaCallableFunctionPointer<func> f(&f_);

    test_kernel << < 1, 1 >> > (f.ptr);
}

出力：

cuda - CUDA関数ポインター

3 に答える 3

Related

Reference