1

cl_float2 値の配列を定数メモリにコピーしようとすると、Nvidia プラットフォームで期待どおりに動作せず、.y 部分がゼロのようです。AMD および Intel プラットフォームでは、この問題は発生しません。

// Host
c_Quadrature_Filter_1 = clCreateBuffer(context, CL_MEM_READ_ONLY, FILTER_SIZE * FILTER_SIZE * sizeof(cl_float2), NULL, &createBufferErrorQuadratureFilter1);

cl_float2* filter_temp = (cl_float2*)malloc(FILTER_SIZE * FILTER_SIZE * sizeof(cl_float2));
cl_float2 test;
test.s[0] = 3.0f;
test.s[1] = 13.0f;

for (int xx = 0; xx < FILTER_SIZE; xx++)
{
    for (int yy = 0; yy < FILTER_SIZE; yy++)
    {
        filter_temp[xx + yy * FILTER_SIZE].s[0] = test.s[0];
        filter_temp[xx + yy * FILTER_SIZE].s[1] = test.s[1];                        
    }
}

clEnqueueWriteBuffer(commandQueue, c_Quadrature_Filter_1, CL_TRUE, 0, FILTER_SIZE * FILTER_SIZE * sizeof(cl_float2), filter_temp, 0, NULL, NULL);
free(filter_temp);

//Device
__kernel(__global float2* Filter_Response, __constant float2* c_Quadrature_Filter_1, __private int DATA_W, __private int DATA_H, __private int DATA_D)
{
    int x = get_global_id(0);
    int y = get_global_id(1);
    int z = get_global_id(2);

    Filter_Response[Calculate3DIndex(x,y,z,DATA_W,DATA_H)].y = c_Quadrature_Filter_1[0].y;
}
4

1 に答える 1