1

私は OpenCL を初めて使用します。clCreateKernel に問題があります。CL_INVALID_PROGRAM_EXECUTABLE がスローされます。誰か助けてください。コードはhttp://www.cs.bris.ac.uk/home/simonm/workshops/OpenCL_lecture3.pdfに基づいています。最後の最適化

コードは次のとおりです。

#define ORDER    10    // Order of the square matrices A, B, and C
#define AVAL     3.0     // A elements are constant and equal to AVAL
#define BVAL     5.0     // B elements are constant and equal to BVAL
#define TOL      (0.001) // tolerance used in floating point comparisons
#define DIM      2       // Max dim for NDRange
#define COUNT    1       // number of times to do each multiplication
#define SUCCESS  1
#define FAILURE  0


// Funciones Auxiliares
void initmat(int Mdim, int Ndim, int Pdim, float *A, float *B, float *C)
{
    int i, j;

    /* Initialize matrices */

    for (i = 0; i < Ndim; i++)
        for (j = 0; j < Pdim; j++)
            A[i*Ndim+j] = AVAL;

    for (i = 0; i < Pdim; i++)
        for (j = 0; j < Mdim; j++)
            B[i*Pdim+j] = BVAL;

    for (i = 0; i < Ndim; i++)
        for (j = 0; j < Mdim; j++)
            C[i*Ndim+j] = 0.0f;
}

// Definicion de la funcion:
char * readKernel(void)
{
    size_t *source_length;
    FILE *fp = fopen("kernel.cl", "r");
    if (fp == NULL)
    {
        printf("Cannot Open Kernel.cl\n");
    }
    else
    {
        printf("Kernel.cl Opened\n");
    }
    fseek(fp, 0, SEEK_END);
    source_length[0] = ftell(fp);

    if (source_length[0] == 0)
    {
        printf("Kernel.cl is empty\n");
    }
    else
    {
        printf("Kernel.cl length: %zu bytes\n", source_length[0]);
    }
    char *source = (char*) calloc(source_length[0] + 1, 1);
    if (source == 0)
    {
        printf("Memory allocation failed");
    }
    fseek(fp, 0, SEEK_SET);
    fread(source, 1, source_length[0], fp);
    printf("Kernel.cl Read\n");

    return source;
}
int main(int argc, char **argv) 
{ 



    // Declare and iniciate data
    float *A, *B, *C;
    int Mdim, Ndim, Pdim;
    int err, szA, szB, szC;
    size_t global[DIM];
    size_t local[DIM];
    cl_device_id device_id;
    cl_context context;
    cl_command_queue commands;
    cl_program program;
    cl_kernel kernel;
    cl_uint nd;
    cl_mem a_in, b_in, c_out;
    Ndim = ORDER;
    Pdim = ORDER;
    Mdim = ORDER;
    szA = Ndim*Pdim;
    szB = Pdim*Mdim;
    szC = Ndim*Mdim;
    A = (float *)malloc(szA*sizeof(float));
    B = (float *)malloc(szB*sizeof(float));
    C = (float *)malloc(szC*sizeof(float));
    const char* C_elem_KernelSource = 
    "__kernel                               \n"
    "void mmul(                             \n"
    "    const int Mdim,                    \n"
    "    const int Ndim,                    \n"  
    "    const int Pdim,                    \n"
    "    __global float* A,                 \n"
    "    __global float* B,                 \n" 
    "    __global float* C,                 \n"
    "    __local float* Bwrk)               \n"
    "{                                      \n" 
    "    int k,j;                           \n"  
    "    int i = get_global_id(0);          \n"
    "    int iloc = get_local_id(0);        \n"
    "    int nloc = get_local_size(0);      \n"
    "    float Awrk[10];                    \n"
    "    float tmp;                         \n"
    "    for (k=0; k<Pdim; k++)             \n" 
    "        Awrk[k] = A[i*Ndim+k];         \n" 
    "    for (j=0; j<Mdim; j++){            \n"
    "        for (k=iloc; k<Pdim; k=k+nloc) \n"
    "            Bwrk[k] = B[k*Pdim+j];     \n"
    "        barrier(CLK_LOCAL_MEM_FENCE);  \n"
    "        tmp = 0.0f;                    \n" 
    "        for (k=0; k<Pdim; k++)         \n"
    "            tmp += Awrk[k] * Bwrk[k];  \n"
    "        C[i*Ndim+j] += tmp;            \n"
    "}                                      \n"  
    ;

    initmat(Mdim, Ndim, Pdim, A, B, C);
    // Setup the plataform 
    cl_uint num_platforms;
    if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS)
    {
        printf("Unable to get platform!\n");
    }else{
        printf("Plataformas Disponibles: %u \n", num_platforms);
    }
    //Identificador
    cl_platform_id platform_id;
    clGetPlatformIDs(1, &platform_id, &num_platforms);
    printf("Plataformas creada\n");

    err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
    if (err==CL_SUCCESS){
        printf("Device creado \n");
    }else {
        printf("Error %d \n", err);
    }
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); 
    if (err==CL_SUCCESS){
        printf("Contexto creado \n");
    }else {
        printf("Error creando contexto \n");
    }
    commands = clCreateCommandQueue(context, device_id, 0, &err);
    if (err==CL_SUCCESS){
        printf("cola de comandos creadas \n");
    }else {
        printf("Error creando cola de comandos \n");
    } 

    // Setup buffers and write A and B matrices to the device memory
    a_in  = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szA, NULL, NULL); 
    b_in  = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * szB, NULL, NULL); 
    c_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * szC, NULL, NULL); 
    err = clEnqueueWriteBuffer(commands, a_in, CL_TRUE, 0, sizeof(float) * szA, A, 0, NULL, NULL); 
    err = clEnqueueWriteBuffer(commands, b_in, CL_TRUE, 0, sizeof(float) * szB, B, 0, NULL, NULL); 
    // Build the program, define the kernel and setup arguments 
    program = clCreateProgramWithSource(context, 1, (const char **) &C_elem_KernelSource,  NULL, &err); 
    if (err==CL_SUCCESS){
        printf("programa creado \n");
    }else {
        printf("Error generado %d creando programa\n", err);
    }
    //Compila el programa en el dispositivo elegido
    clBuildProgram(program, 1, &device_id, NULL, NULL, NULL );
    if (err==CL_SUCCESS){
        printf("programa compilado 1\n");
    }else {
        printf("Error generado %d compilando programa 1\n", err);
    }
    kernel = clCreateKernel(program, "mmul", &err);
    if (err==CL_SUCCESS){
        printf("Kernel creado \n");
    }else {
        printf("Error generado %d creando kernel\n", err);
    } 
    err = clSetKernelArg(kernel, 0, sizeof(int), &Mdim); 
    err |= clSetKernelArg(kernel, 1, sizeof(int), &Ndim); 
    err |= clSetKernelArg(kernel, 2, sizeof(int), &Pdim); 
    err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &a_in); 
    err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &b_in); 
    err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &c_out); 
    err |= clSetKernelArg(kernel, 6, sizeof(float)*Pdim, NULL);
    if (err==CL_SUCCESS){
        printf("Argumentos del Kernel configurados \n");
    }else {
        printf("Error configurando argumentos del kernel \n");
    }
    //Run the kernel and collect results
    // 1D ND Range set to dimensions of C matrix
    //Local Dim set to 250 so number of work-groups match number of 
    //compute units (4 in this case) for our order 1000 matrices
    //Pass local memory to kernels.  This requires a change to the kernel 
    //argument list … a new call to clSetKernelArg is needed
    printf("Encolando Kernel:\n");
    global[0] = (size_t) Ndim; global[1] = (size_t) Mdim; local[0] = (size_t) 2;
    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, global, local, 0, NULL, NULL);
    if (err==CL_SUCCESS){
        printf("Kernel enviado a device \n");
    }else {
        printf("Error enviando kernel a device \n");
    } 
    clFinish(commands); 
    err = clEnqueueReadBuffer(commands, c_out, CL_TRUE, 0, sizeof(float) * szC, C, 0, NULL, NULL );  
    //test_results(A, B, c_out); 


}

ありがとう

4

1 に答える 1

3

主な問題は、112 行目の左中括弧に対応する右中括弧がないことです。

 "    for (j=0; j<Mdim; j++){            \n"

また、34 行目で宣言されたポインターは、初期化なしで使用されることに注意してください。

size_t *source_length;

170 行目でerr=、clBuildProgram() 呼び出しに を追加して、エラー チェックが意図したとおりに機能するようにする必要があります。次に、ビルドが失敗した場合に詳細を取得するために clGetProgramBuildInfo() を使用するロジックを追加できます。

于 2013-10-20T02:20:16.540 に答える