opengl - OpenCLがOpenGL VBOを更新しない

Question

私は OpenCL コーディングへの第一歩を踏み出しています。少なくともCPUから配列を取得し、OpenCLで操作を実行してから、配列を読み戻すことができるフレームワークを持っています(正しい答えで)。私は現在、この OpenCL の例(スライド 18 ～ 23、VBO を float4 ではなく float3 に変更したことのみが大幅な改善) にあるように、ディスプレイスメントメッシュを追加することでこれを改善しようとしています。

これらのスライドとこのリソースで前述したように、共有コンテキストを設定しました。CPU 入力データを使用して VBO をテストしました (正しく描画されることがわかっています)。また、VBO の前にコンテキストを作成します (このスレッドの動機による)。最後に、カーネルを次のように作り直してみました [編集済み]:

__kernel void sine_wave(__global float3* pos, int width, int height, float time) {
    uint x = get_global_id(0); uint y = get_global_id(1);
    pos[y*width+x] = (float3)(1.0f,1.0f,1.0f);
}

それでも、私が何をしても、OpenCL プログラムに何かを更新させることはできません。エラーはなく、何もありませんが、VBO は入力データと同じままです。入力データを指定しない場合、ポイントはすべて (0,0,0) でレンダリングされます。何が原因なのかわかりません。

アイデア？ありがとう、
イアン

PS #1: 現在のシステムは Windows 7 x64 の NVIDIA GTX 580M ですが、記述されたコードは移植可能であることを意図しています。

PS #2: 誰も手がかりがなければ、コードを提供できます。. .

score 1 · Accepted Answer

さて、私はそれを理解しました。さらに何時間も検索した後、NVIDIAのGPUコンピューティングツールキットをダウンロードしました。これは、リンクされたデモの派生元のようです。次に、コードを次の約220行のソースに大幅に削減しました（将来のコーダーに役立つ可能性があります）。

#pragma comment(lib,"Opengl32.lib")
#pragma comment(lib,"glu32.lib")
#pragma comment(lib,"OpenCL.lib")
#pragma comment(lib,"glew32.lib")
#pragma comment(lib,"glut32.lib")

// OpenGL Graphics Includes
#include <GL/glew.h>
#if defined (__APPLE__) || defined(MACOSX)
    #include <OpenGL/OpenGL.h>
    #include <GLUT/glut.h>
#else
    #include <GL/glut.h>
    #ifdef UNIX
        #include <GL/glx.h>
    #endif
#endif

#include <CL/opencl.h>

// Rendering window vars
const unsigned int window_width = 512;
const unsigned int window_height = 512;
const unsigned int mesh_width = 256;
const unsigned int mesh_height = 256;

// OpenCL vars
cl_context cxGPUContext;
cl_device_id* cdDevices;
cl_command_queue cqCommandQueue;
cl_kernel ckKernel;
cl_mem vbo_cl;
cl_program cpProgram;
size_t szGlobalWorkSize[] = {mesh_width, mesh_height};

// vbo variables
GLuint vbo;

int mouse_old_x, mouse_old_y;
int mouse_buttons = 0;
float rotate_x = 0.0, rotate_y = 0.0;
float translate_z = -3.0;
void mouse(int button, int state, int x, int y) {
    if (state == GLUT_DOWN) {
        mouse_buttons |= 1<<button;
    } else if (state == GLUT_UP) {
        mouse_buttons = 0;
    }

    mouse_old_x = x;
    mouse_old_y = y;
}
void motion(int x, int y) {
    float dx, dy;
    dx = (float)(x - mouse_old_x);
    dy = (float)(y - mouse_old_y);

    if (mouse_buttons & 1) {
        rotate_x += dy * 0.2f;
        rotate_y += dx * 0.2f;
    } else if (mouse_buttons & 4) {
        translate_z += dy * 0.01f;
    }

    mouse_old_x = x;
    mouse_old_y = y;
}

void DisplayGL(void) {
    static float anim = 0.0f;

    // run OpenCL kernel to generate vertex positions
    glFinish();
    clEnqueueAcquireGLObjects(cqCommandQueue, 1, &vbo_cl, 0,0,0);

    clSetKernelArg(ckKernel, 3, sizeof(float), &anim);
    clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 2, NULL, szGlobalWorkSize, NULL, 0,0,0 );

    clEnqueueReleaseGLObjects(cqCommandQueue, 1, &vbo_cl, 0,0,0);
    clFinish(cqCommandQueue);

    // set view matrix
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glLoadIdentity();
    glTranslatef(0.0, 0.0, translate_z);
    glRotatef(rotate_x, 1.0, 0.0, 0.0);
    glRotatef(rotate_y, 0.0, 1.0, 0.0);

    glBindBuffer(GL_ARRAY_BUFFER, vbo);
    glVertexPointer(4, GL_FLOAT, 0, 0);
    glEnableClientState(GL_VERTEX_ARRAY);
    glColor3f(1.0, 0.0, 0.0);
    glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
    glDisableClientState(GL_VERTEX_ARRAY);

    // flip backbuffer to screen
    glutSwapBuffers();

    anim += 0.01f;
}

void timerEvent(int value) {
    glutPostRedisplay();
    glutTimerFunc(10, timerEvent,0);
}

int main(int argc, char** argv) {
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
    glutInitWindowPosition (glutGet(GLUT_SCREEN_WIDTH)/2 - window_width/2,  glutGet(GLUT_SCREEN_HEIGHT)/2 - window_height/2);
    glutInitWindowSize(window_width, window_height);
    glutCreateWindow("OpenCL/GL Interop (VBO)");

    glutDisplayFunc(DisplayGL);
    glutMouseFunc(mouse);
    glutMotionFunc(motion);
    glutTimerFunc(10, timerEvent,0);

    glewInit();

    glClearColor(0.0, 0.0, 0.0, 1.0);
    glDisable(GL_DEPTH_TEST);

    glViewport(0, 0, window_width, window_height);
    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1, 10.0);
    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    //Get the NVIDIA platform
    cl_platform_id cpPlatform;
    clGetPlatformIDs(1,&cpPlatform,NULL);

    // Get the number of GPU devices available to the platform
    cl_uint uiDevCount;
    clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &uiDevCount);

    // Create the device list
    cdDevices = new cl_device_id [uiDevCount];
    clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, uiDevCount, cdDevices, NULL);
    // Define OS-specific context properties and create the OpenCL context
#if defined (__APPLE__)
    CGLContextObj kCGLContext = CGLGetCurrentContext();
    CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
    cl_context_properties props[] = 
    {
        CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup, 
        0 
    };
    cxGPUContext = clCreateContext(props, 0,0, NULL, NULL, &ciErrNum);
#else
#ifdef UNIX
    cl_context_properties props[] = 
    {
        CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), 
        CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), 
        CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, 
        0
    };
    cxGPUContext = clCreateContext(props, 1, &cdDevices[uiDeviceUsed], NULL, NULL, &ciErrNum);
#else // Win32
    cl_context_properties props[] = 
    {
        CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), 
        CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), 
        CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, 
        0
    };
    cxGPUContext = clCreateContext(props, 1, &cdDevices[0], NULL, NULL, NULL);
#endif
#endif

    // create a command-queue
    cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, NULL);

    const char* cSourceCL = "__kernel void sine_wave(__global float4* pos, unsigned int width, unsigned int height, float time)\n"
    "{\n"
    "   unsigned int x = get_global_id(0);\n"
    "   unsigned int y = get_global_id(1);\n"
    "\n"
    "   // calculate uv coordinates\n"
    "   float u = x / (float) width;\n"
    "   float v = y / (float) height;\n"
    "   u = u*2.0f - 1.0f;\n"
    "   v = v*2.0f - 1.0f;\n"
    "\n"
    "   // calculate simple sine wave pattern\n"
    "   float freq = 4.0f;\n"
    "   float w = sin(u*freq + time) * cos(v*freq + time) * 0.5f;\n"
    "\n"
    "   // write output vertex\n"
    "   pos[y*width+x] = (float4)(u, w, v, 1.0f);\n"
    "}\n";
    cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **) &cSourceCL, NULL, NULL);

    clBuildProgram(cpProgram, 0, NULL, "-cl-fast-relaxed-math", NULL, NULL);

    // create the kernel
    ckKernel = clCreateKernel(cpProgram, "sine_wave", NULL);

    // create VBO (if using standard GL or CL-GL interop), otherwise create Cl buffer
    unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);

    glGenBuffers(1,&vbo);
    glBindBuffer(GL_ARRAY_BUFFER,vbo);
    // initialize buffer object
    glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);

    // create OpenCL buffer from GL VBO
    vbo_cl = clCreateFromGLBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, vbo, NULL);

    // set the args values 
    clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void *) &vbo_cl);
    clSetKernelArg(ckKernel, 1, sizeof(unsigned int), &mesh_width);
    clSetKernelArg(ckKernel, 2, sizeof(unsigned int), &mesh_height);

    glutMainLoop();
}

元のコードと比較した後、私は（最終的に）重要な違いを見つけました。

右：

clEnqueueNDRangeKernel(context->command_queue, kernel->kernel, 2, NULL, global,NULL, 0,0,0 );

間違い：

clEnqueueNDRangeKernel(context->command_queue, kernel->kernel, 2, NULL, global,local, 0,0,0 );

私が使用していたグリッドサイズの10x10は、他の場所で見た例よりも小さかったことがわかりました。これは、「ローカル」に16x16を使用するように指示したものです。「グローバル」はグリッドサイズであるため、「グローバル」は「ローカル」よりも小さかった。

何らかの理由でこれによってエラーが発生することはありませんでしたが、現時点では、これらの変数の目的を完全に理解しているとは正直に言えません。

イアン

opengl - OpenCLがOpenGL VBOを更新しない

1 に答える 1

Related

Reference