さて、私は4コアCPU上のVs2010とAMD APP SDK、および32ビットと64ビットビルドの両方のデバイスを使用したAMD6950GPUで以下のコードを試しました。私のためのその仕事...
構造体をプラグマパック1からデフォルトのパッキングに変更するか、int2ベクトルを使用してみてください
/*
Copyright (C) Tim Child 2012
All rights reserved
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <memory.h>
#include <malloc.h>
#include "CL\cl.h"
#define LIST_SIZE 10
#pragma pack(push, 1)
typedef struct pairt
{
int a;
int b;
} pairt;
#pragma pack(pop)
typedef struct pair32
{
int32_t a;
int32_t b;
} pair32;
void printAligment( char * msg, size_t a, size_t b)
{
printf("%s\n", msg);
printf( "\t%d\t%d\n", a, b );
}
/**
OpenCLEnvironment An OpenCl Environment structure
**/
typedef struct OpenCLEnvironment
{
cl_platform_id * platformId;
cl_uint platformCount;
cl_device_id * deviceId;
cl_uint * deviceCount;
cl_uint deviceTotal;
cl_context context;
cl_command_queue queue;
cl_uint currentPlatform;
cl_uint currentDevice;
} OpenCLEnvironment;
typedef struct OpenCLKernel
{
char * name;
cl_program program;
cl_kernel kernel;
char * sourceCode;
char * compilerOptions;
char * compilerErrors;
} OpenCLKernel;
/**
NewOpenCLEnvironment Create a new OpenCL Environment
@param[in] currentPlatform Index of the platform Id to use
@param[in] currentDevice Index of the Device Id to use
@returns OpenCL Environment
**/
OpenCLEnvironment * NewOpenCLEnvironment(cl_uint currentPlatform, cl_uint currentDevice)
{
OpenCLEnvironment * environment = NULL;
cl_int error = 0;
cl_uint i = 0;
cl_uint sum;
cl_uint offset = 0;
static cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, 0, 0};
if ( ( environment = (OpenCLEnvironment *)malloc(sizeof (OpenCLEnvironment ) ) ) != NULL)
{
memset( environment, 0, sizeof(OpenCLEnvironment) );
if ( ( error = clGetPlatformIDs( 0, NULL, &environment->platformCount) ) == CL_SUCCESS)
{
environment->deviceCount = (cl_uint*)malloc( sizeof( cl_uint) * environment->platformCount ) ;
environment->platformId = (cl_platform_id *)malloc( sizeof( cl_platform_id) * environment->platformCount ) ;
if ( environment->platformId &&
environment->platformCount > 0 &&
( error = clGetPlatformIDs( environment->platformCount, environment->platformId, 0) ) == CL_SUCCESS)
{
sum = 0;
for( i = 0; i < environment->platformCount; i++)
{
if ( ( error = clGetDeviceIDs( environment->platformId[i], CL_DEVICE_TYPE_ALL, 0, NULL, &sum ) ) == CL_SUCCESS )
{
environment->deviceCount[i] = sum;
environment->deviceTotal += sum;
}
}
environment->deviceId = (cl_device_id *) malloc( sizeof(cl_device_id) * environment->deviceTotal );
offset = 0;
for( i = 0; i < environment->platformCount && environment->deviceId; i++)
{
if ( ( error = clGetDeviceIDs( environment->platformId[i], CL_DEVICE_TYPE_ALL, environment->deviceCount[i],
&environment->deviceId[offset], &sum ) ) == CL_SUCCESS )
{
offset += sum;
}
}
}
}
if ( currentPlatform < environment->platformCount && currentDevice < environment->deviceTotal )
{
environment->currentPlatform = currentPlatform;
environment->currentDevice = currentDevice;
properties[ 1] = (cl_context_properties)environment->platformId[ environment->currentPlatform ];
environment->context = clCreateContext( properties, 1, &environment->deviceId[ environment->currentDevice ],
NULL, NULL, &error );
if (error == CL_SUCCESS)
{
environment->queue = clCreateCommandQueue( environment->context,environment->deviceId[ environment->currentDevice ], 0, &error);
}
}
}
return environment;
}
/**
NewOpenCLKernel Create a new OpenCL Kernel from Source code
@param[in] environment OpenC;l Environment
@param[in] name Kernel Name
@param[in] sourceCode Kernel source code
@param[in] compilerOptions command line option used to compile the kernel
@returns a New OpenCL Kernel structure
**/
OpenCLKernel * NewOpenCLKernel( OpenCLEnvironment * environment, char * name, char * sourceCode, char * compilerOptions )
{
OpenCLKernel * kernel = NULL;
size_t size = 0;
cl_int error = CL_SUCCESS;
cl_int error2 = CL_SUCCESS;
if ( ( kernel = ( OpenCLKernel * ) malloc(sizeof ( OpenCLKernel ) ) ) != NULL )
{
memset(kernel, 0, sizeof( OpenCLKernel ) );
kernel->sourceCode = sourceCode;
kernel->name = name;
kernel->compilerOptions = compilerOptions;
kernel->program = clCreateProgramWithSource( environment->context, 1, &kernel->sourceCode, NULL, &error );
error = clBuildProgram( kernel->program, 1, &environment->deviceId[ environment->currentDevice], kernel->compilerOptions,
NULL, NULL );
if ( error == CL_BUILD_SUCCESS )
{
kernel->kernel = clCreateKernel( kernel->program, name, &error );
}
else if ( error != CL_SUCCESS)
{
error2 = clGetProgramBuildInfo( kernel->program, environment->deviceId[ environment->currentDevice],
CL_PROGRAM_BUILD_LOG, (size_t)NULL, NULL, &size );
kernel->compilerErrors = (char*)malloc( size +1);
error2 = clGetProgramBuildInfo( kernel->program, environment->deviceId[ environment->currentDevice],
CL_PROGRAM_BUILD_LOG, size, kernel->compilerErrors , &size );
}
}
return kernel;
}
void PrintPair(char * msg, pairt * p, int n)
{
int i = 0;
printf("%s\n", msg);
for ( i = 0; i < n; i++)
{
printf ("\t%d\t%d\n", p[i].a, p[i].b );
}
printf( " \n" );
}
int main ( int argc, char ** argv)
{
cl_int error = CL_SUCCESS;
cl_mem p_mem_obj;
size_t global_item_size = LIST_SIZE;
size_t local_item_size = 2;
cl_event events[2];
static pairt unaligned[] = {
1, 2,
3, 4,
5, 6,
7, 8,
9, 10,
11, 12,
13, 14,
15, 16,
17, 18,
19, 20,};
pair32 aligned[] = {
1, 2,
3, 4,
5, 6,
7, 8,
9, 10,
11, 12,
13, 14,
15, 16,
17, 18,
19, 20,};
cl_int2 vector[] = {
1, 2,
3, 4,
5, 6,
7, 8,
9, 10,
11, 12,
13, 14,
15, 16,
17, 18,
19, 20,};
OpenCLEnvironment * environment = NewOpenCLEnvironment(0, 1);
static char * simple_diff = "struct __attribute__ ((packed)) pairt { \
int a; \
int b; \
}; \
\
__kernel void simple_diff( __global struct pairt* p) \
{ \
int i = get_global_id(0); \
__global struct pairt *tmp = &p[i]; \
\
tmp->a = tmp->a * -1; \
tmp->b = tmp->b * -1; \
\
}";
OpenCLKernel * simpleDiff;
printAligment( "Pack 1 aligned", offsetof(pairt, a), offsetof(pairt, b) );
printAligment( "Default aligned", offsetof(pair32, a), offsetof(pair32, b) );
printAligment( "Vector aligned", offsetof(cl_int2, s[0]), offsetof(cl_int2, s[1]) );
simpleDiff = NewOpenCLKernel(environment, "simple_diff", simple_diff, "" );
p_mem_obj = clCreateBuffer(environment->context, CL_MEM_READ_WRITE, LIST_SIZE*sizeof(struct pairt), NULL, &error);
error = clEnqueueWriteBuffer(environment->queue, p_mem_obj, CL_TRUE, 0, LIST_SIZE*sizeof(struct pairt), &unaligned, 0, NULL, NULL);
error = clSetKernelArg(simpleDiff->kernel, 0, sizeof(cl_mem), (void *)&p_mem_obj);
error = clEnqueueNDRangeKernel( environment->queue, simpleDiff->kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, &events[0]);
error = clWaitForEvents(1, &events[0]);
error = clReleaseEvent(events[0]);
error = clEnqueueReadBuffer(environment->queue, p_mem_obj, CL_TRUE, 0, LIST_SIZE*sizeof(struct pairt), unaligned, 0, NULL, &events[1]);
error = clWaitForEvents(1, &events[1]);
error = clReleaseEvent(events[1]);
PrintPair( "Pack 1", unaligned, sizeof(unaligned)/sizeof(unaligned[0]) )
}