cuda - cudaでの比較

Question

そのようなCUDAの2つの浮動小数点配列（a、b）を比較する必要があり
if a > b then a = a/a ; else a = 0ます。

これを呼び出す正しい方法と構文を教えてください。

score 1 · Accepted Answer

このようなものがうまくいくはずです。簡潔にするために、通常のcudaエラーチェックを簡素化しています。

#include <stdio.h>
#define DSIZE 10000
#define nTPB 512

__global__ void cmp(float *a, float *b, int size){
  int idx = threadIdx.x + blockDim.x*blockIdx.x;
  if (idx < size)
    a[idx]=(a[idx] > b[idx])?1.0f:0.0f;  // could also be: ?(a[idx]/a[idx]):0;
}

int main() {
  cudaError_t err;
  float *h_a, *h_b, *d_a, *d_b;
  h_a = (float *)malloc(DSIZE*sizeof(float));
  if (h_a == 0) {printf("malloc fail\n"); return 1;}
  h_b = (float *)malloc(DSIZE*sizeof(float));
  if (h_b == 0) {printf("malloc fail\n"); return 1;}
  for (int i=0; i< DSIZE; i++){
    h_a[i] = 10.0f;
    h_b[i] = (float)i;}
  err = cudaMalloc((void **)&d_a, DSIZE*sizeof(float));
  if (err != cudaSuccess) {printf("cuda fail\n"); return 1;}
  err = cudaMalloc((void **)&d_b, DSIZE*sizeof(float));
  if (err != cudaSuccess) {printf("cuda fail\n"); return 1;}
  err = cudaMemcpy(d_a, h_a, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
  if (err != cudaSuccess) {printf("cuda fail\n"); return 1;}
  err = cudaMemcpy(d_b, h_b, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
  if (err != cudaSuccess) {printf("cuda fail\n"); return 1;}

  cmp<<<(DSIZE+nTPB-1)/nTPB, nTPB>>>(d_a, d_b, DSIZE);
  err=cudaMemcpy(h_a, d_a, DSIZE*sizeof(float), cudaMemcpyDeviceToHost);
  if (err != cudaSuccess) {printf("cuda fail\n"); return 1;}
  for (int i=0; i< 20; i++)
    printf("h_a[%d] = %f\n", i, h_a[i]);
  return 0;
}

cuda - cudaでの比較

1 に答える 1

Related

Reference