1

CUDA を独学で学び、画像処理用の単純なメディアン フィルターを実装しようとしました。これは私が思いついたものですが、そこから得られる画像では良い結果が得られないようです。たとえば、出力画像は比較的ノイズが少ないですが、画像の彩度が高いように見えます.Wikipediaのテディベアのこの画像を試してみると、何らかの理由で鼻が緑色になっています. 私はイライラしすぎて新しいアイデアを考えることができなくなったので、誰かがコードの問題を見ることができれば、私は非常に感謝しています. ありがとう!

これはカーネル関数です:

__global__ void median_filter(int *input, int *output, int IMAGE_W, int IMAGE_H){

    __shared__ float window[BLOCK_W*BLOCK_H][9];

    int x, y, tid;
    int i, j, iMin, temp;

    x = blockIdx.x*blockDim.x + threadIdx.x;
    y = blockIdx.y*blockDim.y + threadIdx.y;
    tid = threadIdx.y*blockDim.y + threadIdx.x;

    if(x>=IMAGE_W && y>=IMAGE_H)
        return;

    /* setting 3x3 window elements for median */
    if(y==0 && x==0)
        window[tid][0] = input[y*IMAGE_W+x];
    else if(y==0 && x!=0)
        window[tid][0] = input[y*IMAGE_W+x-1];
    else if(y!=0 && x==0)
        window[tid][0] = input[(y-1)*IMAGE_W+x];
    else
        window[tid][0] = input[(y-1)*IMAGE_W+x-1];

    window[tid][1] = (y==0)?input[y*IMAGE_W+x]:input[(y-1)*IMAGE_W+x];

    if(y==0 && x==IMAGE_W-1)
        window[tid][2] = input[y*IMAGE_W+x];
    else if(y!=0 && x==IMAGE_W-1)
        window[tid][2] = input[(y-1)*IMAGE_W+x];
    else if(y==0 && x!=IMAGE_W-1)
        window[tid][2] = input[(y-1)*IMAGE_W+x+1];
    else
        window[tid][2] = input[(y-1)*IMAGE_W+x+1];

    window[tid][3] = (x==0)?input[y*IMAGE_W+x]:input[y*IMAGE_W+x-1];
    window[tid][4] = input[y*IMAGE_W+x];
    window[tid][5] = (x==IMAGE_W-1)?input[y*IMAGE_W+x]:input[y*IMAGE_W+x+1];

    if(y==IMAGE_H-1 && x==0)
        window[tid][6] = input[y*IMAGE_W+x];
    else if(y!=IMAGE_H-1 && x==0)
        window[tid][6] = input[(y+1)*IMAGE_W+x];
    else if(y==IMAGE_H-1 && x!=0)
        window[tid][6] = input[y*IMAGE_W+x-1];
    else
        window[tid][6] = input[(y+1)*IMAGE_W+x-1];

    window[tid][7] = (y==IMAGE_H-1)?input[y*IMAGE_W+x]:input[(y+1)*IMAGE_W+x];

    if(y==IMAGE_H-1 && x==IMAGE_W-1)
        window[tid][8] = input[y*IMAGE_W+x];
    else if(y!=IMAGE_H-1 && x==IMAGE_W-1)
        window[tid][8] = input[(y+1)*IMAGE_W+x];
    else if(y==IMAGE_H-1 && x!=IMAGE_W-1)
        window[tid][8] = input[y*IMAGE_W+x+1];
    else
        window[tid][8] = input[(y+1)*IMAGE_W+x+1];

    __syncthreads();

    /* sorting window to find median */
    for(j=0; j<8; j++){
        iMin = j;
        for(i=j+1; i<9; i++){
            if(window[tid][i] < window[tid][iMin]){
                iMin = i;
            }
        }
        if(iMin != j){
            temp = window[tid][iMin];
            window[tid][iMin] = window[tid][j];
            window[tid][j] = temp;
        }
        __syncthreads();
    }

    output[y*IMAGE_W + x] = window[tid][4];
}

そして主な機能:

int main(){
    /*loading picture*/
    char picture[50] = "before.bmp";

    FILE *image = fopen(picture, "rb");

    if(image == NULL)
    {
        printf("Load picture error!\n");
        system("pause");
        exit(1);
    }

    BITMAPFILEHEADER bmpFHeader;
    BITMAPINFOHEADER bmpIHeader;
    fread(&bmpFHeader, sizeof(BITMAPFILEHEADER), 1, image);
    fread(&bmpIHeader, sizeof(BITMAPINFOHEADER), 1, image);

    int imgWidth = bmpIHeader.biWidth;
    int imgHeight = bmpIHeader.biHeight;

    int img_size = imgWidth * imgHeight * sizeof(int);

    int * imgeRedChannel_x = (int *)malloc(img_size);
    int * imgeGreenChannel_x = (int *)malloc(img_size);
    int * imgeBlueChannel_x = (int *)malloc(img_size);

    int * deviceInputRed;
    int * deviceInputGreen;
    int * deviceInputBlue;

    int * deviceOutputRd;
    int * deviceOutputGreen;
    int * deviceOutputBlue;

    for(int i = imgHeight-1; i>=0; i--)
    {
        for(int j = 0; j<imgWidth; j++)
        {

                fread(&(imgeGreenChannel_x[i * (imgWidth) + j]), sizeof(unsigned char), 1, image);
                fread(&(imgeBlueChannel_x[i * (imgWidth) + j]), sizeof(unsigned char), 1, image);
                fread(&(imgeRedChannel_x[i * (imgWidth) + j]), sizeof(unsigned char), 1, image);

        }
    }

    cudaMalloc((void **) &deviceInputRed, sizeof(int) * imgHeight * imgWidth);
    cudaMalloc((void **) &deviceInputBlue, sizeof(int) * imgHeight * imgWidth);
    cudaMalloc((void **) &deviceInputGreen, sizeof(int) * imgHeight * imgWidth);
    cudaMalloc((void **) &deviceOutputRd, sizeof(int) * imgHeight * imgWidth);
    cudaMalloc((void **) &deviceOutputBlue, sizeof(int) * imgHeight * imgWidth);
    cudaMalloc((void **) &deviceOutputGreen, sizeof(int) * imgHeight * imgWidth);

    int dimA = imgWidth*imgHeight;
    int numThreadsPerBlock = 256;
    int numBlocks = dimA / numThreadsPerBlock;
    int sharedMemSize = numThreadsPerBlock*sizeof(int);

    dim3 dimGrid(numBlocks);
    dim3 dimBlock(numThreadsPerBlock);

    cudaMemcpy(deviceInputRed,imgeRedChannel_x,sizeof(int) * imgHeight * imgWidth,cudaMemcpyHostToDevice);
    checkCUDAError("memcpy h-d r");
    cudaMemcpy(deviceInputGreen,imgeGreenChannel_x,sizeof(int) * imgHeight * imgWidth,cudaMemcpyHostToDevice);
    checkCUDAError("memcpy h-d g");
    cudaMemcpy(deviceInputBlue,imgeBlueChannel_x,sizeof(int) * imgHeight * imgWidth,cudaMemcpyHostToDevice);
    checkCUDAError("memcpy h-d b");

    median_filter<<< dimGrid , dimBlock, sharedMemSize>>> (deviceInputRed, deviceOutputRd, imgHeight, imgWidth);
    checkCUDAError("kernel invocation r");
    median_filter<<< dimGrid , dimBlock, sharedMemSize>>> (deviceInputGreen, deviceOutputGreen, imgHeight, imgWidth);
    checkCUDAError("kernel invocation g");
    median_filter<<< dimGrid , dimBlock, sharedMemSize>>> (deviceInputBlue, deviceOutputBlue, imgHeight, imgWidth);
    checkCUDAError("kernel invocation b");

    cudaMemcpy(imgeRedChannel_x, deviceOutputRd, imgHeight * imgWidth * sizeof(int), cudaMemcpyDeviceToHost);
    checkCUDAError("memcpy d-h r");
    cudaMemcpy(imgeGreenChannel_x, deviceOutputGreen, imgHeight * imgWidth * sizeof(int), cudaMemcpyDeviceToHost);
    checkCUDAError("memcpy d-h g");
    cudaMemcpy(imgeBlueChannel_x, deviceOutputBlue, imgHeight * imgWidth * sizeof(int), cudaMemcpyDeviceToHost);
    checkCUDAError("memcpy d-h b");

    cudaFree(deviceInputRed);
    cudaFree(deviceOutputRd);
    cudaFree(deviceInputGreen);
    cudaFree(deviceOutputGreen);
    cudaFree(deviceInputBlue);
    cudaFree(deviceOutputBlue);

    /*saving new picture*/
    fclose(image);

    char title[50]="after";
    strcat(title, ".bmp");

    remove(title);
    image = fopen(title,"wb");

    fwrite(&bmpFHeader, sizeof(BITMAPFILEHEADER), 1, image);
    fwrite(&bmpIHeader, sizeof(BITMAPINFOHEADER), 1, image);

    for(int i = imgHeight-1; i>=0; i--)
    {

        for(int j = 0; j<imgWidth; j++)
        {
            int b = imgeBlueChannel_x[i * (imgWidth) + j];
            int g = imgeGreenChannel_x[i * (imgWidth) + j];
            int r = imgeRedChannel_x[i * (imgWidth) + j]; 

            if(b>255)b=255;
            if(g>255)g=255;
            if(r>255)r=255;



            fwrite(&g, sizeof(unsigned char), 1, image);
            fwrite(&b, sizeof(unsigned char), 1, image);
            fwrite(&r, sizeof(unsigned char), 1, image);
        }
    }

    printf("Success!\n");
    fclose(image);
    system("pause");
    return 0;
}     
4

1 に答える 1