現在、IPP からアプリケーションをインポートして NPP を使用しています。nppiWarpPerspectiveBack_32f_C1R で問題が発生し、警告 2 (NPP_WRONG_INTERSECTION_QUAD_WARNING) が返されます。しかし、同じ係数の IPP 呼び出しが正常に機能することはわかっています。
添付されたプログラムには、ほとんど差のない 2 つの係数がありますが、一方は機能し、もう一方は機能しません。これに関するどんな助けも素晴らしいでしょう。
// WarpIssue.cpp : Defines the entry point for the console application.
//
#include <stdafx.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "nppi.h"
cudaError_t warp(Npp32f* srcImg, NppiSize srcSize, int srcWidthStep, Npp32f* dstImg, NppiSize dstSize, int dstWidthStep, double coeff[][3]);
int _tmain(int argc, _TCHAR* argv[])
{
const int arraySize = 5;
const int a[arraySize] = { 1, 2, 3, 4, 5 };
const int b[arraySize] = { 10, 20, 30, 40, 50 };
int c[arraySize] = { 0 };
// Choose which GPU to run on, change this on a multi-GPU system.
cudaError_t cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
return 1;
}
//Allocate src image data
int srcWidth=1600;
int srcHeight=1200;
int srcWidthStep = 0;
Npp32f* srcImgData = nppiMalloc_32f_C1(srcWidth, srcHeight, &srcWidthStep);
NppiSize srcSize = {srcWidth, srcHeight};
nppiSet_32f_C1R(255.0f, srcImgData, srcWidthStep, srcSize);
//Allocate dst image data
int dstWidth=720;
int dstHeight=480;
int dstWidthStep = 0;
Npp32f* dstImgData = nppiMalloc_32f_C1(dstWidth, dstHeight, &dstWidthStep);
NppiSize dstSize = {dstWidth, dstHeight};
nppiSet_32f_C1R(0.0f, dstImgData, dstWidthStep, dstSize);
//Not Working
double coeff[3][3] = { 0.990986, -0.008086, 733.528174,
0.002669, 1.000126, 352.375707,
-0.000010, 0.000000, 1.001975, };
//Working
/*double coeff[3][3] = { 0.991379, -0.007775, 722.431470,
0.002568, 1.000126, 352.410450,
-0.000009, 0.000000, 1.001949 };*/
//Warp
cudaStatus = warp(srcImgData, srcSize, srcWidthStep, dstImgData, dstSize, dstWidthStep, coeff);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "Back warp failed!");
return 1;
}
//Copy it to cpu
int step = 0;
Npp8u* byteImg = nppiMalloc_8u_C1(dstWidth, dstHeight, &step);
NppiSize sz = {dstWidth, dstHeight };
nppiConvert_32f8u_C1R(dstImgData, dstWidthStep, byteImg, step, sz, NPP_RND_NEAR);
char* cpuImg = (char*)malloc(dstWidth*dstHeight);
cudaMemcpy2D(cpuImg, dstWidth, byteImg, step, dstWidth, dstHeight, cudaMemcpyDeviceToHost);
FILE* imgFile = fopen("output.raw", "w");
fwrite(cpuImg, dstWidth*dstHeight, 1, imgFile);
fclose(imgFile);
//Cleanup
nppiFree(srcImgData);
nppiFree(dstImgData);
nppiFree(byteImg);
cudaStatus = cudaThreadExit();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaThreadExit failed!");
return 1;
}
return 0;
}
cudaError_t warp(Npp32f* srcImg, NppiSize srcSize, int srcWidthStep, Npp32f* dstImg, NppiSize dstSize, int dstWidthStep, double coeff[][3])
{
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaError_t cudaStatus;
NppiRect srcRoi = {0, 0, srcSize.width, srcSize.height};
NppiRect dstRoi = {0, 0, dstSize.width, dstSize.height};
cudaStatus= (cudaError_t)nppiWarpPerspectiveBack_32f_C1R(srcImg, srcSize, srcWidthStep, srcRoi,
dstImg, dstWidthStep, dstRoi, coeff, NPPI_INTER_LINEAR);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "nppiWarpPerspectiveBack_32f_C1R returned error code %d !\n", cudaStatus);
goto Error;
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
goto Error;
}
Error:
return cudaStatus;
}