1

私はこのような記事をかなりの数見てきましたが、かなりの量の読み取りを行ったにもかかわらず、シリアル バージョンは現在これよりもはるかに高速に実行されるため、OpenMP で適切に並列化する次のコードを取得できないようです。

static double red_black_parallel_for_step(simulation* simObj, double stepSize, double* red, double* black){
double tmp = 0.0;   
double avg = 0.0;
double old = 0.0;
double max = -HUGE_VAL;
#pragma omp parallel \
shared(black, red, max) \
firstprivate(old, avg, tmp) 
{
    double priv_max = -HUGE_VAL;
    #pragma omp for 
    for(unsigned int j = 0; j < (*simObj).NY+2; j++){
        for(unsigned int i = 0; i < (int)floor((double)((*simObj).NX+2.0)/2.0); i++){
            for(unsigned int k = 1; k < (*simObj).NZ; k++){
                if(red[IX3] == HUGE_VAL) continue;
                old = red[IX3];
                avg = 0.0;
                const int x1 = ( black[IX3+IX3_XR1STEP] != HUGE_VAL ); 
                const int x2 = ( black[IX3+IX3_XR2STEP] != HUGE_VAL ); 
                const int y1 = ( black[IX3+IX3_YSTEP]   != HUGE_VAL ); 
                const int y2 = ( black[IX3-IX3_YSTEP]   != HUGE_VAL ); 
                const int z1 = ( black[IX3+IX3_ZSTEP]   != HUGE_VAL );
                const int z2 = ( black[IX3-IX3_ZSTEP]   != HUGE_VAL );
                if (x1) avg += black[IX3+IX3_XR1STEP];
                if (x2) avg += black[IX3+IX3_XR2STEP];
                if (y1) avg += black[IX3+IX3_YSTEP];
                if (y2) avg += black[IX3-IX3_YSTEP];
                if (z1) avg += black[IX3+IX3_ZSTEP];
                if (z2) avg += black[IX3-IX3_ZSTEP];
                avg /= (double) (x1+x2+y1+y2+z1+z2);
                red[IX3] = old + stepSize * (avg - old);
                tmp = fabs(old - red[IX3]) / fabs(old);
                if( tmp > priv_max ) priv_max = tmp;
            }
        }
    }
    #pragma omp flush (max)
    if ( priv_max > max ) {
        #pragma omp critical
        {
            if ( priv_max > max ) max = priv_max;
        }
    }
}
#pragma omp parallel \
shared(black, red, max) \
firstprivate(old, avg, tmp) 
{
    double priv_max = -HUGE_VAL;
    #pragma omp for 
    for(unsigned int j = 0; j < (*simObj).NY+2; j++){
        for(unsigned int i = 0; i < (int)floor((double)((*simObj).NX+2)/2.0); i++){
            for (unsigned int k = 1; k < (*simObj).NZ; k++ ){   
                if ( black[IX3] == HUGE_VAL ) continue;
                old = black[IX3];
                avg = 0.0;
                const int x1 = ( red[IX3+IX3_XB1STEP] != HUGE_VAL ); 
                const int x2 = ( red[IX3+IX3_XB2STEP] != HUGE_VAL ); 
                const int y1 = ( red[IX3+IX3_YSTEP]   != HUGE_VAL ); 
                const int y2 = ( red[IX3-IX3_YSTEP]   != HUGE_VAL ); 
                const int z1 = ( red[IX3+IX3_ZSTEP]   != HUGE_VAL );
                const int z2 = ( red[IX3-IX3_ZSTEP]   != HUGE_VAL );
                if (x1) avg += red[IX3+IX3_XB1STEP];
                if (x2) avg += red[IX3+IX3_XB2STEP];
                if (y1) avg += red[IX3+IX3_YSTEP];
                if (y2) avg += red[IX3-IX3_YSTEP];
                if (z1) avg += red[IX3+IX3_ZSTEP];
                if (z2) avg += red[IX3-IX3_ZSTEP];
                avg /= (double) (x1+x2+y1+y2+z1+z2);
                black[IX3] = old + stepSize * (avg - old);
                tmp = fabs(old - black[IX3]) / fabs(old); 
                if( tmp > priv_max ) priv_max = tmp;
            }
        }
    }
    #pragma omp flush (max)
    if ( priv_max > max ) {
        #pragma omp critical
        {
            if ( priv_max > max ) max = priv_max;
        }
    }
}
return max;
}

複雑な要因は、赤と黒の反復間の最大の相対変化 (最大) を追跡する必要があることです。どんな助けでも大歓迎です。

4

1 に答える 1

0

比較の後でのみ、クリティカル ブロック内でのみフラッシュを試みます。

/* not here: #pragma omp flush (max) */
if ( priv_max > max ) { // this should filter out most of the flush operations
    #pragma omp critical
    {
        if ( priv_max > max ) max = priv_max; // now flush; this operation will be exclusive/"critical"
        #pragma omp flush (max)
    }
}
于 2013-08-19T16:40:20.110 に答える