#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>
#define h 1
#define XY0 0
#define MAX_XY 5
#define N 2 //particles per subdomain
#define BLOCKS 4
#define a 1
#define b 1
float velocityX(float x, float y);
float velocityY(float x, float y);
int malloc2dfloat(float ***array, int length);
int main (int argc, char **argv)
{
typedef struct {
float xcoord;
float ycoord;
float velx;
float vely;
} particle;
int points= (int) floor((MAX_XY - XY0)/h) + 1;
int procsize = 2;
int myid, nproc;
MPI_Datatype particletype, oldtypes[1];
MPI_Aint offset[1], extent;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
int startElementX, startElementY, endElementX, endElementY;
particle* sub_pars = (particle*)malloc(sizeof(particle)*N);
offset[0] = 0;
int blockcounts[1];
blockcounts[0] = 4;
oldtypes[0] = MPI_FLOAT;
MPI_Type_struct(1, blockcounts, offset, oldtypes, &particletype);
MPI_Type_commit(&particletype);
particle* particles = (particle*)malloc(sizeof(particle) * N * procsize*procsize);
if (nproc != procsize*procsize){
printf("Must use np=4 -- split into 4 blocks");
MPI_Abort(MPI_COMM_WORLD,1);
}
srand(time(NULL)+myid);
if (myid == 0)
{
float mins[4];
startElementX = 0;
startElementY = 0;
endElementX = (points/procsize)-1;
endElementY = (points/procsize) -1;
}
else if (myid == 1)
{
startElementX = 0;
startElementY = (points/procsize);
endElementX = (points/procsize) -1;
endElementY = points - 1;
}
else if (myid == 2)
{
startElementX = (points/procsize);
startElementY = 0;
endElementX = points - 1;
endElementY = (points/procsize) -1;
}
else
{
startElementX = (points/procsize);
startElementY = (points/procsize);
endElementX = points-1;
endElementY = points-1;
}
int i;
float localmin;
float mag;
for (i=0; i<N; i++)
{
sub_pars[i].xcoord = ((startElementX + rand()/(RAND_MAX / (endElementX-startElementX+1)+1)))*h + XY0;
printf("%f\n", sub_pars[i].xcoord);
sub_pars[i].ycoord = ((startElementY + rand()/(RAND_MAX / (endElementY-startElementY+1)+1)))*h + XY0;
sub_pars[i].velx = velocityX(sub_pars[i].xcoord, sub_pars[i].ycoord);
sub_pars[i].vely = velocityY(sub_pars[i].xcoord, sub_pars[i].ycoord);
mag = sqrt(sub_pars[i].velx*sub_pars[i].velx + sub_pars[i].vely*sub_pars[i].vely);
if (i==0 || localmin > mag) localmin = mag;
}
printf("localmin of %d is %.2f \n", myid, localmin);
MPI_Allgather(&sub_pars, 1, particletype, particles ,1, particletype, MPI_COMM_WORLD);
MPI_Finalize();
if(myid == 0)
{
int k;
for (k=0; k<N*4; k++)
{
printf("test %.2f \n", particles[i].xcoord);
}
}
return 0;
}
float velocityX(float x, float y)
{
float temp = (a+(b*(y*y-x*x))/((x*x+y*y)*(x*x+y*y)));
return temp;
}
float velocityY(float x, float y)
{
float temp = (-1*(2*b*x*y)/((x*x+y*y)*(x*x+y*y)));
return temp;
}
すべてのパーティクルに対して同じ値を返すだけですが、各スレッド内で正しく計算されていることはわかっているため、MPI_Allgather に問題があります。どのように見えるべきか説明してもらえますか?