2

2 台の異なるマシンでプログラムを実行しています。1つでは問題なく正常に動作します。他方では、セグメンテーション違反が発生します。デバッグを通じて、障害が発生した場所を把握しましたが、それが発生する論理的な理由を把握できません。

1 つの関数に次のコードがあります。

pass_particles(particle_grid, particle_properties, input_data, coll_eros_track, collision_number_part, world, grid_rank_lookup, grid_locations);
cout<<"done passing particles"<<endl;

関数 pass_particles は次のようになります。

void pass_particles(map<int,map<int,Particle> > & particle_grid, std::vector<Particle_props> & particle_properties, User_input& input_data, data_tracking & coll_eros_track, vector<int> & collision_number_part, mpi::communicator & world, std::map<int,int> & grid_rank_lookup, map<int,std::vector<double> > & grid_locations)
{
     //cout<<"east-west"<<endl;
    //east-west exchange (x direction)
    map<int, vector<Particle> > particles_to_be_sent_east;
    map<int, vector<Particle> > particles_to_be_sent_west;
    vector<Particle> particles_received_east;
    vector<Particle> particles_received_west;
    int counter_x_sent=0;
    int counter_x_received=0;
    for(grid_iter=particle_grid.begin();grid_iter!=particle_grid.end();grid_iter++)
    {
        map<int,Particle>::iterator part_iter;
        for (part_iter=grid_iter->second.begin();part_iter!=grid_iter->second.end();)
        {
            if (particle_properties[part_iter->second.global_part_num()].particle_in_box()[grid_iter->first])
            {
                //decide if a particle has left the box...need to consider whether particle was already outside the box 
                if ((part_iter->second.position().x()<(grid_locations[grid_iter->first][0]) && part_iter->second.position().x()>(grid_locations[grid_iter->first-input_data.z_numboxes()][0]))
                    || (input_data.periodic_walls_x() && (grid_iter->first-floor(grid_iter->first/(input_data.xz_numboxes()))*input_data.xz_numboxes()<input_data.z_numboxes()) && (part_iter->second.position().x()>(grid_locations[input_data.total_boxes()-1][0]))))
                {
                    particles_to_be_sent_west[grid_iter->first].push_back(part_iter->second);
                    particle_properties[particle_grid[grid_iter->first][part_iter->first].global_part_num()].particle_in_box()[grid_iter->first]=false;
                    counter_sent++;
                    counter_x_sent++;
                }

                else if ((part_iter->second.position().x()>(grid_locations[grid_iter->first][1]) && part_iter->second.position().x()<(grid_locations[grid_iter->first+input_data.z_numboxes()][1]))
                    || (input_data.periodic_walls_x() && (grid_iter->first-floor(grid_iter->first/(input_data.xz_numboxes()))*input_data.xz_numboxes())>input_data.xz_numboxes()-input_data.z_numboxes()-1) && (part_iter->second.position().x()<(grid_locations[0][1])))
                {
                    particles_to_be_sent_east[grid_iter->first].push_back(part_iter->second);
                    particle_properties[particle_grid[grid_iter->first][part_iter->first].global_part_num()].particle_in_box()[grid_iter->first]=false;
                    counter_sent++;
                    counter_x_sent++;
                }

                //select particles in overlap areas to send to neighboring cells
                else if ((part_iter->second.position().x()>(grid_locations[grid_iter->first][0]) && part_iter->second.position().x()<(grid_locations[grid_iter->first][0]+input_data.diam_large())))
                {
                    particles_to_be_sent_west[grid_iter->first].push_back(part_iter->second);
                    counter_sent++;
                    counter_x_sent++;
                }

                else if ((part_iter->second.position().x()<(grid_locations[grid_iter->first][1]) && part_iter->second.position().x()>(grid_locations[grid_iter->first][1]-input_data.diam_large())))
                {
                    particles_to_be_sent_east[grid_iter->first].push_back(part_iter->second);
                    counter_sent++;
                    counter_x_sent++;
                }
                ++part_iter;
            }
            else if (particles_received_current[grid_iter->first].find(part_iter->first)!=particles_received_current[grid_iter->first].end())
            {
                if ((part_iter->second.position().x()>(grid_locations[grid_iter->first][0]) && part_iter->second.position().x()<(grid_locations[grid_iter->first][0]+input_data.diam_large())))
                {
                    particles_to_be_sent_west[grid_iter->first].push_back(part_iter->second);
                    counter_sent++;
                    counter_x_sent++;
                }

                else if ((part_iter->second.position().x()<(grid_locations[grid_iter->first][1]) && part_iter->second.position().x()>(grid_locations[grid_iter->first][1]-input_data.diam_large())))
                {
                    particles_to_be_sent_east[grid_iter->first].push_back(part_iter->second);
                    counter_sent++;
                    counter_x_sent++;
                }
                part_iter++;
            }
            else
            {
                particle_grid[grid_iter->first].erase(part_iter++);
                counter_removed++;
            }
        }
    }

    world.barrier();

    mpi::request reqs_x_send[particles_to_be_sent_west.size()+particles_to_be_sent_east.size()];
    vector<multimap<int,int> > box_sent_x_info;
    box_sent_x_info.resize(world.size());
    vector<multimap<int,int> > box_received_x_info;
    box_received_x_info.resize(world.size());
    int counter_x_reqs=0;
    //send particles
    for(grid_iter_vec=particles_to_be_sent_west.begin();grid_iter_vec!=particles_to_be_sent_west.end();grid_iter_vec++)
    {
        if (grid_iter_vec->second.size()!=0)
        {
            //send a particle. 50 will be "west" tag
            if (input_data.periodic_walls_x() && (grid_iter_vec->first-floor(grid_iter_vec->first/(input_data.xz_numboxes()))*input_data.xz_numboxes()<input_data.z_numboxes()))
            {
                reqs_x_send[counter_x_reqs++]=world.isend(grid_rank_lookup[grid_iter_vec->first + input_data.z_numboxes()*(input_data.x_numboxes()-1)], grid_iter_vec->first + input_data.z_numboxes()*(input_data.x_numboxes()-1), particles_to_be_sent_west[grid_iter_vec->first]);
                box_sent_x_info[grid_rank_lookup[grid_iter_vec->first + input_data.z_numboxes()*(input_data.x_numboxes()-1)]].insert(pair<int,int>(world.rank(), grid_iter_vec->first + input_data.z_numboxes()*(input_data.x_numboxes()-1)));
            }
            else if (!(grid_iter_vec->first-floor(grid_iter_vec->first/(input_data.xz_numboxes()))*input_data.xz_numboxes()<input_data.z_numboxes()))
            {
                reqs_x_send[counter_x_reqs++]=world.isend(grid_rank_lookup[grid_iter_vec->first - input_data.z_numboxes()], grid_iter_vec->first - input_data.z_numboxes(), particles_to_be_sent_west[grid_iter_vec->first]);
                box_sent_x_info[grid_rank_lookup[grid_iter_vec->first - input_data.z_numboxes()]].insert(pair<int,int>(world.rank(),grid_iter_vec->first - input_data.z_numboxes()));
            }
        }
    }

    for(grid_iter_vec=particles_to_be_sent_east.begin();grid_iter_vec!=particles_to_be_sent_east.end();grid_iter_vec++)
    {
        if (grid_iter_vec->second.size()!=0)
        {
            //send a particle. 60 will be "east" tag
            if (input_data.periodic_walls_x() && (grid_iter_vec->first-floor(grid_iter_vec->first/(input_data.xz_numboxes())*input_data.xz_numboxes())>input_data.xz_numboxes()-input_data.z_numboxes()-1))
            {
                reqs_x_send[counter_x_reqs++]=world.isend(grid_rank_lookup[grid_iter_vec->first - input_data.z_numboxes()*(input_data.x_numboxes()-1)], 2000000000-(grid_iter_vec->first - input_data.z_numboxes()*(input_data.x_numboxes()-1)), particles_to_be_sent_east[grid_iter_vec->first]);
                box_sent_x_info[grid_rank_lookup[grid_iter_vec->first - input_data.z_numboxes()*(input_data.x_numboxes()-1)]].insert(pair<int,int>(world.rank(),2000000000-(grid_iter_vec->first - input_data.z_numboxes()*(input_data.x_numboxes()-1))));
            }
            else if (!(grid_iter_vec->first-floor(grid_iter_vec->first/(input_data.xz_numboxes())*input_data.xz_numboxes())>input_data.xz_numboxes()-input_data.z_numboxes()-1))
            {
                reqs_x_send[counter_x_reqs++]=world.isend(grid_rank_lookup[grid_iter_vec->first + input_data.z_numboxes()], 2000000000-(grid_iter_vec->first + input_data.z_numboxes()), particles_to_be_sent_east[grid_iter_vec->first]);
                box_sent_x_info[grid_rank_lookup[grid_iter_vec->first + input_data.z_numboxes()]].insert(pair<int,int>(world.rank(), 2000000000-(grid_iter_vec->first + input_data.z_numboxes())));
            }
        }
    }

    counter=0;
    for (int i=0;i<world.size();i++)
    {
        //if (world.rank()!=i)
        //{
            reqs[counter++]=world.isend(i,1000000000,box_sent_x_info[i]);
            reqs[counter++]=world.irecv(i,1000000000,box_received_x_info[i]);
        //}
    }

    mpi::wait_all(reqs, reqs + world.size()*2);

    //receive particles
    //receive west particles
    for (int j=0;j<world.size();j++)
    {
        multimap<int,int>::iterator received_info_iter;
        for (received_info_iter=box_received_x_info[j].begin();received_info_iter!=box_received_x_info[j].end();received_info_iter++)
        {
            //receive the message
            if (received_info_iter->second<1000000000)
            {
                //receive the message
                world.recv(received_info_iter->first,received_info_iter->second,particles_received_west);
                //loop through all the received particles and add them to the particle_grid for this processor
                for (unsigned int i=0;i<particles_received_west.size();i++)
                {
                    particle_grid[received_info_iter->second].insert(pair<int,Particle>(particles_received_west[i].global_part_num(),particles_received_west[i]));

                    if(particles_received_west[i].position().x()>grid_locations[received_info_iter->second][0] && particles_received_west[i].position().x()<grid_locations[received_info_iter->second][1])
                    {
                        particle_properties[particles_received_west[i].global_part_num()].particle_in_box()[received_info_iter->second]=true;

                    }
                    counter_received++;
                    counter_x_received++;
                }
            }
            else
            {
                //receive the message
                world.recv(received_info_iter->first,received_info_iter->second,particles_received_east);
                //loop through all the received particles and add them to the particle_grid for this processor
                for (unsigned int i=0;i<particles_received_east.size();i++)
                {
                    particle_grid[2000000000-received_info_iter->second].insert(pair<int,Particle>(particles_received_east[i].global_part_num(),particles_received_east[i]));
                    if(particles_received_east[i].position().x()>grid_locations[2000000000-received_info_iter->second][0] && particles_received_east[i].position().x()<grid_locations[2000000000-received_info_iter->second][1])
                    {
                        particle_properties[particles_received_east[i].global_part_num()].particle_in_box()[2000000000-received_info_iter->second]=true;

                    }
                    counter_received++;
                    counter_x_received++;
                }
            }
        }
    }

    mpi::wait_all(reqs_y_send, reqs_y_send + particles_to_be_sent_bottom.size()+particles_to_be_sent_top.size());
    mpi::wait_all(reqs_z_send, reqs_z_send + particles_to_be_sent_south.size()+particles_to_be_sent_north.size());
    mpi::wait_all(reqs_x_send, reqs_x_send + particles_to_be_sent_west.size()+particles_to_be_sent_east.size());

    cout<<"x sent "<<counter_x_sent<<" and received "<<counter_x_received<<" from rank "<<world.rank()<<endl;

    cout<<"rank "<<world.rank()<<" sent "<<counter_sent<<" and received "<<counter_received<<" and removed "<<counter_removed<<endl;
    cout<<"done passing"<<endl;
}

一部のコードのみを投稿しました (投稿していないコードの一部にあるため、一部の変数が未定義のように見える可能性があるという事実は無視してください)

(失敗したマシンで)コードを実行すると、取得できますが、取得できdone passingませんdone passing particles

呼び出された関数の終わりと呼び出し関数の次の行の間でセグメンテーション違反が発生する可能性がある理由と、あるマシンで発生し、別のマシンでは発生しない理由について、私は迷っています。

4

3 に答える 3

5

関数の最後と呼び出し元の後続の行の間でクラッシュしている場合は、ローカル変数のデストラクタでクラッシュしている可能性があります。どのオブジェクトのデストラクタがクラッシュしているかを調べるには、デバッガでプログラムを実行する必要があります。

于 2012-09-28T17:39:01.103 に答える
2

いくつかの可能性があります:

  1. 実際には戻ってきますが、coutはOSによってバッファリングされているため、アプリケーションが最初にクラッシュするため、「パーティクルの通過が完了」は表示されません。
  2. セグメント障害のあるデストラクタを持つローカルクラスがあります。

デバッガーで実行して、実際にクラッシュしている場所を見つけてください。

編集:

gccを使用していると述べたので、-gフラグを追加し、gdbで実行します。Gdbは、どこで問題が発生しているのかを正確に通知します(おそらくnull逆参照)。

于 2012-09-28T16:43:30.693 に答える
0

誰かが後でこれに戻ってきた場合に備えて。ブースト mpi の最新バージョン (当時) 1.50 に更新すると、この問題はなくなりました。大した解決策ではありませんが、うまくいきました。

于 2012-12-05T23:01:50.377 に答える