c++ - コレスキー分解ScaLapackエラー

Question

次のエラーが発生しましたが、理由がわかりません。

{    1,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    1,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    1}:  On entry to PDPOTRF parameter number    2 had an illegal value
{    0,    0}:  On entry to PDPOTRF parameter number    2 had an illegal value


 info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i.

エラーメッセージの意味はわかっていますが、Webで入手できる古いドキュメントに可能な限り従い、Webで動作するサンプルコードから並列コレスキー分解を組み合わせようとしました。どこが間違っていたのかわかりません。

以下のコードで私がどこで間違っていたのかを誰かが説明できますか？コードの概要は次のとおりです。4つのプロセッサでテストし、8x8マトリックスを2 x 2プロセッサに分割します。ブロックグリッドはファイルからマトリックスをロードします。ここでは、8x8マトリックスファイルの例を示します。

182   147   140   125   132    76   126   157
147   213   185   150   209   114   166   188
140   185   232   129   194   142   199   205
125   150   129   143   148    81   104   150
132   209   194   148   214   122   172   189
76   114   142    81   122   102   129   117
126   166   199   104   172   129   187   181
157   188   205   150   189   117   181   259

例に従って、マトリックスを4つのノードのそれぞれに1つずつある4つの個別の4x4ローカルアレイに分散しました。次に、関連するルーチンを実行descinit_して呼び出します。これにより、上記のエラーが発生します。pdpotrf_どこが間違っていたのかわからず、できる限りドキュメントに従おうとしました。fortranでの並列コレスキー分解の実例も大いに役立ちます

関数呼び出しのリファレンス

pdpotrf_

descinit_

実行パラメータ

code name - Meaning = Value
N - Global Rows = 8
M - Global Cols = 8
Nb - Local Block Rows = 2
Mb - Local Block Cols = 2
nrows - Local Rows = 4
ncols Local Cols= 4
lda - Leading dimension of local array = 4 (i've tried 2,4,8)
ord - Order of Matrix = 4   (i've also tried many different things here as well)

上記のパラメータをすべてのノードに出力しましたが、同じです

#include <mpi.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <sstream>
using namespace std;


/*
  To compile:
  mpic++ test.cpp -o test -L/home/admin/libs -lscalapack -lrefblas -ltmg -lreflapack -lgfortran -Wall -O2
  To run:
  mpirun -np 4 ./test matrixfile 8 8 2 2

*/

extern "C" {
  /* Cblacs declarations */
  void Cblacs_pinfo(int*, int*);
  void Cblacs_get(int, int, int*);
  void Cblacs_gridinit(int*, const char*, int, int);
  void Cblacs_gridinfo(int, int*, int*, int*,int*);
  void Cblacs_pcoord(int, int, int*, int*);
  void Cblacs_gridexit(int);
  void Cblacs_barrier(int, const char*);
  void Cdgerv2d(int, int, int, double*, int, int, int);
  void Cdgesd2d(int, int, int, double*, int, int, int);

  int numroc_(int*, int*, int*, int*, int*);

  void pdpotrf_(char*, int*, double*,
        int*, int*, int*, int*);

  void descinit_( int *, int *, int *, int *, int *, int *, int *,
          int *, int *, int *);

}

int main(int argc, char **argv){
  /* MPI */
  int mpirank,nprocs;
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  double MPIelapsed;
  double MPIt2;
  double MPIt1;

  /* Helping vars */
  int iZERO = 0;
  int verbose = 1;
  bool mpiroot = (mpirank == 0);

  if (argc < 6) {
    if (mpiroot)
      cerr << "Usage: matrixTest matrixfile N M Nb Mb"
       << endl
       << " N = Rows , M = Cols , Nb = Row Blocks , Mb = Col Blocks "
       << endl;

    MPI_Finalize();
    return 1;
  }
  /* Scalapack / Blacs Vars */
  int N, M, Nb, Mb;
  int descA[9];
  int info = 0;
  //  int mla = 4;
  int ord = 8;



  double *A_glob = NULL, *A_glob2 = NULL, *A_loc = NULL;

  /* Parse command line arguments */
  if (mpiroot) {
    /* Read command line arguments */
    stringstream stream;
    stream << argv[2] << " " << argv[3] << " " << argv[4] << " " << argv[5];
    stream >> N >> M >> Nb >> Mb;


    /* Reserve space and read matrix (with transposition!) */
    A_glob  = new double[N*M];
    A_glob2 = new double[N*M];
    string fname(argv[1]);
    ifstream file(fname.c_str());
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    file >> *(A_glob + N*c + r);
      }
    }

    /* Print matrix */

      if(verbose == 1) {
    cout << "Matrix A:\n";
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
        cout << setw(3) << *(A_glob + N*c + r) << " ";
      }
      cout << "\n";
    }
    cout << endl;
      }


  }

  /* Begin Cblas context */
  int ctxt, myid, myrow, mycol, numproc;
  //<TODO> make dynamic
  int procrows = 2, proccols = 2;
  Cblacs_pinfo(&myid, &numproc);
  Cblacs_get(0, 0, &ctxt);
  Cblacs_gridinit(&ctxt, "Row-major", procrows, proccols);
  Cblacs_gridinfo( ctxt, &procrows, &proccols, &myrow, &mycol );
  /* process coordinates for the process grid */
  // Cblacs_pcoord(ctxt, myid, &myrow, &mycol);


  /* Broadcast of the matrix dimensions */
  int dimensions[4];
  if (mpiroot) {
    dimensions[0] = N;//Global Rows
    dimensions[1] = M;//Global Cols
    dimensions[2] = Nb;//Local Rows
    dimensions[3] = Mb;//Local Cols
  }
  MPI_Bcast(dimensions, 4, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&ord, 1, MPI_INT, 0, MPI_COMM_WORLD);
  N = dimensions[0];
  M = dimensions[1];
  Nb = dimensions[2];
  Mb = dimensions[3];

  int nrows = numroc_(&N, &Nb, &myrow, &iZERO, &procrows);
  int ncols = numroc_(&M, &Mb, &mycol, &iZERO, &proccols);

  int lda = max(1,nrows);

  MPI_Bcast(&lda, 1, MPI_INT, 0, MPI_COMM_WORLD);

  /* Print grid pattern */
  if (myid == 0)
    cout << "Processes grid pattern:" << endl;
  for (int r = 0; r < procrows; ++r) {
    for (int c = 0; c < proccols; ++c) {
      Cblacs_barrier(ctxt, "All");
      if (myrow == r && mycol == c) {
    cout << myid << " " << flush;
      }
    }
    Cblacs_barrier(ctxt, "All");
    if (myid == 0)
      cout << endl;
  }

  if(myid == 0){
    cout <<"Run Parameters"<<endl;
    cout <<"Global Rows = " << M <<endl;
    cout <<"Global Cols = " << N <<endl;
    cout <<"Local Block Rows = " << Mb <<endl;
    cout <<"Local Block Cols = " << Nb <<endl;
    cout << "nrows = "<<nrows<<endl;
    cout << "ncols = "<<ncols<<endl;
    cout << "lda = "<<lda<<endl;
    cout <<"Order = "<<ord<<endl;
  }


  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }
  A_loc = new double[nrows*ncols];
  for (int i = 0; i < nrows*ncols; ++i) *(A_loc+i)=0.;

  /* Scatter matrix */
  int sendr = 0, sendc = 0, recvr = 0, recvc = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (mpiroot) {
    Cdgesd2d(ctxt, nr, nc, A_glob+N*c+r, N, sendr, sendc);
      }

      if (myrow == sendr && mycol == sendc) {
    Cdgerv2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

    }

    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }

  /* Print local matrices */
  if(verbose == 1) {
  for (int id = 0; id < numproc; ++id) {
    if (id == myid) {
    cout << "A_loc on node " << myid << endl;
    for (int r = 0; r < nrows; ++r) {
      for (int c = 0; c < ncols; ++c)
        cout << setw(3) << *(A_loc+nrows*c+r) << " ";
      cout << endl;
    }
    cout << endl;
      }
      Cblacs_barrier(ctxt, "All");
    }
  }

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  /* DescInit */
  info=0;
  descinit_(descA, &N, &M, &Nb, &Mb,&iZERO,&iZERO,&ctxt, &lda, &info);

  if(mpiroot){
    if(verbose == 1){
      if (info == 0){
    cout<<"Description init sucesss!"<<endl;
      }
      if(info < 0){
    cout <<"Error Info < 0: if INFO = -i, the i-th argument had an illegal value"<< endl
         <<"Info = " << info<<endl;
      }
    }
    // Cblacs_barrier(ctxt, "All");
  }

  //psgesv_(n, 1, al, 1,1,idescal, ipiv, b, 1,1,idescb,  info) */
  //    psgesv_(&n, &one, al, &one,&one,idescal, ipiv, b, &one,&one,idescb,  &info);
  //pXelset http://www.netlib.org/scalapack/tools/pdelset.f


  /* CHOLESKY HERE */
  info = 0;
  MPIt1=MPI_Wtime();

  pdpotrf_("L",&ord,A_loc,&Nb,&Mb,descA,&info);

  for (int id = 0; id < numproc; ++id) {
    Cblacs_barrier(ctxt, "All");
  }

  MPIt2 = MPI_Wtime();
  MPIelapsed=MPIt2-MPIt1;
  if(mpiroot){
    std::cout<<"Cholesky MPI Run Time" << MPIelapsed<<std::endl;

    if(info == 0){
      std::cout<<"SUCCESS"<<std::endl;
    }
    if(info < 0){

      cout << "info < 0:  If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. " << endl;
      cout<<"info = " << info << endl;
    }
    if(info > 0){
      std::cout<<"matrix is not positve definte"<<std::endl;
    }
  }

  //sanity check set global matrix to zero before it's recieved by nodes
  if(mpiroot){
    for (int r = 0; r < N; ++r) {
      for (int c = 0; c < M; ++c) {
    A_glob2[c *N + r]  = 0; 
      }
    }
  }

  /* Gather matrix */
  sendr = 0;
  for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
    sendc = 0;
    // Number of rows to be sent
    // Is this the last row block?
    int nr = Nb;
    if (N-r < Nb)
      nr = N-r;

    for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
      // Number of cols to be sent
      // Is this the last col block?
      int nc = Mb;
      if (M-c < Mb)
    nc = M-c;

      if (myrow == sendr && mycol == sendc) {
    // Send a nr-by-nc submatrix to process (sendr, sendc)
    Cdgesd2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
    recvc = (recvc+nc)%ncols;
      }

      if (mpiroot) {
    // Receive the same data
    // The leading dimension of the local matrix is nrows!
    Cdgerv2d(ctxt, nr, nc, A_glob2+N*c+r, N, sendr, sendc);
      }
    }
    if (myrow == sendr)
      recvr = (recvr+nr)%nrows;
  }
  /* Print test matrix */
  if (mpiroot) {
    if(verbose == 1){
      cout << "Matrix A test:\n";
      for (int r = 0; r < N; ++r) {
    for (int c = 0; c < M; ++c) {
      cout << setw(3) << *(A_glob2+N*c+r) << " ";
    }
    cout << endl;
      }
    }
  }

  /* Release resources */
  delete[] A_glob;
  delete[] A_glob2;
  delete[] A_loc;
  Cblacs_gridexit(ctxt);
  MPI_Finalize();
}

score 7 · Accepted Answer

そうです-私はこれを解決しました。これがあなたがしなければならないことです（私は変更されたMPIプログラムの結果をOctaveのあなたのマトリックスのコレスキー分解に対してチェックしました-それは機能します）。

IBMによる次のLAPACKリファレンスは、リンクにあるものよりも役立つことがわかりました：http: //publib.boulder.ibm.com/infocenter/clresctr/vxrx/index.jsp? topic=%2Fcom.ibm.cluster。 pessl.v4r2.pssl100.doc％2Fam6gr_lpotrf.htm

PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO )

あなたは通り過ぎMbて、そしてとNbしIAてJA。ただし、これらのパラメーターは、より大きなマトリックス内にグローバルマトリックスの開始行と列を提供することを目的としています。これらは、大きな行列があり、部分行列のコレスキー分解のみが必要な場合にのみ関係します。あなたの場合、IAそしてJA両方がなければなりません1！

だからあなたがする必要があるのは：

int IA = 1;
int JA = 1;
pdpotrf_("L",&ord,A_loc,&IA,&JA,descA,&info);

int ord = 8;コマンドラインから読み取った値に依存するようにハードコードを変更することもできます。そうしないと、後で問題が発生します。

上記のように変更されたプログラムの出力：

Cholesky MPI Run Time0.000659943
SUCCESS
Matrix A test:
13.4907 147 140 125 132  76 126 157 
10.8964 9.70923 185 150 209 114 166 188 
10.3775 7.4077 8.33269 129 194 142 199 205 
9.26562 5.0507 -0.548194 5.59806 148  81 104 150 
9.78449 10.5451 1.72175 0.897537 1.81524 122 172 189 
5.63349 5.41911 5.20784 0.765767 0.0442447 3.63139 129 117 
9.33974 6.61543 6.36911 -2.22569 1.03941 2.48498 1.79738 181 
11.6376 6.30249 4.50561 2.28799 -0.627688 -2.17633 7.27182 0.547228

比較のためのオクターブ出力：

octave:1> A=dlmread("matrixfile4")
A =

   182   147   140   125   132    76   126   157
   147   213   185   150   209   114   166   188
   140   185   232   129   194   142   199   205
   125   150   129   143   148    81   104   150
   132   209   194   148   214   122   172   189
    76   114   142    81   122   102   129   117
   126   166   199   104   172   129   187   181
   157   188   205   150   189   117   181   259

octave:2> C=chol(A)
C =

   13.49074   10.89636   10.37749    9.26562    9.78449    5.63349    9.33974   11.63761
    0.00000    9.70923    7.40770    5.05070   10.54508    5.41911    6.61543    6.30249
    0.00000    0.00000    8.33269   -0.54819    1.72175    5.20784    6.36911    4.50561
    0.00000    0.00000    0.00000    5.59806    0.89754    0.76577   -2.22569    2.28799
    0.00000    0.00000    0.00000    0.00000    1.81524    0.04424    1.03941   -0.62769
    0.00000    0.00000    0.00000    0.00000    0.00000    3.63139    2.48498   -2.17633
    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    1.79738    7.27182
    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    0.00000    0.54723

（行列がノードに誤ってコピーされていることについての私の以前のコメント-現在は削除されています-は適用されません。プログラムの残りの部分は私には問題ないようです。）

c++ - コレスキー分解ScaLapackエラー

1 に答える 1

Related

Reference