#include "matrice.h"
#include "mpi.h"

#define SIRINA 10

using namespace std;
using namespace jwork;

typedef gustaMatrica MTX;

/////////////////////////////////////////// #include "jacobi-MPI-f.cc"

#include "distrGustaMatrica.hh"


distrGustaMatrica AB;
T *X;

void diagDominant_Part(matrica &A, unsigned offset) {
  for (unsigned i=0;i<A.visina();i++) {
    T sum(A.absSumaReda(i));
    A.set(i+offset,i,1.2*sum+1);
  }
}

void generisiMatricu(unsigned np, unsigned n, unsigned m) {
  gustaMatrica A(n,n),B(m,n);

//  A.randomN(10);
//  B.randomN(20);
  diagDominant(A);
  
//  A.ispisi(cout,&B);

  unsigned jDIV = n / np;             // koliko jednacina ide jednom procesu
  unsigned jMOD = n % np;             // ostatak pri deljenju se kasnije
                                      // sasporedjuje na prvih jMOD procesa

  unsigned ukupnoPoslato(0);          // broj ukupno poslatih jednacina
  unsigned i;                         // iterator
  unsigned brojJednacina;             // broj jednacina po node-u

  unsigned procitanoJednacina(0);     // broj ukupno procitanih jednacina

  unsigned r=0;
  for (i=0; i<np; i++) {              // iteracija se vrsi po broju procesa
    brojJednacina = jDIV;
    if (i<jMOD)
      brojJednacina++;
    distrGustaMatrica ABload(procitanoJednacina,m+n,brojJednacina);
    ABload.set_nm(n,m);
    
//    ucitajRedoveMatrice( n, m, brojJednacina, ABload );
//    for (unsigned q=0;q<brojJednacina;q++) {
//      unsigned e(0);
//      for (unsigned w=0;w<n;w++)
//        ABload.set(e++,q,A.get(w,r));
//      for (unsigned w=0;w<m;w++)
//        ABload.set(e++,q,B.get(w,r));      
//      r++;
//    }
    ABload.randomN(10);
    diagDominant_Part(ABload,procitanoJednacina);
    
    if (i==0)
      AB=ABload;
    else
      ABload.MPI_Send(i);
    procitanoJednacina+=brojJednacina;
  }
 
}

void ucitajMatricu(unsigned np) {
  
  unsigned n,m;
  
  if (!ucitajDimenzijuSistema(&n,&m,cin))    // sa standardnog ulaza i slanje
    throw NEISPRAVAN_ULAZ;                   // delova jednacina na razlicite
                                             // node-ove.

  if ( np>n )
    throw BROJ_NODEOVA_MORA_BITI_MANJI_OD_VELICINE_SISTEMA;

  // na pocetku je ucitana dimenzija matrice, jednacine se ucitavaju u ovoj
  // funkciji, i svaka greska ce prouzrokovati izuzetak!
  
  unsigned jDIV = n / np;             // koliko jednacina ide jednom procesu
  unsigned jMOD = n % np;             // ostatak pri deljenju se kasnije
                                      // sasporedjuje na prvih jMOD procesa

  unsigned ukupnoPoslato(0);          // broj ukupno poslatih jednacina
  unsigned i;                         // iterator
  unsigned brojJednacina;             // broj jednacina po node-u

  unsigned procitanoJednacina(0);     // broj ukupno procitanih jednacina

  for (i=0; i<np; i++) {              // iteracija se vrsi po broju procesa
    brojJednacina = jDIV;
    if (i<jMOD)
      brojJednacina++;
    distrGustaMatrica ABload(procitanoJednacina);
    ABload.set_nm(n,m);
    
    ucitajRedoveMatrice( n, m, brojJednacina, ABload );
    
    if (i==0)
      AB=ABload;
    else
      ABload.MPI_Send(i);
    procitanoJednacina+=brojJednacina;
  }
}

void jacobiPripremi() {
  for (unsigned j=0; j<AB.visina(); j++) {
    T a=AB.get(j+AB.prva(),j);
    AB.set(j+AB.prva(),j,0);
  
    for (unsigned i=0; i<AB.get_n(); i++)
      AB.set(i,j,-AB.get(i,j)/a);
    for (unsigned i=AB.get_n(); i<AB.sirina(); i++)
      AB.set(i,j, AB.get(i,j)/a);
      
//    AB.pomnoziRed(j,-1/a);  
    
  }
}

void iteracija(unsigned rank) {

  T rezultati[ AB.get_m() * AB.visina() ];

  for (unsigned k=0; k<AB.get_m(); k++) {
  
    unsigned offset  = k*AB.get_n();
    unsigned roffset = k*AB.visina();
    
    for (unsigned j=0; j<AB.visina(); j++) {
      T sum(0); 
      for (unsigned i=0; i<AB.get_n(); i++) {
        sum+=AB.get(i,j)*X[offset+i];
      }
      sum+=AB.get(AB.get_n()+k,j);
      rezultati[ roffset+j ] = sum;
    }
  }
  
  unsigned info[2];
  info[0]=AB.prva();
  info[1]=AB.visina();
  
  if (rank==0) {
    for (unsigned k=0; k<AB.get_m(); k++) {
      unsigned offset = k*AB.get_n();
      unsigned roffset= k*AB.visina();
      for (unsigned j=0; j<info[1]; j++) 
        X[offset+info[0]+j] = rezultati[ roffset+j ];
    }
  } else {
    MPI::COMM_WORLD.Send(info, 2, MPI::UNSIGNED, 0, 0);
    MPI::COMM_WORLD.Send(rezultati, AB.get_m() * info[1], MPI_T, 0, 0);
  }
  
}

#define CMD_TERMINATE_PROCESS 0
#define CMD_ITERACIJA         1
#define CMD_NORMA_REDOVA      2
#define CMD_PRINT             3

void sendCommand(unsigned COMMAND, unsigned np) {
  for (unsigned i=1; i<np; i++) 
    MPI::COMM_WORLD.Send(&COMMAND, 1, MPI::UNSIGNED, i, 0);
}

void pokupi_rezultate_u_X(T *X, unsigned np) {
  unsigned info[2];

  // za roffset ne moze se koristiti visina vec broj jednacina koji je razlicit od procesa do procesa
  unsigned jDIV = AB.get_n() / np;             // koliko jednacina ide jednom procesu
  unsigned jMOD = AB.get_n() % np;             // ostatak pri deljenju se kasnije

  for (unsigned i=1; i<np; i++) {
    unsigned brj = jDIV;

    if (i<jMOD)
      brj++;

    MPI::COMM_WORLD.Recv( info, 2, MPI::UNSIGNED, i, 0);
    T rezultati[AB.get_m()*info[1]];
    MPI::COMM_WORLD.Recv( rezultati, AB.get_m() * info[1], MPI_T, i, 0);
     
    for (unsigned k=0; k<AB.get_m(); k++) {
      unsigned offset  =k*AB.get_n();
///////////      unsigned roffset =k*AB.visina(); // greska!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      unsigned roffset =k*brj;
      
      for (unsigned j=0; j<info[1]; j++) 
        X[offset+info[0]+j] = rezultati[ roffset+j ];
    }            
  }
}

void printX(T *X) {
  cout << "[ ";
  for (unsigned i=0; i<AB.nm(); i++) {
    if (i!=0 && i%AB.get_n()==0) 
      cout << " ]" << endl << "[ ";
    cout << redukuj_sirinu(X[i],10) << "  ; ";
  }
  cout << "]" << endl;
}

void printSumX(T *X) {
  cout << "[ ";
  T sum(0);
  for (unsigned i=0; i<AB.nm(); i++) {
    sum+=fabs(X[i]);
  }
  cout << sum << " ]" << endl;
}

T uporediUzastopneIteracije(T *X, T *oldX) {

  T globalMax(0);
  for (unsigned k=0; k<AB.get_m(); k++) {
    unsigned offset(k*AB.visina());

    T max(0);
    for (unsigned i=0; i<AB.visina(); i++) {
      T razlika( fabs( fabs(X[offset+i]) - fabs(oldX[offset+i]) ) );
  
      if (max<razlika)
        max=razlika;
    }     
    if ( globalMax<max ) 
      globalMax= max;      
  }
  return globalMax;
}


bool master(unsigned np, unsigned size_n, unsigned size_m) {

//  ucitajMatricu(np);
  resetujBrojace();
  cout << "Generisanje matrice i slanje node-ovima ( " << size_n << " x " << size_n << " | " << size_m << " )  broj node-ova: " << np << endl;
  generisiMatricu(np,size_n,size_m);
  ispisiStanjeBrojaca(cout);

//  cout << "0" << endl << AB << endl;
//  sendCommand(CMD_PRINT,np);
//  sendCommand(CMD_TERMINATE_PROCESS,np);


  resetujBrojace();

  jacobiPripremi();
  unsigned nm(AB.nm());
  X = new T[nm];
  T *oldX = new T[nm];

  
  sendCommand(CMD_NORMA_REDOVA, np);
  T q(AB.normaRedova_A_zaJacobi());
  
  for (unsigned i=1;i<np;i++) {
    T qtmp;
    MPI::COMM_WORLD.Recv(&qtmp,1,MPI_T,i,0);
    if (q<qtmp)
      q=qtmp;    
  }  
 
  T qq=q/(1-q);

//  cout << "q=" << q << " qq=" << qq << endl;

  for (unsigned i=0;i<nm;i++)
    X[i]=0;

  unsigned iter;
  for (iter=0; iter<MAXITER; iter++ ) {

    sendCommand(CMD_ITERACIJA, np);
    MPI::COMM_WORLD.Bcast(X, nm, MPI_T, 0);

    for (unsigned i=0;i<nm;i++)
      oldX[i]=X[i];

    iteracija(0);
         
    pokupi_rezultate_u_X(X,np);
    
    T maxr(uporediUzastopneIteracije(X,oldX));
    
//    cout << iter << "\n";      
//    printX(X);
    
    if (qq*maxr<EPSILON)
      break;
    
  } // for iter

  ispisiStanjeBrojaca(cout);
  cout << "Resenje sistema u (" << (iter+1) << ") koraka :";
//  printX(X);
  printSumX(X);
//  cout << endl;
  
  sendCommand(CMD_TERMINATE_PROCESS,np);
  
  
  delete [] oldX;
  delete [] X;
 
}

void slave(unsigned rank) {
  AB.MPI_Recv(0);
  jacobiPripremi();
  unsigned nm(AB.nm()); 
  unsigned COMMAND;

  X = new T[nm];  
  bool terminate=false;

  while ( !terminate ) {
    MPI::COMM_WORLD.Recv(&COMMAND,1,MPI::UNSIGNED,0,0);
    switch (COMMAND) {
      case CMD_ITERACIJA:
        MPI::COMM_WORLD.Bcast(X, nm, MPI_T, 0);
        iteracija(rank);
        break;
      case CMD_NORMA_REDOVA:                                // moras i normu kolona!
        T q;
        q = AB.normaRedova_A_zaJacobi();
        MPI::COMM_WORLD.Send(&q,1,MPI_T,0,0);       
        break;        
      case CMD_PRINT:
        cout << "rank=" << rank << endl;
        cout << AB << endl;
        break;
      case CMD_TERMINATE_PROCESS:
        terminate=true;
//        cout << "TERMINATING :_)))) (" << rank << ")" << endl;
        break;
    }
  }

  delete [] X;
}

int main(int argc, char **argv) {
  try {

    unsigned size_n=10;
    unsigned size_m=2 ;
    if (argc>1)
      size_n=atoi(argv[1]);
    if (argc>2)
      size_m=atoi(argv[2]);
      
    if (size_n<=0)
      size_n=10;
    if (size_m<=0)
      size_m=2;


    MPI::Init(argc,argv);
    int rank,size;
    rank=MPI::COMM_WORLD.Get_rank();
    size=MPI::COMM_WORLD.Get_size();
    if (rank==0) master(size,size_n,size_m); 
                 else slave(rank); 
    MPI::Finalize();
    
  } catch( int err ) {
    cerr << "ERROR!! " << err << endl;
  }
}
