IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends Pages
Ifpack_SORa.cpp
1 /*
2 //@HEADER
3 // ***********************************************************************
4 //
5 // Ifpack: Object-Oriented Algebraic Preconditioner Package
6 // Copyright (2002) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //@HEADER
42 */
43 
44 #include "Ifpack_SORa.h"
45 #include "Ifpack.h"
46 #include "Ifpack_Utils.h"
47 #include "Teuchos_ParameterList.hpp"
48 #include "Teuchos_RefCountPtr.hpp"
49 #include "Epetra_Import.h"
50 #include "Epetra_Export.h"
51 #include "Epetra_IntVector.h"
52 
53 using Teuchos::RefCountPtr;
54 using Teuchos::rcp;
55 
56 
57 
58 #ifdef HAVE_IFPACK_EPETRAEXT
59 #include "EpetraExt_MatrixMatrix.h"
60 #include "EpetraExt_RowMatrixOut.h"
61 #include "EpetraExt_MultiVectorOut.h"
62 #include "EpetraExt_Transpose_RowMatrix.h"
63 
64 
65 #define ABS(x) ((x)>=0?(x):-(x))
66 #define MIN(x,y) ((x)<(y)?(x):(y))
67 #define MAX(x,y) ((x)>(y)?(x):(y))
68 
69 // Useful functions horked from ML
70 int* FindLocalDiricheltRowsFromOnesAndZeros(const Epetra_CrsMatrix & Matrix, int &numBCRows);
71 void Apply_BCsToMatrixRowsAndColumns(const int *dirichletRows, int numBCRows,const Epetra_IntVector &dirichletColumns,const Epetra_CrsMatrix & Matrix);
72 Epetra_IntVector * FindLocalDirichletColumnsFromRows(const int *dirichletRows, int numBCRows,const Epetra_CrsMatrix & Matrix);
73 
74 Ifpack_SORa::Ifpack_SORa(Epetra_RowMatrix* A):
75  IsInitialized_(false),
76  IsComputed_(false),
77  Label_(),
78  Alpha_(1.5),
79  Gamma_(1.0),
80  NumSweeps_(1),
81  IsParallel_(false),
82  HaveOAZBoundaries_(false),
83  UseInterprocDamping_(false),
84  UseGlobalDamping_(false),
85  LambdaMax_(-1.0),
86  Time_(A->Comm())
87 {
88  Epetra_CrsMatrix *Acrs=dynamic_cast<Epetra_CrsMatrix*>(A);
89  if(Acrs) Acrs_=rcp(Acrs,false);
90  A_=rcp(A,false);
91 }
92 
93 void Ifpack_SORa::Destroy(){
94 }
95 
96 
97 
98 int Ifpack_SORa::Initialize(){
99  Alpha_ = List_.get("sora: alpha", Alpha_);
100  Gamma_ = List_.get("sora: gamma",Gamma_);
101  NumSweeps_ = List_.get("sora: sweeps",NumSweeps_);
102  HaveOAZBoundaries_= List_.get("sora: oaz boundaries", HaveOAZBoundaries_);
103  UseInterprocDamping_ = List_.get("sora: use interproc damping",UseInterprocDamping_);
104  UseGlobalDamping_ = List_.get("sora: use global damping",UseGlobalDamping_);
105 
106  if (A_->Comm().NumProc() != 1) IsParallel_ = true;
107  else {
108  IsParallel_ = false;
109  // Don't use interproc damping, for obvious reasons
110  UseInterprocDamping_=false;
111  }
112 
113  // Counters
114  IsInitialized_=true;
115  NumInitialize_++;
116  return 0;
117 }
118 
119 int Ifpack_SORa::SetParameters(Teuchos::ParameterList& parameterlist){
120  List_=parameterlist;
121  return 0;
122 }
123 
124 
125 template<typename int_type>
126 int Ifpack_SORa::TCompute(){
127  Epetra_Map *RowMap=const_cast<Epetra_Map*>(&A_->RowMatrixRowMap());
128  Epetra_Vector Adiag(*RowMap);
129  Epetra_CrsMatrix *Askew2=0, *Aherm2=0,*W=0;
130  int *rowptr_s,*colind_s,*rowptr_h,*colind_h;
131  double *vals_s,*vals_h;
132  bool RowMatrixMode=(Acrs_==Teuchos::null);
133 
134  // Label
135  sprintf(Label_, "IFPACK SORa (alpha=%5.2f gamma=%5.2f)",Alpha_,Gamma_);
136 
137  if(RowMatrixMode){
138  if(!A_->Comm().MyPID()) cout<<"SORa: RowMatrix mode enabled"<<endl;
139  // RowMatrix mode, build Acrs_ the hard way.
140  Epetra_RowMatrix *Arow=&*A_;
141  Epetra_Map *ColMap=const_cast<Epetra_Map*>(&A_->RowMatrixColMap());
142 
143  int Nmax=A_->MaxNumEntries();
144  int length;
145  std::vector<double> values(Nmax);
146  Epetra_CrsMatrix *Acrs=new Epetra_CrsMatrix(Copy,*RowMap,Nmax);
147 
148  std::vector<int> indices_local(Nmax);
149 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
150  if(RowMap->GlobalIndicesInt()) {
151  for(int i=0;i<Arow->NumMyRows();i++) {
152  Arow->ExtractMyRowCopy(i,Nmax,length,&values[0],&indices_local[0]);
153  for(int j=0;j<length;j++)
154  indices_local[j]= ColMap->GID(indices_local[j]);
155  Acrs->InsertGlobalValues(RowMap->GID(i),length,&values[0],&indices_local[0]);
156  }
157  }
158  else
159 #endif
160 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
161  if(RowMap->GlobalIndicesLongLong()) {
162  std::vector<int_type> indices_global(Nmax);
163  for(int i=0;i<Arow->NumMyRows();i++) {
164  Arow->ExtractMyRowCopy(i,Nmax,length,&values[0],&indices_local[0]);
165  for(int j=0;j<length;j++)
166  indices_global[j]= (int_type) ColMap->GID64(indices_local[j]);
167  Acrs->InsertGlobalValues((int_type) RowMap->GID64(i),length,&values[0],&indices_global[0]);
168  }
169  }
170  else
171 #endif
172  throw "Ifpack_SORa::TCompute: GlobalIndices type unknown";
173 
174  Acrs->FillComplete(A_->OperatorDomainMap(),A_->OperatorRangeMap());
175  Acrs_=rcp(Acrs,true);
176  }
177 
178  // Create Askew2
179  // Note: Here I let EpetraExt do the thinking for me. Since it gets all the maps correct for the E + F^T stencil.
180  // There are probably more efficient ways to do this but this method has the bonus of allowing code reuse.
181  IFPACK_CHK_ERR(EpetraExt::MatrixMatrix::Add(*Acrs_,false,1,*Acrs_,true,-1,Askew2));
182  Askew2->FillComplete();
183 
184  // Create Aherm2
185  IFPACK_CHK_ERR(EpetraExt::MatrixMatrix::Add(*Acrs_,false,1,*Acrs_,true,1,Aherm2));
186  Aherm2->FillComplete();
187 
188  int nnz2=Askew2->NumMyNonzeros();
189  int N=Askew2->NumMyRows();
190 
191 
192  // Grab pointers
193  IFPACK_CHK_ERR(Askew2->ExtractCrsDataPointers(rowptr_s,colind_s,vals_s));
194  IFPACK_CHK_ERR(Aherm2->ExtractCrsDataPointers(rowptr_h,colind_h,vals_h));
195 
196  // Sanity checking: Make sure the sparsity patterns are the same
197 #define SANITY_CHECK
198 #ifdef SANITY_CHECK
199  for(int i=0;i<N;i++)
200  if(rowptr_s[i]!=rowptr_h[i]) IFPACK_CHK_ERR(-2);
201  for(int i=0;i<nnz2;i++)
202  if(colind_s[i]!=colind_h[i]) IFPACK_CHK_ERR(-3);
203 #endif
204 
205  // Dirichlet Detection & Nuking of Aherm2 and Askew2
206  // Note: Relies on Aherm2/Askew2 having identical sparsity patterns (see sanity check above)
207  if(HaveOAZBoundaries_){
208  int numBCRows;
209  int* dirRows=FindLocalDiricheltRowsFromOnesAndZeros(*Acrs_,numBCRows);
210  Epetra_IntVector* dirCols=FindLocalDirichletColumnsFromRows(dirRows,numBCRows,*Aherm2);
211  Apply_BCsToMatrixRowsAndColumns(dirRows,numBCRows,*dirCols,*Aherm2);
212  Apply_BCsToMatrixRowsAndColumns(dirRows,numBCRows,*dirCols,*Askew2);
213  delete [] dirRows;
214  delete dirCols;
215  }
216 
217  // Grab diagonal of A
218  A_->ExtractDiagonalCopy(Adiag);
219 
220  // Allocate the diagonal for W
221  Epetra_Vector *Wdiag = new Epetra_Vector(*RowMap);
222 
223  // Build the W matrix (lower triangle only)
224  // Note: Relies on EpetraExt giving me identical sparsity patterns for both Askew2 and Aherm2 (see sanity check above)
225  int maxentries=Askew2->MaxNumEntries();
226  int_type* gids=new int_type [maxentries];
227  double* newvals=new double[maxentries];
228  W=new Epetra_CrsMatrix(Copy,*RowMap,0);
229  for(int i=0;i<N;i++){
230  // Build the - (1+alpha)/2 E - (1-alpha)/2 F part of the W matrix
231  int_type rowgid = (int_type) Acrs_->GRID64(i);
232  double c_data=0.0;
233  double ipdamp=0.0;
234  int idx=0;
235 
236  for(int j=rowptr_s[i];j<rowptr_s[i+1];j++){
237  int_type colgid = (int_type) Askew2->GCID64(colind_s[j]);
238  c_data+=fabs(vals_s[j]);
239  if(rowgid>colgid){
240  // Rely on the fact that off-diagonal entries are always numbered last, dropping the entry entirely.
241  if(colind_s[j] < N) {
242  gids[idx]=colgid;
243  newvals[idx]=vals_h[j]/2 + Alpha_ * vals_s[j]/2;
244  idx++;
245  }
246  else{
247  ipdamp+=fabs(vals_h[j]/2 + Alpha_ * vals_s[j]/2);
248  }
249  }
250  }
251  if(idx>0)
252  IFPACK_CHK_ERR(W->InsertGlobalValues(rowgid,idx,newvals,gids));
253 
254  // Do the diagonal
255  double w_val= c_data*Alpha_*Gamma_/4 + Adiag[Acrs_->LRID(rowgid)];
256  if(UseInterprocDamping_) w_val+=ipdamp;
257 
258  W->InsertGlobalValues(rowgid,1,&w_val,&rowgid);
259  IFPACK_CHK_ERR(Wdiag->ReplaceGlobalValues(1,&w_val,&rowgid));
260  }
261  W->FillComplete(A_->OperatorDomainMap(),A_->OperatorRangeMap());
262  W_=rcp(W);
263  Wdiag_=rcp(Wdiag);
264 
265  // Mark as computed
266  IsComputed_=true;
267 
268  // Global damping, if wanted
269  if(UseGlobalDamping_) {
270  PowerMethod(10,LambdaMax_);
271  if(!A_->Comm().MyPID()) printf("SORa: Global damping parameter = %6.4e (lmax=%6.4e)\n",GetOmega(),LambdaMax_);
272  }
273 
274  // Cleanup
275  delete [] gids;
276  delete [] newvals;
277  delete Aherm2;
278  delete Askew2;
279  if(RowMatrixMode) {
280  Acrs_=Teuchos::null;
281  }
282 
283  // Counters
284  NumCompute_++;
285  ComputeTime_ += Time_.ElapsedTime();
286  return 0;
287 }
288 
289 int Ifpack_SORa::Compute(){
290  if(!IsInitialized_) Initialize();
291  int ret_val = 0;
292 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
293  if(A_->RowMatrixRowMap().GlobalIndicesInt()) {
294  ret_val = TCompute<int>();
295  }
296  else
297 #endif
298 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
299  if(A_->RowMatrixRowMap().GlobalIndicesLongLong()) {
300  ret_val = TCompute<long long>();
301  }
302  else
303 #endif
304  throw "Ifpack_SORa::Compute: GlobalIndices type unknown";
305 
306  return ret_val;
307 }
308 
309 
310 int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{
311  if(!IsComputed_) return -1;
312  Time_.ResetStartTime();
313  bool initial_guess_is_zero=false;
314  int NumMyRows=W_->NumMyRows();
315  int NumVectors = X.NumVectors();
316  Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors);
317 
318  double omega=GetOmega();
319 
320  // need to create an auxiliary vector, Xcopy
321  Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
322  if (X.Pointers()[0] == Y.Pointers()[0]){
323  Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
324  // Since the user didn't give us anything better, our initial guess is zero.
325  Y.Scale(0.0);
326  initial_guess_is_zero=true;
327  }
328  else
329  Xcopy = Teuchos::rcp( &X, false );
330 
331  Teuchos::RefCountPtr< Epetra_MultiVector > T2;
332  // Note: Assuming that the matrix has an importer. Ifpack_PointRelaxation doesn't do this, but given that
333  // I have a CrsMatrix, I'm probably OK.
334  // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine
335  // if things are on or off processor.
336  // Note: T2 must be zero'd out
337  if (IsParallel_ && W_->Importer()) T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true));
338  else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true));
339 
340  // Pointer grabs
341  int* rowptr,*colind;
342  double *values;
343  double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr;
344  T2->ExtractView(&t2_ptr);
345  Y.ExtractView(&y_ptr);
346  Temp.ExtractView(&t_ptr);
347  Xcopy->ExtractView(&x_ptr);
348  Wdiag_->ExtractView(&d_ptr);
349  IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values));
350 
351 
352  for(int i=0; i<NumSweeps_; i++){
353  // Calculate b-Ax
354  if(!initial_guess_is_zero || i > 0) {
355  A_->Apply(Y,Temp);
356  Temp.Update(1.0,*Xcopy,-1.0);
357  }
358  else
359  Temp.Update(1.0,*Xcopy,0.0);
360 
361  // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated
362  // in this sweep before they are used, so we don't need to reset T2 to zero here.
363 
364  // Do backsolve & update
365  // x = x + W^{-1} (b - A x)
366  for(int j=0; j<NumMyRows; j++){
367  double diag=d_ptr[j];
368  for (int m=0 ; m<NumVectors; m++) {
369  double dtmp=0.0;
370  // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute.
371  t2_ptr[m][j]=0.0;
372  for(int k=rowptr[j];k<rowptr[j+1];k++){
373  dtmp+= values[k]*t2_ptr[m][colind[k]];
374  }
375  // Yes, we need to update both of these.
376  t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag;
377  y_ptr[m][j] += omega*t2_ptr[m][j];
378  }
379  }
380  }
381 
382  // Counter update
383  NumApplyInverse_++;
384  ApplyInverseTime_ += Time_.ElapsedTime();
385  return 0;
386 }
387 
388 
389 ostream& Ifpack_SORa::Print(ostream& os) const{
390  os<<"Ifpack_SORa"<<endl;
391  os<<" alpha = "<<Alpha_<<endl;
392  os<<" gamma = "<<Gamma_<<endl;
393  os<<endl;
394  return os;
395 }
396 
397 
398 double Ifpack_SORa::Condest(const Ifpack_CondestType CT,
399  const int MaxIters,
400  const double Tol,
401  Epetra_RowMatrix* Matrix_in){
402  return -1.0;
403 }
404 
405 
406 
407 
408 
409 // ============================================================================
410 inline int* FindLocalDiricheltRowsFromOnesAndZeros(const Epetra_CrsMatrix & Matrix, int &numBCRows){
411  int *dirichletRows = new int[Matrix.NumMyRows()];
412  numBCRows = 0;
413  for (int i=0; i<Matrix.NumMyRows(); i++) {
414  int numEntries, *cols;
415  double *vals;
416  int ierr = Matrix.ExtractMyRowView(i,numEntries,vals,cols);
417  if (ierr == 0) {
418  int nz=0;
419  for (int j=0; j<numEntries; j++) if (vals[j]!=0) nz++;
420  if (nz==1) dirichletRows[numBCRows++] = i;
421  // EXPERIMENTAL: Treat Special Inflow Boundaries as Dirichlet Boundaries
422  if(nz==2) dirichletRows[numBCRows++] = i;
423  }/*end if*/
424  }/*end for*/
425  return dirichletRows;
426 }/*end FindLocalDiricheltRowsFromOnesAndZeros*/
427 
428 
429 // ======================================================================
431 inline Epetra_IntVector * FindLocalDirichletColumnsFromRows(const int *dirichletRows, int numBCRows,const Epetra_CrsMatrix & Matrix){
432  // Zero the columns corresponding to the Dirichlet rows. Completely ignore the matrix maps.
433 
434  // Build rows
435  Epetra_IntVector ZeroRows(Matrix.RowMap());
436  Epetra_IntVector *ZeroCols=new Epetra_IntVector(Matrix.ColMap());
437  ZeroRows.PutValue(0);
438  ZeroCols->PutValue(0);
439 
440  // Easy Case: We're all on one processor
441  if(Matrix.RowMap().SameAs(Matrix.ColMap())){
442  for (int i=0; i < numBCRows; i++)
443  (*ZeroCols)[dirichletRows[i]]=1;
444  return ZeroCols;
445  }
446 
447  // Flag the rows which are zero locally
448  for (int i=0; i < numBCRows; i++)
449  ZeroRows[dirichletRows[i]]=1;
450 
451  // Boundary exchange to move the data
452  if(Matrix.RowMap().SameAs(Matrix.DomainMap())){
453  // Use A's Importer if we have one
454  ZeroCols->Import(ZeroRows,*Matrix.Importer(),Insert);
455  }
456  else{
457  // Use custom importer if we don't
458  Epetra_Import Importer(Matrix.ColMap(),Matrix.RowMap());
459  ZeroCols->Import(ZeroRows,Importer,Insert);
460  }
461  return ZeroCols;
462 }
463 
464 
465 // ======================================================================
466 inline void Apply_BCsToMatrixRowsAndColumns(const int *dirichletRows, int numBCRows,const Epetra_IntVector &dirichletColumns,const Epetra_CrsMatrix & Matrix){
467  /* This function zeros out rows & columns of Matrix.
468  Comments: The graph of Matrix is unchanged.
469  */
470  // Nuke the rows
471  for(int i=0;i<numBCRows;i++){
472  int numEntries, *cols;
473  double *vals;
474  Matrix.ExtractMyRowView(dirichletRows[i],numEntries,vals,cols);
475  for (int j=0; j<numEntries; j++) vals[j]=0.0;
476  }/*end for*/
477 
478  // Nuke the columns
479  for (int i=0; i < Matrix.NumMyRows(); i++) {
480  int numEntries;
481  double *vals;
482  int *cols;
483  Matrix.ExtractMyRowView(i,numEntries,vals,cols);
484  for (int j=0; j < numEntries; j++) {
485  if (dirichletColumns[ cols[j] ] > 0) vals[j] = 0.0;
486  }/*end for*/
487  }/*end for*/
488 }/* end Apply_BCsToMatrixColumns */
489 
490 
491 
492 
493 
494 int Ifpack_SORa::
495 PowerMethod(const int MaximumIterations, double& lambda_max)
496 {
497  // this is a simple power method
498  lambda_max = 0.0;
499  double RQ_top, RQ_bottom, norm;
500  Epetra_Vector x(A_->OperatorDomainMap());
501  Epetra_Vector y(A_->OperatorRangeMap());
502  Epetra_Vector z(A_->OperatorRangeMap());
503  x.Random();
504  x.Norm2(&norm);
505  if (norm == 0.0) IFPACK_CHK_ERR(-1);
506 
507  x.Scale(1.0 / norm);
508 
509  // Only do 1 sweep per PM step, turn off global damping
510  int NumSweepsBackup=NumSweeps_;
511  bool UseGlobalDampingBackup=UseGlobalDamping_;
512  NumSweeps_=1;UseGlobalDamping_=false;
513 
514  for (int iter = 0; iter < MaximumIterations; ++iter)
515  {
516  y.PutScalar(0.0);
517  A_->Apply(x, z);
518  ApplyInverse(z,y);
519  y.Update(1.0,x,-1.0);
520 
521  // Compute the Rayleigh Quotient
522  y.Dot(x, &RQ_top);
523  x.Dot(x, &RQ_bottom);
524  lambda_max = RQ_top / RQ_bottom;
525  y.Norm2(&norm);
526  if (norm == 0.0) IFPACK_CHK_ERR(-1);
527  x.Update(1.0 / norm, y, 0.0);
528 
529  }
530 
531  // Enable if we want to prevent over-relaxation
532  // lambda_max=MAX(lambda_max,1.0);
533 
534  // Reset parameters
535  NumSweeps_=NumSweepsBackup;
536  UseGlobalDamping_=UseGlobalDampingBackup;
537 
538  return(0);
539 }
540 
541 #endif