IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends Pages
Ifpack_PointRelaxation.cpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Ifpack_ConfigDefs.h"
44 #include <iomanip>
45 #include <cmath>
46 #include "Epetra_Operator.h"
47 #include "Epetra_CrsMatrix.h"
48 #include "Epetra_Comm.h"
49 #include "Epetra_Map.h"
50 #include "Epetra_MultiVector.h"
51 #include "Ifpack_Preconditioner.h"
52 #include "Ifpack_PointRelaxation.h"
53 #include "Ifpack_Utils.h"
54 #include "Ifpack_Condest.h"
55 
56 static const int IFPACK_JACOBI = 0;
57 static const int IFPACK_GS = 1;
58 static const int IFPACK_SGS = 2;
59 
60 //==============================================================================
62 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix_in) :
63  IsInitialized_(false),
64  IsComputed_(false),
65  NumInitialize_(0),
66  NumCompute_(0),
67  NumApplyInverse_(0),
68  InitializeTime_(0.0),
69  ComputeTime_(0.0),
70  ApplyInverseTime_(0.0),
71  ComputeFlops_(0.0),
72  ApplyInverseFlops_(0.0),
73  NumSweeps_(1),
74  DampingFactor_(1.0),
75  UseTranspose_(false),
76  Condest_(-1.0),
77  ComputeCondest_(false),
78  PrecType_(IFPACK_JACOBI),
79  MinDiagonalValue_(0.0),
80  NumMyRows_(0),
81  NumMyNonzeros_(0),
82  NumGlobalRows_(0),
83  NumGlobalNonzeros_(0),
84  Matrix_(Teuchos::rcp(Matrix_in,false)),
85  IsParallel_(false),
86  ZeroStartingSolution_(true),
87  DoBackwardGS_(false),
88  DoL1Method_(false),
89  L1Eta_(1.5),
90  NumLocalSmoothingIndices_(Matrix_in->NumMyRows()),
91  LocalSmoothingIndices_(0)
92 {
93 }
94 
95 //==============================================================================
96 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List)
97 {
98 
99  string PT;
100  if (PrecType_ == IFPACK_JACOBI)
101  PT = "Jacobi";
102  else if (PrecType_ == IFPACK_GS)
103  PT = "Gauss-Seidel";
104  else if (PrecType_ == IFPACK_SGS)
105  PT = "symmetric Gauss-Seidel";
106 
107  PT = List.get("relaxation: type", PT);
108 
109  if (PT == "Jacobi")
110  PrecType_ = IFPACK_JACOBI;
111  else if (PT == "Gauss-Seidel")
112  PrecType_ = IFPACK_GS;
113  else if (PT == "symmetric Gauss-Seidel")
114  PrecType_ = IFPACK_SGS;
115  else {
116  IFPACK_CHK_ERR(-2);
117  }
118 
119  NumSweeps_ = List.get("relaxation: sweeps",NumSweeps_);
120  DampingFactor_ = List.get("relaxation: damping factor",
121  DampingFactor_);
122  MinDiagonalValue_ = List.get("relaxation: min diagonal value",
123  MinDiagonalValue_);
124  ZeroStartingSolution_ = List.get("relaxation: zero starting solution",
125  ZeroStartingSolution_);
126 
127  DoBackwardGS_ = List.get("relaxation: backward mode",DoBackwardGS_);
128 
129  DoL1Method_ = List.get("relaxation: use l1",DoL1Method_);
130 
131  L1Eta_ = List.get("relaxation: l1 eta",L1Eta_);
132 
133 
134  // This (partial) reordering would require a very different implementation of Jacobi than we have no, so we're not
135  // going to do it.
136  NumLocalSmoothingIndices_= List.get("relaxation: number of local smoothing indices",NumLocalSmoothingIndices_);
137  LocalSmoothingIndices_ = List.get("relaxation: local smoothing indices",LocalSmoothingIndices_);
138  if(PrecType_ == IFPACK_JACOBI && LocalSmoothingIndices_) {
139  NumLocalSmoothingIndices_=NumMyRows_;
140  LocalSmoothingIndices_=0;
141  if(!Comm().MyPID()) cout<<"Ifpack_PointRelaxation: WARNING: Reordered/Local Smoothing not implemented for Jacobi. Defaulting to regular Jacobi"<<endl;
142  }
143 
144  SetLabel();
145 
146  return(0);
147 }
148 
149 //==============================================================================
150 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const
151 {
152  return(Matrix_->Comm());
153 }
154 
155 //==============================================================================
157 {
158  return(Matrix_->OperatorDomainMap());
159 }
160 
161 //==============================================================================
163 {
164  return(Matrix_->OperatorRangeMap());
165 }
166 
167 //==============================================================================
169 {
170  IsInitialized_ = false;
171 
172  if (Matrix_ == Teuchos::null)
173  IFPACK_CHK_ERR(-2);
174 
175  if (Time_ == Teuchos::null)
176  Time_ = Teuchos::rcp( new Epetra_Time(Comm()) );
177 
178  if (Matrix().NumGlobalRows64() != Matrix().NumGlobalCols64())
179  IFPACK_CHK_ERR(-2); // only square matrices
180 
181  NumMyRows_ = Matrix_->NumMyRows();
182  NumMyNonzeros_ = Matrix_->NumMyNonzeros();
183  NumGlobalRows_ = Matrix_->NumGlobalRows64();
184  NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros64();
185 
186  if (Comm().NumProc() != 1)
187  IsParallel_ = true;
188  else
189  IsParallel_ = false;
190 
191  ++NumInitialize_;
192  InitializeTime_ += Time_->ElapsedTime();
193  IsInitialized_ = true;
194  return(0);
195 }
196 
197 //==============================================================================
199 {
200  int ierr = 0;
201  if (!IsInitialized())
202  IFPACK_CHK_ERR(Initialize());
203 
204  Time_->ResetStartTime();
205 
206  // reset values
207  IsComputed_ = false;
208  Condest_ = -1.0;
209 
210  if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed.
211  if (NumSweeps_ < 0)
212  IFPACK_CHK_ERR(-2); // at least one application
213 
214  Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) );
215 
216  if (Diagonal_ == Teuchos::null)
217  IFPACK_CHK_ERR(-5);
218 
219  IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_));
220 
221  // Setup for L1 Methods.
222  // Here we add half the value of the off-processor entries in the row,
223  // but only if diagonal isn't sufficiently large.
224  //
225  // Note: This is only done in the slower-but-more-general "RowMatrix" mode.
226  //
227  // This follows from Equation (6.5) in:
228  // Baker, Falgout, Kolev and Yang. Multigrid Smoothers for Ultraparallel Computing.
229  // SIAM J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864--2887.
230  if(DoL1Method_ && IsParallel_) {
231  int maxLength = Matrix().MaxNumEntries();
232  std::vector<int> Indices(maxLength);
233  std::vector<double> Values(maxLength);
234  int NumEntries;
235 
236  for (int i = 0 ; i < NumMyRows_ ; ++i) {
237  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, maxLength,NumEntries,
238  &Values[0], &Indices[0]));
239  double diagonal_boost=0.0;
240  for (int k = 0 ; k < NumEntries ; ++k)
241  if(Indices[k] > NumMyRows_)
242  diagonal_boost+=std::abs(Values[k]/2.0);
243  if ((*Diagonal_)[i] < L1Eta_*diagonal_boost)
244  (*Diagonal_)[i]+=diagonal_boost;
245  }
246  }
247 
248  // check diagonal elements, store the inverses, and verify that
249  // no zeros are around. If an element is zero, then by default
250  // its inverse is zero as well (that is, the row is ignored).
251  for (int i = 0 ; i < NumMyRows_ ; ++i) {
252  double& diag = (*Diagonal_)[i];
253  if (IFPACK_ABS(diag) < MinDiagonalValue_)
254  diag = MinDiagonalValue_;
255  if (diag != 0.0)
256  diag = 1.0 / diag;
257  }
258 #ifdef IFPACK_FLOPCOUNTERS
259  ComputeFlops_ += NumMyRows_;
260 #endif
261 
262 #if 0
263  // some methods require the inverse of the diagonal, compute it
264  // now and store it in `Diagonal_'
265  if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) {
266  Diagonal_->Reciprocal(*Diagonal_);
267  // update flops
268  ComputeFlops_ += NumMyRows_;
269  }
270 #endif
271 
272  // We need to import data from external processors. Here I create an
273  // Epetra_Import object because I cannot assume that Matrix_ has one.
274  // This is a bit of waste of resources (but the code is more robust).
275  // Note that I am doing some strange stuff to set the components of Y
276  // from Y2 (to save some time).
277  //
278  if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) {
279  Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(),
280  Matrix().RowMatrixRowMap()) );
281  if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5);
282  }
283 
284  ++NumCompute_;
285  ComputeTime_ += Time_->ElapsedTime();
286  IsComputed_ = true;
287 
288  IFPACK_CHK_ERR(ierr);
289  return(0);
290 }
291 
292 //==============================================================================
293 ostream& Ifpack_PointRelaxation::Print(ostream & os) const
294 {
295 
296  double MyMinVal, MyMaxVal;
297  double MinVal, MaxVal;
298 
299  if (IsComputed_) {
300  Diagonal_->MinValue(&MyMinVal);
301  Diagonal_->MaxValue(&MyMaxVal);
302  Comm().MinAll(&MyMinVal,&MinVal,1);
303  Comm().MinAll(&MyMaxVal,&MaxVal,1);
304  }
305 
306  if (!Comm().MyPID()) {
307  os << endl;
308  os << "================================================================================" << endl;
309  os << "Ifpack_PointRelaxation" << endl;
310  os << "Sweeps = " << NumSweeps_ << endl;
311  os << "damping factor = " << DampingFactor_ << endl;
312  if (PrecType_ == IFPACK_JACOBI)
313  os << "Type = Jacobi" << endl;
314  else if (PrecType_ == IFPACK_GS)
315  os << "Type = Gauss-Seidel" << endl;
316  else if (PrecType_ == IFPACK_SGS)
317  os << "Type = symmetric Gauss-Seidel" << endl;
318  if (DoBackwardGS_)
319  os << "Using backward mode (GS only)" << endl;
320  if (ZeroStartingSolution_)
321  os << "Using zero starting solution" << endl;
322  else
323  os << "Using input starting solution" << endl;
324  os << "Condition number estimate = " << Condest() << endl;
325  os << "Global number of rows = " << Matrix_->NumGlobalRows64() << endl;
326  if (IsComputed_) {
327  os << "Minimum value on stored diagonal = " << MinVal << endl;
328  os << "Maximum value on stored diagonal = " << MaxVal << endl;
329  }
330  os << endl;
331  os << "Phase # calls Total Time (s) Total MFlops MFlops/s" << endl;
332  os << "----- ------- -------------- ------------ --------" << endl;
333  os << "Initialize() " << std::setw(5) << NumInitialize_
334  << " " << std::setw(15) << InitializeTime_
335  << " 0.0 0.0" << endl;
336  os << "Compute() " << std::setw(5) << NumCompute_
337  << " " << std::setw(15) << ComputeTime_
338  << " " << std::setw(15) << 1.0e-6 * ComputeFlops_;
339  if (ComputeTime_ != 0.0)
340  os << " " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl;
341  else
342  os << " " << std::setw(15) << 0.0 << endl;
343  os << "ApplyInverse() " << std::setw(5) << NumApplyInverse_
344  << " " << std::setw(15) << ApplyInverseTime_
345  << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_;
346  if (ApplyInverseTime_ != 0.0)
347  os << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl;
348  else
349  os << " " << std::setw(15) << 0.0 << endl;
350  os << "================================================================================" << endl;
351  os << endl;
352  }
353 
354  return(os);
355 }
356 
357 //==============================================================================
359 Condest(const Ifpack_CondestType CT,
360  const int MaxIters, const double Tol,
361  Epetra_RowMatrix* Matrix_in)
362 {
363  if (!IsComputed()) // cannot compute right now
364  return(-1.0);
365 
366  // always computes it. Call Condest() with no parameters to get
367  // the previous estimate.
368  Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
369 
370  return(Condest_);
371 }
372 
373 //==============================================================================
374 void Ifpack_PointRelaxation::SetLabel()
375 {
376  string PT;
377  if (PrecType_ == IFPACK_JACOBI)
378  PT = "Jacobi";
379  else if (PrecType_ == IFPACK_GS){
380  PT = "GS";
381  if(DoBackwardGS_)
382  PT = "Backward " + PT;
383  }
384  else if (PrecType_ == IFPACK_SGS)
385  PT = "SGS";
386 
387  if(NumLocalSmoothingIndices_!=NumMyRows_) PT="Local " + PT;
388  else if(LocalSmoothingIndices_) PT="Reordered " + PT;
389 
390  Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_)
391  + ", damping=" + Ifpack_toString(DampingFactor_) + ")";
392 }
393 
394 //==============================================================================
395 // Note that Ifpack_PointRelaxation and Jacobi is much faster than
396 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the
397 // way the matrix-vector produce is performed).
398 //
399 // Another ML-related observation is that the starting solution (in Y)
400 // is NOT supposed to be zero. This may slow down the application of just
401 // one sweep of Jacobi.
402 //
404 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
405 {
406  if (!IsComputed())
407  IFPACK_CHK_ERR(-3);
408 
409  if (X.NumVectors() != Y.NumVectors())
410  IFPACK_CHK_ERR(-2);
411 
412  Time_->ResetStartTime();
413 
414  // AztecOO gives X and Y pointing to the same memory location,
415  // need to create an auxiliary vector, Xcopy
416  Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy;
417  if (X.Pointers()[0] == Y.Pointers()[0])
418  Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
419  else
420  Xcopy = Teuchos::rcp( &X, false );
421 
422  // Flops are updated in each of the following.
423  switch (PrecType_) {
424  case IFPACK_JACOBI:
425  IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y));
426  break;
427  case IFPACK_GS:
428  IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y));
429  break;
430  case IFPACK_SGS:
431  IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y));
432  break;
433  default:
434  IFPACK_CHK_ERR(-1); // something wrong
435  }
436 
437  ++NumApplyInverse_;
438  ApplyInverseTime_ += Time_->ElapsedTime();
439  return(0);
440 }
441 
442 //==============================================================================
443 // This preconditioner can be much slower than AztecOO and ML versions
444 // if the matrix-vector product requires all ExtractMyRowCopy()
445 // (as done through Ifpack_AdditiveSchwarz).
446 int Ifpack_PointRelaxation::
447 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const
448 {
449  int NumVectors = LHS.NumVectors();
450 
451  int startIter = 0;
452  if (NumSweeps_ > 0 && ZeroStartingSolution_) {
453  // When we have a zero initial guess, we can skip the first
454  // matrix apply call and zero initialization
455  for (int v = 0; v < NumVectors; v++)
456  IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(RHS(v)), *Diagonal_, 0.0));
457 
458  startIter = 1;
459  }
460 
461  bool zeroOut = false;
462  Epetra_MultiVector A_times_LHS(LHS.Map(), NumVectors, zeroOut);
463  for (int j = startIter; j < NumSweeps_ ; j++) {
464  IFPACK_CHK_ERR(Apply(LHS, A_times_LHS));
465  IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0));
466  for (int v = 0 ; v < NumVectors ; ++v)
467  IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)),
468  *Diagonal_, 1.0));
469 
470  }
471 
472  // Flops:
473  // - matrix vector (2 * NumGlobalNonzeros_)
474  // - update (2 * NumGlobalRows_)
475  // - Multiply:
476  // - DampingFactor (NumGlobalRows_)
477  // - Diagonal (NumGlobalRows_)
478  // - A + B (NumGlobalRows_)
479  // - 1.0 (NumGlobalRows_)
480 #ifdef IFPACK_FLOPCOUNTERS
481  ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
482 #endif
483 
484  return(0);
485 }
486 
487 //==============================================================================
488 int Ifpack_PointRelaxation::
489 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
490 {
491  if (ZeroStartingSolution_)
492  Y.PutScalar(0.0);
493 
494  const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
495  // try to pick the best option; performances may be improved
496  // if several sweeps are used.
497  if (CrsMatrix != 0)
498  {
499  if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
500  return(ApplyInverseGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
501  else if (CrsMatrix->StorageOptimized())
502  return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y));
503  else
504  return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y));
505  }
506  else
507  return(ApplyInverseGS_RowMatrix(X, Y));
508 } //ApplyInverseGS()
509 
510 
511 
512 //==============================================================================
513 int Ifpack_PointRelaxation::
514 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
515 {
516  int NumVectors = X.NumVectors();
517 
518  int Length = Matrix().MaxNumEntries();
519  std::vector<int> Indices(Length);
520  std::vector<double> Values(Length);
521 
522  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
523  if (IsParallel_)
524  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
525  else
526  Y2 = Teuchos::rcp( &Y, false );
527 
528  // extract views (for nicer and faster code)
529  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
530  X.ExtractView(&x_ptr);
531  Y.ExtractView(&y_ptr);
532  Y2->ExtractView(&y2_ptr);
533  Diagonal_->ExtractView(&d_ptr);
534 
535  for (int j = 0; j < NumSweeps_ ; j++) {
536 
537  // data exchange is here, once per sweep
538  if (IsParallel_)
539  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
540 
541  // FIXME: do I really need this code below?
542  if (NumVectors == 1) {
543 
544  double* y0_ptr = y_ptr[0];
545  double* y20_ptr = y2_ptr[0];
546  double* x0_ptr = x_ptr[0];
547 
548  if(!DoBackwardGS_){
549  /* Forward Mode */
550  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
551  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
552 
553  int NumEntries;
554  int col;
555  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
556  &Values[0], &Indices[0]));
557 
558  double dtemp = 0.0;
559  for (int k = 0 ; k < NumEntries ; ++k) {
560 
561  col = Indices[k];
562  dtemp += Values[k] * y20_ptr[col];
563  }
564 
565  y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
566  }
567  }
568  else {
569  /* Backward Mode */
570  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
571  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
572 
573  int NumEntries;
574  int col;
575  (void) col; // Forestall compiler warning for unused variable.
576  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
577  &Values[0], &Indices[0]));
578  double dtemp = 0.0;
579  for (int k = 0 ; k < NumEntries ; ++k) {
580 
581  col = Indices[k];
582  dtemp += Values[k] * y20_ptr[i];
583  }
584 
585  y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
586  }
587  }
588 
589  // using Export() sounded quite expensive
590  if (IsParallel_)
591  for (int i = 0 ; i < NumMyRows_ ; ++i)
592  y0_ptr[i] = y20_ptr[i];
593 
594  }
595  else {
596  if(!DoBackwardGS_){
597  /* Forward Mode */
598  for (int i = 0 ; i < NumMyRows_ ; ++i) {
599 
600  int NumEntries;
601  int col;
602  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
603  &Values[0], &Indices[0]));
604 
605  for (int m = 0 ; m < NumVectors ; ++m) {
606 
607  double dtemp = 0.0;
608  for (int k = 0 ; k < NumEntries ; ++k) {
609 
610  col = Indices[k];
611  dtemp += Values[k] * y2_ptr[m][col];
612  }
613 
614  y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
615  }
616  }
617  }
618  else {
619  /* Backward Mode */
620  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
621  int NumEntries;
622  int col;
623  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
624  &Values[0], &Indices[0]));
625 
626  for (int m = 0 ; m < NumVectors ; ++m) {
627 
628  double dtemp = 0.0;
629  for (int k = 0 ; k < NumEntries ; ++k) {
630 
631  col = Indices[k];
632  dtemp += Values[k] * y2_ptr[m][col];
633  }
634 
635  y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
636 
637  }
638  }
639  }
640 
641  // using Export() sounded quite expensive
642  if (IsParallel_)
643  for (int m = 0 ; m < NumVectors ; ++m)
644  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
645  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
646  y_ptr[m][i] = y2_ptr[m][i];
647  }
648  }
649  }
650 
651 #ifdef IFPACK_FLOPCOUNTERS
652  ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
653 #endif
654 
655  return(0);
656 } //ApplyInverseGS_RowMatrix()
657 
658 //==============================================================================
659 int Ifpack_PointRelaxation::
660 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
661 {
662  int NumVectors = X.NumVectors();
663 
664  int* Indices;
665  double* Values;
666 
667  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
668  if (IsParallel_) {
669  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
670  }
671  else
672  Y2 = Teuchos::rcp( &Y, false );
673 
674  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
675  X.ExtractView(&x_ptr);
676  Y.ExtractView(&y_ptr);
677  Y2->ExtractView(&y2_ptr);
678  Diagonal_->ExtractView(&d_ptr);
679 
680  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
681 
682  // only one data exchange per sweep
683  if (IsParallel_)
684  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
685 
686  if(!DoBackwardGS_){
687  /* Forward Mode */
688  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
689  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
690 
691  int NumEntries;
692  int col;
693  double diag = d_ptr[i];
694 
695  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
696 
697  for (int m = 0 ; m < NumVectors ; ++m) {
698 
699  double dtemp = 0.0;
700 
701  for (int k = 0; k < NumEntries; ++k) {
702 
703  col = Indices[k];
704  dtemp += Values[k] * y2_ptr[m][col];
705  }
706 
707  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
708  }
709  }
710  }
711  else {
712  /* Backward Mode */
713  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
714  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
715 
716  int NumEntries;
717  int col;
718  double diag = d_ptr[i];
719 
720  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
721 
722  for (int m = 0 ; m < NumVectors ; ++m) {
723 
724  double dtemp = 0.0;
725  for (int k = 0; k < NumEntries; ++k) {
726 
727  col = Indices[k];
728  dtemp += Values[k] * y2_ptr[m][col];
729  }
730 
731  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
732 
733  }
734  }
735  }
736 
737  if (IsParallel_)
738  for (int m = 0 ; m < NumVectors ; ++m)
739  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
740  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
741  y_ptr[m][i] = y2_ptr[m][i];
742  }
743  }
744 
745 #ifdef IFPACK_FLOPCOUNTERS
746  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
747 #endif
748  return(0);
749 } //ApplyInverseGS_CrsMatrix()
750 
751 //
752 //==============================================================================
753 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage()
754 
755 int Ifpack_PointRelaxation::
756 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
757 {
758  int* IndexOffset;
759  int* Indices;
760  double* Values;
761  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
762 
763  int NumVectors = X.NumVectors();
764 
765  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
766  if (IsParallel_) {
767  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
768  }
769  else
770  Y2 = Teuchos::rcp( &Y, false );
771 
772  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
773  X.ExtractView(&x_ptr);
774  Y.ExtractView(&y_ptr);
775  Y2->ExtractView(&y2_ptr);
776  Diagonal_->ExtractView(&d_ptr);
777 
778  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
779 
780  // only one data exchange per sweep
781  if (IsParallel_)
782  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
783 
784  if(!DoBackwardGS_){
785  /* Forward Mode */
786  for (int i = 0 ; i < NumMyRows_ ; ++i) {
787 
788  int col;
789  double diag = d_ptr[i];
790 
791  for (int m = 0 ; m < NumVectors ; ++m) {
792 
793  double dtemp = 0.0;
794 
795  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
796 
797  col = Indices[k];
798  dtemp += Values[k] * y2_ptr[m][col];
799  }
800 
801  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
802  }
803  }
804  }
805  else {
806  /* Backward Mode */
807  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
808 
809  int col;
810  double diag = d_ptr[i];
811 
812  for (int m = 0 ; m < NumVectors ; ++m) {
813 
814  double dtemp = 0.0;
815  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
816 
817  col = Indices[k];
818  dtemp += Values[k] * y2_ptr[m][col];
819  }
820 
821  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
822 
823  }
824  }
825  }
826 
827 
828  if (IsParallel_)
829  for (int m = 0 ; m < NumVectors ; ++m)
830  for (int i = 0 ; i < NumMyRows_ ; ++i)
831  y_ptr[m][i] = y2_ptr[m][i];
832  }
833 
834 #ifdef IFPACK_FLOPCOUNTERS
835  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
836 #endif
837  return(0);
838 } //ApplyInverseGS_FastCrsMatrix()
839 
840 
841 //
842 //==============================================================================
843 // ApplyInverseGS_LocalFastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices
844 
845 int Ifpack_PointRelaxation::
846 ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
847 {
848  int* IndexOffset;
849  int* Indices;
850  double* Values;
851  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
852 
853  int NumVectors = X.NumVectors();
854 
855  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
856  if (IsParallel_) {
857  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
858  }
859  else
860  Y2 = Teuchos::rcp( &Y, false );
861 
862  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
863  X.ExtractView(&x_ptr);
864  Y.ExtractView(&y_ptr);
865  Y2->ExtractView(&y2_ptr);
866  Diagonal_->ExtractView(&d_ptr);
867 
868  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
869 
870  // only one data exchange per sweep
871  if (IsParallel_)
872  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
873 
874  if(!DoBackwardGS_){
875  /* Forward Mode */
876  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
877  int i=LocalSmoothingIndices_[ii];
878 
879  int col;
880  double diag = d_ptr[i];
881 
882  for (int m = 0 ; m < NumVectors ; ++m) {
883 
884  double dtemp = 0.0;
885 
886  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
887 
888  col = Indices[k];
889  dtemp += Values[k] * y2_ptr[m][col];
890  }
891 
892  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
893  }
894  }
895  }
896  else {
897  /* Backward Mode */
898  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
899  int i=LocalSmoothingIndices_[ii];
900 
901  int col;
902  double diag = d_ptr[i];
903 
904  for (int m = 0 ; m < NumVectors ; ++m) {
905 
906  double dtemp = 0.0;
907  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
908 
909  col = Indices[k];
910  dtemp += Values[k] * y2_ptr[m][col];
911  }
912 
913  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
914 
915  }
916  }
917  }
918 
919 
920  if (IsParallel_)
921  for (int m = 0 ; m < NumVectors ; ++m)
922  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
923  int i=LocalSmoothingIndices_[ii];
924  y_ptr[m][i] = y2_ptr[m][i];
925  }
926  }
927 
928 #ifdef IFPACK_FLOPCOUNTERS
929  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
930 #endif
931  return(0);
932 } //ApplyInverseGS_LocalFastCrsMatrix()
933 
934 
935 //==============================================================================
936 int Ifpack_PointRelaxation::
937 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
938 {
939  if (ZeroStartingSolution_)
940  Y.PutScalar(0.0);
941 
942  const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);
943  // try to pick the best option; performances may be improved
944  // if several sweeps are used.
945  if (CrsMatrix != 0)
946  {
947  if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_)
948  return(ApplyInverseSGS_LocalFastCrsMatrix(CrsMatrix, X, Y));
949  else if (CrsMatrix->StorageOptimized())
950  return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y));
951  else
952  return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y));
953  }
954  else
955  return(ApplyInverseSGS_RowMatrix(X, Y));
956 }
957 
958 //==============================================================================
959 int Ifpack_PointRelaxation::
960 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
961 {
962  int NumVectors = X.NumVectors();
963  int Length = Matrix().MaxNumEntries();
964  std::vector<int> Indices(Length);
965  std::vector<double> Values(Length);
966 
967  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
968  if (IsParallel_) {
969  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
970  }
971  else
972  Y2 = Teuchos::rcp( &Y, false );
973 
974  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
975  X.ExtractView(&x_ptr);
976  Y.ExtractView(&y_ptr);
977  Y2->ExtractView(&y2_ptr);
978  Diagonal_->ExtractView(&d_ptr);
979 
980  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
981 
982  // only one data exchange per sweep
983  if (IsParallel_)
984  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
985 
986  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
987  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
988 
989  int NumEntries;
990  int col;
991  double diag = d_ptr[i];
992 
993  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
994  &Values[0], &Indices[0]));
995 
996  for (int m = 0 ; m < NumVectors ; ++m) {
997 
998  double dtemp = 0.0;
999 
1000  for (int k = 0 ; k < NumEntries ; ++k) {
1001 
1002  col = Indices[k];
1003  dtemp += Values[k] * y2_ptr[m][col];
1004  }
1005 
1006  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1007  }
1008  }
1009  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1010  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1011 
1012  int NumEntries;
1013  int col;
1014  double diag = d_ptr[i];
1015 
1016  IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
1017  &Values[0], &Indices[0]));
1018 
1019  for (int m = 0 ; m < NumVectors ; ++m) {
1020 
1021  double dtemp = 0.0;
1022  for (int k = 0 ; k < NumEntries ; ++k) {
1023 
1024  col = Indices[k];
1025  dtemp += Values[k] * y2_ptr[m][col];
1026  }
1027 
1028  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1029 
1030  }
1031  }
1032 
1033  if (IsParallel_)
1034  for (int m = 0 ; m < NumVectors ; ++m)
1035  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1036  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1037  y_ptr[m][i] = y2_ptr[m][i];
1038  }
1039  }
1040 
1041 #ifdef IFPACK_FLOPCOUNTERS
1042  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1043 #endif
1044  return(0);
1045 }
1046 
1047 //==============================================================================
1048 int Ifpack_PointRelaxation::
1049 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1050 {
1051  int NumVectors = X.NumVectors();
1052 
1053  int* Indices;
1054  double* Values;
1055 
1056  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1057  if (IsParallel_) {
1058  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1059  }
1060  else
1061  Y2 = Teuchos::rcp( &Y, false );
1062 
1063  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1064  X.ExtractView(&x_ptr);
1065  Y.ExtractView(&y_ptr);
1066  Y2->ExtractView(&y2_ptr);
1067  Diagonal_->ExtractView(&d_ptr);
1068 
1069  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1070 
1071  // only one data exchange per sweep
1072  if (IsParallel_)
1073  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1074 
1075  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1076  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1077 
1078  int NumEntries;
1079  int col;
1080  double diag = d_ptr[i];
1081 
1082  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1083 
1084  for (int m = 0 ; m < NumVectors ; ++m) {
1085 
1086  double dtemp = 0.0;
1087 
1088  for (int k = 0; k < NumEntries; ++k) {
1089 
1090  col = Indices[k];
1091  dtemp += Values[k] * y2_ptr[m][col];
1092  }
1093 
1094  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1095  }
1096  }
1097  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1098  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1099 
1100  int NumEntries;
1101  int col;
1102  double diag = d_ptr[i];
1103 
1104  IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1105 
1106  for (int m = 0 ; m < NumVectors ; ++m) {
1107 
1108  double dtemp = 0.0;
1109  for (int k = 0; k < NumEntries; ++k) {
1110 
1111  col = Indices[k];
1112  dtemp += Values[k] * y2_ptr[m][col];
1113  }
1114 
1115  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1116 
1117  }
1118  }
1119 
1120  if (IsParallel_)
1121  for (int m = 0 ; m < NumVectors ; ++m)
1122  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1123  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1124  y_ptr[m][i] = y2_ptr[m][i];
1125  }
1126  }
1127 
1128 #ifdef IFPACK_FLOPCOUNTERS
1129  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1130 #endif
1131  return(0);
1132 }
1133 
1134 //==============================================================================
1135 // this requires Epetra_CrsMatrix + OptimizeStorage()
1136 int Ifpack_PointRelaxation::
1137 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1138 {
1139  int* IndexOffset;
1140  int* Indices;
1141  double* Values;
1142  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
1143 
1144  int NumVectors = X.NumVectors();
1145 
1146  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1147  if (IsParallel_) {
1148  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1149  }
1150  else
1151  Y2 = Teuchos::rcp( &Y, false );
1152 
1153  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1154  X.ExtractView(&x_ptr);
1155  Y.ExtractView(&y_ptr);
1156  Y2->ExtractView(&y2_ptr);
1157  Diagonal_->ExtractView(&d_ptr);
1158 
1159  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1160 
1161  // only one data exchange per sweep
1162  if (IsParallel_)
1163  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1164 
1165  for (int i = 0 ; i < NumMyRows_ ; ++i) {
1166 
1167  int col;
1168  double diag = d_ptr[i];
1169 
1170  // no need to extract row i
1171  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1172 
1173  for (int m = 0 ; m < NumVectors ; ++m) {
1174 
1175  double dtemp = 0.0;
1176 
1177  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1178 
1179  col = Indices[k];
1180  dtemp += Values[k] * y2_ptr[m][col];
1181  }
1182 
1183  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1184  }
1185  }
1186 
1187  for (int i = NumMyRows_ - 1 ; i > -1 ; --i) {
1188 
1189  int col;
1190  double diag = d_ptr[i];
1191 
1192  // no need to extract row i
1193  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1194 
1195  for (int m = 0 ; m < NumVectors ; ++m) {
1196 
1197  double dtemp = 0.0;
1198  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1199 
1200  col = Indices[k];
1201  dtemp += Values[k] * y2_ptr[m][col];
1202  }
1203 
1204  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1205 
1206  }
1207  }
1208 
1209  if (IsParallel_)
1210  for (int m = 0 ; m < NumVectors ; ++m)
1211  for (int i = 0 ; i < NumMyRows_ ; ++i)
1212  y_ptr[m][i] = y2_ptr[m][i];
1213  }
1214 
1215 #ifdef IFPACK_FLOPCOUNTERS
1216  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1217 #endif
1218  return(0);
1219 }
1220 
1221 
1222 //==============================================================================
1223 // this requires Epetra_CrsMatrix + OptimizeStorage() + LocalSmoothingIndices_
1224 int Ifpack_PointRelaxation::
1225 ApplyInverseSGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1226 {
1227  int* IndexOffset;
1228  int* Indices;
1229  double* Values;
1230  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));
1231 
1232  int NumVectors = X.NumVectors();
1233 
1234  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
1235  if (IsParallel_) {
1236  Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
1237  }
1238  else
1239  Y2 = Teuchos::rcp( &Y, false );
1240 
1241  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
1242  X.ExtractView(&x_ptr);
1243  Y.ExtractView(&y_ptr);
1244  Y2->ExtractView(&y2_ptr);
1245  Diagonal_->ExtractView(&d_ptr);
1246 
1247  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
1248 
1249  // only one data exchange per sweep
1250  if (IsParallel_)
1251  IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));
1252 
1253  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1254  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1255 
1256  int col;
1257  double diag = d_ptr[i];
1258 
1259  // no need to extract row i
1260  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1261 
1262  for (int m = 0 ; m < NumVectors ; ++m) {
1263 
1264  double dtemp = 0.0;
1265 
1266  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1267 
1268  col = Indices[k];
1269  dtemp += Values[k] * y2_ptr[m][col];
1270  }
1271 
1272  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1273  }
1274  }
1275 
1276  for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) {
1277  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1278 
1279  int col;
1280  double diag = d_ptr[i];
1281 
1282  // no need to extract row i
1283  //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));
1284 
1285  for (int m = 0 ; m < NumVectors ; ++m) {
1286 
1287  double dtemp = 0.0;
1288  for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {
1289 
1290  col = Indices[k];
1291  dtemp += Values[k] * y2_ptr[m][col];
1292  }
1293 
1294  y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
1295 
1296  }
1297  }
1298 
1299  if (IsParallel_)
1300  for (int m = 0 ; m < NumVectors ; ++m)
1301  for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
1302  int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
1303  y_ptr[m][i] = y2_ptr[m][i];
1304  }
1305  }
1306 
1307 #ifdef IFPACK_FLOPCOUNTERS
1308  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
1309 #endif
1310  return(0);
1311 }
virtual int Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Applies the matrix to an Epetra_MultiVector.
virtual const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this operator.
virtual const Epetra_Map & OperatorDomainMap() const
Returns the Epetra_Map object associated with the domain of this operator.
string Ifpack_toString(const int &x)
Convertes an integer to string.
Ifpack_PointRelaxation(const Epetra_RowMatrix *Matrix)
Ifpack_PointRelaxation constructor with given Epetra_RowMatrix.
virtual double Condest() const
Returns the condition number estimate, or -1.0 if not computed.
virtual int SetParameters(Teuchos::ParameterList &List)
Sets all the parameters for the preconditioner.
virtual bool IsInitialized() const
Returns true if the preconditioner has been successfully initialized, false otherwise.
virtual int Compute()
Computes the preconditioners.
virtual int Initialize()
Computes all it is necessary to initialize the preconditioner.
virtual const Epetra_RowMatrix & Matrix() const
Returns a pointer to the matrix to be preconditioned.
virtual bool IsComputed() const
Returns true if the preconditioner has been successfully computed.
virtual ostream & Print(ostream &os) const
Prints object to an output stream.
virtual const Epetra_Map & OperatorRangeMap() const
Returns the Epetra_Map object associated with the range of this operator.
virtual int ApplyInverse(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Applies the preconditioner to X, returns the result in Y.