IFPACK  Development
 All Classes Files Functions Variables Enumerations Friends Pages
Ifpack_IKLU.cpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Ifpack_ConfigDefs.h"
44 #include "Ifpack_Preconditioner.h"
45 #include "Ifpack_IKLU.h"
46 #include "Ifpack_Condest.h"
47 #include "Ifpack_Utils.h"
48 #include "Ifpack_HashTable.h"
49 #include "Epetra_SerialComm.h"
50 #include "Epetra_Comm.h"
51 #include "Epetra_Map.h"
52 #include "Epetra_RowMatrix.h"
53 #include "Epetra_CrsMatrix.h"
54 #include "Epetra_Vector.h"
55 #include "Epetra_MultiVector.h"
56 #include "Epetra_Util.h"
57 #include "Teuchos_ParameterList.hpp"
58 #include "Teuchos_RefCountPtr.hpp"
59 #include <functional>
60 #include <algorithm>
61 
62 using namespace Teuchos;
63 
64 //==============================================================================
65 // FIXME: allocate Comm_ and Time_ the first Initialize() call
66 Ifpack_IKLU::Ifpack_IKLU(const Epetra_RowMatrix* A) :
67  A_(*A),
68  Comm_(A->Comm()),
69  Condest_(-1.0),
70  Relax_(0.),
71  Athresh_(0.0),
72  Rthresh_(1.0),
73  LevelOfFill_(1.0),
74  DropTolerance_(1e-12),
75  IsInitialized_(false),
76  IsComputed_(false),
77  UseTranspose_(false),
78  NumMyRows_(-1),
79  NumInitialize_(0),
80  NumCompute_(0),
81  NumApplyInverse_(0),
82  InitializeTime_(0.0),
83  ComputeTime_(0.0),
84  ApplyInverseTime_(0.0),
85  ComputeFlops_(0.0),
86  ApplyInverseFlops_(0.0),
87  Time_(Comm()),
88  GlobalNonzeros_(0),
89  csrA_(0),
90  cssS_(0),
91  csrnN_(0)
92 {
93  // do nothing here..
94 }
95 
96 //==============================================================================
98 {
99  Destroy();
100 }
101 
102 //==============================================================================
103 void Ifpack_IKLU::Destroy()
104 {
105  IsInitialized_ = false;
106  IsComputed_ = false;
107  if (csrA_)
108  csr_spfree( csrA_ );
109  if (cssS_)
110  csr_sfree( cssS_ );
111  if (csrnN_)
112  csr_nfree( csrnN_ );
113 }
114 
115 //==========================================================================
116 int Ifpack_IKLU::SetParameters(Teuchos::ParameterList& List)
117 {
118  try
119  {
120  LevelOfFill_ = List.get<double>("fact: ilut level-of-fill", LevelOfFill());
121  if (LevelOfFill_ <= 0.0)
122  IFPACK_CHK_ERR(-2); // must be greater than 0.0
123 
124  Athresh_ = List.get<double>("fact: absolute threshold", Athresh_);
125  Rthresh_ = List.get<double>("fact: relative threshold", Rthresh_);
126  Relax_ = List.get<double>("fact: relax value", Relax_);
127  DropTolerance_ = List.get<double>("fact: drop tolerance", DropTolerance_);
128 
129  Label_ = "IFPACK IKLU (fill=" + Ifpack_toString(LevelOfFill())
130  + ", relax=" + Ifpack_toString(RelaxValue())
131  + ", athr=" + Ifpack_toString(AbsoluteThreshold())
132  + ", rthr=" + Ifpack_toString(RelativeThreshold())
133  + ", droptol=" + Ifpack_toString(DropTolerance())
134  + ")";
135  return(0);
136  }
137  catch (...)
138  {
139  cerr << "Caught an exception while parsing the parameter list" << endl;
140  cerr << "This typically means that a parameter was set with the" << endl;
141  cerr << "wrong type (for example, int instead of double). " << endl;
142  cerr << "please check the documentation for the type required by each parameer." << endl;
143  IFPACK_CHK_ERR(-1);
144  }
145 
146  return(0);
147 }
148 
149 //==========================================================================
151 {
152  // delete previously allocated factorization
153  Destroy();
154 
155  Time_.ResetStartTime();
156 
157  if (A_.Comm().NumProc() != 1) {
158  cout << " There are too many processors !!! " << endl;
159  cerr << "Ifpack_IKLU can be used with Comm().NumProc() == 1" << endl;
160  cerr << "only. This class is a subdomain solver for Ifpack_AdditiveSchwarz," << endl;
161  cerr << "and it is currently not meant to be used otherwise." << endl;
162  exit(EXIT_FAILURE);
163  }
164 
165  // check dimensions of input matrix only in serial
166  if (Comm().NumProc() == 1 && Matrix().NumMyRows() != Matrix().NumMyCols())
167  IFPACK_CHK_ERR(-2);
168 
169  NumMyRows_ = Matrix().NumMyRows();
170  NumMyNonzeros_ = Matrix().NumMyNonzeros();
171 
172  int RowNnz, Length = Matrix().MaxNumEntries();
173  std::vector<int> RowIndices(Length);
174  std::vector<double> RowValues(Length);
175 
176  //cout << "Processor " << Comm().MyPID() << " owns " << NumMyRows_ << " rows and has " << NumMyNonzeros_ << " nonzeros " << endl;
177  // get general symbolic structure of the matrix
178  csrA_ = csr_spalloc( NumMyRows_, NumMyRows_, NumMyNonzeros_, 1, 0 );
179 
180  // copy the symbolic structure into csrA_
181  int count = 0;
182  csrA_->p[0] = 0;
183  for (int i = 0; i < NumMyRows_; ++i ) {
184 
185  IFPACK_CHK_ERR(A_.ExtractMyRowCopy(i,Length,RowNnz,
186  &RowValues[0],&RowIndices[0]));
187  for (int j = 0 ; j < RowNnz ; ++j) {
188  csrA_->j[count++] = RowIndices[j];
189  //cout << "Row = " << i << ", Column = " << RowIndices[j] << ", Value = " << RowValues[j] << endl;
190  }
191  csrA_->p[i+1] = csrA_->p[i] + RowNnz;
192  }
193 
194  // Perform symbolic analysis on the current matrix structure
195  int order = 1;
196  cssS_ = csr_sqr( order, csrA_ );
197  for (int i = 0; i < NumMyRows_; ++i ) {
198  cout << "AMD Perm (from inside KLU) [" << i << "] = " << cssS_->q[i] << endl;
199  }
200 
201  // nothing else to do here
202  IsInitialized_ = true;
203  ++NumInitialize_;
204  InitializeTime_ += Time_.ElapsedTime();
205 
206  return(0);
207 }
208 
209 //==========================================================================
211 {
212  public:
213  inline bool operator()(const double& x, const double& y)
214  {
215  return(IFPACK_ABS(x) > IFPACK_ABS(y));
216  }
217 };
218 
219 //==========================================================================
220 
222 {
223  if (!IsInitialized())
224  IFPACK_CHK_ERR(Initialize());
225 
226  Time_.ResetStartTime();
227  IsComputed_ = false;
228 
229  NumMyRows_ = A_.NumMyRows();
230  int Length = A_.MaxNumEntries();
231 
232  bool distributed = (Comm().NumProc() > 1)?true:false;
233 #if !defined(EPETRA_NO_32BIT_GLOBAL_INDICES) || !defined(EPETRA_NO_64BIT_GLOBAL_INDICES)
234  if (distributed)
235  {
236  SerialComm_ = rcp(new Epetra_SerialComm);
237  SerialMap_ = rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_));
238  assert (SerialComm_.get() != 0);
239  assert (SerialMap_.get() != 0);
240  }
241  else
242  SerialMap_ = rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false);
243 #endif
244 
245  int RowNnz;
246  std::vector<int> RowIndices(Length);
247  std::vector<double> RowValues(Length);
248 
249  // copy the values from A_ into csrA_
250  int count = 0;
251  for (int i = 0; i < NumMyRows_; ++i ) {
252 
253  IFPACK_CHK_ERR(A_.ExtractMyRowCopy(i,Length,RowNnz,
254  &RowValues[0],&RowIndices[0]));
255  // make sure each row has the same number of nonzeros
256  if (RowNnz != (csrA_->p[i+1]-csrA_->p[i])) {
257  cout << "The number of nonzeros for this row does not math the expected number of nonzeros!!!" << endl;
258  }
259  for (int j = 0 ; j < RowNnz ; ++j) {
260 
261  csrA_->x[count++] = RowValues[j];
262  //cout << "Row = " << i << ", Column = " << RowIndices[j] << ", Value = " << RowValues[j] << endl;
263  }
264  }
265 
266  // compute the lu factors
267  double tol = 0.1;
268  csrnN_ = csr_lu( &*csrA_, &*cssS_, tol );
269 
270  // Create L and U as a view of the information stored in csrnN_->L and csrnN_->U
271  csr* L_tmp = csrnN_->L;
272  csr* U_tmp = csrnN_->U;
273  std::vector<int> numEntriesL( NumMyRows_ ), numEntriesU( NumMyRows_ );
274  for (int i=0; i < NumMyRows_; ++i) {
275  numEntriesL[i] = ( L_tmp->p[i+1] - L_tmp->p[i] );
276  numEntriesU[i] = ( U_tmp->p[i+1] - U_tmp->p[i] );
277  }
278  L_ = rcp(new Epetra_CrsMatrix(View, *SerialMap_, &numEntriesL[0]));
279  U_ = rcp(new Epetra_CrsMatrix(View, *SerialMap_, &numEntriesU[0]));
280 
281  // Insert the values into L and U
282 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
283  if(SerialMap_->GlobalIndicesInt()) {
284  for (int i=0; i < NumMyRows_; ++i) {
285  L_->InsertGlobalValues( i, numEntriesL[i], &(L_tmp->x[L_tmp->p[i]]), &(L_tmp->j[L_tmp->p[i]]) );
286  U_->InsertGlobalValues( i, numEntriesU[i], &(U_tmp->x[U_tmp->p[i]]), &(U_tmp->j[U_tmp->p[i]]) );
287  }
288  }
289  else
290 #endif
291 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
292  if(SerialMap_->GlobalIndicesLongLong()) {
293 
294  const int MaxNumEntries_L_U = std::max(L_->MaxNumEntries(), U_->MaxNumEntries());
295  std::vector<long long> entries(MaxNumEntries_L_U);
296 
297  for (int i=0; i < NumMyRows_; ++i) {
298  std::copy(&(L_tmp->j[L_tmp->p[i]]), &(L_tmp->j[L_tmp->p[i]]) + numEntriesL[i], entries.begin());
299  L_->InsertGlobalValues( i, numEntriesL[i], &(L_tmp->x[L_tmp->p[i]]), &(entries[0]) );
300 
301  std::copy(&(U_tmp->j[U_tmp->p[i]]), &(U_tmp->j[U_tmp->p[i]]) + numEntriesU[i], entries.begin());
302  U_->InsertGlobalValues( i, numEntriesU[i], &(U_tmp->x[U_tmp->p[i]]), &(entries[0]) );
303  }
304  }
305  else
306 #endif
307  throw "Ifpack_IKLU::Compute: GlobalIndices type unknown for SerialMap_";
308 
309  IFPACK_CHK_ERR(L_->FillComplete());
310  IFPACK_CHK_ERR(U_->FillComplete());
311 
312  long long MyNonzeros = L_->NumGlobalNonzeros64() + U_->NumGlobalNonzeros64();
313  Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1);
314 
315  IsComputed_ = true;
316 
317  ++NumCompute_;
318  ComputeTime_ += Time_.ElapsedTime();
319 
320  return(0);
321 
322 }
323 
324 //=============================================================================
325 int Ifpack_IKLU::ApplyInverse(const Epetra_MultiVector& X,
326  Epetra_MultiVector& Y) const
327 {
328  if (!IsComputed())
329  IFPACK_CHK_ERR(-2); // compute preconditioner first
330 
331  if (X.NumVectors() != Y.NumVectors())
332  IFPACK_CHK_ERR(-3); // Return error: X and Y not the same size
333 
334  Time_.ResetStartTime();
335 
336  // NOTE: L_ and U_ are based on SerialMap_, while Xcopy is based
337  // on A.Map()... which are in general different. However, Solve()
338  // does not seem to care... which is fine with me.
339  //
340  // AztecOO gives X and Y pointing to the same memory location,
341  // need to create an auxiliary vector, Xcopy and apply permutation.
342  std::vector<int> invq( NumMyRows_ );
343 
344  for (int i=0; i<NumMyRows_; ++i ) {
345  csrnN_->perm[ csrnN_->pinv[i] ] = i;
346  invq[ cssS_->q[i] ] = i;
347  }
348 
349  Teuchos::RefCountPtr<Epetra_MultiVector> Xcopy = Teuchos::rcp( new Epetra_MultiVector(X.Map(),X.NumVectors()), false );
350  Teuchos::RefCountPtr<Epetra_MultiVector> Ytemp = Teuchos::rcp( new Epetra_MultiVector(Y.Map(),Y.NumVectors()) );
351 
352  for (int i=0; i<NumMyRows_; ++i) {
353  for (int j=0; j<X.NumVectors(); ++j) {
354  Xcopy->ReplaceMyValue( invq[i], j, (*X(j))[i] );
355  }
356  }
357 
358  if (!UseTranspose_)
359  {
360  // solves LU Y = X
361  IFPACK_CHK_ERR(L_->Solve(false,false,false,*Xcopy,*Ytemp));
362  IFPACK_CHK_ERR(U_->Solve(true,false,false,*Ytemp,*Ytemp));
363  }
364  else
365  {
366  // solves U(trans) L(trans) Y = X
367  IFPACK_CHK_ERR(U_->Solve(true,true,false,*Xcopy,*Ytemp));
368  IFPACK_CHK_ERR(L_->Solve(false,true,false,*Ytemp,*Ytemp));
369  }
370 
371  // Reverse the permutation.
372  for (int i=0; i<NumMyRows_; ++i) {
373  for (int j=0; j<Y.NumVectors(); ++j) {
374  Y.ReplaceMyValue( csrnN_->perm[i], j, (*(*Ytemp)(j))[i] );
375  }
376  }
377 
378  ++NumApplyInverse_;
379 #ifdef IFPACK_FLOPCOUNTERS
380  ApplyInverseFlops_ += X.NumVectors() * 2 * GlobalNonzeros_;
381 #endif
382  ApplyInverseTime_ += Time_.ElapsedTime();
383 
384  return(0);
385 
386 }
387 //=============================================================================
388 // This function finds X such that LDU Y = X or U(trans) D L(trans) Y = X for multiple RHS
389 int Ifpack_IKLU::Apply(const Epetra_MultiVector& X,
390  Epetra_MultiVector& Y) const
391 {
392 
393  return(-98);
394 }
395 
396 //=============================================================================
397 double Ifpack_IKLU::Condest(const Ifpack_CondestType CT,
398  const int MaxIters, const double Tol,
399  Epetra_RowMatrix* Matrix_in)
400 {
401  if (!IsComputed()) // cannot compute right now
402  return(-1.0);
403 
404  // NOTE: this is computing the *local* condest
405  if (Condest_ == -1.0)
406  Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in);
407 
408  return(Condest_);
409 }
410 
411 //=============================================================================
412 std::ostream&
413 Ifpack_IKLU::Print(std::ostream& os) const
414 {
415  if (!Comm().MyPID()) {
416  os << endl;
417  os << "================================================================================" << endl;
418  os << "Ifpack_IKLU: " << Label() << endl << endl;
419  os << "Level-of-fill = " << LevelOfFill() << endl;
420  os << "Absolute threshold = " << AbsoluteThreshold() << endl;
421  os << "Relative threshold = " << RelativeThreshold() << endl;
422  os << "Relax value = " << RelaxValue() << endl;
423  os << "Condition number estimate = " << Condest() << endl;
424  os << "Global number of rows = " << A_.NumGlobalRows64() << endl;
425  if (IsComputed_) {
426  os << "Number of nonzeros in A = " << A_.NumGlobalNonzeros64() << endl;
427  os << "Number of nonzeros in L + U = " << NumGlobalNonzeros64()
428  << " ( = " << 100.0 * NumGlobalNonzeros64() / A_.NumGlobalNonzeros64()
429  << " % of A)" << endl;
430  os << "nonzeros / rows = "
431  << 1.0 * NumGlobalNonzeros64() / U_->NumGlobalRows64() << endl;
432  }
433  os << endl;
434  os << "Phase # calls Total Time (s) Total MFlops MFlops/s" << endl;
435  os << "----- ------- -------------- ------------ --------" << endl;
436  os << "Initialize() " << std::setw(5) << NumInitialize()
437  << " " << std::setw(15) << InitializeTime()
438  << " 0.0 0.0" << endl;
439  os << "Compute() " << std::setw(5) << NumCompute()
440  << " " << std::setw(15) << ComputeTime()
441  << " " << std::setw(15) << 1.0e-6 * ComputeFlops();
442  if (ComputeTime() != 0.0)
443  os << " " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl;
444  else
445  os << " " << std::setw(15) << 0.0 << endl;
446  os << "ApplyInverse() " << std::setw(5) << NumApplyInverse()
447  << " " << std::setw(15) << ApplyInverseTime()
448  << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops();
449  if (ApplyInverseTime() != 0.0)
450  os << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl;
451  else
452  os << " " << std::setw(15) << 0.0 << endl;
453  os << "================================================================================" << endl;
454  os << endl;
455  }
456 
457  return(os);
458 }
Ifpack_IKLU(const Epetra_RowMatrix *A)
Ifpack_IKLU constuctor with variable number of indices per row.
Definition: Ifpack_IKLU.cpp:66
const Epetra_RowMatrix & Matrix() const
Returns a reference to the matrix to be preconditioned.
Definition: Ifpack_IKLU.h:188
virtual ostream & Print(std::ostream &os) const
Prints basic information on iostream. This function is used by operator<<.
int SetParameters(Teuchos::ParameterList &parameterlis)
Set parameters using a Teuchos::ParameterList object.
virtual int NumApplyInverse() const
Returns the number of calls to ApplyInverse().
Definition: Ifpack_IKLU.h:228
string Ifpack_toString(const int &x)
Convertes an integer to string.
virtual double InitializeTime() const
Returns the time spent in Initialize().
Definition: Ifpack_IKLU.h:234
virtual double ComputeTime() const
Returns the time spent in Compute().
Definition: Ifpack_IKLU.h:240
virtual double ApplyInverseFlops() const
Returns the number of flops in the application of the preconditioner.
Definition: Ifpack_IKLU.h:262
const char * Label() const
Returns the label of this object.
Definition: Ifpack_IKLU.h:200
bool IsInitialized() const
Returns true if the preconditioner has been successfully initialized.
Definition: Ifpack_IKLU.h:111
virtual double ApplyInverseTime() const
Returns the time spent in ApplyInverse().
Definition: Ifpack_IKLU.h:246
bool IsComputed() const
If factor is completed, this query returns true, otherwise it returns false.
Definition: Ifpack_IKLU.h:128
virtual int NumCompute() const
Returns the number of calls to Compute().
Definition: Ifpack_IKLU.h:222
virtual ~Ifpack_IKLU()
Ifpack_IKLU Destructor.
Definition: Ifpack_IKLU.cpp:97
double DropTolerance() const
Gets the dropping tolerance.
Definition: Ifpack_IKLU.h:289
int ApplyInverse(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Returns the result of a Ifpack_IKLU forward/back solve on a Epetra_MultiVector X in Y...
double RelativeThreshold() const
Get relative threshold value.
Definition: Ifpack_IKLU.h:283
double RelaxValue() const
Set relative threshold value.
Definition: Ifpack_IKLU.h:272
const Epetra_Comm & Comm() const
Returns the Epetra_BlockMap object associated with the range of this matrix operator.
Definition: Ifpack_IKLU.h:185
virtual int NumInitialize() const
Returns the number of calls to Initialize().
Definition: Ifpack_IKLU.h:216
double AbsoluteThreshold() const
Get absolute threshold value.
Definition: Ifpack_IKLU.h:277
int Initialize()
Initialize L and U with values from user matrix A.
double Condest() const
Returns the computed estimated condition number, or -1.0 if no computed.
Definition: Ifpack_IKLU.h:152
int Compute()
Compute IC factor U using the specified graph, diagonal perturbation thresholds and relaxation parame...
virtual double ComputeFlops() const
Returns the number of flops in the computation phase.
Definition: Ifpack_IKLU.h:257