Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_TriDiContainer_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
44 #define IFPACK2_TRIDICONTAINER_DEF_HPP
45 
47 #include "Teuchos_LAPACK.hpp"
48 
49 #ifdef HAVE_MPI
50 # include <mpi.h>
52 #else
53 # include "Teuchos_DefaultSerialComm.hpp"
54 #endif // HAVE_MPI
55 
56 
57 namespace Ifpack2 {
58 
59 template<class MatrixType, class LocalScalarType>
60 TriDiContainer<MatrixType, LocalScalarType, true>::
61 TriDiContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
63  const Teuchos::RCP<const import_type>& importer,
64  int OverlapLevel,
65  scalar_type DampingFactor) :
66  Container<MatrixType> (matrix, partitions, importer, OverlapLevel,
67  DampingFactor),
68  ipiv_ (this->partitions_.size(), 0),
69  IsInitialized_ (false),
70  IsComputed_ (false),
71  scalars_ (nullptr),
72  scalarOffsets_ (this->numBlocks_)
73 {
74  using Teuchos::Array;
75  using Teuchos::ArrayView;
76  using Teuchos::RCP;
77  using Teuchos::rcp;
78  using Teuchos::toString;
80  ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::TriDiContainer: "
81  "The constructor's input matrix must have a column Map.");
82 
83  // Check whether the input set of local row indices is correct.
84  const map_type& rowMap = * (matrix->getRowMap ());
85  {
86  for(int i = 0; i < this->numBlocks_; i++)
87  {
88  Teuchos::ArrayView<const local_ordinal_type> localRows = this->getLocalRows(i);
89  for(local_ordinal_type j = 0; j < this->blockRows_[i]; j++)
90  {
92  !rowMap.isNodeLocalElement(this->partitions_[this->partitionIndices_[i] + j]),
93  std::invalid_argument, "Ifpack2::TriDiContainer: "
94  "On process " << rowMap.getComm()->getRank() << " of "
95  << rowMap.getComm()->getSize() << ", in the given set of local row "
96  "indices localRows = " << Teuchos::toString(localRows) << ", the following "
97  "entries is not valid local row index on the calling process: "
98  << localRows[j] << ".");
99  }
100  }
101  }
102 
103  // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
104  // different index base than zero?
105  //compute scalar array offsets (probably different from partitionIndices_)
106  local_ordinal_type scalarTotal = 0;
107  for(local_ordinal_type i = 0; i < this->numBlocks_; i++)
108  {
109  scalarOffsets_[i] = scalarTotal;
110  if(this->blockRows_[i] == 1)
111  scalarTotal++;
112  else
113  scalarTotal += 4 * (this->blockRows_[i] - 1);
114  }
115  //Allocate scalar arrays
116  scalars_ = new local_scalar_type[scalarTotal];
117  diagBlocks_.reserve(this->numBlocks_);
118  for(int i = 0; i < this->numBlocks_; i++)
119  diagBlocks_.emplace_back(Teuchos::View, scalars_ + scalarOffsets_[i], this->blockRows_[i]);
120 }
121 
122 template<class MatrixType, class LocalScalarType>
123 TriDiContainer<MatrixType, LocalScalarType, true>::
124 TriDiContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
125  const Teuchos::Array<local_ordinal_type>& localRows) :
126  Container<MatrixType> (matrix, localRows),
127  ipiv_ (this->partitions_.size(), 0),
128  IsInitialized_ (false),
129  IsComputed_ (false),
130  scalars_ (nullptr)
131 {
132  using Teuchos::Array;
133  using Teuchos::ArrayView;
134  using Teuchos::RCP;
135  using Teuchos::rcp;
136  using Teuchos::toString;
138  !matrix->hasColMap(), std::invalid_argument, "Ifpack2::TriDiContainer: "
139  "The constructor's input matrix must have a column Map.");
140 
141  // Check whether the input set of local row indices is correct.
142  const map_type& rowMap = *(matrix->getRowMap());
143  {
144  for(local_ordinal_type j = 0; j < this->blockRows_[0]; j++)
145  {
146  //Check that all rows in only block are valid and locally owned
148  !rowMap.isNodeLocalElement(this->partitions_[this->partitionIndices_[0] + j]),
149  std::invalid_argument, "Ifpack2::TriDiContainer: "
150  "On process " << rowMap.getComm ()->getRank () << " of "
151  << rowMap.getComm ()->getSize () << ", in the given set of local row "
152  "indices localRows = " << Teuchos::toString (localRows) << ", the following "
153  "entries is not valid local row index on the calling process: "
154  << localRows[j] << ".");
155  }
156  }
157  // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
158  // different index base than zero?
159  //for single block, let the SerialTriDiMat own the scalar memory, as there would be no speed gain
160  diagBlocks_.emplace_back(this->blockRows_[0], this->blockRows_[0], true);
161 }
162 
163 template<class MatrixType, class LocalScalarType>
164 TriDiContainer<MatrixType, LocalScalarType, true>::~TriDiContainer ()
165 {
166  if(scalars_)
167  delete[] scalars_;
168 }
169 
170 template<class MatrixType, class LocalScalarType>
171 bool TriDiContainer<MatrixType, LocalScalarType, true>::isInitialized () const
172 {
173  return IsInitialized_;
174 }
175 
176 template<class MatrixType, class LocalScalarType>
177 bool TriDiContainer<MatrixType, LocalScalarType, true>::isComputed () const
178 {
179  return IsComputed_;
180 }
181 
182 template<class MatrixType, class LocalScalarType>
183 void TriDiContainer<MatrixType, LocalScalarType, true>::
184 setParameters (const Teuchos::ParameterList& /* List */)
185 {
186  // the solver doesn't currently take any parameters
187 }
188 
189 template<class MatrixType, class LocalScalarType>
190 void TriDiContainer<MatrixType, LocalScalarType, true>::initialize ()
191 {
192  for(int i = 0; i < this->numBlocks_; i++)
193  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<local_scalar_type>::zero());
194  std::fill(ipiv_.begin(), ipiv_.end(), 0);
195  IsInitialized_ = true;
196  // We assume that if you called this method, you intend to recompute
197  // everything.
198  IsComputed_ = false;
199 }
200 
201 template<class MatrixType, class LocalScalarType>
202 void TriDiContainer<MatrixType, LocalScalarType, true>::compute ()
203 {
205  ipiv_.size () != this->partitions_.size(), std::logic_error,
206  "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
207  "Please report this bug to the Ifpack2 developers.");
208 
209  IsComputed_ = false;
210  if (! this->isInitialized ()) {
211  this->initialize ();
212  }
213 
214  // Extract the submatrix.
215  extract ();
216  factor (); // factor the submatrix
217 
218  IsComputed_ = true;
219 }
220 
221 template<class MatrixType, class LocalScalarType>
222 void TriDiContainer<MatrixType, LocalScalarType, true>::clearBlocks ()
223 {
224  std::vector<HostViewLocal> empty1;
225  std::swap(empty1, X_local);
226  std::vector<HostViewLocal> empty2;
227  std::swap(empty2, Y_local);
228  Container<MatrixType>::clearBlocks();
229 }
230 
231 template<class MatrixType, class LocalScalarType>
232 void TriDiContainer<MatrixType, LocalScalarType, true>::factor ()
233 {
234  for(int i = 0; i < this->numBlocks_; i++)
235  {
237  int INFO = 0;
238  int* blockIpiv = (int*) ipiv_.getRawPtr() + this->partitionIndices_[i];
239  lapack.GTTRF (diagBlocks_[i].numRowsCols (),
240  diagBlocks_[i].DL(),
241  diagBlocks_[i].D(),
242  diagBlocks_[i].DU(),
243  diagBlocks_[i].DU2(),
244  blockIpiv, &INFO);
245  // INFO < 0 is a bug.
247  INFO < 0, std::logic_error, "Ifpack2::TriDiContainer::factor: "
248  "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
249  "incorrectly. INFO = " << INFO << " < 0. "
250  "Please report this bug to the Ifpack2 developers.");
251  // INFO > 0 means the matrix is singular. This is probably an issue
252  // either with the choice of rows the rows we extracted, or with the
253  // input matrix itself.
255  INFO > 0, std::runtime_error, "Ifpack2::TriDiContainer::factor: "
256  "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
257  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
258  "(one-based index i) is exactly zero. This probably means that the input "
259  "matrix has a singular diagonal block.");
260  }
261 }
262 
263 template<class MatrixType, class LocalScalarType>
264 void TriDiContainer<MatrixType, LocalScalarType, true>::
265 applyImpl (HostViewLocal& X,
266  HostViewLocal& Y,
267  int blockIndex,
268  int stride,
269  Teuchos::ETransp mode,
270  local_scalar_type alpha,
271  local_scalar_type beta) const
272 {
274  auto zero = STS::zero();
275  size_t numVecs = X.extent(1);
276  size_t numRows = X.extent(0);
277 
279  X.extent (0) != Y.extent (0),
280  std::logic_error, "Ifpack2::TriDiContainer::applyImpl: X and Y have "
281  "incompatible dimensions (" << X.extent (0) << " resp. "
282  << Y.extent (0) << "). Please report this bug to "
283  "the Ifpack2 developers.");
285  X.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
286  std::logic_error, "Ifpack2::TriDiContainer::applyImpl: The input "
287  "multivector X has incompatible dimensions from those of the "
288  "inverse operator (" << X.extent (0) << " vs. "
289  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols () : diagBlocks_[blockIndex].numRowsCols())
290  << "). Please report this bug to the Ifpack2 developers.");
292  Y.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
293  std::logic_error, "Ifpack2::TriDiContainer::applyImpl: The output "
294  "multivector Y has incompatible dimensions from those of the "
295  "inverse operator (" << Y.extent (0) << " vs. "
296  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols ())
297  << "). Please report this bug to the Ifpack2 developers.");
298 
299  if(alpha == zero) { // don't need to solve the linear system
300  if(beta == zero) {
301  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
302  // any Inf or NaN values in Y (rather than multiplying them by
303  // zero, resulting in NaN values).
304  for(size_t j = 0; j < Y.extent(1); j++)
305  for(size_t i = 0; i < Y.extent(0); i++)
306  Y(i, j) = zero;
307  }
308  else { // beta != 0
309  for(size_t j = 0; j < Y.extent(1); j++)
310  for(size_t i = 0; i < Y.extent(0); i++)
311  Y(i, j) *= beta;
312  }
313  }
314  else { // alpha != 0; must solve the linear system
316  // If beta is nonzero or Y is not constant stride, we have to use
317  // a temporary output multivector. It gets a copy of X, since
318  // GETRS overwrites its (multi)vector input with its output.
319  HostViewLocal Y_tmp("", numRows, numVecs);
320  Kokkos::deep_copy(Y_tmp, X);
321  scalar_type* Y_ptr = Y_tmp.data();
322  int INFO = 0;
323  const char trans =
324  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
325  int* blockIpiv = (int*) ipiv_.getRawPtr() + this->partitionIndices_[blockIndex];
326  lapack.GTTRS (trans,
327  diagBlocks_[blockIndex].numRowsCols(),
328  numVecs,
329  diagBlocks_[blockIndex].DL(),
330  diagBlocks_[blockIndex].D(),
331  diagBlocks_[blockIndex].DU(),
332  diagBlocks_[blockIndex].DU2(),
333  blockIpiv,
334  Y_ptr,
335  stride,
336  &INFO);
338  INFO != 0, std::runtime_error, "Ifpack2::TriDiContainer::applyImpl: "
339  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
340  "failed with INFO = " << INFO << " != 0.");
341 
342  if (beta != STS::zero ()) {
343  for(size_t j = 0; j < Y.extent(1); j++)
344  {
345  for(size_t i = 0; i < Y.extent(0); i++)
346  {
347  Y(i, j) *= beta;
348  Y(i, j) += alpha * Y_tmp(i, j);
349  }
350  }
351  }
352  else {
353  for(size_t j = 0; j < Y.extent(1); j++)
354  {
355  for(size_t i = 0; i < Y.extent(0); i++)
356  Y(i, j) = Y_tmp(i, j);
357  }
358  }
359  }
360 }
361 
362 template<class MatrixType, class LocalScalarType>
363 void TriDiContainer<MatrixType, LocalScalarType, true>::
364 apply (HostView& X,
365  HostView& Y,
366  int blockIndex,
367  int stride,
368  Teuchos::ETransp mode,
369  scalar_type alpha,
370  scalar_type beta) const
371 {
372  using Teuchos::ArrayView;
373  using Teuchos::as;
374  using Teuchos::RCP;
375  using Teuchos::rcp;
376 
377  // The local operator might have a different Scalar type than
378  // MatrixType. This means that we might have to convert X and Y to
379  // the Tpetra::MultiVector specialization that the local operator
380  // wants. This class' X_ and Y_ internal fields are of the right
381  // type for the local operator, so we can use those as targets.
382 
383  Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
384 
386  ! IsComputed_, std::runtime_error, "Ifpack2::TriDiContainer::apply: "
387  "You must have called the compute() method before you may call apply(). "
388  "You may call the apply() method as many times as you want after calling "
389  "compute() once, but you must have called compute() at least once.");
390 
391  const size_t numVecs = X.extent(1);
392 
393  if(numVecs == 0) {
394  return; // done! nothing to do
395  }
396 
397  // The local operator works on a permuted subset of the local parts
398  // of X and Y. The subset and permutation are defined by the index
399  // array returned by getLocalRows(). If the permutation is trivial
400  // and the subset is exactly equal to the local indices, then we
401  // could use the local parts of X and Y exactly, without needing to
402  // permute. Otherwise, we have to use temporary storage to permute
403  // X and Y. For now, we always use temporary storage.
404  //
405  // Create temporary permuted versions of the input and output.
406  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
407  // store the permuted versions of X resp. Y. Note that X_local has
408  // the domain Map of the operator, which may be a permuted subset of
409  // the local Map corresponding to X.getMap(). Similarly, Y_local
410  // has the range Map of the operator, which may be a permuted subset
411  // of the local Map corresponding to Y.getMap(). numRows_ here
412  // gives the number of rows in the row Map of the local Inverse_
413  // operator.
414  //
415  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
416  // here that the row Map and the range Map of that operator are
417  // the same.
418  //
419  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
420  // really belongs in Tpetra.
421 
422  if(X_local.size() == 0)
423  {
424  //create all X_local and Y_local managed Views at once, are
425  //reused in subsequent apply() calls
426  for(int i = 0; i < this->numBlocks_; i++)
427  {
428  X_local.emplace_back("", this->blockRows_[i], numVecs);
429  }
430  for(int i = 0; i < this->numBlocks_; i++)
431  {
432  Y_local.emplace_back("", this->blockRows_[i], numVecs);
433  }
434  }
435 
436  const ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
437 
438  mvgs.gatherViewToView (X_local[blockIndex], X, localRows);
439 
440  // We must gather the contents of the output multivector Y even on
441  // input to applyImpl(), since the inverse operator might use it as
442  // an initial guess for a linear solve. We have no way of knowing
443  // whether it does or does not.
444 
445  mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
446 
447  // Apply the local operator:
448  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
449  this->applyImpl (X_local[blockIndex], Y_local[blockIndex], blockIndex, stride, mode,
450  as<local_scalar_type>(alpha), as<local_scalar_type>(beta));
451 
452  // Scatter the permuted subset output vector Y_local back into the
453  // original output multivector Y.
454  mvgs.scatterViewToView (Y, Y_local[blockIndex], localRows);
455 }
456 
457 template<class MatrixType, class LocalScalarType>
458 void TriDiContainer<MatrixType, LocalScalarType, true>::
459 weightedApply (HostView& X,
460  HostView& Y,
461  HostView& D,
462  int blockIndex,
463  int stride,
464  Teuchos::ETransp mode,
465  scalar_type alpha,
466  scalar_type beta) const
467 {
468  using Teuchos::ArrayRCP;
469  using Teuchos::ArrayView;
470  using Teuchos::Range1D;
471  using Teuchos::Ptr;
472  using Teuchos::ptr;
473  using Teuchos::RCP;
474  using Teuchos::rcp;
475  using Teuchos::rcp_const_cast;
476  using std::cerr;
477  using std::endl;
479 
480  // The local operator template parameter might have a different
481  // Scalar type than MatrixType. This means that we might have to
482  // convert X and Y to the Tpetra::MultiVector specialization that
483  // the local operator wants. This class' X_ and Y_ internal fields
484  // are of the right type for the local operator, so we can use those
485  // as targets.
486 
487  Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
488 
489  size_t numRows = this->blockRows_[blockIndex];
490  size_t numVecs = X.extent(1);
491 
492  if(numVecs == 0) {
493  return; // done! nothing to do
494  }
495 
497  ! IsComputed_, std::runtime_error, "Ifpack2::TriDiContainer::"
498  "weightedApply: You must have called the compute() method before you may "
499  "call apply(). You may call the apply() method as many times as you want "
500  "after calling compute() once, but you must have called compute() at least "
501  "once.");
502 
503  // The local operator works on a permuted subset of the local parts
504  // of X and Y. The subset and permutation are defined by the index
505  // array returned by getLocalRows(). If the permutation is trivial
506  // and the subset is exactly equal to the local indices, then we
507  // could use the local parts of X and Y exactly, without needing to
508  // permute. Otherwise, we have to use temporary storage to permute
509  // X and Y. For now, we always use temporary storage.
510  //
511  // Ensure we have temporary permuted versions of the input and output.
512  // Initialize X_ and/or Y_ if necessary. We'll use them to
513  // store the permuted versions of X resp. Y. Note that X_local has
514  // the domain Map of the operator, which may be a permuted subset of
515  // the local Map corresponding to X.getMap(). Similarly, Y_local
516  // has the range Map of the operator, which may be a permuted subset
517  // of the local Map corresponding to Y.getMap(). numRows_ here
518  // gives the number of rows in the row Map of the local operator.
519  //
520  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
521  // here that the row Map and the range Map of that operator are
522  // the same.
523  //
524  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
525  // really belongs in Tpetra.
526  if(X_local.size() == 0)
527  {
528  //create all X_local and Y_local managed Views at once, are
529  //reused in subsequent apply() calls
530  for(int i = 0; i < this->numBlocks_; i++)
531  {
532  X_local.emplace_back("", this->blockRows_[i], numVecs);
533  }
534  for(int i = 0; i < this->numBlocks_; i++)
535  {
536  Y_local.emplace_back("", this->blockRows_[i], numVecs);
537  }
538  }
539 
540  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
541 
542  mvgs.gatherViewToView (X_local[blockIndex], X, localRows);
543 
544  // We must gather the output multivector Y even on input to
545  // applyImpl(), since the local operator might use it as an initial
546  // guess for a linear solve. We have no way of knowing whether it
547  // does or does not.
548 
549  mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
550 
551  // Apply the diagonal scaling D to the input X. It's our choice
552  // whether the result has the original input Map of X, or the
553  // permuted subset Map of X_local. If the latter, we also need to
554  // gather D into the permuted subset Map. We choose the latter, to
555  // save memory and computation. Thus, we do the following:
556  //
557  // 1. Gather D into a temporary vector D_local.
558  // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
559  // 3. Compute X_scaled := diag(D_loca) * X_local.
560 
561  HostViewLocal D_local("", numVecs, numRows);
562 
563  mvgs.gatherViewToView (D_local, D, localRows);
564 
565  HostViewLocal X_scaled("", numVecs, numRows);
566 
567  for(size_t i = 0; i < X_scaled.extent(0); i++) {
568  for(size_t j = 0; j < X_scaled.extent(1); j++) {
569  X_scaled(i, j) = X_local[blockIndex](i, j) * D_local(0, j);
570  }
571  }
572 
573  // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
574  // can write the result of Inverse_->apply() directly to Y_local, so
575  // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
576  // temporary storage for M^{-1}*X_scaled, so Y_temp must be
577  // different than Y_local.
578  HostViewLocal Y_temp("", Y.extent(0), Y.extent(1));
579 
580  // Apply the local operator: Y_tmp := M^{-1} * X_scaled
581  applyImpl(X_scaled, Y_temp, blockIndex, stride, mode, STS::one(), STS::zero());
582  // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp.
583  //
584  // Note that we still use the permuted subset scaling D_local here,
585  // because Y_temp has the same permuted subset Map. That's good, in
586  // fact, because it's a subset: less data to read and multiply.
587  for(size_t i = 0; i < Y.extent(0); i++) {
588  for(size_t j = 0; j < Y.extent(1); j++) {
589  Y_local[blockIndex](i, j) *= beta;
590  Y_local[blockIndex](i, j) += alpha * D_local(i, 0) * Y_temp(i, j);
591  }
592  }
593 
594  // Copy the permuted subset output vector Y_local into the original
595  // output multivector Y.
596  mvgs.scatterViewToView (Y, Y_local[blockIndex], localRows);
597 }
598 
599 template<class MatrixType, class LocalScalarType>
600 std::ostream& TriDiContainer<MatrixType, LocalScalarType, true>::print(std::ostream& os) const
601 {
602  Teuchos::FancyOStream fos(Teuchos::rcp(&os,false));
603  fos.setOutputToRootOnly(0);
604  describe(fos);
605  return(os);
606 }
607 
608 template<class MatrixType, class LocalScalarType>
609 std::string TriDiContainer<MatrixType, LocalScalarType, true>::description() const
610 {
611  std::ostringstream oss;
613  if (isInitialized()) {
614  if (isComputed()) {
615  oss << "{status = initialized, computed";
616  }
617  else {
618  oss << "{status = initialized, not computed";
619  }
620  }
621  else {
622  oss << "{status = not initialized, not computed";
623  }
624 
625  oss << "}";
626  return oss.str();
627 }
628 
629 template<class MatrixType, class LocalScalarType>
630 void
631 TriDiContainer<MatrixType, LocalScalarType, true>::
632 describe (Teuchos::FancyOStream& os,
633  const Teuchos::EVerbosityLevel verbLevel) const
634 {
635  using std::endl;
636  if(verbLevel==Teuchos::VERB_NONE) return;
637  os << "================================================================================" << endl;
638  os << "Ifpack2::TriDiContainer" << endl;
639  os << "Number of blocks = " << this->numBlocks_ << endl;
640  os << "isInitialized() = " << IsInitialized_ << endl;
641  os << "isComputed() = " << IsComputed_ << endl;
642  os << "================================================================================" << endl;
643  os << endl;
644 }
645 
646 template<class MatrixType, class LocalScalarType>
647 void
648 TriDiContainer<MatrixType, LocalScalarType, true>::
649 extract ()
650 {
651  using Teuchos::Array;
652  using Teuchos::ArrayView;
653  using Teuchos::toString;
654  auto& A = *this->inputMatrix_;
655  const size_t inputMatrixNumRows = A.getNodeNumRows();
656  // We only use the rank of the calling process and the number of MPI
657  // processes for generating error messages. Extraction itself is
658  // entirely local to each participating MPI process.
659  const int myRank = A.getRowMap()->getComm()->getRank();
660  const int numProcs = A.getRowMap()->getComm()->getSize();
661 
662  // Sanity check that the local row indices to extract fall within
663  // the valid range of local row indices for the input matrix.
664  for(int i = 0; i < this->numBlocks_; i++)
665  {
666  const local_ordinal_type numRows_ = this->blockRows_[i];
667  Teuchos::ArrayView<const local_ordinal_type> localRows = this->getLocalRows(i);
668  for(local_ordinal_type j = 0; j < numRows_; j++)
669  {
671  localRows[j] < 0 ||
672  static_cast<size_t> (localRows[j]) >= inputMatrixNumRows,
673  std::runtime_error, "Ifpack2::TriDiContainer::extract: On process " <<
674  myRank << " of " << numProcs << ", localRows[j=" << j << "] = " <<
675  localRows[j] << ", which is out of the valid range of local row indices "
676  "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix.");
677  }
678 
679  // Convert the local row indices we want into local column indices.
680  // For every local row ii_local = localRows[i] we take, we also want
681  // to take the corresponding column. To find the corresponding
682  // column, we use the row Map to convert the local row index
683  // ii_local into a global index ii_global, and then use the column
684  // Map to convert ii_global into a local column index jj_local. If
685  // the input matrix doesn't have a column Map, we need to be using
686  // global indices anyway...
687 
688  // We use the domain Map to exclude off-process global entries.
689  const map_type& globalRowMap = *(A.getRowMap());
690  const map_type& globalColMap = *(A.getColMap());
691  const map_type& globalDomMap = *(A.getDomainMap());
692 
693  bool rowIndsValid = true;
694  bool colIndsValid = true;
695  Array<local_ordinal_type> localCols (numRows_);
696  // For error messages, collect the sets of invalid row indices and
697  // invalid column indices. They are otherwise not useful.
698  Array<local_ordinal_type> invalidLocalRowInds;
699  Array<global_ordinal_type> invalidGlobalColInds;
700  for (local_ordinal_type j = 0; j < numRows_; j++)
701  {
702  // ii_local is the (local) row index we want to look up.
703  const local_ordinal_type ii_local = localRows[j];
704  // Find the global index jj_global corresponding to ii_local.
705  // Global indices are the same (rather, are required to be the
706  // same) in all three Maps, which is why we use jj (suggesting a
707  // column index, which is how we will use it below).
708  const global_ordinal_type jj_global = globalRowMap.getGlobalElement(ii_local);
710  {
711  // If ii_local is not a local index in the row Map on the
712  // calling process, that means localRows is incorrect. We've
713  // already checked for this in the constructor, but we might as
714  // well check again here, since it's cheap to do so (just an
715  // integer comparison, since we need jj_global anyway).
716  rowIndsValid = false;
717  invalidLocalRowInds.push_back(ii_local);
718  break;
719  }
720  // Exclude "off-process" entries: that is, those in the column Map
721  // on this process that are not in the domain Map on this process.
722  if(globalDomMap.isNodeGlobalElement (jj_global))
723  {
724  // jj_global is not an off-process entry. Look up its local
725  // index in the column Map; we want to extract this column index
726  // from the input matrix. If jj_global is _not_ in the column
727  // Map on the calling process, that could mean that the column
728  // in question is empty on this process. That would be bad for
729  // solving linear systems with the extract submatrix. We could
730  // solve the resulting singular linear systems in a minimum-norm
731  // least-squares sense, but for now we simply raise an exception.
732  const local_ordinal_type jj_local = globalColMap.getLocalElement(jj_global);
734  {
735  colIndsValid = false;
736  invalidGlobalColInds.push_back(jj_global);
737  break;
738  }
739  localCols[j] = jj_local;
740  }
741  }
743  !rowIndsValid, std::logic_error, "Ifpack2::TriDiContainer::extract: "
744  "On process " << myRank << ", at least one row index in the set of local "
745  "row indices given to the constructor is not a valid local row index in "
746  "the input matrix's row Map on this process. This should be impossible "
747  "because the constructor checks for this case. Here is the complete set "
748  "of invalid local row indices: " << toString (invalidLocalRowInds) << ". "
749  "Please report this bug to the Ifpack2 developers.");
751  !colIndsValid, std::runtime_error, "Ifpack2::TriDiContainer::extract: "
752  "On process " << myRank << ", "
753  "At least one row index in the set of row indices given to the constructor "
754  "does not have a corresponding column index in the input matrix's column "
755  "Map. This probably means that the column(s) in question is/are empty on "
756  "this process, which would make the submatrix to extract structurally "
757  "singular. Here is the compete set of invalid global column indices: "
758  << toString (invalidGlobalColInds) << ".");
759 
760  diagBlocks_[i].putScalar(Teuchos::ScalarTraits<local_scalar_type>::zero());
761  const size_t maxNumEntriesInRow = A.getNodeMaxNumRowEntries();
762  Array<scalar_type> val(maxNumEntriesInRow);
763  Array<local_ordinal_type> ind(maxNumEntriesInRow);
764 
765  const local_ordinal_type INVALID = Teuchos::OrdinalTraits<local_ordinal_type>::invalid();
766  for(local_ordinal_type j = 0; j < numRows_; j++)
767  {
768  const local_ordinal_type localRow = localRows[j];
769  size_t numEntries;
770  A.getLocalRowCopy(localRow, ind(), val(), numEntries);
771 
772  for(size_t k = 0; k < numEntries; k++)
773  {
774  const local_ordinal_type localCol = ind[k];
775 
776  // Skip off-process elements
777  //
778  // FIXME (mfh 24 Aug 2013) This assumes the following:
779  //
780  // 1. The column and row Maps begin with the same set of
781  // on-process entries, in the same order. That is,
782  // on-process row and column indices are the same.
783  // 2. All off-process indices in the column Map of the input
784  // matrix occur after that initial set.
785  if(localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows)
786  {
787  // for local column IDs, look for each ID in the list
788  // of columns hosted by this object
789  local_ordinal_type jj = INVALID;
790  for (local_ordinal_type kk = 0; kk < numRows_; kk++)
791  {
792  if(localRows[kk] == localCol)
793  jj = kk;
794  }
795  if (jj != INVALID)
796  diagBlocks_[i](j, jj) += val[k];
797  }
798  }
799  }
800  }
801 }
802 
803 template<class MatrixType, class LocalScalarType>
804 std::string TriDiContainer<MatrixType, LocalScalarType, true>::getName()
805 {
806  return "TriDi";
807 }
808 
809 template<class MatrixType, class LocalScalarType>
810 TriDiContainer<MatrixType, LocalScalarType, false>::
811 TriDiContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
813  const Teuchos::RCP<const import_type>& importer,
814  int OverlapLevel,
815  scalar_type DampingFactor) :
816  Container<MatrixType> (matrix, partitions, importer, OverlapLevel,
817  DampingFactor)
818 {
820  (true, std::logic_error, "Ifpack2::TriDiContainer: Not implemented for "
821  "LocalScalarType = " << Teuchos::TypeNameTraits<LocalScalarType>::name ()
822  << ".");
823 }
824 
825 template<class MatrixType, class LocalScalarType>
826 TriDiContainer<MatrixType, LocalScalarType, false>::
827 TriDiContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
828  const Teuchos::Array<local_ordinal_type>& localRows) :
829  Container<MatrixType> (matrix, localRows)
830 {
832  (true, std::logic_error, "Ifpack2::TriDiContainer: Not implemented for "
833  "LocalScalarType = " << Teuchos::TypeNameTraits<LocalScalarType>::name ()
834  << ".");
835 }
836 
837 template<class MatrixType, class LocalScalarType>
838 TriDiContainer<MatrixType, LocalScalarType, false>::~TriDiContainer () {}
839 
840 template<class MatrixType, class LocalScalarType>
841 bool TriDiContainer<MatrixType, LocalScalarType, false>::isInitialized () const
842 {
843  return false;
844 }
845 
846 template<class MatrixType, class LocalScalarType>
847 bool TriDiContainer<MatrixType, LocalScalarType, false>::isComputed () const
848 {
849  return false;
850 }
851 
852 template<class MatrixType, class LocalScalarType>
853 void TriDiContainer<MatrixType, LocalScalarType, false>::
854 setParameters (const Teuchos::ParameterList& /* List */) {}
855 
856 template<class MatrixType, class LocalScalarType>
857 void TriDiContainer<MatrixType, LocalScalarType, false>::initialize () {}
858 
859 template<class MatrixType, class LocalScalarType>
860 void TriDiContainer<MatrixType, LocalScalarType, false>::compute () {}
861 
862 template<class MatrixType, class LocalScalarType>
863 void TriDiContainer<MatrixType, LocalScalarType, false>::clearBlocks () {}
864 
865 template<class MatrixType, class LocalScalarType>
866 void TriDiContainer<MatrixType, LocalScalarType, false>::factor () {}
867 
868 template<class MatrixType, class LocalScalarType>
869 void TriDiContainer<MatrixType, LocalScalarType, false>::
870 applyImpl (HostViewLocal& X,
871  HostViewLocal& Y,
872  int blockIndex,
873  int stride,
874  Teuchos::ETransp mode,
875  local_scalar_type alpha,
876  local_scalar_type beta) const {}
877 
878 template<class MatrixType, class LocalScalarType>
879 void TriDiContainer<MatrixType, LocalScalarType, false>::
880 apply (HostView& X,
881  HostView& Y,
882  int blockIndex,
883  int stride,
884  Teuchos::ETransp mode,
885  scalar_type alpha,
886  scalar_type beta) const {}
887 
888 template<class MatrixType, class LocalScalarType>
889 void TriDiContainer<MatrixType, LocalScalarType, false>::
890 weightedApply (HostView& X,
891  HostView& Y,
892  HostView& D,
893  int blockIndex,
894  int stride,
895  Teuchos::ETransp mode,
896  scalar_type alpha,
897  scalar_type beta) const {}
898 
899 template<class MatrixType, class LocalScalarType>
900 std::ostream& TriDiContainer<MatrixType, LocalScalarType, false>::print(std::ostream& os) const
901 {
902  return os;
903 }
904 
905 template<class MatrixType, class LocalScalarType>
906 std::string TriDiContainer<MatrixType, LocalScalarType, false>::description() const
907 {
908  return "";
909 }
910 
911 template<class MatrixType, class LocalScalarType>
912 void
913 TriDiContainer<MatrixType, LocalScalarType, false>::
914 describe (Teuchos::FancyOStream& os,
915  const Teuchos::EVerbosityLevel verbLevel) const {}
916 
917 template<class MatrixType, class LocalScalarType>
918 void
919 TriDiContainer<MatrixType, LocalScalarType, false>::
920 extract () {}
921 
922 template<class MatrixType, class LocalScalarType>
923 std::string TriDiContainer<MatrixType, LocalScalarType, false>::getName()
924 {
925  return "";
926 }
927 
928 #define IFPACK2_TRIDICONTAINER_INSTANT(S,LO,GO,N) \
929  template class Ifpack2::TriDiContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
930 
931 } // namespace Ifpack2
932 
933 #endif
Ifpack2::TriDiContainer class declaration.
void GTTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &nrhs, const ScalarType *dl, const ScalarType *d, const ScalarType *du, const ScalarType *du2, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
void GTTRF(const OrdinalType &n, ScalarType *dl, ScalarType *d, ScalarType *du, ScalarType *du2, OrdinalType *IPIV, OrdinalType *info) const
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
TEUCHOSCORE_LIB_DLL_EXPORT std::string toString(const EVerbosityLevel verbLevel)
TypeTo as(const TypeFrom &t)
std::string toString(const T &t)