Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_BandedContainer_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_BANDEDCONTAINER_DEF_HPP
44 #define IFPACK2_BANDEDCONTAINER_DEF_HPP
45 
46 #include "Teuchos_LAPACK.hpp"
47 #include "Tpetra_CrsMatrix.hpp"
48 #include <iostream>
49 #include <sstream>
50 
51 #ifdef HAVE_MPI
52 # include <mpi.h>
54 #else
55 # include "Teuchos_DefaultSerialComm.hpp"
56 #endif // HAVE_MPI
57 
58 namespace Ifpack2 {
59 
60 template<class MatrixType, class LocalScalarType>
61 BandedContainer<MatrixType, LocalScalarType, true>::
62 BandedContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
64  const Teuchos::RCP<const import_type>& importer,
65  int OverlapLevel,
66  scalar_type DampingFactor) :
67  Container<MatrixType>(matrix, partitions, importer, OverlapLevel, DampingFactor),
68  ipiv_(this->partitions_.size()),
69  kl_(this->numBlocks_, -1),
70  ku_(this->numBlocks_, -1),
71  scalars_(nullptr),
72  scalarOffsets_(this->numBlocks_)
73 {
75  ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::BandedContainer: "
76  "The constructor's input matrix must have a column Map.");
77 
78  // Check whether the input set of local row indices is correct.
79  const map_type& rowMap = * (matrix->getRowMap ());
80  for(int i = 0; i < this->numBlocks_; i++)
81  {
82  Teuchos::ArrayView<const local_ordinal_type> localRows = this->getLocalRows(i);
83  for(local_ordinal_type j = 0; j < this->blockRows_[i]; j++)
84  {
86  !rowMap.isNodeLocalElement(localRows[j]),
87  std::invalid_argument, "Ifpack2::BandedContainer: "
88  "On process " << rowMap.getComm ()->getRank () << " of "
89  << rowMap.getComm ()->getSize () << ", in the given set of local row "
90  "indices localRows = " << Teuchos::toString (localRows) << ", the following "
91  "entry is not valid local row indices on the calling process: "
92  << localRows[j] << ".");
93  }
94  }
95  IsInitialized_ = false;
96  IsComputed_ = false;
97 }
98 
99 template<class MatrixType, class LocalScalarType>
100 BandedContainer<MatrixType, LocalScalarType, true>::
101 BandedContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
102  const Teuchos::Array<local_ordinal_type>& localRows) :
103  Container<MatrixType>(matrix, localRows),
104  ipiv_(this->blockRows_[0]),
105  kl_(1, -1),
106  ku_(1, -1),
107  scalars_(nullptr),
108  scalarOffsets_(1, 0)
109 {
110  TEUCHOS_TEST_FOR_EXCEPTION(!matrix->hasColMap(), std::invalid_argument, "Ifpack2::BandedContainer: "
111  "The constructor's input matrix must have a column Map.");
112 
113  // Check whether the input set of local row indices is correct.
114  const map_type& rowMap = *(matrix->getRowMap());
115  for(local_ordinal_type j = 0; j < this->blockRows_[0]; j++)
116  {
118  !rowMap.isNodeLocalElement(localRows[j]),
119  std::invalid_argument, "Ifpack2::BandedContainer: "
120  "On process " << rowMap.getComm()->getRank() << " of "
121  << rowMap.getComm()->getSize() << ", in the given set of local row "
122  "indices localRows = " << Teuchos::toString(localRows) << ", the following "
123  "entry is not valid local row indices on the calling process: "
124  << localRows[j] << ".");
125  }
126  IsInitialized_ = false;
127  IsComputed_ = false;
128 }
129 
130 template<class MatrixType, class LocalScalarType>
131 BandedContainer<MatrixType, LocalScalarType, true>::
132 ~BandedContainer ()
133 {
134  if(scalars_)
135  delete[] scalars_;
136 }
137 
138 template<class MatrixType, class LocalScalarType>
139 void BandedContainer<MatrixType, LocalScalarType, true>::
140 setParameters (const Teuchos::ParameterList& List)
141 {
143  if(List.isParameter("relaxation: banded container superdiagonals"))
144  kl_[0] = List.get<int>("relaxation: banded container superdiagonals");
145  if(List.isParameter("relaxation: banded container subdiagonals"))
146  ku_[0] = List.get<int>("relaxation: banded container subdiagonals");
147 
148  for(local_ordinal_type b = 1; b < this->numBlocks_; b++)
149  {
150  kl_[b] = kl_[0];
151  ku_[b] = ku_[0];
152  }
153 
154  // The user provided insufficient information. If this is the case we check for the optimal values.
155  // User information may be overwritten only if necessary.
156  for(local_ordinal_type b = 0; b < this->numBlocks_; b++)
157  {
158  if (ku_[b] == -1 || kl_[b] == -1)
159  {
160  const Teuchos::ArrayView<const local_ordinal_type> localRows = this->getLocalRows(b);
161  const size_type numRows = localRows.size();
162 
163  // loop over local rows in current block
164  for(size_type i = 0; i < numRows; ++i)
165  {
168  this->inputMatrix_->getLocalRowView(localRows[i], indices, values);
169 
170  size_type min_col_it = numRows > 0 ? numRows - 1 : 0; // just a guess
171  size_type max_col_it = 0;
172 
173  size_type cntCols = 0;
174 
175  // loop over all column entries
176  for(size_type c = 0; c < indices.size(); c++)
177  {
178  const local_ordinal_type lColIdx = indices[c]; // current column idx
179  // check whether lColIdx is contained in localRows[]
180  for(size_type j = 0; j < numRows; j++)
181  {
182  if (localRows[j] == lColIdx)
183  {
184  if(localRows[min_col_it] > lColIdx)
185  min_col_it = j;
186  if(localRows[max_col_it] < lColIdx)
187  max_col_it = j;
188  cntCols++;
189  }
190  }
191  if(cntCols == numRows)
192  break; // skip remaining entries in column
193  }
194  ku_[b] = std::max(ku_[b], Teuchos::as<local_ordinal_type>(max_col_it - i));
195  kl_[b] = std::max(kl_[b], Teuchos::as<local_ordinal_type>(i - min_col_it));
196  }
197  }
199  (kl_[b] == -1 || ku_[b] == -1, std::invalid_argument,
200  "Ifpack2::BandedContainer::setParameters: the user must provide the number"
201  " of sub- and superdiagonals in the 'kl' and 'ku' parameters.");
202  }
203 }
204 
205 template<class MatrixType, class LocalScalarType>
206 void
207 BandedContainer<MatrixType, LocalScalarType, true>::
208 initialize ()
209 {
210  using Teuchos::null;
211  using Teuchos::rcp;
212  for(local_ordinal_type b = 0; b < this->numBlocks_; b++)
213  {
215  (kl_[b] == -1 || ku_[b] == -1, std::invalid_argument,
216  "Ifpack2::BandedContainer::initialize: the user must provide the number of"
217  " sub- and superdiagonals in the 'kl' and 'ku' parameters. Make sure that "
218  "you call BandedContainer<T>::setParameters!");
219  }
220  global_ordinal_type totalScalars = 0;
221  for(local_ordinal_type b = 0; b < this->numBlocks_; b++)
222  {
223  local_ordinal_type stride = 2 * kl_[b] + ku_[b] + 1;
224  scalarOffsets_[b] = totalScalars;
225  totalScalars += stride * this->blockRows_[b];
226  }
227  scalars_ = new local_scalar_type[totalScalars];
228  for(int b = 0; b < this->numBlocks_; b++)
229  {
230  local_ordinal_type nrows = this->blockRows_[b];
231  diagBlocks_.emplace_back(Teuchos::View, scalars_ + scalarOffsets_[b], 2 * kl_[b] + ku_[b] + 1, nrows, nrows, kl_[b], kl_[b] + ku_[b]);
232  diagBlocks_[b].putScalar(Teuchos::ScalarTraits<local_scalar_type>::zero());
233  }
234  // We assume that if you called this method, you intend to recompute
235  // everything.
236  IsInitialized_ = false;
237  IsComputed_ = false;
238  std::fill (ipiv_.begin (), ipiv_.end (), 0);
239  IsInitialized_ = true;
240 }
241 
242 template<class MatrixType, class LocalScalarType>
243 void
244 BandedContainer<MatrixType, LocalScalarType, true>::
245 compute ()
246 {
248  ipiv_.size () != this->partitions_.size(), std::logic_error,
249  "Ifpack2::BandedContainer::compute: ipiv_ array has the wrong size. "
250  "Please report this bug to the Ifpack2 developers.");
251 
252  IsComputed_ = false;
253  if (! this->isInitialized ()) {
254  this->initialize ();
255  }
256 
257  // Extract the submatrices from input matrix.
258  extract ();
259  factor (); // factor the submatrix
260 
261  IsComputed_ = true;
262 }
263 
264 template<class MatrixType, class LocalScalarType>
265 void
266 BandedContainer<MatrixType, LocalScalarType, true>::
267 clearBlocks ()
268 {
269  std::vector<HostViewLocal> empty1;
270  std::swap(empty1, X_local);
271  std::vector<HostViewLocal> empty2;
272  std::swap(empty2, Y_local);
273  Container<MatrixType>::clearBlocks ();
274 }
275 
276 template<class MatrixType, class LocalScalarType>
277 void
278 BandedContainer<MatrixType, LocalScalarType, true>::
279 factor ()
280 {
282  int INFO = 0;
283 
284  for(int i = 0; i < this->numBlocks_; i++)
285  {
286  // Plausibility checks for matrix
287  TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].values()==0, std::invalid_argument,
288  "BandedContainer<T>::factor: Diagonal block is an empty SerialBandDenseMatrix<T>!");
289  TEUCHOS_TEST_FOR_EXCEPTION(diagBlocks_[i].upperBandwidth() < diagBlocks_[i].lowerBandwidth(), std::invalid_argument,
290  "BandedContainer<T>::factor: Diagonal block needs kl additional superdiagonals for factorization! However, the number of superdiagonals is smaller than the number of subdiagonals!");
291  int* blockIpiv = &ipiv_[this->partitionIndices_[i]];
292  lapack.GBTRF (diagBlocks_[i].numRows(),
293  diagBlocks_[i].numCols(),
294  diagBlocks_[i].lowerBandwidth(),
295  diagBlocks_[i].upperBandwidth() - diagBlocks_[i].lowerBandwidth(), /* enter the real number of superdiagonals (see Teuchos_SerialBandDenseSolver)*/
296  diagBlocks_[i].values(),
297  diagBlocks_[i].stride(),
298  blockIpiv,
299  &INFO);
300 
301  // INFO < 0 is a bug.
303  INFO < 0, std::logic_error, "Ifpack2::BandedContainer::factor: "
304  "LAPACK's _GBTRF (LU factorization with partial pivoting) was called "
305  "incorrectly. INFO = " << INFO << " < 0. "
306  "Please report this bug to the Ifpack2 developers.");
307  // INFO > 0 means the matrix is singular. This is probably an issue
308  // either with the choice of rows the rows we extracted, or with the
309  // input matrix itself.
311  INFO > 0, std::runtime_error, "Ifpack2::BandedContainer::factor: "
312  "LAPACK's _GBTRF (LU factorization with partial pivoting) reports that the "
313  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
314  "(one-based index i) is exactly zero. This probably means that the input "
315  "matrix has a singular diagonal block.");
316  }
317 }
318 
319 template<class MatrixType, class LocalScalarType>
320 void
321 BandedContainer<MatrixType, LocalScalarType, true>::
322 applyImpl (HostViewLocal& X,
323  HostViewLocal& Y,
324  int blockIndex,
325  int stride,
326  Teuchos::ETransp mode,
327  const local_scalar_type alpha,
328  const local_scalar_type beta) const
329 {
330  using Teuchos::ArrayRCP;
331  using Teuchos::Ptr;
332  using Teuchos::ptr;
333  using Teuchos::RCP;
334  using Teuchos::rcp;
335  using Teuchos::rcpFromRef;
336 
338  X.extent (0) != Y.extent (0),
339  std::logic_error, "Ifpack2::BandedContainer::applyImpl: X and Y have "
340  "incompatible dimensions (" << X.extent (0) << " resp. "
341  << Y.extent (0) << "). Please report this bug to "
342  "the Ifpack2 developers.");
344  X.extent (0) != static_cast<size_t> (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows()),
345  std::logic_error, "Ifpack2::BandedContainer::applyImpl: The input "
346  "multivector X has incompatible dimensions from those of the "
347  "inverse operator (" << X.extent (0) << " vs. "
348  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numCols() : diagBlocks_[blockIndex].numRows())
349  << "). Please report this bug to the Ifpack2 developers.");
351  Y.extent (0) != static_cast<size_t> (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols()),
352  std::logic_error, "Ifpack2::BandedContainer::applyImpl: The output "
353  "multivector Y has incompatible dimensions from those of the "
354  "inverse operator (" << Y.extent (0) << " vs. "
355  << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRows() : diagBlocks_[blockIndex].numCols())
356  << "). Please report this bug to the Ifpack2 developers.");
357 
358  size_t numVecs = (int) X.extent (1);
359 
361  if (alpha == zero) { // don't need to solve the linear system
362  if (beta == zero) {
363  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
364  // any Inf or NaN values in Y (rather than multiplying them by
365  // zero, resulting in NaN values).
366  for(size_t j = 0; j < Y.extent(0); j++)
367  for(size_t i = 0; i < Y.extent(1); i++)
368  Y(i, j) = zero;
369  }
370  else { // beta != 0
371  for(size_t j = 0; j < Y.extent(0); j++)
372  for(size_t i = 0; i < Y.extent(1); i++)
373  Y(i, j) = beta * (local_impl_scalar_type) Y(i, j);
374  }
375  }
376  else { // alpha != 0; must solve the linear system
378  // If beta is nonzero or Y is not constant stride, we have to use
379  // a temporary output multivector. It gets a copy of X, since
380  // GBTRS overwrites its (multi)vector input with its output.
381  Ptr<HostViewLocal> Y_tmp;
382  bool deleteYT = false;
383  if(beta == zero) {
384  Y = X;
385  Y_tmp = ptr(&Y);
386  }
387  else {
388  Y_tmp = ptr (new HostViewLocal ("", X.extent (0), X.extent (1))); // constructor copies X
389  deleteYT = true;
390  Kokkos::deep_copy(*Y_tmp, X);
391  }
392 
393  local_scalar_type* const Y_ptr = (local_scalar_type*) Y_tmp->data();
394 
395  int INFO = 0;
396  const char trans =(mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
397 
398  const int* blockIpiv = &ipiv_[this->partitionIndices_[blockIndex]];
399  lapack.GBTRS(trans,
400  diagBlocks_[blockIndex].numCols(),
401  diagBlocks_[blockIndex].lowerBandwidth(),
402  /* enter the real number of superdiagonals (see Teuchos_SerialBandDenseSolver)*/
403  diagBlocks_[blockIndex].upperBandwidth() - diagBlocks_[blockIndex].lowerBandwidth(),
404  numVecs,
405  diagBlocks_[blockIndex].values(),
406  diagBlocks_[blockIndex].stride(),
407  blockIpiv,
408  Y_ptr, stride, &INFO);
409 
411  INFO != 0, std::runtime_error, "Ifpack2::BandedContainer::applyImpl: "
412  "LAPACK's _GBTRS (solve using LU factorization with partial pivoting) "
413  "failed with INFO = " << INFO << " != 0.");
414 
415  if (beta != zero) {
416  for(size_t j = 0; j < Y.extent(1); j++)
417  for(size_t i = 0; i < Y.extent(0); i++)
418  Y(i, j) = beta * Y(i, j) + alpha * (*Y_tmp)(i, j);
419  }
420  if(deleteYT)
421  delete Y_tmp.get();
422  }
423 }
424 
425 template<class MatrixType, class LocalScalarType>
426 void
427 BandedContainer<MatrixType, LocalScalarType, true>::
428 apply (HostView& X,
429  HostView& Y,
430  int blockIndex,
431  int stride,
432  Teuchos::ETransp mode,
433  scalar_type alpha,
434  scalar_type beta) const
435 {
436  using Teuchos::ArrayView;
437  using Teuchos::as;
438  using Teuchos::RCP;
439  using Teuchos::rcp;
440 
441  // The local operator might have a different Scalar type than
442  // MatrixType. This means that we might have to convert X and Y to
443  // the Tpetra::MultiVector specialization that the local operator
444  // wants. This class' X_ and Y_ internal fields are of the right
445  // type for the local operator, so we can use those as targets.
446 
447  // Tpetra::MultiVector specialization corresponding to MatrixType.
448  Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
449  const size_t numVecs = X.extent(1);
450 
452  ! IsComputed_, std::runtime_error, "Ifpack2::BandedContainer::apply: "
453  "You must have called the compute() method before you may call apply(). "
454  "You may call the apply() method as many times as you want after calling "
455  "compute() once, but you must have called compute() at least once.");
457  X.extent(1) != Y.extent(1), std::runtime_error,
458  "Ifpack2::BandedContainer::apply: X and Y have different numbers of "
459  "vectors. X has " << X.extent(1)
460  << ", but Y has " << Y.extent(1) << ".");
461 
462  if (numVecs == 0) {
463  return; // done! nothing to do
464  }
465 
466  // The local operator works on a permuted subset of the local parts
467  // of X and Y. The subset and permutation are defined by the index
468  // array returned by getLocalRows(). If the permutation is trivial
469  // and the subset is exactly equal to the local indices, then we
470  // could use the local parts of X and Y exactly, without needing to
471  // permute. Otherwise, we have to use temporary storage to permute
472  // X and Y. For now, we always use temporary storage.
473  //
474  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
475  // here that the row Map and the range Map of that operator are
476  // the same.
477  //
478  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
479  // really belongs in Tpetra.
480 
481  if(X_local.size() == 0)
482  {
483  //create all X_local and Y_local managed Views at once, are
484  //reused in subsequent apply() calls
485  for(int i = 0; i < this->numBlocks_; i++)
486  {
487  X_local.emplace_back("", this->blockRows_[i], numVecs);
488  }
489  for(int i = 0; i < this->numBlocks_; i++)
490  {
491  Y_local.emplace_back("", this->blockRows_[i], numVecs);
492  }
493  }
494 
495  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
496 
497  mvgs.gatherViewToView(X_local[blockIndex], X, localRows);
498 
499  // We must gather the contents of the output multivector Y even on
500  // input to applyImpl(), since the inverse operator might use it as
501  // an initial guess for a linear solve. We have no way of knowing
502  // whether it does or does not.
503 
504  mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
505 
506  // Apply the local operator:
507  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
508  this->applyImpl (X_local[blockIndex], Y_local[blockIndex], blockIndex, stride, mode, as<local_scalar_type>(alpha),
509  as<local_scalar_type>(beta));
510 
511  // Scatter the permuted subset output vector Y_local back into the
512  // original output multivector Y.
513  mvgs.scatterViewToView(Y, Y_local[blockIndex], localRows);
514 }
515 
516 template<class MatrixType, class LocalScalarType>
517 void
518 BandedContainer<MatrixType, LocalScalarType, true>::
519 weightedApply (HostView& /* X */,
520  HostView& /* Y */,
521  HostView& /* D */,
522  int /* blockIndex */,
523  int /* stride */,
524  Teuchos::ETransp /* mode */,
525  scalar_type /* alpha */,
526  scalar_type /* beta */) const
527 {
528  using Teuchos::ArrayRCP;
529  using Teuchos::ArrayView;
530  using Teuchos::Range1D;
531  using Teuchos::Ptr;
532  using Teuchos::ptr;
533  using Teuchos::RCP;
534  using Teuchos::rcp;
535  using Teuchos::rcp_const_cast;
536  using std::cerr;
537  using std::endl;
538 
540  true, std::runtime_error, "Ifpack2::BandedContainer::"
541  "weightedApply: This code is not tested and not used. Expect bugs.");
542 
543  // mfh 09 Aug 2017: Comment out unreachable code to prevent compiler warning.
544 #if 0
545 
546  // The local operator template parameter might have a different
547  // Scalar type than MatrixType. This means that we might have to
548  // convert X and Y to the Tpetra::MultiVector specialization that
549  // the local operator wants. This class' X_ and Y_ internal fields
550  // are of the right type for the local operator, so we can use those
551  // as targets.
552 
555  // typedef Tpetra::Vector<local_scalar_type, local_ordinal_type, global_ordinal_type, node_type> LV; // unused
556 
557  Details::MultiVectorLocalGatherScatter<mv_type, local_mv_type> mvgs;
558  const size_t numVecs = X.extent(1);
559 
561  ! IsComputed_, std::runtime_error, "Ifpack2::BandedContainer::"
562  "weightedApply: You must have called the compute() method before you may "
563  "call apply(). You may call the apply() method as many times as you want "
564  "after calling compute() once, but you must have called compute() at least "
565  "once.");
567  numVecs != Y.extent(1), std::runtime_error,
568  "Ifpack2::BandedContainer::weightedApply: X and Y have different numbers "
569  "of vectors. X has " << X.extent(1) << ", but Y has "
570  << Y.extent(1) << ".");
571 
572  if (numVecs == 0) {
573  return; // done! nothing to do
574  }
575 
576  // The local operator works on a permuted subset of the local parts
577  // of X and Y. The subset and permutation are defined by the index
578  // array returned by getLocalRows(). If the permutation is trivial
579  // and the subset is exactly equal to the local indices, then we
580  // could use the local parts of X and Y exactly, without needing to
581  // permute. Otherwise, we have to use temporary storage to permute
582  // X and Y. For now, we always use temporary storage.
583  //
584  // Create temporary permuted versions of the input and output.
585  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
586  // store the permuted versions of X resp. Y. Note that X_local has
587  // the domain Map of the operator, which may be a permuted subset of
588  // the local Map corresponding to X.getMap(). Similarly, Y_local
589  // has the range Map of the operator, which may be a permuted subset
590  // of the local Map corresponding to Y.getMap(). numRows_ here
591  // gives the number of rows in the row Map of the local operator.
592  //
593  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
594  // here that the row Map and the range Map of that operator are
595  // the same.
596  //
597  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
598  // really belongs in Tpetra.
599 
600  const size_t numRows = this->blockRows_[blockIndex];
601 
602  if(X_local.size() == 0)
603  {
604  //create all X_local and Y_local managed Views at once, are
605  //reused in subsequent apply() calls
606  for(int i = 0; i < this->numBlocks_; i++)
607  {
608  X_local.emplace_back("", this->blockRows_[i], numVecs);
609  }
610  for(int i = 0; i < this->numBlocks_; i++)
611  {
612  Y_local.emplace_back("", this->blockRows_[i], numVecs);
613  }
614  }
615 
616  HostViewLocal D_local("", numRows, 1);
617  HostViewLocal X_scaled("", numRows, numVecs);
618 
619  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
620  mvgs.gatherViewToView (X_local[blockIndex], X, localRows);
621 
622  // We must gather the output multivector Y even on input to
623  // applyImpl(), since the local operator might use it as an initial
624  // guess for a linear solve. We have no way of knowing whether it
625  // does or does not.
626 
627  mvgs.gatherViewToView (Y_local[blockIndex], Y, localRows);
628 
629  // Apply the diagonal scaling D to the input X. It's our choice
630  // whether the result has the original input Map of X, or the
631  // permuted subset Map of X_local. If the latter, we also need to
632  // gather D into the permuted subset Map. We choose the latter, to
633  // save memory and computation. Thus, we do the following:
634  //
635  // 1. Gather D into a temporary vector D_local.
636  // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
637  // 3. Compute X_scaled := diag(D_loca) * X_local.
638 
639  mvgs.gatherViewToView (D_local, D, localRows);
640 
641  for(size_t j = 0; j < numVecs; j++)
642  for(size_t i = 0; i < numRows; i++)
643  X_scaled(i, j) = X_local[blockIndex](i, j) * D_local(i, 0);
644 
645  // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
646  // can write the result of Inverse_->apply() directly to Y_local, so
647  // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
648  // temporary storage for M^{-1}*X_scaled, so Y_temp must be
649  // different than Y_local.
650  Ptr<HostViewLocal> Y_temp;
651  bool deleteYT = false;
652  if(beta == zero)
653  Y_temp = ptr(&Y_local[blockIndex]);
654  else
655  {
656  Y_temp = ptr(new HostViewLocal("", numRows, numVecs));
657  deleteYT = true;
658  }
659 
660  // Apply the local operator: Y_temp := M^{-1} * X_scaled
661  applyImpl(X_scaled, *Y_temp, blockIndex, stride, mode, one, one);
662  // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp.
663  //
664  // Note that we still use the permuted subset scaling D_local here,
665  // because Y_temp has the same permuted subset Map. That's good, in
666  // fact, because it's a subset: less data to read and multiply.
667 
668  for(size_t j = 0; j < numVecs; j++)
669  for(size_t i = 0; i < numRows; i++)
670  Y_local[blockIndex](i, j) = Y_local[blockIndex](i, j) * (local_impl_scalar_type) beta + (local_impl_scalar_type) alpha * (*Y_temp)(i, j) * D_local(i, 0);
671 
672  if(deleteYT)
673  delete Y_temp.get();
674 
675  // Copy the permuted subset output vector Y_local into the original
676  // output multivector Y.
677  mvgs.scatterViewToView (Y, Y_local[blockIndex], localRows);
678 
679 #endif // 0
680 }
681 
682 template<class MatrixType, class LocalScalarType>
683 std::ostream&
684 BandedContainer<MatrixType, LocalScalarType, true>::
685 print (std::ostream& os) const
686 {
687  Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os));
688  fos.setOutputToRootOnly (0);
689  describe (fos);
690  return os;
691 }
692 
693 template<class MatrixType, class LocalScalarType>
694 std::string
695 BandedContainer<MatrixType, LocalScalarType, true>::
696 description () const
697 {
698  std::ostringstream oss;
700  if (isInitialized()) {
701  if (isComputed()) {
702  oss << "{status = initialized, computed";
703  }
704  else {
705  oss << "{status = initialized, not computed";
706  }
707  }
708  else {
709  oss << "{status = not initialized, not computed";
710  }
711  oss << "}";
712  return oss.str();
713 }
714 
715 template<class MatrixType, class LocalScalarType>
716 void
717 BandedContainer<MatrixType, LocalScalarType, true>::
718 describe (Teuchos::FancyOStream& os,
719  const Teuchos::EVerbosityLevel verbLevel) const
720 {
721  if(verbLevel==Teuchos::VERB_NONE) return;
722  os << "================================================================================" << std::endl;
723  os << "Ifpack2::BandedContainer" << std::endl;
724  for(int i = 0; i < this->numBlocks_; i++)
725  {
726  os << "Block " << i << ": Number of rows = " << this->blockRows_[i] << std::endl;
727  os << "Block " << i << ": Number of subdiagonals = " << diagBlocks_[i].lowerBandwidth() << std::endl;
728  os << "Block " << i << ": Number of superdiagonals = " << diagBlocks_[i].upperBandwidth() << std::endl;
729  }
730  os << "isInitialized() = " << IsInitialized_ << std::endl;
731  os << "isComputed() = " << IsComputed_ << std::endl;
732  os << "================================================================================" << std::endl;
733  os << std::endl;
734 }
735 
736 template<class MatrixType, class LocalScalarType>
737 void
738 BandedContainer<MatrixType, LocalScalarType, true>::
739 extract ()
740 {
741  using Teuchos::Array;
742  using Teuchos::ArrayView;
743  using Teuchos::toString;
744  auto& A = *this->inputMatrix_;
745  const size_t inputMatrixNumRows = A.getNodeNumRows ();
746  // We only use the rank of the calling process and the number of MPI
747  // processes for generating error messages. Extraction itself is
748  // entirely local to each participating MPI process.
749  const int myRank = A.getRowMap()->getComm()->getRank();
750  const int numProcs = A.getRowMap()->getComm()->getSize();
751 
752  for(int blockIndex = 0; blockIndex < this->numBlocks_; blockIndex++)
753  {
754  const local_ordinal_type numRows_ = this->blockRows_[blockIndex];
755  // Sanity check that the local row indices to extract fall within
756  // the valid range of local row indices for the input matrix.
757  ArrayView<const local_ordinal_type> localRows = this->getLocalRows(blockIndex);
758  for(local_ordinal_type j = 0; j < numRows_; j++)
759  {
761  localRows[j] < 0 ||
762  static_cast<size_t> (localRows[j]) >= inputMatrixNumRows,
763  std::runtime_error, "Ifpack2::BandedContainer::extract: On process " <<
764  myRank << " of " << numProcs << ", localRows[j=" << j << "] = " <<
765  localRows[j] << ", which is out of the valid range of local row indices "
766  "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix.");
767  }
768 
769  // Convert the local row indices we want into local column indices.
770  // For every local row ii_local = localRows[i] we take, we also want
771  // to take the corresponding column. To find the corresponding
772  // column, we use the row Map to convert the local row index
773  // ii_local into a global index ii_global, and then use the column
774  // Map to convert ii_global into a local column index jj_local. If
775  // the input matrix doesn't have a column Map, we need to be using
776  // global indices anyway...
777 
778  // We use the domain Map to exclude off-process global entries.
779  const map_type& globalRowMap = *(A.getRowMap ());
780  const map_type& globalColMap = *(A.getColMap ());
781  const map_type& globalDomMap = *(A.getDomainMap ());
782 
783  bool rowIndsValid = true;
784  bool colIndsValid = true;
785  Array<local_ordinal_type> localCols (numRows_);
786  // For error messages, collect the sets of invalid row indices and
787  // invalid column indices. They are otherwise not useful.
788  Array<local_ordinal_type> invalidLocalRowInds;
789  Array<global_ordinal_type> invalidGlobalColInds;
790  for(local_ordinal_type i = 0; i < numRows_; i++)
791  {
792  // ii_local is the (local) row index we want to look up.
793  const local_ordinal_type ii_local = localRows[i];
794  // Find the global index jj_global corresponding to ii_local.
795  // Global indices are the same (rather, are required to be the
796  // same) in all three Maps, which is why we use jj (suggesting a
797  // column index, which is how we will use it below).
798  const global_ordinal_type jj_global = globalRowMap.getGlobalElement(ii_local);
800  {
801  // If ii_local is not a local index in the row Map on the
802  // calling process, that means localRows is incorrect. We've
803  // already checked for this in the constructor, but we might as
804  // well check again here, since it's cheap to do so (just an
805  // integer comparison, since we need jj_global anyway).
806  rowIndsValid = false;
807  invalidLocalRowInds.push_back(ii_local);
808  break;
809  }
810  // Exclude "off-process" entries: that is, those in the column Map
811  // on this process that are not in the domain Map on this process.
812  if(globalDomMap.isNodeGlobalElement(jj_global))
813  {
814  // jj_global is not an off-process entry. Look up its local
815  // index in the column Map; we want to extract this column index
816  // from the input matrix. If jj_global is _not_ in the column
817  // Map on the calling process, that could mean that the column
818  // in question is empty on this process. That would be bad for
819  // solving linear systems with the extract submatrix. We could
820  // solve the resulting singular linear systems in a minimum-norm
821  // least-squares sense, but for now we simply raise an exception.
822  const local_ordinal_type jj_local = globalColMap.getLocalElement(jj_global);
824  {
825  colIndsValid = false;
826  invalidGlobalColInds.push_back(jj_global);
827  break;
828  }
829  localCols[i] = jj_local;
830  }
831  }
833  ! rowIndsValid, std::logic_error, "Ifpack2::BandedContainer::extract: "
834  "On process " << myRank << ", at least one row index in the set of local "
835  "row indices given to the constructor is not a valid local row index in "
836  "the input matrix's row Map on this process. This should be impossible "
837  "because the constructor checks for this case. Here is the complete set "
838  "of invalid local row indices: " << toString(invalidLocalRowInds) << ". "
839  "Please report this bug to the Ifpack2 developers.");
841  ! colIndsValid, std::runtime_error, "Ifpack2::BandedContainer::extract: "
842  "On process " << myRank << ", "
843  "At least one row index in the set of row indices given to the constructor "
844  "does not have a corresponding column index in the input matrix's column "
845  "Map. This probably means that the column(s) in question is/are empty on "
846  "this process, which would make the submatrix to extract structurally "
847  "singular. Here is the compete set of invalid global column indices: "
848  << toString(invalidGlobalColInds) << ".");
849 
850  const size_t maxNumEntriesInRow = A.getNodeMaxNumRowEntries();
851  Array<scalar_type> val(maxNumEntriesInRow);
852  Array<local_ordinal_type> ind(maxNumEntriesInRow);
853 
854  const local_ordinal_type INVALID = Teuchos::OrdinalTraits<local_ordinal_type>::invalid();
855  for (local_ordinal_type i = 0; i < numRows_; i++)
856  {
857  const local_ordinal_type localRow = this->partitions_[this->partitionIndices_[blockIndex] + i];
858  size_t numEntries;
859  A.getLocalRowCopy(localRow, ind(), val(), numEntries);
860  for (size_t k = 0; k < numEntries; ++k)
861  {
862  const local_ordinal_type localCol = ind[k];
863  // Skip off-process elements
864  //
865  // FIXME (mfh 24 Aug 2013) This assumes the following:
866  //
867  // 1. The column and row Maps begin with the same set of
868  // on-process entries, in the same order. That is,
869  // on-process row and column indices are the same.
870  // 2. All off-process indices in the column Map of the input
871  // matrix occur after that initial set.
872  if(localCol >= 0 && static_cast<size_t>(localCol) < inputMatrixNumRows)
873  {
874  // for local column IDs, look for each ID in the list
875  // of columns hosted by this object
876  local_ordinal_type jj = INVALID;
877  for (size_t kk = 0; kk < (size_t) numRows_; kk++)
878  {
879  if(localRows[kk] == localCol)
880  jj = kk;
881  }
882  if (jj != INVALID)
883  diagBlocks_[blockIndex](i, jj) += val[k]; // ???
884  }
885  }
886  }
887  }
888 }
889 
890 template<class MatrixType, class LocalScalarType>
891 std::string BandedContainer<MatrixType, LocalScalarType, true>::getName()
892 {
893  return "Banded";
894 }
895 
896 template<class MatrixType, class LocalScalarType>
897 BandedContainer<MatrixType, LocalScalarType, false>::
898 BandedContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
900  const Teuchos::RCP<const import_type>& importer,
901  int OverlapLevel,
902  scalar_type DampingFactor) :
903  Container<MatrixType>(matrix, partitions, importer, OverlapLevel, DampingFactor)
904 {
906  (true, std::logic_error, "Ifpack2::BandedContainer: Not implemented for "
907  "LocalScalarType = " << Teuchos::TypeNameTraits<LocalScalarType>::name ()
908  << ".");
909 }
910 
911 template<class MatrixType, class LocalScalarType>
912 BandedContainer<MatrixType, LocalScalarType, false>::
913 BandedContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
914  const Teuchos::Array<local_ordinal_type>& localRows) :
915  Container<MatrixType>(matrix, localRows)
916 {
918  (true, std::logic_error, "Ifpack2::BandedContainer: Not implemented for "
919  "LocalScalarType = " << Teuchos::TypeNameTraits<LocalScalarType>::name ()
920  << ".");
921 }
922 
923 template<class MatrixType, class LocalScalarType>
924 BandedContainer<MatrixType, LocalScalarType, false>::
925 ~BandedContainer () {}
926 
927 template<class MatrixType, class LocalScalarType>
928 void BandedContainer<MatrixType, LocalScalarType, false>::
929 setParameters (const Teuchos::ParameterList& List) {}
930 
931 template<class MatrixType, class LocalScalarType>
932 void
933 BandedContainer<MatrixType, LocalScalarType, false>::
934 initialize () {}
935 
936 template<class MatrixType, class LocalScalarType>
937 void
938 BandedContainer<MatrixType, LocalScalarType, false>::
939 compute () {}
940 
941 template<class MatrixType, class LocalScalarType>
942 void
943 BandedContainer<MatrixType, LocalScalarType, false>::
944 clearBlocks () {}
945 
946 template<class MatrixType, class LocalScalarType>
947 void
948 BandedContainer<MatrixType, LocalScalarType, false>::
949 factor () {}
950 
951 template<class MatrixType, class LocalScalarType>
952 void
953 BandedContainer<MatrixType, LocalScalarType, false>::
954 applyImpl (HostViewLocal& X,
955  HostViewLocal& Y,
956  int blockIndex,
957  int stride,
958  Teuchos::ETransp mode,
959  const local_scalar_type alpha,
960  const local_scalar_type beta) const {}
961 
962 template<class MatrixType, class LocalScalarType>
963 void
964 BandedContainer<MatrixType, LocalScalarType, false>::
965 apply (HostView& X,
966  HostView& Y,
967  int blockIndex,
968  int stride,
969  Teuchos::ETransp mode,
970  scalar_type alpha,
971  scalar_type beta) const {}
972 
973 template<class MatrixType, class LocalScalarType>
974 void
975 BandedContainer<MatrixType, LocalScalarType, false>::
976 weightedApply (HostView& X,
977  HostView& Y,
978  HostView& D,
979  int blockIndex,
980  int stride,
981  Teuchos::ETransp mode,
982  scalar_type alpha,
983  scalar_type beta) const {}
984 
985 template<class MatrixType, class LocalScalarType>
986 std::ostream&
987 BandedContainer<MatrixType, LocalScalarType, false>::
988 print (std::ostream& os) const
989 {
990  return os;
991 }
992 
993 template<class MatrixType, class LocalScalarType>
994 std::string
995 BandedContainer<MatrixType, LocalScalarType, false>::
996 description () const
997 {
998  return "";
999 }
1000 
1001 template<class MatrixType, class LocalScalarType>
1002 void
1003 BandedContainer<MatrixType, LocalScalarType, false>::
1004 describe (Teuchos::FancyOStream& os,
1005  const Teuchos::EVerbosityLevel verbLevel) const {}
1006 
1007 template<class MatrixType, class LocalScalarType>
1008 void
1009 BandedContainer<MatrixType, LocalScalarType, false>::
1010 extract () {}
1011 
1012 template<class MatrixType, class LocalScalarType>
1013 std::string BandedContainer<MatrixType, LocalScalarType, false>::getName()
1014 {
1015  return "";
1016 }
1017 
1018 } // namespace Ifpack2
1019 
1020 #define IFPACK2_BANDEDCONTAINER_INSTANT(S,LO,GO,N) \
1021  template class Ifpack2::BandedContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
1022 
1023 #endif // IFPACK2_BANDEDCONTAINER_HPP
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
void GBTRF(const OrdinalType &m, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual std::string description() const
TEUCHOSCORE_LIB_DLL_EXPORT std::string toString(const EVerbosityLevel verbLevel)
void GBTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
TypeTo as(const TypeFrom &t)
std::string toString(const T &t)