Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Experimental_RBILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45 
46 #include "Tpetra_BlockMultiVector.hpp"
47 #include "Tpetra_BlockView.hpp"
48 #include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
49 #include "Ifpack2_OverlappingRowMatrix.hpp"
50 #include "Ifpack2_Details_getCrsMatrix.hpp"
51 #include "Ifpack2_LocalFilter.hpp"
52 #include "Ifpack2_Utilities.hpp"
53 #include "Ifpack2_RILUK.hpp"
54 #include "KokkosSparse_trsv.hpp"
55 
56 //#define IFPACK2_RBILUK_INITIAL
57 //#define IFPACK2_RBILUK_INITIAL_NOKK
58 
59 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
60 #include "KokkosBatched_Gemm_Decl.hpp"
61 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
62 #include "KokkosBatched_Util.hpp"
63 #endif
64 
65 namespace Ifpack2 {
66 
67 namespace Experimental {
68 
69 namespace
70 {
71 template<class MatrixType>
72 struct LocalRowHandler
73 {
74  using LocalOrdinal = typename MatrixType::local_ordinal_type;
75  using row_matrix_type = Tpetra::RowMatrix<
76  typename MatrixType::scalar_type,
77  LocalOrdinal,
78  typename MatrixType::global_ordinal_type,
79  typename MatrixType::node_type>;
80  using inds_type = typename row_matrix_type::local_inds_host_view_type;
81  using vals_type = typename row_matrix_type::values_host_view_type;
82 
83  LocalRowHandler(Teuchos::RCP<const row_matrix_type> A)
84  : A_(std::move(A))
85  {
86  if (!A_->supportsRowViews())
87  {
88  const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
89  const auto blockSize = A_->getBlockSize();
90  ind_nc_ = inds_type_nc("Ifpack2::RBILUK::LocalRowHandler::indices",maxNumRowEntr);
91  val_nc_ = vals_type_nc("Ifpack2::RBILUK::LocalRowHandler::values",maxNumRowEntr*blockSize*blockSize);
92  }
93  }
94 
95  void getLocalRow(LocalOrdinal local_row, inds_type & InI, vals_type & InV, LocalOrdinal & NumIn)
96  {
97  if (A_->supportsRowViews())
98  {
99  A_->getLocalRowView(local_row,InI,InV);
100  NumIn = (LocalOrdinal)InI.size();
101  }
102  else
103  {
104  size_t cnt;
105  A_->getLocalRowCopy(local_row,ind_nc_,val_nc_,cnt);
106  InI = ind_nc_;
107  InV = val_nc_;
108  NumIn = (LocalOrdinal)cnt;
109  }
110  }
111 
112 private:
113 
114  using inds_type_nc = typename row_matrix_type::nonconst_local_inds_host_view_type;
115  using vals_type_nc = typename row_matrix_type::nonconst_values_host_view_type;
116 
118  inds_type_nc ind_nc_;
119  vals_type_nc val_nc_;
120 };
121 
122 } // namespace
123 
124 template<class MatrixType>
126  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) )
127 {}
128 
129 template<class MatrixType>
131  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) )
132 {}
133 
134 
135 template<class MatrixType>
137 
138 
139 template<class MatrixType>
140 void
142 {
143  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
144 
145  // It's legal for A to be null; in that case, you may not call
146  // initialize() until calling setMatrix() with a nonnull input.
147  // Regardless, setting the matrix invalidates any previous
148  // factorization.
149  if (A.getRawPtr () != this->A_.getRawPtr ())
150  {
151  this->isAllocated_ = false;
152  this->isInitialized_ = false;
153  this->isComputed_ = false;
154  this->Graph_ = Teuchos::null;
155  L_block_ = Teuchos::null;
156  U_block_ = Teuchos::null;
157  D_block_ = Teuchos::null;
158  }
159 }
160 
161 
162 
163 template<class MatrixType>
164 const typename RBILUK<MatrixType>::block_crs_matrix_type&
166 {
168  L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
169  "is null. Please call initialize() (and preferably also compute()) "
170  "before calling this method. If the input matrix has not yet been set, "
171  "you must first call setMatrix() with a nonnull input matrix before you "
172  "may call initialize() or compute().");
173  return *L_block_;
174 }
175 
176 
177 template<class MatrixType>
178 const typename RBILUK<MatrixType>::block_crs_matrix_type&
180 {
182  D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
183  "(of diagonal entries) is null. Please call initialize() (and "
184  "preferably also compute()) before calling this method. If the input "
185  "matrix has not yet been set, you must first call setMatrix() with a "
186  "nonnull input matrix before you may call initialize() or compute().");
187  return *D_block_;
188 }
189 
190 
191 template<class MatrixType>
192 const typename RBILUK<MatrixType>::block_crs_matrix_type&
194 {
196  U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
197  "is null. Please call initialize() (and preferably also compute()) "
198  "before calling this method. If the input matrix has not yet been set, "
199  "you must first call setMatrix() with a nonnull input matrix before you "
200  "may call initialize() or compute().");
201  return *U_block_;
202 }
203 
204 template<class MatrixType>
206 {
207  using Teuchos::null;
208  using Teuchos::rcp;
209 
210  if (! this->isAllocated_) {
211  // Deallocate any existing storage. This avoids storing 2x
212  // memory, since RCP op= does not deallocate until after the
213  // assignment.
214  this->L_ = null;
215  this->U_ = null;
216  this->D_ = null;
217  L_block_ = null;
218  U_block_ = null;
219  D_block_ = null;
220 
221  // Allocate Matrix using ILUK graphs
222  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
223  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
224  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
225  blockSize_) );
226  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
227  U_block_->setAllToScalar (STM::zero ());
228  D_block_->setAllToScalar (STM::zero ());
229 
230  }
231  this->isAllocated_ = true;
232 }
233 
234 namespace
235 {
236 template<class MatrixType>
238 makeLocalFilter (const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A)
239 {
240  using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
241  using Teuchos::RCP;
242  using Teuchos::rcp;
243  using Teuchos::rcp_dynamic_cast;
244  using Teuchos::rcp_implicit_cast;
245 
246  // If A_'s communicator only has one process, or if its column and
247  // row Maps are the same, then it is already local, so use it
248  // directly.
249  if (A->getRowMap ()->getComm ()->getSize () == 1 ||
250  A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
251  return A;
252  }
253 
254  // If A_ is already a LocalFilter, then use it directly. This
255  // should be the case if RILUK is being used through
256  // AdditiveSchwarz, for example.
257  RCP<const LocalFilter<row_matrix_type> > A_lf_r =
258  rcp_dynamic_cast<const LocalFilter<row_matrix_type> > (A);
259  if (! A_lf_r.is_null ()) {
260  return rcp_implicit_cast<const row_matrix_type> (A_lf_r);
261  }
262  else {
263  // A_'s communicator has more than one process, its row Map and
264  // its column Map differ, and A_ is not a LocalFilter. Thus, we
265  // have to wrap it in a LocalFilter.
266  return rcp (new LocalFilter<row_matrix_type> (A));
267  }
268 }
269 
270 template<class MatrixType>
272 getBlockCrsGraph(const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A)
273 {
274  using local_ordinal_type = typename MatrixType::local_ordinal_type;
275  using Teuchos::RCP;
276  using Teuchos::rcp;
277  using Teuchos::rcp_dynamic_cast;
278  using Teuchos::rcp_const_cast;
279  using Teuchos::rcpFromRef;
280  using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
281  using crs_graph_type = typename RBILUK<MatrixType>::crs_graph_type;
282  using block_crs_matrix_type = typename RBILUK<MatrixType>::block_crs_matrix_type;
283 
284  auto A_local = makeLocalFilter<MatrixType>(A);
285 
286  {
287  RCP<const LocalFilter<row_matrix_type> > filteredA =
288  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_local);
289  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
290  RCP<const block_crs_matrix_type > A_block = Teuchos::null;
291  if (!filteredA.is_null ())
292  {
293  overlappedA = rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
294  }
295 
296  if (! overlappedA.is_null ()) {
297  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
298  }
299  else if (!filteredA.is_null ()){
300  //If there is no overlap, filteredA could be the block CRS matrix
301  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
302  }
303  else
304  {
305  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(A_local);
306  }
307 
308  if (!A_block.is_null()){
309  return rcpFromRef(A_block->getCrsGraph());
310  }
311  }
312 
313  // Could not extract block crs, make graph manually
314  {
315  local_ordinal_type numRows = A_local->getLocalNumRows();
316  Teuchos::Array<size_t> entriesPerRow(numRows);
317  for(local_ordinal_type i = 0; i < numRows; i++) {
318  entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
319  }
320  RCP<crs_graph_type> A_local_crs_nc =
321  rcp (new crs_graph_type (A_local->getRowMap (),
322  A_local->getColMap (),
323  entriesPerRow()));
324 
325  {
326  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
327  LocalRowHandler_t localRowHandler(A_local);
328  typename LocalRowHandler_t::inds_type indices;
329  typename LocalRowHandler_t::vals_type values;
330  for(local_ordinal_type i = 0; i < numRows; i++) {
331  local_ordinal_type numEntries = 0;
332  localRowHandler.getLocalRow(i, indices, values, numEntries);
333  A_local_crs_nc->insertLocalIndices(i, numEntries,indices.data());
334  }
335  }
336 
337  A_local_crs_nc->fillComplete (A_local->getDomainMap (), A_local->getRangeMap ());
338  return rcp_const_cast<const crs_graph_type> (A_local_crs_nc);
339  }
340 
341 }
342 
343 
344 } // namespace
345 
346 template<class MatrixType>
348 {
349  using Teuchos::RCP;
350  using Teuchos::rcp;
351  using Teuchos::rcp_dynamic_cast;
352  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
353 
355  (this->A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
356  "RowMatrix) is null. Please call setMatrix() with a nonnull input "
357  "before calling this method.");
359  (! this->A_->isFillComplete (), std::runtime_error, prefix << "The matrix "
360  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
361  "initialize() or compute() with this matrix until the matrix is fill "
362  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
363  "underlying graph is fill complete.");
364 
365  blockSize_ = this->A_->getBlockSize();
366  this->A_local_ = makeLocalFilter<MatrixType>(this->A_);
367 
368  Teuchos::Time timer ("RBILUK::initialize");
369  double startTime = timer.wallTime();
370  { // Start timing
371  Teuchos::TimeMonitor timeMon (timer);
372 
373  // Calling initialize() means that the user asserts that the graph
374  // of the sparse matrix may have changed. We must not just reuse
375  // the previous graph in that case.
376  //
377  // Regarding setting isAllocated_ to false: Eventually, we may want
378  // some kind of clever memory reuse strategy, but it's always
379  // correct just to blow everything away and start over.
380  this->isInitialized_ = false;
381  this->isAllocated_ = false;
382  this->isComputed_ = false;
383  this->Graph_ = Teuchos::null;
384 
385  RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
386  this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (matrixCrsGraph,
387  this->LevelOfFill_, 0));
388 
389  if (this->isKokkosKernelsSpiluk_) {
390  this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
391  KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
392  this->A_local_->getLocalNumRows(),
393  2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
394  2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
395  blockSize_);
396  this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic
397  }
398  else {
399  this->Graph_->initialize ();
400  }
401 
402  allocate_L_and_U_blocks ();
403 
404 #ifdef IFPACK2_RBILUK_INITIAL
405  initAllValues ();
406 #endif
407  } // Stop timing
408 
409  this->isInitialized_ = true;
410  this->numInitialize_ += 1;
411  this->initializeTime_ += (timer.wallTime() - startTime);
412 }
413 
414 
415 template<class MatrixType>
416 void
418 initAllValues ()
419 {
420  using Teuchos::RCP;
421  typedef Tpetra::Map<LO,GO,node_type> map_type;
422 
423  LO NumIn = 0, NumL = 0, NumU = 0;
424  bool DiagFound = false;
425  size_t NumNonzeroDiags = 0;
426  size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
427  LO blockMatSize = blockSize_*blockSize_;
428 
429  // First check that the local row map ordering is the same as the local portion of the column map.
430  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
431  // implicitly assume that this is the case.
432  Teuchos::ArrayView<const GO> rowGIDs = this->A_->getRowMap()->getLocalElementList();
433  Teuchos::ArrayView<const GO> colGIDs = this->A_->getColMap()->getLocalElementList();
434  bool gidsAreConsistentlyOrdered=true;
435  GO indexOfInconsistentGID=0;
436  for (GO i=0; i<rowGIDs.size(); ++i) {
437  if (rowGIDs[i] != colGIDs[i]) {
438  gidsAreConsistentlyOrdered=false;
439  indexOfInconsistentGID=i;
440  break;
441  }
442  }
443  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
444  "The ordering of the local GIDs in the row and column maps is not the same"
445  << std::endl << "at index " << indexOfInconsistentGID
446  << ". Consistency is required, as all calculations are done with"
447  << std::endl << "local indexing.");
448 
449  // Allocate temporary space for extracting the strictly
450  // lower and upper parts of the matrix A.
451  Teuchos::Array<LO> LI(MaxNumEntries);
452  Teuchos::Array<LO> UI(MaxNumEntries);
453  Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
454  Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
455 
456  Teuchos::Array<scalar_type> diagValues(blockMatSize);
457 
458  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
459  U_block_->setAllToScalar (STM::zero ());
460  D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
461 
462  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
463  // host, so sync to host first. The const_cast is unfortunate but
464  // is our only option to make this correct.
465 
466  /*
467  const_cast<block_crs_matrix_type&> (A).sync_host ();
468  L_block_->sync_host ();
469  U_block_->sync_host ();
470  D_block_->sync_host ();
471  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
472  L_block_->modify_host ();
473  U_block_->modify_host ();
474  D_block_->modify_host ();
475  */
476 
477  RCP<const map_type> rowMap = L_block_->getRowMap ();
478 
479  // First we copy the user's matrix into L and U, regardless of fill level.
480  // It is important to note that L and U are populated using local indices.
481  // This means that if the row map GIDs are not monotonically increasing
482  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
483  // matrix is not the one that you would get if you based L (U) on GIDs.
484  // This is ok, as the *order* of the GIDs in the rowmap is a better
485  // expression of the user's intent than the GIDs themselves.
486 
487  //TODO BMK: Revisit this fence when BlockCrsMatrix is refactored.
488  Kokkos::fence();
489  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
490  LocalRowHandler_t localRowHandler(this->A_);
491  typename LocalRowHandler_t::inds_type InI;
492  typename LocalRowHandler_t::vals_type InV;
493  for (size_t myRow=0; myRow<this->A_->getLocalNumRows(); ++myRow) {
494  LO local_row = myRow;
495 
496  localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
497 
498  // Split into L and U (we don't assume that indices are ordered).
499  NumL = 0;
500  NumU = 0;
501  DiagFound = false;
502 
503  for (LO j = 0; j < NumIn; ++j) {
504  const LO k = InI[j];
505  const LO blockOffset = blockMatSize*j;
506 
507  if (k == local_row) {
508  DiagFound = true;
509  // Store perturbed diagonal in Tpetra::Vector D_
510  for (LO jj = 0; jj < blockMatSize; ++jj)
511  diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
512  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
513  }
514  else if (k < 0) { // Out of range
516  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
517  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
518  "probably an artifact of the undocumented assumptions of the "
519  "original implementation (likely copied and pasted from Ifpack). "
520  "Nevertheless, the code I found here insisted on this being an error "
521  "state, so I will throw an exception here.");
522  }
523  else if (k < local_row) {
524  LI[NumL] = k;
525  const LO LBlockOffset = NumL*blockMatSize;
526  for (LO jj = 0; jj < blockMatSize; ++jj)
527  LV[LBlockOffset+jj] = InV[blockOffset+jj];
528  NumL++;
529  }
530  else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
531  UI[NumU] = k;
532  const LO UBlockOffset = NumU*blockMatSize;
533  for (LO jj = 0; jj < blockMatSize; ++jj)
534  UV[UBlockOffset+jj] = InV[blockOffset+jj];
535  NumU++;
536  }
537  }
538 
539  // Check in things for this row of L and U
540 
541  if (DiagFound) {
542  ++NumNonzeroDiags;
543  } else
544  {
545  for (LO jj = 0; jj < blockSize_; ++jj)
546  diagValues[jj*(blockSize_+1)] = this->Athresh_;
547  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
548  }
549 
550  if (NumL) {
551  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
552  }
553 
554  if (NumU) {
555  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
556  }
557  }
558 
559  // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
560  // ever gets a device implementation.
561  /*
562  {
563  typedef typename block_crs_matrix_type::device_type device_type;
564  const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
565  L_block_->template sync<device_type> ();
566  U_block_->template sync<device_type> ();
567  D_block_->template sync<device_type> ();
568  }
569  */
570  this->isInitialized_ = true;
571 }
572 
573 namespace { // (anonymous)
574 
575 // For a given Kokkos::View type, possibly unmanaged, get the
576 // corresponding managed Kokkos::View type. This is handy for
577 // translating from little_block_type or little_host_vec_type (both
578 // possibly unmanaged) to their managed versions.
579 template<class LittleBlockType>
580 struct GetManagedView {
581  static_assert (Kokkos::is_view<LittleBlockType>::value,
582  "LittleBlockType must be a Kokkos::View.");
583  typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
584  typename LittleBlockType::array_layout,
585  typename LittleBlockType::device_type> managed_non_const_type;
586  static_assert (static_cast<int> (managed_non_const_type::rank) ==
587  static_cast<int> (LittleBlockType::rank),
588  "managed_non_const_type::rank != LittleBlock::rank. "
589  "Please report this bug to the Ifpack2 developers.");
590 };
591 
592 } // namespace (anonymous)
593 
594 template<class MatrixType>
596 {
597  using Teuchos::RCP;
598  using Teuchos::rcp;
599  using Teuchos::Array;
600 
601  typedef impl_scalar_type IST;
602  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
603 
604  // initialize() checks this too, but it's easier for users if the
605  // error shows them the name of the method that they actually
606  // called, rather than the name of some internally called method.
608  (this->A_.is_null (), std::runtime_error, prefix << "The matrix (A_, "
609  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
610  "input before calling this method.");
612  (! this->A_->isFillComplete (), std::runtime_error, prefix << "The matrix "
613  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
614  "initialize() or compute() with this matrix until the matrix is fill "
615  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
616  "underlying graph is fill complete.");
617 
618  if (! this->isInitialized ()) {
619  initialize (); // Don't count this in the compute() time
620  }
621 
622  // // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
623  // // host, so sync to host first. The const_cast is unfortunate but
624  // // is our only option to make this correct.
625  // if (! A_block_.is_null ()) {
626  // Teuchos::RCP<block_crs_matrix_type> A_nc =
627  // Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
628  // // A_nc->sync_host ();
629  // }
630  /*
631  L_block_->sync_host ();
632  U_block_->sync_host ();
633  D_block_->sync_host ();
634  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
635  L_block_->modify_host ();
636  U_block_->modify_host ();
637  D_block_->modify_host ();
638  */
639 
640  Teuchos::Time timer ("RBILUK::compute");
641  double startTime = timer.wallTime();
642  { // Start timing
643  Teuchos::TimeMonitor timeMon (timer);
644  this->isComputed_ = false;
645 
646  // MinMachNum should be officially defined, for now pick something a little
647  // bigger than IEEE underflow value
648 
649 // const scalar_type MinDiagonalValue = STS::rmin ();
650 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
651  if (!this->isKokkosKernelsSpiluk_) {
652  initAllValues ();
653  size_t NumIn;
654  LO NumL, NumU, NumURead;
655 
656  // Get Maximum Row length
657  const size_t MaxNumEntries =
658  L_block_->getLocalMaxNumRowEntries () + U_block_->getLocalMaxNumRowEntries () + 1;
659 
660  const LO blockMatSize = blockSize_*blockSize_;
661 
662  // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
663  // expressing these strides explicitly, in order to finish #177
664  // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
665  const LO rowStride = blockSize_;
666 
667  Teuchos::Array<int> ipiv_teuchos(blockSize_);
668  Kokkos::View<int*, Kokkos::HostSpace,
669  Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
670  Teuchos::Array<IST> work_teuchos(blockSize_);
671  Kokkos::View<IST*, Kokkos::HostSpace,
672  Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
673 
674  size_t num_cols = U_block_->getColMap()->getLocalNumElements();
675  Teuchos::Array<int> colflag(num_cols);
676 
677  typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock ("diagModBlock", blockSize_, blockSize_);
678  typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp ("matTmp", blockSize_, blockSize_);
679  typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier ("multiplier", blockSize_, blockSize_);
680 
681 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
682 
683  // Now start the factorization.
684 
685  // Need some integer workspace and pointers
686  LO NumUU;
687  for (size_t j = 0; j < num_cols; ++j) {
688  colflag[j] = -1;
689  }
690  Teuchos::Array<LO> InI(MaxNumEntries, 0);
691  Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
692 
693  const LO numLocalRows = L_block_->getLocalNumRows ();
694  for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
695 
696  // Fill InV, InI with current row of L, D and U combined
697 
698  NumIn = MaxNumEntries;
699  local_inds_host_view_type colValsL;
700  values_host_view_type valsL;
701 
702  L_block_->getLocalRowView(local_row, colValsL, valsL);
703  NumL = (LO) colValsL.size();
704  for (LO j = 0; j < NumL; ++j)
705  {
706  const LO matOffset = blockMatSize*j;
707  little_block_host_type lmat((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
708  little_block_host_type lmatV((typename little_block_host_type::value_type*) &InV[matOffset],blockSize_,rowStride);
709  //lmatV.assign(lmat);
710  Tpetra::COPY (lmat, lmatV);
711  InI[j] = colValsL[j];
712  }
713 
714  little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
715  little_block_host_type dmatV((typename little_block_host_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
716  //dmatV.assign(dmat);
717  Tpetra::COPY (dmat, dmatV);
718  InI[NumL] = local_row;
719 
720  local_inds_host_view_type colValsU;
721  values_host_view_type valsU;
722  U_block_->getLocalRowView(local_row, colValsU, valsU);
723  NumURead = (LO) colValsU.size();
724  NumU = 0;
725  for (LO j = 0; j < NumURead; ++j)
726  {
727  if (!(colValsU[j] < numLocalRows)) continue;
728  InI[NumL+1+j] = colValsU[j];
729  const LO matOffset = blockMatSize*(NumL+1+j);
730  little_block_host_type umat((typename little_block_host_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
731  little_block_host_type umatV((typename little_block_host_type::value_type*) &InV[matOffset], blockSize_, rowStride);
732  //umatV.assign(umat);
733  Tpetra::COPY (umat, umatV);
734  NumU += 1;
735  }
736  NumIn = NumL+NumU+1;
737 
738  // Set column flags
739  for (size_t j = 0; j < NumIn; ++j) {
740  colflag[InI[j]] = j;
741  }
742 
743 #ifndef IFPACK2_RBILUK_INITIAL
744  for (LO i = 0; i < blockSize_; ++i)
745  for (LO j = 0; j < blockSize_; ++j){
746  {
747  diagModBlock(i,j) = 0;
748  }
749  }
750 #else
751  scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
752  Kokkos::deep_copy (diagModBlock, diagmod);
753 #endif
754 
755  for (LO jj = 0; jj < NumL; ++jj) {
756  LO j = InI[jj];
757  little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride); // current_mults++;
758  //multiplier.assign(currentVal);
759  Tpetra::COPY (currentVal, multiplier);
760 
761  const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j,j);
762  // alpha = 1, beta = 0
763 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
764  KokkosBatched::SerialGemm
765  <KokkosBatched::Trans::NoTranspose,
766  KokkosBatched::Trans::NoTranspose,
767  KokkosBatched::Algo::Gemm::Blocked>::invoke
768  (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
769 #else
770  Tpetra::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
771  STS::zero (), matTmp);
772 #endif
773  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
774  //currentVal.assign(matTmp);
775  Tpetra::COPY (matTmp, currentVal);
776  local_inds_host_view_type UUI;
777  values_host_view_type UUV;
778 
779  U_block_->getLocalRowView(j, UUI, UUV);
780  NumUU = (LO) UUI.size();
781 
782  if (this->RelaxValue_ == STM::zero ()) {
783  for (LO k = 0; k < NumUU; ++k) {
784  if (!(UUI[k] < numLocalRows)) continue;
785  const int kk = colflag[UUI[k]];
786  if (kk > -1) {
787  little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
788  little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
789 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
790  KokkosBatched::SerialGemm
791  <KokkosBatched::Trans::NoTranspose,
792  KokkosBatched::Trans::NoTranspose,
793  KokkosBatched::Algo::Gemm::Blocked>::invoke
794  ( magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
795 #else
796  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
797  STM::one (), kkval);
798 #endif
799  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
800  }
801  }
802  }
803  else {
804  for (LO k = 0; k < NumUU; ++k) {
805  if (!(UUI[k] < numLocalRows)) continue;
806  const int kk = colflag[UUI[k]];
807  little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
808  if (kk > -1) {
809  little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
810 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
811  KokkosBatched::SerialGemm
812  <KokkosBatched::Trans::NoTranspose,
813  KokkosBatched::Trans::NoTranspose,
814  KokkosBatched::Algo::Gemm::Blocked>::invoke
815  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
816 #else
817  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
818  STM::one (), kkval);
819 #endif
820  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
821  }
822  else {
823 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
824  KokkosBatched::SerialGemm
825  <KokkosBatched::Trans::NoTranspose,
826  KokkosBatched::Trans::NoTranspose,
827  KokkosBatched::Algo::Gemm::Blocked>::invoke
828  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
829 #else
830  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
831  STM::one (), diagModBlock);
832 #endif
833  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
834  }
835  }
836  }
837  }
838  if (NumL) {
839  // Replace current row of L
840  L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
841  }
842 
843  // dmat.assign(dmatV);
844  Tpetra::COPY (dmatV, dmat);
845 
846  if (this->RelaxValue_ != STM::zero ()) {
847  //dmat.update(this->RelaxValue_, diagModBlock);
848  Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
849  }
850 
851 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
852 // if (STS::real (DV[i]) < STM::zero ()) {
853 // DV[i] = -MinDiagonalValue;
854 // }
855 // else {
856 // DV[i] = MinDiagonalValue;
857 // }
858 // }
859 // else
860  {
861  int lapackInfo = 0;
862  for (int k = 0; k < blockSize_; ++k) {
863  ipiv[k] = 0;
864  }
865 
866  Tpetra::GETF2 (dmat, ipiv, lapackInfo);
867  //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
869  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
870  "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
871 
872  Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
873  //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
875  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
876  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
877  }
878 
879  for (LO j = 0; j < NumU; ++j) {
880  little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride); // current_mults++;
881  // scale U by the diagonal inverse
882 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
883  KokkosBatched::SerialGemm
884  <KokkosBatched::Trans::NoTranspose,
885  KokkosBatched::Trans::NoTranspose,
886  KokkosBatched::Algo::Gemm::Blocked>::invoke
887  (STS::one (), dmat, currentVal, STS::zero (), matTmp);
888 #else
889  Tpetra::GEMM ("N", "N", STS::one (), dmat, currentVal,
890  STS::zero (), matTmp);
891 #endif
892  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
893  //currentVal.assign(matTmp);
894  Tpetra::COPY (matTmp, currentVal);
895  }
896 
897  if (NumU) {
898  // Replace current row of L and U
899  U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
900  }
901 
902 #ifndef IFPACK2_RBILUK_INITIAL
903  // Reset column flags
904  for (size_t j = 0; j < NumIn; ++j) {
905  colflag[InI[j]] = -1;
906  }
907 #else
908  // Reset column flags
909  for (size_t j = 0; j < num_cols; ++j) {
910  colflag[j] = -1;
911  }
912 #endif
913  }
914  } // !this->isKokkosKernelsSpiluk_
915  else {
916  RCP<const block_crs_matrix_type> A_local_bcrs = Details::getBcrsMatrix(this->A_local_);
917  RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
918  if (A_local_bcrs.is_null()) {
919  if (A_local_crs.is_null()) {
920  local_ordinal_type numRows = this->A_local_->getLocalNumRows();
921  Array<size_t> entriesPerRow(numRows);
922  for(local_ordinal_type i = 0; i < numRows; i++) {
923  entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
924  }
925  RCP<crs_matrix_type> A_local_crs_nc =
926  rcp (new crs_matrix_type (this->A_local_->getRowMap (),
927  this->A_local_->getColMap (),
928  entriesPerRow()));
929  // copy entries into A_local_crs
930  nonconst_local_inds_host_view_type indices("indices",this->A_local_->getLocalMaxNumRowEntries());
931  nonconst_values_host_view_type values("values",this->A_local_->getLocalMaxNumRowEntries());
932  for(local_ordinal_type i = 0; i < numRows; i++) {
933  size_t numEntries = 0;
934  this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
935  A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
936  }
937  A_local_crs_nc->fillComplete (this->A_local_->getDomainMap (), this->A_local_->getRangeMap ());
938  A_local_crs = Teuchos::rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
939  }
940  // Create bcrs from crs
941  // We can skip fillLogicalBlocks if we know the input is already filled
942  if (blockSize_ > 1) {
943  auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
944  A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
945  }
946  else {
947  A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
948  }
949  }
950 
952  this->isKokkosKernelsStream_, std::runtime_error, "Ifpack2::RBILUK::compute: "
953  "streams are not yet supported.");
954 
955  auto lclMtx = A_local_bcrs->getLocalMatrixDevice();
956  this->A_local_rowmap_ = lclMtx.graph.row_map;
957  this->A_local_entries_ = lclMtx.graph.entries;
958  this->A_local_values_ = lclMtx.values;
959 
960  // L_block_->resumeFill ();
961  // U_block_->resumeFill ();
962 
963  if (L_block_->isLocallyIndexed ()) {
964  L_block_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
965  U_block_->setAllToScalar (STS::zero ());
966  }
967 
968  using row_map_type = typename local_matrix_device_type::row_map_type;
969 
970  auto lclL = L_block_->getLocalMatrixDevice();
971  row_map_type L_rowmap = lclL.graph.row_map;
972  auto L_entries = lclL.graph.entries;
973  auto L_values = lclL.values;
974 
975  auto lclU = U_block_->getLocalMatrixDevice();
976  row_map_type U_rowmap = lclU.graph.row_map;
977  auto U_entries = lclU.graph.entries;
978  auto U_values = lclU.values;
979 
980  KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_,
981  this->A_local_rowmap_, this->A_local_entries_, this->A_local_values_,
982  L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
983  }
984  } // Stop timing
985 
986  // Sync everything back to device, for efficient solves.
987  /*
988  {
989  typedef typename block_crs_matrix_type::device_type device_type;
990  if (! A_block_.is_null ()) {
991  Teuchos::RCP<block_crs_matrix_type> A_nc =
992  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
993  A_nc->template sync<device_type> ();
994  }
995  L_block_->template sync<device_type> ();
996  U_block_->template sync<device_type> ();
997  D_block_->template sync<device_type> ();
998  }
999  */
1000 
1001  this->isComputed_ = true;
1002  this->numCompute_ += 1;
1003  this->computeTime_ += (timer.wallTime() - startTime);
1004 }
1005 
1006 
1007 template<class MatrixType>
1008 void
1010 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1011  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1012  Teuchos::ETransp mode,
1013  scalar_type alpha,
1014  scalar_type beta) const
1015 {
1016  using Teuchos::RCP;
1017  typedef Tpetra::BlockMultiVector<scalar_type,
1019 
1021  this->A_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
1022  "null. Please call setMatrix() with a nonnull input, then initialize() "
1023  "and compute(), before calling this method.");
1025  ! this->isComputed (), std::runtime_error,
1026  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
1027  "you must call compute() before calling this method.");
1028  TEUCHOS_TEST_FOR_EXCEPTION(
1029  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1030  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
1031  "X.getNumVectors() = " << X.getNumVectors ()
1032  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
1033  TEUCHOS_TEST_FOR_EXCEPTION(
1034  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1035  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1036  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1037  "fixed. There is a FIXME in this file about this very issue.");
1038 
1039  const LO blockMatSize = blockSize_*blockSize_;
1040 
1041  const LO rowStride = blockSize_;
1042 
1043  BMV yBlock (Y, * (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_);
1044  const BMV xBlock (X, * (this->A_->getColMap ()), blockSize_);
1045 
1046  Teuchos::Array<scalar_type> lclarray(blockSize_);
1047  little_host_vec_type lclvec((typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
1048  const scalar_type one = STM::one ();
1049  const scalar_type zero = STM::zero ();
1050 
1051  Teuchos::Time timer ("RBILUK::apply");
1052  double startTime = timer.wallTime();
1053  { // Start timing
1054  Teuchos::TimeMonitor timeMon (timer);
1055  if (!this->isKokkosKernelsSpiluk_) {
1056  if (alpha == one && beta == zero) {
1057  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1058  // Start by solving L C = X for C. C must have the same Map
1059  // as D. We have to use a temp multivector, since our
1060  // implementation of triangular solves does not allow its
1061  // input and output to alias one another.
1062  //
1063  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
1064  const LO numVectors = xBlock.getNumVectors();
1065  BMV cBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1066  BMV rBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1067  for (LO imv = 0; imv < numVectors; ++imv)
1068  {
1069  for (size_t i = 0; i < D_block_->getLocalNumRows(); ++i)
1070  {
1071  LO local_row = i;
1072  const_host_little_vec_type xval =
1073  xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1074  little_host_vec_type cval =
1075  cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1076  //cval.assign(xval);
1077  Tpetra::COPY (xval, cval);
1078 
1079  local_inds_host_view_type colValsL;
1080  values_host_view_type valsL;
1081  L_block_->getLocalRowView(local_row, colValsL, valsL);
1082  LO NumL = (LO) colValsL.size();
1083 
1084  for (LO j = 0; j < NumL; ++j)
1085  {
1086  LO col = colValsL[j];
1087  const_host_little_vec_type prevVal =
1088  cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1089 
1090  const LO matOffset = blockMatSize*j;
1091  little_block_host_type lij((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
1092 
1093  //cval.matvecUpdate(-one, lij, prevVal);
1094  Tpetra::GEMV (-one, lij, prevVal, cval);
1095  }
1096  }
1097  }
1098 
1099  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
1100  D_block_->applyBlock(cBlock, rBlock);
1101 
1102  // Solve U Y = R.
1103  for (LO imv = 0; imv < numVectors; ++imv)
1104  {
1105  const LO numRows = D_block_->getLocalNumRows();
1106  for (LO i = 0; i < numRows; ++i)
1107  {
1108  LO local_row = (numRows-1)-i;
1109  const_host_little_vec_type rval =
1110  rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1111  little_host_vec_type yval =
1112  yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1113  //yval.assign(rval);
1114  Tpetra::COPY (rval, yval);
1115 
1116  local_inds_host_view_type colValsU;
1117  values_host_view_type valsU;
1118  U_block_->getLocalRowView(local_row, colValsU, valsU);
1119  LO NumU = (LO) colValsU.size();
1120 
1121  for (LO j = 0; j < NumU; ++j)
1122  {
1123  LO col = colValsU[NumU-1-j];
1124  const_host_little_vec_type prevVal =
1125  yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1126 
1127  const LO matOffset = blockMatSize*(NumU-1-j);
1128  little_block_host_type uij((typename little_block_host_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
1129 
1130  //yval.matvecUpdate(-one, uij, prevVal);
1131  Tpetra::GEMV (-one, uij, prevVal, yval);
1132  }
1133  }
1134  }
1135  }
1136  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1137  TEUCHOS_TEST_FOR_EXCEPTION(
1138  true, std::runtime_error,
1139  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm without KokkosKernels. ");
1140  }
1141  }
1142  else { // alpha != 1 or beta != 0
1143  if (alpha == zero) {
1144  if (beta == zero) {
1145  Y.putScalar (zero);
1146  } else {
1147  Y.scale (beta);
1148  }
1149  } else { // alpha != zero
1150  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1151  apply (X, Y_tmp, mode);
1152  Y.update (alpha, Y_tmp, beta);
1153  }
1154  }
1155  }
1156  else {
1157  // Kokkos kernels impl. For now, the only block trsv available is Sequential
1158  // and must be done on host.
1159  using row_map_type = typename local_matrix_host_type::row_map_type;
1160  using index_type = typename local_matrix_host_type::index_type;
1161  using values_type = typename local_matrix_host_type::values_type;
1162 
1163  auto X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
1164  auto Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
1165 
1166  auto L_row_ptrs_host = L_block_->getCrsGraph().getLocalRowPtrsHost();
1167  auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost();
1168  auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost();
1169  auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost();
1170  auto L_values_host = L_block_->getValuesHost();
1171  auto U_values_host = U_block_->getValuesHost();
1172 
1173  row_map_type* L_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&L_row_ptrs_host);
1174  index_type* L_entries_host_ri = reinterpret_cast<index_type*>(&L_entries_host);
1175  row_map_type* U_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&U_row_ptrs_host);
1176  index_type* U_entries_host_ri = reinterpret_cast<index_type*>(&U_entries_host);
1177  values_type* L_values_host_ri = reinterpret_cast<values_type*>(&L_values_host);
1178  values_type* U_values_host_ri = reinterpret_cast<values_type*>(&U_values_host);
1179 
1180  const auto numRows = L_block_->getLocalNumRows();
1181  local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_);
1182  local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_);
1183 
1184  if (mode == Teuchos::NO_TRANS) {
1185  KokkosSparse::trsv("L", "N", "N", L_block_local_host, X_view, Y_view);
1186  KokkosSparse::trsv("U", "N", "N", U_block_local_host, Y_view, Y_view);
1187  KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
1188  }
1189  else {
1190  KokkosSparse::trsv("U", "T", "N", U_block_local_host, X_view, Y_view);
1191  KokkosSparse::trsv("L", "T", "N", L_block_local_host, Y_view, Y_view);
1192  KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
1193  }
1194 
1195  //Y.getWrappedDualView().sync();
1196  }
1197  } // Stop timing
1198 
1199  this->numApply_ += 1;
1200  this->applyTime_ += (timer.wallTime() - startTime);
1201 }
1202 
1203 
1204 template<class MatrixType>
1206 {
1207  std::ostringstream os;
1208 
1209  // Output is a valid YAML dictionary in flow style. If you don't
1210  // like everything on a single line, you should call describe()
1211  // instead.
1212  os << "\"Ifpack2::Experimental::RBILUK\": {";
1213  os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
1214  << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
1215 
1216  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
1217 
1218  if (this->A_.is_null ()) {
1219  os << "Matrix: null";
1220  }
1221  else {
1222  os << "Global matrix dimensions: ["
1223  << this->A_->getGlobalNumRows () << ", " << this->A_->getGlobalNumCols () << "]"
1224  << ", Global nnz: " << this->A_->getGlobalNumEntries();
1225  }
1226 
1227  os << "}";
1228  return os.str ();
1229 }
1230 
1231 } // namespace Experimental
1232 
1233 } // namespace Ifpack2
1234 
1235 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
1236 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
1237 // handled internally via dynamic cast.
1238 
1239 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
1240  template class Ifpack2::Experimental::RBILUK< Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1241  template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
1242 
1243 #endif
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:347
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:146
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:136
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:193
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:136
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1010
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:153
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:245
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:150
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:165
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:142
T * getRawPtr() const
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:179
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:595
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:100
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:141
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:165
size_type size() const
static double wallTime()
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:159
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1205
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:128
bool is_null() const