43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
46 #include "Tpetra_BlockMultiVector.hpp"
47 #include "Tpetra_BlockView.hpp"
48 #include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
49 #include "Ifpack2_OverlappingRowMatrix.hpp"
50 #include "Ifpack2_Details_getCrsMatrix.hpp"
51 #include "Ifpack2_LocalFilter.hpp"
53 #include "Ifpack2_RILUK.hpp"
54 #include "KokkosSparse_trsv.hpp"
59 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
60 #include "KokkosBatched_Gemm_Decl.hpp"
61 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
62 #include "KokkosBatched_Util.hpp"
67 namespace Experimental {
71 template<
class MatrixType>
72 struct LocalRowHandler
74 using LocalOrdinal =
typename MatrixType::local_ordinal_type;
75 using row_matrix_type = Tpetra::RowMatrix<
76 typename MatrixType::scalar_type,
78 typename MatrixType::global_ordinal_type,
79 typename MatrixType::node_type>;
80 using inds_type =
typename row_matrix_type::local_inds_host_view_type;
81 using vals_type =
typename row_matrix_type::values_host_view_type;
86 if (!A_->supportsRowViews())
88 const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
89 const auto blockSize = A_->getBlockSize();
90 ind_nc_ = inds_type_nc(
"Ifpack2::RBILUK::LocalRowHandler::indices",maxNumRowEntr);
91 val_nc_ = vals_type_nc(
"Ifpack2::RBILUK::LocalRowHandler::values",maxNumRowEntr*blockSize*blockSize);
95 void getLocalRow(LocalOrdinal local_row, inds_type & InI, vals_type & InV, LocalOrdinal & NumIn)
97 if (A_->supportsRowViews())
99 A_->getLocalRowView(local_row,InI,InV);
100 NumIn = (LocalOrdinal)InI.size();
105 A_->getLocalRowCopy(local_row,ind_nc_,val_nc_,cnt);
108 NumIn = (LocalOrdinal)cnt;
114 using inds_type_nc =
typename row_matrix_type::nonconst_local_inds_host_view_type;
115 using vals_type_nc =
typename row_matrix_type::nonconst_values_host_view_type;
118 inds_type_nc ind_nc_;
119 vals_type_nc val_nc_;
124 template<
class MatrixType>
129 template<
class MatrixType>
135 template<
class MatrixType>
139 template<
class MatrixType>
151 this->isAllocated_ =
false;
152 this->isInitialized_ =
false;
153 this->isComputed_ =
false;
154 this->Graph_ = Teuchos::null;
155 L_block_ = Teuchos::null;
156 U_block_ = Teuchos::null;
157 D_block_ = Teuchos::null;
163 template<
class MatrixType>
164 const typename RBILUK<MatrixType>::block_crs_matrix_type&
168 L_block_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getL: The L factor "
169 "is null. Please call initialize() (and preferably also compute()) "
170 "before calling this method. If the input matrix has not yet been set, "
171 "you must first call setMatrix() with a nonnull input matrix before you "
172 "may call initialize() or compute().");
177 template<
class MatrixType>
178 const typename RBILUK<MatrixType>::block_crs_matrix_type&
182 D_block_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getD: The D factor "
183 "(of diagonal entries) is null. Please call initialize() (and "
184 "preferably also compute()) before calling this method. If the input "
185 "matrix has not yet been set, you must first call setMatrix() with a "
186 "nonnull input matrix before you may call initialize() or compute().");
191 template<
class MatrixType>
192 const typename RBILUK<MatrixType>::block_crs_matrix_type&
196 U_block_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getU: The U factor "
197 "is null. Please call initialize() (and preferably also compute()) "
198 "before calling this method. If the input matrix has not yet been set, "
199 "you must first call setMatrix() with a nonnull input matrix before you "
200 "may call initialize() or compute().");
204 template<
class MatrixType>
210 if (! this->isAllocated_) {
222 L_block_ =
rcp(
new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
223 U_block_ =
rcp(
new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
224 D_block_ =
rcp(
new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
226 L_block_->setAllToScalar (STM::zero ());
227 U_block_->setAllToScalar (STM::zero ());
228 D_block_->setAllToScalar (STM::zero ());
231 this->isAllocated_ =
true;
236 template<
class MatrixType>
238 makeLocalFilter (
const Teuchos::RCP<
const typename RBILUK<MatrixType>::row_matrix_type>& A)
240 using row_matrix_type =
typename RBILUK<MatrixType>::row_matrix_type;
243 using Teuchos::rcp_dynamic_cast;
244 using Teuchos::rcp_implicit_cast;
249 if (A->getRowMap ()->getComm ()->getSize () == 1 ||
250 A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
257 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
258 rcp_dynamic_cast<
const LocalFilter<row_matrix_type> > (A);
259 if (! A_lf_r.is_null ()) {
260 return rcp_implicit_cast<
const row_matrix_type> (A_lf_r);
266 return rcp (
new LocalFilter<row_matrix_type> (A));
270 template<
class MatrixType>
272 getBlockCrsGraph(
const Teuchos::RCP<
const typename RBILUK<MatrixType>::row_matrix_type>& A)
274 using local_ordinal_type =
typename MatrixType::local_ordinal_type;
277 using Teuchos::rcp_dynamic_cast;
278 using Teuchos::rcp_const_cast;
279 using Teuchos::rcpFromRef;
280 using row_matrix_type =
typename RBILUK<MatrixType>::row_matrix_type;
281 using crs_graph_type =
typename RBILUK<MatrixType>::crs_graph_type;
282 using block_crs_matrix_type =
typename RBILUK<MatrixType>::block_crs_matrix_type;
284 auto A_local = makeLocalFilter<MatrixType>(A);
287 RCP<const LocalFilter<row_matrix_type> > filteredA =
288 rcp_dynamic_cast<
const LocalFilter<row_matrix_type> >(A_local);
289 RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
290 RCP<const block_crs_matrix_type > A_block = Teuchos::null;
291 if (!filteredA.is_null ())
293 overlappedA = rcp_dynamic_cast<
const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
296 if (! overlappedA.is_null ()) {
297 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
299 else if (!filteredA.is_null ()){
301 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
305 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(A_local);
308 if (!A_block.is_null()){
309 return rcpFromRef(A_block->getCrsGraph());
315 local_ordinal_type numRows = A_local->getLocalNumRows();
317 for(local_ordinal_type i = 0; i < numRows; i++) {
318 entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
320 RCP<crs_graph_type> A_local_crs_nc =
321 rcp (
new crs_graph_type (A_local->getRowMap (),
322 A_local->getColMap (),
326 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
327 LocalRowHandler_t localRowHandler(A_local);
328 typename LocalRowHandler_t::inds_type indices;
329 typename LocalRowHandler_t::vals_type values;
330 for(local_ordinal_type i = 0; i < numRows; i++) {
331 local_ordinal_type numEntries = 0;
332 localRowHandler.getLocalRow(i, indices, values, numEntries);
333 A_local_crs_nc->insertLocalIndices(i, numEntries,indices.data());
337 A_local_crs_nc->fillComplete (A_local->getDomainMap (), A_local->getRangeMap ());
338 return rcp_const_cast<
const crs_graph_type> (A_local_crs_nc);
346 template<
class MatrixType>
351 using Teuchos::rcp_dynamic_cast;
352 const char prefix[] =
"Ifpack2::Experimental::RBILUK::initialize: ";
355 (this->A_.
is_null (), std::runtime_error, prefix <<
"The matrix (A_, the "
356 "RowMatrix) is null. Please call setMatrix() with a nonnull input "
357 "before calling this method.");
359 (! this->A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix "
360 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
361 "initialize() or compute() with this matrix until the matrix is fill "
362 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
363 "underlying graph is fill complete.");
365 blockSize_ = this->A_->getBlockSize();
366 this->A_local_ = makeLocalFilter<MatrixType>(this->A_);
369 double startTime = timer.wallTime();
380 this->isInitialized_ =
false;
381 this->isAllocated_ =
false;
382 this->isComputed_ =
false;
383 this->Graph_ = Teuchos::null;
385 RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
387 this->LevelOfFill_, 0));
389 if (this->isKokkosKernelsSpiluk_) {
390 this->KernelHandle_ =
Teuchos::rcp (
new kk_handle_type ());
391 KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
392 this->A_local_->getLocalNumRows(),
393 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
394 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
396 this->Graph_->initialize(KernelHandle_);
399 this->Graph_->initialize ();
402 allocate_L_and_U_blocks ();
404 #ifdef IFPACK2_RBILUK_INITIAL
409 this->isInitialized_ =
true;
410 this->numInitialize_ += 1;
411 this->initializeTime_ += (timer.wallTime() - startTime);
415 template<
class MatrixType>
421 typedef Tpetra::Map<LO,GO,node_type> map_type;
423 LO NumIn = 0, NumL = 0, NumU = 0;
424 bool DiagFound =
false;
425 size_t NumNonzeroDiags = 0;
426 size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
427 LO blockMatSize = blockSize_*blockSize_;
434 bool gidsAreConsistentlyOrdered=
true;
435 GO indexOfInconsistentGID=0;
436 for (GO i=0; i<rowGIDs.
size(); ++i) {
437 if (rowGIDs[i] != colGIDs[i]) {
438 gidsAreConsistentlyOrdered=
false;
439 indexOfInconsistentGID=i;
444 "The ordering of the local GIDs in the row and column maps is not the same"
445 << std::endl <<
"at index " << indexOfInconsistentGID
446 <<
". Consistency is required, as all calculations are done with"
447 << std::endl <<
"local indexing.");
458 L_block_->setAllToScalar (STM::zero ());
459 U_block_->setAllToScalar (STM::zero ());
460 D_block_->setAllToScalar (STM::zero ());
477 RCP<const map_type> rowMap = L_block_->getRowMap ();
489 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
490 LocalRowHandler_t localRowHandler(this->A_);
491 typename LocalRowHandler_t::inds_type InI;
492 typename LocalRowHandler_t::vals_type InV;
493 for (
size_t myRow=0; myRow<this->A_->getLocalNumRows(); ++myRow) {
494 LO local_row = myRow;
496 localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
503 for (LO j = 0; j < NumIn; ++j) {
505 const LO blockOffset = blockMatSize*j;
507 if (k == local_row) {
510 for (LO jj = 0; jj < blockMatSize; ++jj)
511 diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
512 D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
516 true, std::runtime_error,
"Ifpack2::RILUK::initAllValues: current "
517 "GID k = " << k <<
" < 0. I'm not sure why this is an error; it is "
518 "probably an artifact of the undocumented assumptions of the "
519 "original implementation (likely copied and pasted from Ifpack). "
520 "Nevertheless, the code I found here insisted on this being an error "
521 "state, so I will throw an exception here.");
523 else if (k < local_row) {
525 const LO LBlockOffset = NumL*blockMatSize;
526 for (LO jj = 0; jj < blockMatSize; ++jj)
527 LV[LBlockOffset+jj] = InV[blockOffset+jj];
530 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
532 const LO UBlockOffset = NumU*blockMatSize;
533 for (LO jj = 0; jj < blockMatSize; ++jj)
534 UV[UBlockOffset+jj] = InV[blockOffset+jj];
545 for (LO jj = 0; jj < blockSize_; ++jj)
546 diagValues[jj*(blockSize_+1)] = this->Athresh_;
547 D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
551 L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
555 U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
570 this->isInitialized_ =
true;
579 template<
class LittleBlockType>
580 struct GetManagedView {
581 static_assert (Kokkos::is_view<LittleBlockType>::value,
582 "LittleBlockType must be a Kokkos::View.");
583 typedef Kokkos::View<
typename LittleBlockType::non_const_data_type,
584 typename LittleBlockType::array_layout,
585 typename LittleBlockType::device_type> managed_non_const_type;
586 static_assert (static_cast<int> (managed_non_const_type::rank) ==
587 static_cast<int> (LittleBlockType::rank),
588 "managed_non_const_type::rank != LittleBlock::rank. "
589 "Please report this bug to the Ifpack2 developers.");
594 template<
class MatrixType>
601 typedef impl_scalar_type IST;
602 const char prefix[] =
"Ifpack2::Experimental::RBILUK::compute: ";
608 (this->A_.
is_null (), std::runtime_error, prefix <<
"The matrix (A_, "
609 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
610 "input before calling this method.");
612 (! this->A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix "
613 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
614 "initialize() or compute() with this matrix until the matrix is fill "
615 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
616 "underlying graph is fill complete.");
618 if (! this->isInitialized ()) {
641 double startTime = timer.
wallTime();
644 this->isComputed_ =
false;
651 if (!this->isKokkosKernelsSpiluk_) {
654 LO NumL, NumU, NumURead;
657 const size_t MaxNumEntries =
658 L_block_->getLocalMaxNumRowEntries () + U_block_->getLocalMaxNumRowEntries () + 1;
660 const LO blockMatSize = blockSize_*blockSize_;
665 const LO rowStride = blockSize_;
668 Kokkos::View<
int*, Kokkos::HostSpace,
669 Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.
getRawPtr (), blockSize_);
671 Kokkos::View<IST*, Kokkos::HostSpace,
672 Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
674 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
677 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock (
"diagModBlock", blockSize_, blockSize_);
678 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp (
"matTmp", blockSize_, blockSize_);
679 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier (
"multiplier", blockSize_, blockSize_);
687 for (
size_t j = 0; j < num_cols; ++j) {
693 const LO numLocalRows = L_block_->getLocalNumRows ();
694 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
698 NumIn = MaxNumEntries;
699 local_inds_host_view_type colValsL;
700 values_host_view_type valsL;
702 L_block_->getLocalRowView(local_row, colValsL, valsL);
703 NumL = (LO) colValsL.size();
704 for (LO j = 0; j < NumL; ++j)
706 const LO matOffset = blockMatSize*j;
707 little_block_host_type lmat((
typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
708 little_block_host_type lmatV((
typename little_block_host_type::value_type*) &InV[matOffset],blockSize_,rowStride);
710 Tpetra::COPY (lmat, lmatV);
711 InI[j] = colValsL[j];
714 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
715 little_block_host_type dmatV((
typename little_block_host_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
717 Tpetra::COPY (dmat, dmatV);
718 InI[NumL] = local_row;
720 local_inds_host_view_type colValsU;
721 values_host_view_type valsU;
722 U_block_->getLocalRowView(local_row, colValsU, valsU);
723 NumURead = (LO) colValsU.
size();
725 for (LO j = 0; j < NumURead; ++j)
727 if (!(colValsU[j] < numLocalRows))
continue;
728 InI[NumL+1+j] = colValsU[j];
729 const LO matOffset = blockMatSize*(NumL+1+j);
730 little_block_host_type umat((
typename little_block_host_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
731 little_block_host_type umatV((
typename little_block_host_type::value_type*) &InV[matOffset], blockSize_, rowStride);
733 Tpetra::COPY (umat, umatV);
739 for (
size_t j = 0; j < NumIn; ++j) {
743 #ifndef IFPACK2_RBILUK_INITIAL
744 for (LO i = 0; i < blockSize_; ++i)
745 for (LO j = 0; j < blockSize_; ++j){
747 diagModBlock(i,j) = 0;
752 Kokkos::deep_copy (diagModBlock, diagmod);
755 for (LO jj = 0; jj < NumL; ++jj) {
757 little_block_host_type currentVal((
typename little_block_host_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride);
759 Tpetra::COPY (currentVal, multiplier);
761 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j,j);
763 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
764 KokkosBatched::SerialGemm
765 <KokkosBatched::Trans::NoTranspose,
766 KokkosBatched::Trans::NoTranspose,
767 KokkosBatched::Algo::Gemm::Blocked>::invoke
768 (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
770 Tpetra::GEMM (
"N",
"N", STS::one (), currentVal, dmatInverse,
771 STS::zero (), matTmp);
775 Tpetra::COPY (matTmp, currentVal);
776 local_inds_host_view_type UUI;
777 values_host_view_type UUV;
779 U_block_->getLocalRowView(j, UUI, UUV);
780 NumUU = (LO) UUI.size();
782 if (this->RelaxValue_ == STM::zero ()) {
783 for (LO k = 0; k < NumUU; ++k) {
784 if (!(UUI[k] < numLocalRows))
continue;
785 const int kk = colflag[UUI[k]];
787 little_block_host_type kkval((
typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
788 little_block_host_type uumat((
typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
789 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
790 KokkosBatched::SerialGemm
791 <KokkosBatched::Trans::NoTranspose,
792 KokkosBatched::Trans::NoTranspose,
793 KokkosBatched::Algo::Gemm::Blocked>::invoke
794 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
796 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
804 for (LO k = 0; k < NumUU; ++k) {
805 if (!(UUI[k] < numLocalRows))
continue;
806 const int kk = colflag[UUI[k]];
807 little_block_host_type uumat((
typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
809 little_block_host_type kkval((
typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
810 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
811 KokkosBatched::SerialGemm
812 <KokkosBatched::Trans::NoTranspose,
813 KokkosBatched::Trans::NoTranspose,
814 KokkosBatched::Algo::Gemm::Blocked>::invoke
815 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
817 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
823 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
824 KokkosBatched::SerialGemm
825 <KokkosBatched::Trans::NoTranspose,
826 KokkosBatched::Trans::NoTranspose,
827 KokkosBatched::Algo::Gemm::Blocked>::invoke
828 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
830 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
831 STM::one (), diagModBlock);
844 Tpetra::COPY (dmatV, dmat);
846 if (this->RelaxValue_ != STM::zero ()) {
848 Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
862 for (
int k = 0; k < blockSize_; ++k) {
866 Tpetra::GETF2 (dmat, ipiv, lapackInfo);
869 lapackInfo != 0, std::runtime_error,
"Ifpack2::Experimental::RBILUK::compute: "
870 "lapackInfo = " << lapackInfo <<
" which indicates an error in the factorization GETRF.");
872 Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
875 lapackInfo != 0, std::runtime_error,
"Ifpack2::Experimental::RBILUK::compute: "
876 "lapackInfo = " << lapackInfo <<
" which indicates an error in the matrix inverse GETRI.");
879 for (LO j = 0; j < NumU; ++j) {
880 little_block_host_type currentVal((
typename little_block_host_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride);
882 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
883 KokkosBatched::SerialGemm
884 <KokkosBatched::Trans::NoTranspose,
885 KokkosBatched::Trans::NoTranspose,
886 KokkosBatched::Algo::Gemm::Blocked>::invoke
887 (STS::one (), dmat, currentVal, STS::zero (), matTmp);
889 Tpetra::GEMM (
"N",
"N", STS::one (), dmat, currentVal,
890 STS::zero (), matTmp);
894 Tpetra::COPY (matTmp, currentVal);
899 U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
902 #ifndef IFPACK2_RBILUK_INITIAL
904 for (
size_t j = 0; j < NumIn; ++j) {
905 colflag[InI[j]] = -1;
909 for (
size_t j = 0; j < num_cols; ++j) {
916 RCP<const block_crs_matrix_type> A_local_bcrs = Details::getBcrsMatrix(this->A_local_);
917 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
918 if (A_local_bcrs.is_null()) {
919 if (A_local_crs.is_null()) {
921 Array<size_t> entriesPerRow(numRows);
923 entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
925 RCP<crs_matrix_type> A_local_crs_nc =
927 this->A_local_->getColMap (),
930 nonconst_local_inds_host_view_type indices(
"indices",this->A_local_->getLocalMaxNumRowEntries());
931 nonconst_values_host_view_type values(
"values",this->A_local_->getLocalMaxNumRowEntries());
933 size_t numEntries = 0;
934 this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
935 A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
937 A_local_crs_nc->fillComplete (this->A_local_->getDomainMap (), this->A_local_->getRangeMap ());
938 A_local_crs = Teuchos::rcp_const_cast<
const crs_matrix_type> (A_local_crs_nc);
942 if (blockSize_ > 1) {
943 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
944 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
947 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
952 this->isKokkosKernelsStream_, std::runtime_error,
"Ifpack2::RBILUK::compute: "
953 "streams are not yet supported.");
955 auto lclMtx = A_local_bcrs->getLocalMatrixDevice();
956 this->A_local_rowmap_ = lclMtx.graph.row_map;
957 this->A_local_entries_ = lclMtx.graph.entries;
958 this->A_local_values_ = lclMtx.values;
963 if (L_block_->isLocallyIndexed ()) {
964 L_block_->setAllToScalar (STS::zero ());
965 U_block_->setAllToScalar (STS::zero ());
968 using row_map_type =
typename local_matrix_device_type::row_map_type;
970 auto lclL = L_block_->getLocalMatrixDevice();
971 row_map_type L_rowmap = lclL.graph.row_map;
972 auto L_entries = lclL.graph.entries;
973 auto L_values = lclL.values;
975 auto lclU = U_block_->getLocalMatrixDevice();
976 row_map_type U_rowmap = lclU.graph.row_map;
977 auto U_entries = lclU.graph.entries;
978 auto U_values = lclU.values;
980 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_,
981 this->A_local_rowmap_, this->A_local_entries_, this->A_local_values_,
982 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
1001 this->isComputed_ =
true;
1002 this->numCompute_ += 1;
1003 this->computeTime_ += (timer.
wallTime() - startTime);
1007 template<
class MatrixType>
1010 apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1011 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1021 this->A_.
is_null (), std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: The matrix is "
1022 "null. Please call setMatrix() with a nonnull input, then initialize() "
1023 "and compute(), before calling this method.");
1025 ! this->isComputed (), std::runtime_error,
1026 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
1027 "you must call compute() before calling this method.");
1028 TEUCHOS_TEST_FOR_EXCEPTION(
1029 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1030 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
1031 "X.getNumVectors() = " << X.getNumVectors ()
1032 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
1033 TEUCHOS_TEST_FOR_EXCEPTION(
1035 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1036 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1037 "fixed. There is a FIXME in this file about this very issue.");
1039 const LO blockMatSize = blockSize_*blockSize_;
1041 const LO rowStride = blockSize_;
1043 BMV yBlock (Y, * (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_);
1044 const BMV xBlock (X, * (this->A_->getColMap ()), blockSize_);
1047 little_host_vec_type lclvec((
typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
1048 const scalar_type one = STM::one ();
1049 const scalar_type zero = STM::zero ();
1052 double startTime = timer.
wallTime();
1055 if (!this->isKokkosKernelsSpiluk_) {
1056 if (alpha == one && beta == zero) {
1064 const LO numVectors = xBlock.getNumVectors();
1065 BMV cBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1066 BMV rBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1067 for (LO imv = 0; imv < numVectors; ++imv)
1069 for (
size_t i = 0; i < D_block_->getLocalNumRows(); ++i)
1072 const_host_little_vec_type xval =
1073 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1074 little_host_vec_type cval =
1075 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1077 Tpetra::COPY (xval, cval);
1079 local_inds_host_view_type colValsL;
1080 values_host_view_type valsL;
1081 L_block_->getLocalRowView(local_row, colValsL, valsL);
1082 LO NumL = (LO) colValsL.size();
1084 for (LO j = 0; j < NumL; ++j)
1086 LO col = colValsL[j];
1087 const_host_little_vec_type prevVal =
1088 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1090 const LO matOffset = blockMatSize*j;
1091 little_block_host_type lij((
typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
1094 Tpetra::GEMV (-one, lij, prevVal, cval);
1100 D_block_->applyBlock(cBlock, rBlock);
1103 for (LO imv = 0; imv < numVectors; ++imv)
1105 const LO numRows = D_block_->getLocalNumRows();
1106 for (LO i = 0; i < numRows; ++i)
1108 LO local_row = (numRows-1)-i;
1109 const_host_little_vec_type rval =
1110 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1111 little_host_vec_type yval =
1112 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1114 Tpetra::COPY (rval, yval);
1116 local_inds_host_view_type colValsU;
1117 values_host_view_type valsU;
1118 U_block_->getLocalRowView(local_row, colValsU, valsU);
1119 LO NumU = (LO) colValsU.size();
1121 for (LO j = 0; j < NumU; ++j)
1123 LO col = colValsU[NumU-1-j];
1124 const_host_little_vec_type prevVal =
1125 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1127 const LO matOffset = blockMatSize*(NumU-1-j);
1128 little_block_host_type uij((
typename little_block_host_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
1131 Tpetra::GEMV (-one, uij, prevVal, yval);
1137 TEUCHOS_TEST_FOR_EXCEPTION(
1138 true, std::runtime_error,
1139 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm without KokkosKernels. ");
1143 if (alpha == zero) {
1150 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1151 apply (X, Y_tmp, mode);
1152 Y.update (alpha, Y_tmp, beta);
1159 using row_map_type =
typename local_matrix_host_type::row_map_type;
1160 using index_type =
typename local_matrix_host_type::index_type;
1161 using values_type =
typename local_matrix_host_type::values_type;
1163 auto X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
1164 auto Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
1166 auto L_row_ptrs_host = L_block_->getCrsGraph().getLocalRowPtrsHost();
1167 auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost();
1168 auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost();
1169 auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost();
1170 auto L_values_host = L_block_->getValuesHost();
1171 auto U_values_host = U_block_->getValuesHost();
1173 row_map_type* L_row_ptrs_host_ri =
reinterpret_cast<row_map_type*
>(&L_row_ptrs_host);
1174 index_type* L_entries_host_ri =
reinterpret_cast<index_type*
>(&L_entries_host);
1175 row_map_type* U_row_ptrs_host_ri =
reinterpret_cast<row_map_type*
>(&U_row_ptrs_host);
1176 index_type* U_entries_host_ri =
reinterpret_cast<index_type*
>(&U_entries_host);
1177 values_type* L_values_host_ri =
reinterpret_cast<values_type*
>(&L_values_host);
1178 values_type* U_values_host_ri =
reinterpret_cast<values_type*
>(&U_values_host);
1180 const auto numRows = L_block_->getLocalNumRows();
1181 local_matrix_host_type L_block_local_host(
"L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_);
1182 local_matrix_host_type U_block_local_host(
"U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_);
1185 KokkosSparse::trsv(
"L",
"N",
"N", L_block_local_host, X_view, Y_view);
1186 KokkosSparse::trsv(
"U",
"N",
"N", U_block_local_host, Y_view, Y_view);
1187 KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
1190 KokkosSparse::trsv(
"U",
"T",
"N", U_block_local_host, X_view, Y_view);
1191 KokkosSparse::trsv(
"L",
"T",
"N", L_block_local_host, Y_view, Y_view);
1192 KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
1199 this->numApply_ += 1;
1200 this->applyTime_ += (timer.
wallTime() - startTime);
1204 template<
class MatrixType>
1207 std::ostringstream os;
1212 os <<
"\"Ifpack2::Experimental::RBILUK\": {";
1213 os <<
"Initialized: " << (this->isInitialized () ?
"true" :
"false") <<
", "
1214 <<
"Computed: " << (this->isComputed () ?
"true" :
"false") <<
", ";
1216 os <<
"Level-of-fill: " << this->getLevelOfFill() <<
", ";
1219 os <<
"Matrix: null";
1222 os <<
"Global matrix dimensions: ["
1223 << this->A_->getGlobalNumRows () <<
", " << this->A_->getGlobalNumCols () <<
"]"
1224 <<
", Global nnz: " << this->A_->getGlobalNumEntries();
1239 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
1240 template class Ifpack2::Experimental::RBILUK< Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1241 template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:347
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:146
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:136
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:193
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:136
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1010
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:153
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:245
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:150
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:165
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:142
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:179
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:595
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:100
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:141
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:165
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:159
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1205
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:128