41 #ifndef TPETRA_DETAILS_RESIDUAL_HPP
42 #define TPETRA_DETAILS_RESIDUAL_HPP
44 #include "TpetraCore_config.h"
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Tpetra_LocalCrsMatrixOperator.hpp"
47 #include "Tpetra_MultiVector.hpp"
48 #include "Teuchos_RCP.hpp"
49 #include "Teuchos_ScalarTraits.hpp"
52 #include "KokkosSparse_spmv_impl.hpp"
70 template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV,
bool restrictedMode,
bool skipOffRank>
73 using execution_space =
typename AMatrix::execution_space;
74 using LO =
typename AMatrix::non_const_ordinal_type;
75 using value_type =
typename AMatrix::non_const_value_type;
76 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
77 using team_member =
typename team_policy::member_type;
78 using ATV = Kokkos::ArithTraits<value_type>;
86 ConstMV X_domainmap_lcl;
90 const ConstMV& X_colmap_lcl_,
91 const ConstMV& B_lcl_,
93 const int rows_per_team_,
95 const ConstMV& X_domainmap_lcl_) :
97 X_colmap_lcl(X_colmap_lcl_),
100 rows_per_team(rows_per_team_),
102 X_domainmap_lcl(X_domainmap_lcl_)
105 KOKKOS_INLINE_FUNCTION
106 void operator() (
const team_member& dev)
const
109 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
110 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
112 if (lclRow >= A_lcl.numRows ()) {
118 value_type A_x = ATV::zero ();
120 if (!restrictedMode) {
121 const auto A_row = A_lcl.rowConst(lclRow);
122 const LO row_length =
static_cast<LO
> (A_row.length);
124 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
125 const auto A_val = A_row.value(iEntry);
126 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),0);
132 const LO offRankOffset = offsets(lclRow);
133 const size_t start = A_lcl.graph.row_map(lclRow);
134 const size_t end = A_lcl.graph.row_map(lclRow+1);
136 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
137 const auto A_val = A_lcl.values(iEntry);
138 const auto lclCol = A_lcl.graph.entries(iEntry);
139 if (iEntry < offRankOffset)
140 lsum += A_val * X_domainmap_lcl(lclCol,0);
141 else if (!skipOffRank)
142 lsum += A_val * X_colmap_lcl(lclCol,0);
146 Kokkos::single(Kokkos::PerThread(dev),[&] () {
147 R_lcl(lclRow,0) = B_lcl(lclRow,0) - A_x;
152 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
154 for(LO v=0; v<numVectors; v++) {
156 value_type A_x = ATV::zero ();
158 if (!restrictedMode) {
160 const auto A_row = A_lcl.rowConst(lclRow);
161 const LO row_length =
static_cast<LO
> (A_row.length);
163 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, row_length), [&] (
const LO iEntry, value_type& lsum) {
164 const auto A_val = A_row.value(iEntry);
165 lsum += A_val * X_colmap_lcl(A_row.colidx(iEntry),v);
169 const LO offRankOffset = offsets(lclRow);
170 const size_t start = A_lcl.graph.row_map(lclRow);
171 const size_t end = A_lcl.graph.row_map(lclRow+1);
173 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, start, end), [&] (
const LO iEntry, value_type& lsum) {
174 const auto A_val = A_lcl.values(iEntry);
175 const auto lclCol = A_lcl.graph.entries(iEntry);
176 if (iEntry < offRankOffset)
177 lsum += A_val * X_domainmap_lcl(lclCol,v);
178 else if (!skipOffRank)
179 lsum += A_val * X_colmap_lcl(lclCol,v);
183 Kokkos::single(Kokkos::PerThread(dev),[&] () {
184 R_lcl(lclRow,v) = B_lcl(lclRow,v) - A_x;
195 template<
class AMatrix,
class MV,
class ConstMV,
class Offsets,
bool is_MV>
198 using execution_space =
typename AMatrix::execution_space;
199 using LO =
typename AMatrix::non_const_ordinal_type;
200 using value_type =
typename AMatrix::non_const_value_type;
201 using team_policy =
typename Kokkos::TeamPolicy<execution_space>;
202 using team_member =
typename team_policy::member_type;
203 using ATV = Kokkos::ArithTraits<value_type>;
206 ConstMV X_colmap_lcl;
213 const ConstMV& X_colmap_lcl_,
215 const int rows_per_team_,
218 X_colmap_lcl(X_colmap_lcl_),
220 rows_per_team(rows_per_team_),
224 KOKKOS_INLINE_FUNCTION
225 void operator() (
const team_member& dev)
const
228 Kokkos::parallel_for(Kokkos::TeamThreadRange (dev, 0, rows_per_team),[&] (
const LO& loop) {
229 const LO lclRow =
static_cast<LO
> (dev.league_rank ()) * rows_per_team + loop;
231 if (lclRow >= A_lcl.numRows ()) {
235 const LO offRankOffset = offsets(lclRow);
236 const size_t end = A_lcl.graph.row_map(lclRow+1);
240 value_type A_x = ATV::zero ();
242 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
243 const auto A_val = A_lcl.values(iEntry);
244 const auto lclCol = A_lcl.graph.entries(iEntry);
245 lsum += A_val * X_colmap_lcl(lclCol,0);
248 Kokkos::single(Kokkos::PerThread(dev),[&] () {
249 R_lcl(lclRow,0) -= A_x;
254 const LO numVectors =
static_cast<LO
>(X_colmap_lcl.extent(1));
256 for(LO v=0; v<numVectors; v++) {
258 value_type A_x = ATV::zero ();
260 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange (dev, offRankOffset, end), [&] (
const LO iEntry, value_type& lsum) {
261 const auto A_val = A_lcl.values(iEntry);
262 const auto lclCol = A_lcl.graph.entries(iEntry);
263 lsum += A_val * X_colmap_lcl(lclCol,v);
266 Kokkos::single(Kokkos::PerThread(dev),[&] () {
267 R_lcl(lclRow,v) -= A_x;
276 template<
class SC,
class LO,
class GO,
class NO>
281 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
284 using Teuchos::NO_TRANS;
290 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
293 const_local_view_device_type X_colmap_lcl = X_colmap.
getLocalViewDevice(Access::ReadOnly);
296 const_local_view_device_type X_domainmap_lcl;
300 TEUCHOS_TEST_FOR_EXCEPTION
302 "X.getNumVectors() = " << X_colmap.
getNumVectors () <<
" != "
304 TEUCHOS_TEST_FOR_EXCEPTION
306 A.
getColMap ()->getLocalNumElements (), std::runtime_error,
307 "X has the wrong number of local rows. "
309 "A.getColMap()->getLocalNumElements() = " <<
310 A.
getColMap ()->getLocalNumElements () <<
".");
311 TEUCHOS_TEST_FOR_EXCEPTION
313 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
314 "R has the wrong number of local rows. "
316 "A.getRowMap()->getLocalNumElements() = " <<
317 A.
getRowMap ()->getLocalNumElements () <<
".");
318 TEUCHOS_TEST_FOR_EXCEPTION
320 A.
getRowMap ()->getLocalNumElements (), std::runtime_error,
321 "B has the wrong number of local rows. "
323 "A.getRowMap()->getLocalNumElements() = " <<
324 A.
getRowMap ()->getLocalNumElements () <<
".");
326 TEUCHOS_TEST_FOR_EXCEPTION
327 (! A.
isFillComplete (), std::runtime_error,
"The matrix A is not "
328 "fill complete. You must call fillComplete() (possibly with "
329 "domain and range Map arguments) without an intervening "
330 "resumeFill() call before you may call this method.");
331 TEUCHOS_TEST_FOR_EXCEPTION
333 std::runtime_error,
"X, Y and B must be constant stride.");
336 TEUCHOS_TEST_FOR_EXCEPTION
337 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
338 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
339 std::runtime_error,
"X, Y and R may not alias one another.");
343 if (!fusedResidual) {
344 SC one = Teuchos::ScalarTraits<SC>::one();
346 SC zero = Teuchos::ScalarTraits<SC>::zero();
349 A.
localApply(X_colmap,R,Teuchos::NO_TRANS, one, zero);
354 if (A_lcl.numRows() == 0) {
358 int64_t numLocalRows = A_lcl.numRows ();
359 int64_t myNnz = A_lcl.nnz();
362 int vector_length = -1;
363 int64_t rows_per_thread = -1;
365 using execution_space =
typename CrsMatrix<SC,LO,GO,NO>::execution_space;
366 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
368 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
369 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
371 policy_type policy (1, 1);
373 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
376 policy = policy_type (worksets, team_size, vector_length);
379 bool is_vector = (X_colmap_lcl.extent(1) == 1);
383 if (X_domainmap ==
nullptr) {
385 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,false,false>;
386 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
387 Kokkos::parallel_for(
"residual-vector",policy,func);
392 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
393 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,false>;
394 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
395 Kokkos::parallel_for(
"residual-vector",policy,func);
401 if (X_domainmap ==
nullptr) {
403 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,false,false>;
404 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
405 Kokkos::parallel_for(
"residual-multivector",policy,func);
410 X_domainmap_lcl = X_domainmap->getLocalViewDevice(Access::ReadOnly);
411 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,false>;
412 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
413 Kokkos::parallel_for(
"residual-multivector",policy,func);
420 template<
class SC,
class LO,
class GO,
class NO>
421 void localResidualWithCommCompOverlap(
const CrsMatrix<SC,LO,GO,NO> & A,
422 MultiVector<SC,LO,GO,NO> & X_colmap,
423 const MultiVector<SC,LO,GO,NO> & B,
424 MultiVector<SC,LO,GO,NO> & R,
425 const Kokkos::View<const size_t*, typename NO::device_type>& offsets,
426 const MultiVector<SC,LO,GO,NO> & X_domainmap) {
428 using Teuchos::NO_TRANS;
430 using import_type =
typename CrsMatrix<SC,LO,GO,NO>::import_type;
432 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localResidualWithCommCompOverlap");
434 using local_matrix_device_type =
typename CrsMatrix<SC,LO,GO,NO>::local_matrix_device_type;
435 using local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::non_const_type;
436 using const_local_view_device_type =
typename MultiVector<SC,LO,GO,NO>::dual_view_type::t_dev::const_type;
437 using offset_type = Kokkos::View<const size_t*, typename NO::device_type>;
439 local_matrix_device_type A_lcl = A.getLocalMatrixDevice ();
440 const_local_view_device_type X_colmap_lcl = X_colmap.getLocalViewDevice(Access::ReadOnly);
441 const_local_view_device_type B_lcl = B.getLocalViewDevice(Access::ReadOnly);
442 local_view_device_type R_lcl = R.getLocalViewDevice(Access::OverwriteAll);
443 const_local_view_device_type X_domainmap_lcl = X_domainmap.getLocalViewDevice(Access::ReadOnly);;
447 TEUCHOS_TEST_FOR_EXCEPTION
448 (X_colmap.getNumVectors () != R.getNumVectors (), std::runtime_error,
449 "X.getNumVectors() = " << X_colmap.getNumVectors () <<
" != "
450 "R.getNumVectors() = " << R.getNumVectors () <<
".");
451 TEUCHOS_TEST_FOR_EXCEPTION
452 (X_colmap.getLocalLength () !=
453 A.getColMap ()->getLocalNumElements (), std::runtime_error,
454 "X has the wrong number of local rows. "
455 "X.getLocalLength() = " << X_colmap.getLocalLength () <<
" != "
456 "A.getColMap()->getLocalNumElements() = " <<
457 A.getColMap ()->getLocalNumElements () <<
".");
458 TEUCHOS_TEST_FOR_EXCEPTION
459 (R.getLocalLength () !=
460 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
461 "R has the wrong number of local rows. "
462 "R.getLocalLength() = " << R.getLocalLength () <<
" != "
463 "A.getRowMap()->getLocalNumElements() = " <<
464 A.getRowMap ()->getLocalNumElements () <<
".");
465 TEUCHOS_TEST_FOR_EXCEPTION
466 (B.getLocalLength () !=
467 A.getRowMap ()->getLocalNumElements (), std::runtime_error,
468 "B has the wrong number of local rows. "
469 "B.getLocalLength() = " << B.getLocalLength () <<
" != "
470 "A.getRowMap()->getLocalNumElements() = " <<
471 A.getRowMap ()->getLocalNumElements () <<
".");
473 TEUCHOS_TEST_FOR_EXCEPTION
474 (! A.isFillComplete (), std::runtime_error,
"The matrix A is not "
475 "fill complete. You must call fillComplete() (possibly with "
476 "domain and range Map arguments) without an intervening "
477 "resumeFill() call before you may call this method.");
478 TEUCHOS_TEST_FOR_EXCEPTION
479 (! X_colmap.isConstantStride () || ! R.isConstantStride () || ! B.isConstantStride (),
480 std::runtime_error,
"X, Y and B must be constant stride.");
483 TEUCHOS_TEST_FOR_EXCEPTION
484 ((X_colmap_lcl.data () == R_lcl.data () && X_colmap_lcl.data () !=
nullptr) ||
485 (X_colmap_lcl.data () == B_lcl.data () && X_colmap_lcl.data () !=
nullptr),
486 std::runtime_error,
"X, Y and R may not alias one another.");
489 if (A_lcl.numRows() == 0) {
493 int64_t numLocalRows = A_lcl.numRows ();
494 int64_t myNnz = A_lcl.nnz();
497 int vector_length = -1;
498 int64_t rows_per_thread = -1;
500 using execution_space =
typename CrsMatrix<SC,LO,GO,NO>::execution_space;
501 using policy_type =
typename Kokkos::TeamPolicy<execution_space>;
503 int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(numLocalRows, myNnz, rows_per_thread, team_size, vector_length);
504 int64_t worksets = (B_lcl.extent (0) + rows_per_team - 1) / rows_per_team;
506 policy_type policy (1, 1);
508 policy = policy_type (worksets, Kokkos::AUTO, vector_length);
511 policy = policy_type (worksets, team_size, vector_length);
514 bool is_vector = (X_colmap_lcl.extent(1) == 1);
518 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false,true,true>;
519 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
520 Kokkos::parallel_for(
"residual-vector",policy,func);
522 RCP<const import_type> importer = A.getGraph ()->getImporter ();
523 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
525 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-1");
527 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,false>;
528 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
529 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
534 using functor_type = LocalResidualFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true,true,true>;
535 functor_type func (A_lcl, X_colmap_lcl, B_lcl, R_lcl, rows_per_team, offsets, X_domainmap_lcl);
536 Kokkos::parallel_for(
"residual-multivector",policy,func);
538 RCP<const import_type> importer = A.getGraph ()->getImporter ();
539 X_colmap.endImport (X_domainmap, *importer,
INSERT,
true);
541 Kokkos::fence(
"Tpetra::localResidualWithCommCompOverlap-2");
543 using functor_type2 = OffRankUpdateFunctor<local_matrix_device_type,local_view_device_type,const_local_view_device_type,offset_type,true>;
544 functor_type2 func2 (A_lcl, X_colmap_lcl, R_lcl, rows_per_team, offsets);
545 Kokkos::parallel_for(
"residual-vector-offrank",policy,func2);
552 template<
class SC,
class LO,
class GO,
class NO>
560 using Teuchos::rcp_const_cast;
561 using Teuchos::rcpFromRef;
566 if (overlapCommunicationAndComputation)
567 TEUCHOS_ASSERT(skipCopyAndPermuteIfPossible);
574 bool restrictedMode =
false;
579 SC one = Teuchos::ScalarTraits<SC>::one(), negone = -one, zero = Teuchos::ScalarTraits<SC>::zero();
580 Aop.
apply(X_in,R_in,Teuchos::NO_TRANS, one, zero);
581 R_in.
update(one,B_in,negone);
590 using offset_type =
typename graph_type::offset_device_view_type;
593 const bool R_is_replicated =
602 RCP<const import_type> importer = A.
getGraph ()->getImporter ();
603 RCP<const export_type> exporter = A.
getGraph ()->getExporter ();
609 if (importer.is_null ()) {
624 X_colMap = rcp_const_cast<MV> (rcpFromRef (X_in) );
636 restrictedMode = skipCopyAndPermuteIfPossible && importer->isLocallyFitted();
638 if (debug && restrictedMode) {
639 TEUCHOS_TEST_FOR_EXCEPTION
640 (!importer->getTargetMap()->isLocallyFitted(*importer->getSourceMap()), std::runtime_error,
641 "Source map and target map are not locally fitted, but Tpetra::residual thinks they are.");
645 X_colMap->beginImport (X_in, *importer,
INSERT, restrictedMode);
656 if(exporter.is_null()) {
661 R_rowMap = rcpFromRef (R_in);
670 RCP<const MV> B_rowMap;
671 if(exporter.is_null()) {
677 B_rowMap = rcp_const_cast<
const MV> (B_rowMapNonConst);
680 B_rowMap = rcpFromRef (B_in);
686 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: B Import");
688 B_rowMapNonConst->doImport(B_in, *exporter,
ADD);
689 B_rowMap = rcp_const_cast<
const MV> (B_rowMapNonConst);
697 if (! exporter.is_null ()) {
698 if ( ! importer.is_null ())
699 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
700 if (restrictedMode && !importer.is_null ())
701 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
703 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
706 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::residual: R Export");
715 if (overlapCommunicationAndComputation) {
716 localResidualWithCommCompOverlap (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, X_in);
718 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
719 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets, &X_in);
722 if ( ! importer.is_null ())
723 X_colMap->endImport (X_in, *importer,
INSERT, restrictedMode);
724 localResidual (A, *X_colMap, *B_rowMap, *R_rowMap, offsets);
741 if (R_is_replicated) {
742 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::residual: Reduce Y");
754 #endif // TPETRA_DETAILS_RESIDUAL_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
virtual void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const =0
Computes the operator-multivector application.
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
Functor for computing R -= A_offRank*X_colmap.
offsets_view_type offsets_
Offsets (output argument)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don't currently exist.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Abstract interface for operators (e.g., matrices and preconditioners).
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Functor for computing the residual.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
void start()
Start the deep_copy counter.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
void reduce()
Sum values of a locally replicated multivector across all processes.
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
void residual(const Operator< SC, LO, GO, NO > &A, const MultiVector< SC, LO, GO, NO > &X, const MultiVector< SC, LO, GO, NO > &B, MultiVector< SC, LO, GO, NO > &R)
Computes R = B - A * X.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.