41 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
42 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
50 #include "Tpetra_BlockMultiVector.hpp"
53 #include "Teuchos_TimeMonitor.hpp"
54 #ifdef HAVE_TPETRA_DEBUG
56 #endif // HAVE_TPETRA_DEBUG
58 #include "KokkosSparse_spmv.hpp"
65 struct BlockCrsRowStruct {
66 T totalNumEntries, totalNumBytes, maxRowLength;
67 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
68 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
69 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
70 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
75 KOKKOS_INLINE_FUNCTION
76 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
77 a.totalNumEntries += b.totalNumEntries;
78 a.totalNumBytes += b.totalNumBytes;
79 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
82 template<
typename T,
typename ExecSpace>
83 struct BlockCrsReducer {
84 typedef BlockCrsReducer reducer;
86 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
89 KOKKOS_INLINE_FUNCTION
90 BlockCrsReducer(value_type &val) : value(&val) {}
92 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
93 KOKKOS_INLINE_FUNCTION
void join(value_type &dst,
const value_type &src)
const { dst += src; }
94 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
95 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
96 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
105 template<
class Scalar,
class LO,
class GO,
class Node>
106 class GetLocalDiagCopy {
108 typedef typename Node::device_type device_type;
109 typedef size_t diag_offset_type;
110 typedef Kokkos::View<
const size_t*, device_type,
111 Kokkos::MemoryUnmanaged> diag_offsets_type;
112 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
114 typedef typename local_graph_device_type::row_map_type row_offsets_type;
115 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
116 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
117 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
120 GetLocalDiagCopy (
const diag_type& diag,
121 const values_type& val,
122 const diag_offsets_type& diagOffsets,
123 const row_offsets_type& ptr,
124 const LO blockSize) :
126 diagOffsets_ (diagOffsets),
128 blockSize_ (blockSize),
129 offsetPerBlock_ (blockSize_*blockSize_),
133 KOKKOS_INLINE_FUNCTION
void
134 operator() (
const LO& lclRowInd)
const
139 const size_t absOffset = ptr_[lclRowInd];
142 const size_t relOffset = diagOffsets_[lclRowInd];
145 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
150 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
151 const_little_block_type;
152 const_little_block_type D_in (val_.data () + pointOffset,
153 blockSize_, blockSize_);
154 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
160 diag_offsets_type diagOffsets_;
161 row_offsets_type ptr_;
168 template<
class Scalar,
class LO,
class GO,
class Node>
170 BlockCrsMatrix<Scalar, LO, GO, Node>::
171 markLocalErrorAndGetStream ()
173 * (this->localError_) =
true;
174 if ((*errs_).is_null ()) {
175 *errs_ = Teuchos::rcp (
new std::ostringstream ());
180 template<
class Scalar,
class LO,
class GO,
class Node>
184 graph_ (Teuchos::rcp (new
map_type ()), 0),
185 blockSize_ (static_cast<LO> (0)),
186 X_colMap_ (new Teuchos::RCP<
BMV> ()),
187 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
188 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
190 localError_ (new bool (false)),
191 errs_ (new Teuchos::RCP<std::ostringstream> ())
195 template<
class Scalar,
class LO,
class GO,
class Node>
198 const LO blockSize) :
201 rowMeshMap_ (* (graph.getRowMap ())),
202 blockSize_ (blockSize),
203 X_colMap_ (new Teuchos::RCP<
BMV> ()),
204 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
205 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
206 offsetPerBlock_ (blockSize * blockSize),
207 localError_ (new bool (false)),
208 errs_ (new Teuchos::RCP<std::ostringstream> ())
212 TEUCHOS_TEST_FOR_EXCEPTION(
213 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
214 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
215 "rows (isSorted() is false). This class assumes sorted rows.");
217 graphRCP_ = Teuchos::rcpFromRef(graph_);
223 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
224 TEUCHOS_TEST_FOR_EXCEPTION(
225 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
226 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
227 " <= 0. The block size must be positive.");
235 const auto colPointMap = Teuchos::rcp
237 *pointImporter_ = Teuchos::rcp
241 auto local_graph_h = graph.getLocalGraphHost ();
242 auto ptr_h = local_graph_h.row_map;
243 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
246 auto ind_h = local_graph_h.entries;
247 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
251 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
252 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
256 template<
class Scalar,
class LO,
class GO,
class Node>
259 const typename local_matrix_device_type::values_type& values,
260 const LO blockSize) :
263 rowMeshMap_ (* (graph.getRowMap ())),
264 blockSize_ (blockSize),
265 X_colMap_ (new Teuchos::RCP<
BMV> ()),
266 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
267 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
268 offsetPerBlock_ (blockSize * blockSize),
269 localError_ (new bool (false)),
270 errs_ (new Teuchos::RCP<std::ostringstream> ())
273 TEUCHOS_TEST_FOR_EXCEPTION(
274 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
275 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
276 "rows (isSorted() is false). This class assumes sorted rows.");
278 graphRCP_ = Teuchos::rcpFromRef(graph_);
284 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
285 TEUCHOS_TEST_FOR_EXCEPTION(
286 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
287 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
288 " <= 0. The block size must be positive.");
296 const auto colPointMap = Teuchos::rcp
298 *pointImporter_ = Teuchos::rcp
302 auto local_graph_h = graph.getLocalGraphHost ();
303 auto ptr_h = local_graph_h.row_map;
304 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
307 auto ind_h = local_graph_h.entries;
308 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
311 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
312 TEUCHOS_ASSERT_EQUALITY(numValEnt, values.size());
313 val_ = decltype (val_) (values);
317 template<
class Scalar,
class LO,
class GO,
class Node>
322 const LO blockSize) :
325 rowMeshMap_ (* (graph.getRowMap ())),
326 domainPointMap_ (domainPointMap),
327 rangePointMap_ (rangePointMap),
328 blockSize_ (blockSize),
329 X_colMap_ (new Teuchos::RCP<
BMV> ()),
330 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
331 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
332 offsetPerBlock_ (blockSize * blockSize),
333 localError_ (new bool (false)),
334 errs_ (new Teuchos::RCP<std::ostringstream> ())
336 TEUCHOS_TEST_FOR_EXCEPTION(
337 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
338 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
339 "rows (isSorted() is false). This class assumes sorted rows.");
341 graphRCP_ = Teuchos::rcpFromRef(graph_);
347 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
348 TEUCHOS_TEST_FOR_EXCEPTION(
349 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
350 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
351 " <= 0. The block size must be positive.");
355 const auto colPointMap = Teuchos::rcp
357 *pointImporter_ = Teuchos::rcp
361 auto local_graph_h = graph.getLocalGraphHost ();
362 auto ptr_h = local_graph_h.row_map;
363 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
367 auto ind_h = local_graph_h.entries;
368 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
372 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
373 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
378 template<
class Scalar,
class LO,
class GO,
class Node>
379 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
384 return Teuchos::rcp (
new map_type (domainPointMap_));
387 template<
class Scalar,
class LO,
class GO,
class Node>
388 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
393 return Teuchos::rcp (
new map_type (rangePointMap_));
396 template<
class Scalar,
class LO,
class GO,
class Node>
397 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
401 return graph_.getRowMap();
404 template<
class Scalar,
class LO,
class GO,
class Node>
405 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
409 return graph_.getColMap();
412 template<
class Scalar,
class LO,
class GO,
class Node>
417 return graph_.getGlobalNumRows();
420 template<
class Scalar,
class LO,
class GO,
class Node>
425 return graph_.getLocalNumRows();
428 template<
class Scalar,
class LO,
class GO,
class Node>
433 return graph_.getLocalMaxNumRowEntries();
436 template<
class Scalar,
class LO,
class GO,
class Node>
441 Teuchos::ETransp mode,
446 TEUCHOS_TEST_FOR_EXCEPTION(
447 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
448 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
449 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
450 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
452 TEUCHOS_TEST_FOR_EXCEPTION(
454 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
455 "X and Y must both be constant stride");
459 const LO blockSize = getBlockSize ();
461 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
462 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
464 catch (std::invalid_argument& e) {
465 TEUCHOS_TEST_FOR_EXCEPTION(
466 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
467 "apply: Either the input MultiVector X or the output MultiVector Y "
468 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
469 "graph. BlockMultiVector's constructor threw the following exception: "
478 const_cast<this_BCRS_type&
> (*this).applyBlock (X_view, Y_view, mode, alpha, beta);
479 }
catch (std::invalid_argument& e) {
480 TEUCHOS_TEST_FOR_EXCEPTION(
481 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
482 "apply: The implementation method applyBlock complained about having "
483 "an invalid argument. It reported the following: " << e.what ());
484 }
catch (std::logic_error& e) {
485 TEUCHOS_TEST_FOR_EXCEPTION(
486 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
487 "apply: The implementation method applyBlock complained about a "
488 "possible bug in its implementation. It reported the following: "
490 }
catch (std::exception& e) {
491 TEUCHOS_TEST_FOR_EXCEPTION(
492 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
493 "apply: The implementation method applyBlock threw an exception which "
494 "is neither std::invalid_argument nor std::logic_error, but is a "
495 "subclass of std::exception. It reported the following: "
498 TEUCHOS_TEST_FOR_EXCEPTION(
499 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
500 "apply: The implementation method applyBlock threw an exception which "
501 "is not an instance of a subclass of std::exception. This probably "
502 "indicates a bug. Please report this to the Tpetra developers.");
506 template<
class Scalar,
class LO,
class GO,
class Node>
511 Teuchos::ETransp mode,
515 TEUCHOS_TEST_FOR_EXCEPTION(
517 "Tpetra::BlockCrsMatrix::applyBlock: "
518 "X and Y have different block sizes. X.getBlockSize() = "
522 if (mode == Teuchos::NO_TRANS) {
523 applyBlockNoTrans (X, Y, alpha, beta);
524 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
525 applyBlockTrans (X, Y, mode, alpha, beta);
527 TEUCHOS_TEST_FOR_EXCEPTION(
528 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
529 "applyBlock: Invalid 'mode' argument. Valid values are "
530 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
534 template<
class Scalar,
class LO,
class GO,
class Node>
545 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
546 "destMatrix is required to be null.");
550 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
551 RCP<crs_graph_type> destGraph = importAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, importer,
552 srcGraph->getDomainMap(),
553 srcGraph->getRangeMap());
557 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
562 template<
class Scalar,
class LO,
class GO,
class Node>
573 TEUCHOS_TEST_FOR_EXCEPTION(!destMatrix.is_null(), std::invalid_argument,
574 "destMatrix is required to be null.");
578 RCP<crs_graph_type> srcGraph = rcp (
new crs_graph_type(this->getCrsGraph()));
579 RCP<crs_graph_type> destGraph = exportAndFillCompleteCrsGraph<crs_graph_type>(srcGraph, exporter,
580 srcGraph->getDomainMap(),
581 srcGraph->getRangeMap());
585 destMatrix = rcp (
new this_BCRS_type (*destGraph, getBlockSize()));
590 template<
class Scalar,
class LO,
class GO,
class Node>
595 auto val_d = val_.getDeviceView(Access::OverwriteAll);
599 template<
class Scalar,
class LO,
class GO,
class Node>
605 const LO numColInds)
const
607 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
608 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
609 ptrdiff_t * offsets = offsets_host_view.data();
610 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
611 const LO validCount = this->replaceLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
615 template <
class Scalar,
class LO,
class GO,
class Node>
619 Kokkos::MemoryUnmanaged>& offsets)
const
621 graph_.getLocalDiagOffsets (offsets);
624 template <
class Scalar,
class LO,
class GO,
class Node>
628 Kokkos::MemoryUnmanaged>& diag,
629 const Kokkos::View<
const size_t*, device_type,
630 Kokkos::MemoryUnmanaged>& offsets)
const
632 using Kokkos::parallel_for;
633 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
635 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getLocalNumElements ());
636 const LO blockSize = this->getBlockSize ();
637 TEUCHOS_TEST_FOR_EXCEPTION
638 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
639 static_cast<LO> (diag.extent (1)) < blockSize ||
640 static_cast<LO> (diag.extent (2)) < blockSize,
641 std::invalid_argument, prefix <<
642 "The input Kokkos::View is not big enough to hold all the data.");
643 TEUCHOS_TEST_FOR_EXCEPTION
644 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
645 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
646 "diagonal blocks " << lclNumMeshRows <<
".");
648 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
649 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
655 auto val_d = val_.getDeviceView(Access::ReadOnly);
656 parallel_for (policy_type (0, lclNumMeshRows),
657 functor_type (diag, val_d, offsets,
658 graph_.getLocalGraphDevice ().row_map, blockSize_));
661 template<
class Scalar,
class LO,
class GO,
class Node>
667 const LO numColInds)
const
669 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
670 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
671 ptrdiff_t * offsets = offsets_host_view.data();
672 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
673 const LO validCount = this->absMaxLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
678 template<
class Scalar,
class LO,
class GO,
class Node>
684 const LO numColInds)
const
686 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
687 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
688 ptrdiff_t * offsets = offsets_host_view.data();
689 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
690 const LO validCount = this->sumIntoLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
693 template<
class Scalar,
class LO,
class GO,
class Node>
697 nonconst_local_inds_host_view_type &Indices,
698 nonconst_values_host_view_type &Values,
699 size_t& NumEntries)
const
701 auto vals = getValuesHost(LocalRow);
702 const LO numInds = ptrHost_(LocalRow+1) - ptrHost_(LocalRow);
703 if (numInds > (LO)Indices.extent(0) || numInds*blockSize_*blockSize_ > (LO)Values.extent(0)) {
704 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
705 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
706 << numInds <<
" row entries");
708 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
709 for (LO i=0; i<numInds; ++i) {
710 Indices[i] = colInds[i];
712 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
715 NumEntries = numInds;
718 template<
class Scalar,
class LO,
class GO,
class Node>
724 const LO numColInds)
const
726 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
731 return static_cast<LO
> (0);
734 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
738 for (LO k = 0; k < numColInds; ++k) {
739 const LO relBlockOffset =
740 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
741 if (relBlockOffset != LINV) {
742 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
743 hint = relBlockOffset + 1;
751 template<
class Scalar,
class LO,
class GO,
class Node>
755 const ptrdiff_t offsets[],
757 const LO numOffsets)
const
759 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
764 return static_cast<LO
> (0);
771 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
772 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
773 size_t pointOffset = 0;
776 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
777 const size_t blockOffset = offsets[k]*perBlockSize;
778 if (offsets[k] != STINV) {
780 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
789 template<
class Scalar,
class LO,
class GO,
class Node>
793 const ptrdiff_t offsets[],
795 const LO numOffsets)
const
797 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
802 return static_cast<LO
> (0);
804 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
805 auto val_out = getValuesHost(localRowInd);
806 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
808 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
809 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
810 size_t pointOffset = 0;
813 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
814 const size_t blockOffset = offsets[k]*perBlockSize;
815 if (blockOffset != STINV) {
816 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
817 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
826 template<
class Scalar,
class LO,
class GO,
class Node>
830 const ptrdiff_t offsets[],
832 const LO numOffsets)
const
834 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
839 return static_cast<LO
> (0);
847 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
848 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
849 size_t pointOffset = 0;
852 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
853 const size_t blockOffset = offsets[k]*perBlockSize;
854 if (blockOffset != STINV) {
856 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
857 AXPY (ONE, A_new, A_old);
864 template<
class Scalar,
class LO,
class GO,
class Node>
866 impl_scalar_type_dualview::t_host::const_type
870 return val_.getHostView(Access::ReadOnly);
873 template<
class Scalar,
class LO,
class GO,
class Node>
874 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
875 impl_scalar_type_dualview::t_dev::const_type
876 BlockCrsMatrix<Scalar, LO, GO, Node>::
877 getValuesDevice ()
const
879 return val_.getDeviceView(Access::ReadOnly);
882 template<
class Scalar,
class LO,
class GO,
class Node>
883 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
884 impl_scalar_type_dualview::t_host
888 return val_.getHostView(Access::ReadWrite);
891 template<
class Scalar,
class LO,
class GO,
class Node>
893 impl_scalar_type_dualview::t_dev
897 return val_.getDeviceView(Access::ReadWrite);
900 template<
class Scalar,
class LO,
class GO,
class Node>
901 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
902 impl_scalar_type_dualview::t_host::const_type
903 BlockCrsMatrix<Scalar, LO, GO, Node>::
904 getValuesHost (
const LO& lclRow)
const
906 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
907 auto val = val_.getHostView(Access::ReadOnly);
908 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
912 template<
class Scalar,
class LO,
class GO,
class Node>
914 impl_scalar_type_dualview::t_dev::const_type
918 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
919 auto val = val_.getDeviceView(Access::ReadOnly);
920 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
924 template<
class Scalar,
class LO,
class GO,
class Node>
929 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
930 auto val = val_.getHostView(Access::ReadWrite);
931 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
935 template<
class Scalar,
class LO,
class GO,
class Node>
940 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
941 auto val = val_.getDeviceView(Access::ReadWrite);
942 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
946 template<
class Scalar,
class LO,
class GO,
class Node>
951 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
952 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
953 return static_cast<LO
> (0);
955 return static_cast<LO
> (numEntInGraph);
959 template<
class Scalar,
class LO,
class GO,
class Node>
960 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
964 auto numCols = this->graph_.getColMap()->getLocalNumElements();
965 auto val = val_.getDeviceView(Access::ReadWrite);
966 const LO blockSize = this->getBlockSize ();
967 const auto graph = this->graph_.getLocalGraphDevice ();
969 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
976 template<
class Scalar,
class LO,
class GO,
class Node>
981 const Teuchos::ETransp mode,
991 TEUCHOS_TEST_FOR_EXCEPTION(
992 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
993 "transpose and conjugate transpose modes are not yet implemented.");
996 template<
class Scalar,
class LO,
class GO,
class Node>
998 BlockCrsMatrix<Scalar, LO, GO, Node>::
999 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1000 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1006 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1007 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1008 const Scalar zero = STS::zero ();
1009 const Scalar one = STS::one ();
1010 RCP<const import_type>
import = graph_.getImporter ();
1012 RCP<const export_type> theExport = graph_.getExporter ();
1013 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1015 if (alpha == zero) {
1019 else if (beta != one) {
1024 const BMV* X_colMap = NULL;
1025 if (
import.is_null ()) {
1029 catch (std::exception& e) {
1030 TEUCHOS_TEST_FOR_EXCEPTION
1031 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1032 "operator= threw an exception: " << std::endl << e.what ());
1042 if ((*X_colMap_).is_null () ||
1043 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1044 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1045 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1046 static_cast<LO
> (X.getNumVectors ())));
1048 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1052 X_colMap = &(**X_colMap_);
1054 catch (std::exception& e) {
1055 TEUCHOS_TEST_FOR_EXCEPTION
1056 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1057 "operator= threw an exception: " << std::endl << e.what ());
1061 BMV* Y_rowMap = NULL;
1062 if (theExport.is_null ()) {
1065 else if ((*Y_rowMap_).is_null () ||
1066 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1067 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1068 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1069 static_cast<LO
> (X.getNumVectors ())));
1071 Y_rowMap = &(**Y_rowMap_);
1073 catch (std::exception& e) {
1074 TEUCHOS_TEST_FOR_EXCEPTION(
1075 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1076 "operator= threw an exception: " << std::endl << e.what ());
1081 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1083 catch (std::exception& e) {
1084 TEUCHOS_TEST_FOR_EXCEPTION
1085 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1086 "an exception: " << e.what ());
1089 TEUCHOS_TEST_FOR_EXCEPTION
1090 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1091 "an exception not a subclass of std::exception.");
1094 if (! theExport.is_null ()) {
1101 template <
class Scalar,
class LO,
class GO,
class Node>
1102 void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1103 const BlockMultiVector<Scalar, LO, GO, Node> &X,
1104 BlockMultiVector<Scalar, LO, GO, Node> &Y,
const Scalar alpha,
1105 const Scalar beta) {
1107 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1108 const impl_scalar_type alpha_impl = alpha;
1109 const auto graph = this->graph_.getLocalGraphDevice();
1111 mv_type X_mv = X.getMultiVectorView();
1112 mv_type Y_mv = Y.getMultiVectorView();
1113 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1114 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1116 #if KOKKOSKERNELS_VERSION >= 40299
1117 auto A_lcl = getLocalMatrixDevice();
1118 if(!applyHelper.get()) {
1120 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
1122 if(applyHelper->shouldUseIntRowptrs())
1124 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
1126 &applyHelper->handle_int, KokkosSparse::NoTranspose,
1127 alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
1132 &applyHelper->handle, KokkosSparse::NoTranspose,
1133 alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
1136 auto A_lcl = getLocalMatrixDevice();
1137 KokkosSparse::spmv(KokkosSparse::NoTranspose, alpha_impl, A_lcl, X_lcl, beta,
1143 template<
class Scalar,
class LO,
class GO,
class Node>
1145 BlockCrsMatrix<Scalar, LO, GO, Node>::
1146 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1147 const LO colIndexToFind,
1148 const LO hint)
const
1150 const size_t absStartOffset = ptrHost_[localRowIndex];
1151 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1152 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1154 const LO*
const curInd = indHost_.data () + absStartOffset;
1157 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1164 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1169 const LO maxNumEntriesForLinearSearch = 32;
1170 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1175 const LO* beg = curInd;
1176 const LO* end = curInd + numEntriesInRow;
1177 std::pair<const LO*, const LO*> p =
1178 std::equal_range (beg, end, colIndexToFind);
1179 if (p.first != p.second) {
1181 relOffset =
static_cast<LO
> (p.first - beg);
1185 for (LO k = 0; k < numEntriesInRow; ++k) {
1186 if (colIndexToFind == curInd[k]) {
1196 template<
class Scalar,
class LO,
class GO,
class Node>
1198 BlockCrsMatrix<Scalar, LO, GO, Node>::
1199 offsetPerBlock ()
const
1201 return offsetPerBlock_;
1204 template<
class Scalar,
class LO,
class GO,
class Node>
1205 typename BlockCrsMatrix<Scalar, LO, GO, Node>::const_little_block_type
1206 BlockCrsMatrix<Scalar, LO, GO, Node>::
1207 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1208 const size_t pointOffset)
const
1211 const LO rowStride = blockSize_;
1212 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1215 template<
class Scalar,
class LO,
class GO,
class Node>
1216 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1217 BlockCrsMatrix<Scalar, LO, GO, Node>::
1218 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1219 const size_t pointOffset)
const
1222 const LO rowStride = blockSize_;
1223 return little_block_type (val + pointOffset, blockSize_, rowStride);
1226 template<
class Scalar,
class LO,
class GO,
class Node>
1227 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1228 BlockCrsMatrix<Scalar, LO, GO, Node>::
1229 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1230 const size_t pointOffset)
const
1233 const LO rowStride = blockSize_;
1234 const size_t bs2 = blockSize_ * blockSize_;
1235 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1238 template<
class Scalar,
class LO,
class GO,
class Node>
1239 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_type
1240 BlockCrsMatrix<Scalar, LO, GO, Node>::
1241 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1243 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1245 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1246 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1247 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1248 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1249 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1250 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1254 return little_block_type ();
1258 template<
class Scalar,
class LO,
class GO,
class Node>
1259 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1260 BlockCrsMatrix<Scalar, LO, GO, Node>::
1261 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1263 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1265 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1266 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1267 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1268 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1269 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1270 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1274 return little_block_host_type ();
1279 template<
class Scalar,
class LO,
class GO,
class Node>
1281 BlockCrsMatrix<Scalar, LO, GO, Node>::
1282 checkSizes (const ::Tpetra::SrcDistObject& source)
1285 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1286 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1289 std::ostream& err = markLocalErrorAndGetStream ();
1290 err <<
"checkSizes: The source object of the Import or Export "
1291 "must be a BlockCrsMatrix with the same template parameters as the "
1292 "target object." << endl;
1297 if (src->getBlockSize () != this->getBlockSize ()) {
1298 std::ostream& err = markLocalErrorAndGetStream ();
1299 err <<
"checkSizes: The source and target objects of the Import or "
1300 <<
"Export must have the same block sizes. The source's block "
1301 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1302 <<
"size = " << this->getBlockSize () <<
"." << endl;
1304 if (! src->graph_.isFillComplete ()) {
1305 std::ostream& err = markLocalErrorAndGetStream ();
1306 err <<
"checkSizes: The source object of the Import or Export is "
1307 "not fill complete. Both source and target objects must be fill "
1308 "complete." << endl;
1310 if (! this->graph_.isFillComplete ()) {
1311 std::ostream& err = markLocalErrorAndGetStream ();
1312 err <<
"checkSizes: The target object of the Import or Export is "
1313 "not fill complete. Both source and target objects must be fill "
1314 "complete." << endl;
1316 if (src->graph_.getColMap ().is_null ()) {
1317 std::ostream& err = markLocalErrorAndGetStream ();
1318 err <<
"checkSizes: The source object of the Import or Export does "
1319 "not have a column Map. Both source and target objects must have "
1320 "column Maps." << endl;
1322 if (this->graph_.getColMap ().is_null ()) {
1323 std::ostream& err = markLocalErrorAndGetStream ();
1324 err <<
"checkSizes: The target object of the Import or Export does "
1325 "not have a column Map. Both source and target objects must have "
1326 "column Maps." << endl;
1330 return ! (* (this->localError_));
1333 template<
class Scalar,
class LO,
class GO,
class Node>
1337 (const ::Tpetra::SrcDistObject& source,
1338 const size_t numSameIDs,
1345 using ::Tpetra::Details::Behavior;
1347 using ::Tpetra::Details::ProfilingRegion;
1351 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1352 const bool debug = Behavior::debug();
1353 const bool verbose = Behavior::verbose();
1358 std::ostringstream os;
1359 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1360 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
1365 if (* (this->localError_)) {
1366 std::ostream& err = this->markLocalErrorAndGetStream ();
1368 <<
"The target object of the Import or Export is already in an error state."
1377 std::ostringstream os;
1378 os << prefix << endl
1381 std::cerr << os.str ();
1387 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
1388 std::ostream& err = this->markLocalErrorAndGetStream ();
1390 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
1391 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
1395 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
1396 std::ostream& err = this->markLocalErrorAndGetStream ();
1398 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1403 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1405 std::ostream& err = this->markLocalErrorAndGetStream ();
1407 <<
"The source (input) object of the Import or "
1408 "Export is either not a BlockCrsMatrix, or does not have the right "
1409 "template parameters. checkSizes() should have caught this. "
1410 "Please report this bug to the Tpetra developers." << endl;
1414 bool lclErr =
false;
1415 #ifdef HAVE_TPETRA_DEBUG
1416 std::set<LO> invalidSrcCopyRows;
1417 std::set<LO> invalidDstCopyRows;
1418 std::set<LO> invalidDstCopyCols;
1419 std::set<LO> invalidDstPermuteCols;
1420 std::set<LO> invalidPermuteFromRows;
1421 #endif // HAVE_TPETRA_DEBUG
1430 #ifdef HAVE_TPETRA_DEBUG
1431 const map_type& srcRowMap = * (src->graph_.getRowMap ());
1432 #endif // HAVE_TPETRA_DEBUG
1433 const map_type& dstRowMap = * (this->graph_.getRowMap ());
1434 const map_type& srcColMap = * (src->graph_.getColMap ());
1435 const map_type& dstColMap = * (this->graph_.getColMap ());
1436 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
1438 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
1440 std::ostringstream os;
1442 <<
"canUseLocalColumnIndices: "
1443 << (canUseLocalColumnIndices ?
"true" :
"false")
1444 <<
", numPermute: " << numPermute
1446 std::cerr << os.str ();
1449 const auto permuteToLIDsHost = permuteToLIDs.view_host();
1450 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
1452 if (canUseLocalColumnIndices) {
1454 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1455 #ifdef HAVE_TPETRA_DEBUG
1458 invalidSrcCopyRows.insert (localRow);
1461 #endif // HAVE_TPETRA_DEBUG
1463 local_inds_host_view_type lclSrcCols;
1464 values_host_view_type vals;
1468 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1469 if (numEntries > 0) {
1470 LO err = this->replaceLocalValues (localRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1471 if (err != numEntries) {
1474 #ifdef HAVE_TPETRA_DEBUG
1475 invalidDstCopyRows.insert (localRow);
1476 #endif // HAVE_TPETRA_DEBUG
1484 for (LO k = 0; k < numEntries; ++k) {
1487 #ifdef HAVE_TPETRA_DEBUG
1488 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
1489 #endif // HAVE_TPETRA_DEBUG
1498 for (
size_t k = 0; k < numPermute; ++k) {
1499 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1500 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1502 local_inds_host_view_type lclSrcCols;
1503 values_host_view_type vals;
1505 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1506 if (numEntries > 0) {
1507 LO err = this->replaceLocalValues (dstLclRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1508 if (err != numEntries) {
1510 #ifdef HAVE_TPETRA_DEBUG
1511 for (LO c = 0; c < numEntries; ++c) {
1513 invalidDstPermuteCols.insert (lclSrcCols[c]);
1516 #endif // HAVE_TPETRA_DEBUG
1523 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries ();
1524 Teuchos::Array<LO> lclDstCols (maxNumEnt);
1527 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1528 local_inds_host_view_type lclSrcCols;
1529 values_host_view_type vals;
1535 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1536 }
catch (std::exception& e) {
1538 std::ostringstream os;
1539 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1540 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1541 << localRow <<
", src->getLocalRowView() threw an exception: "
1543 std::cerr << os.str ();
1548 if (numEntries > 0) {
1549 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1552 std::ostringstream os;
1553 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1554 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1555 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1556 << maxNumEnt << endl;
1557 std::cerr << os.str ();
1563 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1564 for (LO j = 0; j < numEntries; ++j) {
1566 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1568 #ifdef HAVE_TPETRA_DEBUG
1569 invalidDstCopyCols.insert (lclDstColsView[j]);
1570 #endif // HAVE_TPETRA_DEBUG
1575 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1576 }
catch (std::exception& e) {
1578 std::ostringstream os;
1579 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1580 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1581 << localRow <<
", this->replaceLocalValues() threw an exception: "
1583 std::cerr << os.str ();
1587 if (err != numEntries) {
1590 std::ostringstream os;
1591 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1592 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
1593 "localRow " << localRow <<
", this->replaceLocalValues "
1594 "returned " << err <<
" instead of numEntries = "
1595 << numEntries << endl;
1596 std::cerr << os.str ();
1604 for (
size_t k = 0; k < numPermute; ++k) {
1605 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1606 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1608 local_inds_host_view_type lclSrcCols;
1609 values_host_view_type vals;
1613 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1614 }
catch (std::exception& e) {
1616 std::ostringstream os;
1617 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1618 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
1619 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
1620 <<
", src->getLocalRowView() threw an exception: " << e.what ();
1621 std::cerr << os.str ();
1626 if (numEntries > 0) {
1627 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1633 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1634 for (LO j = 0; j < numEntries; ++j) {
1636 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1638 #ifdef HAVE_TPETRA_DEBUG
1639 invalidDstPermuteCols.insert (lclDstColsView[j]);
1640 #endif // HAVE_TPETRA_DEBUG
1643 LO err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1644 if (err != numEntries) {
1653 std::ostream& err = this->markLocalErrorAndGetStream ();
1654 #ifdef HAVE_TPETRA_DEBUG
1655 err <<
"copyAndPermute: The graph structure of the source of the "
1656 "Import or Export must be a subset of the graph structure of the "
1658 err <<
"invalidSrcCopyRows = [";
1659 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
1660 it != invalidSrcCopyRows.end (); ++it) {
1662 typename std::set<LO>::const_iterator itp1 = it;
1664 if (itp1 != invalidSrcCopyRows.end ()) {
1668 err <<
"], invalidDstCopyRows = [";
1669 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
1670 it != invalidDstCopyRows.end (); ++it) {
1672 typename std::set<LO>::const_iterator itp1 = it;
1674 if (itp1 != invalidDstCopyRows.end ()) {
1678 err <<
"], invalidDstCopyCols = [";
1679 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
1680 it != invalidDstCopyCols.end (); ++it) {
1682 typename std::set<LO>::const_iterator itp1 = it;
1684 if (itp1 != invalidDstCopyCols.end ()) {
1688 err <<
"], invalidDstPermuteCols = [";
1689 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
1690 it != invalidDstPermuteCols.end (); ++it) {
1692 typename std::set<LO>::const_iterator itp1 = it;
1694 if (itp1 != invalidDstPermuteCols.end ()) {
1698 err <<
"], invalidPermuteFromRows = [";
1699 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
1700 it != invalidPermuteFromRows.end (); ++it) {
1702 typename std::set<LO>::const_iterator itp1 = it;
1704 if (itp1 != invalidPermuteFromRows.end ()) {
1710 err <<
"copyAndPermute: The graph structure of the source of the "
1711 "Import or Export must be a subset of the graph structure of the "
1713 #endif // HAVE_TPETRA_DEBUG
1717 std::ostringstream os;
1718 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1719 const bool lclSuccess = ! (* (this->localError_));
1720 os <<
"*** Proc " << myRank <<
": copyAndPermute "
1721 << (lclSuccess ?
"succeeded" :
"FAILED");
1725 os <<
": error messages: " << this->errorMessages ();
1727 std::cerr << os.str ();
1751 template<
class LO,
class GO>
1753 packRowCount (
const size_t numEnt,
1754 const size_t numBytesPerValue,
1755 const size_t blkSize)
1757 using ::Tpetra::Details::PackTraits;
1768 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1769 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1770 return numEntLen + gidsLen + valsLen;
1784 template<
class ST,
class LO,
class GO>
1786 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1787 const size_t offset,
1788 const size_t numBytes,
1791 using Kokkos::subview;
1792 using ::Tpetra::Details::PackTraits;
1794 if (numBytes == 0) {
1796 return static_cast<size_t> (0);
1801 TEUCHOS_TEST_FOR_EXCEPTION
1802 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1803 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
1805 const char*
const inBuf = imports.data () + offset;
1807 TEUCHOS_TEST_FOR_EXCEPTION
1808 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
1809 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
1811 return static_cast<size_t> (numEntLO);
1818 template<
class ST,
class LO,
class GO>
1820 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1821 const size_t offset,
1822 const size_t numEnt,
1823 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1824 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1825 const size_t numBytesPerValue,
1826 const size_t blockSize)
1828 using Kokkos::subview;
1829 using ::Tpetra::Details::PackTraits;
1835 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1838 const LO numEntLO =
static_cast<size_t> (numEnt);
1840 const size_t numEntBeg = offset;
1842 const size_t gidsBeg = numEntBeg + numEntLen;
1843 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1844 const size_t valsBeg = gidsBeg + gidsLen;
1845 const size_t valsLen = numScalarEnt * numBytesPerValue;
1847 char*
const numEntOut = exports.data () + numEntBeg;
1848 char*
const gidsOut = exports.data () + gidsBeg;
1849 char*
const valsOut = exports.data () + valsBeg;
1851 size_t numBytesOut = 0;
1856 Kokkos::pair<int, size_t> p;
1857 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
1858 errorCode += p.first;
1859 numBytesOut += p.second;
1861 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
1862 errorCode += p.first;
1863 numBytesOut += p.second;
1866 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1867 TEUCHOS_TEST_FOR_EXCEPTION
1868 (numBytesOut != expectedNumBytes, std::logic_error,
1869 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1870 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1872 TEUCHOS_TEST_FOR_EXCEPTION
1873 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
1874 "PackTraits::packArray returned a nonzero error code");
1880 template<
class ST,
class LO,
class GO>
1882 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1883 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1884 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1885 const size_t offset,
1886 const size_t numBytes,
1887 const size_t numEnt,
1888 const size_t numBytesPerValue,
1889 const size_t blockSize)
1891 using ::Tpetra::Details::PackTraits;
1893 if (numBytes == 0) {
1897 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1898 TEUCHOS_TEST_FOR_EXCEPTION
1899 (static_cast<size_t> (imports.extent (0)) <= offset,
1900 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1901 << imports.extent (0) <<
" <= offset = " << offset <<
".");
1902 TEUCHOS_TEST_FOR_EXCEPTION
1903 (static_cast<size_t> (imports.extent (0)) < offset + numBytes,
1904 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
1905 << imports.extent (0) <<
" < offset + numBytes = "
1906 << (offset + numBytes) <<
".");
1911 const size_t numEntBeg = offset;
1913 const size_t gidsBeg = numEntBeg + numEntLen;
1914 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
1915 const size_t valsBeg = gidsBeg + gidsLen;
1916 const size_t valsLen = numScalarEnt * numBytesPerValue;
1918 const char*
const numEntIn = imports.data () + numEntBeg;
1919 const char*
const gidsIn = imports.data () + gidsBeg;
1920 const char*
const valsIn = imports.data () + valsBeg;
1922 size_t numBytesOut = 0;
1926 TEUCHOS_TEST_FOR_EXCEPTION
1927 (static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
1928 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1929 <<
" != actual number of entries " << numEntOut <<
".");
1932 Kokkos::pair<int, size_t> p;
1933 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
1934 errorCode += p.first;
1935 numBytesOut += p.second;
1937 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
1938 errorCode += p.first;
1939 numBytesOut += p.second;
1942 TEUCHOS_TEST_FOR_EXCEPTION
1943 (numBytesOut != numBytes, std::logic_error,
1944 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1945 <<
" != numBytes = " << numBytes <<
".");
1947 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1948 TEUCHOS_TEST_FOR_EXCEPTION
1949 (numBytesOut != expectedNumBytes, std::logic_error,
1950 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1951 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1953 TEUCHOS_TEST_FOR_EXCEPTION
1954 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
1955 "PackTraits::unpackArray returned a nonzero error code");
1961 template<
class Scalar,
class LO,
class GO,
class Node>
1965 (const ::Tpetra::SrcDistObject& source,
1970 Kokkos::DualView<
size_t*,
1972 size_t& constantNumPackets)
1974 using ::Tpetra::Details::Behavior;
1976 using ::Tpetra::Details::ProfilingRegion;
1977 using ::Tpetra::Details::PackTraits;
1979 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
1983 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1985 const bool debug = Behavior::debug();
1986 const bool verbose = Behavior::verbose();
1991 std::ostringstream os;
1992 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1993 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1998 if (* (this->localError_)) {
1999 std::ostream& err = this->markLocalErrorAndGetStream ();
2001 <<
"The target object of the Import or Export is already in an error state."
2010 std::ostringstream os;
2011 os << prefix << std::endl
2015 std::cerr << os.str ();
2021 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2022 std::ostream& err = this->markLocalErrorAndGetStream ();
2024 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
2025 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2026 <<
"." << std::endl;
2029 if (exportLIDs.need_sync_host ()) {
2030 std::ostream& err = this->markLocalErrorAndGetStream ();
2031 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2035 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
2037 std::ostream& err = this->markLocalErrorAndGetStream ();
2039 <<
"The source (input) object of the Import or "
2040 "Export is either not a BlockCrsMatrix, or does not have the right "
2041 "template parameters. checkSizes() should have caught this. "
2042 "Please report this bug to the Tpetra developers." << std::endl;
2054 constantNumPackets = 0;
2058 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2059 const size_t numExportLIDs = exportLIDs.extent (0);
2060 size_t numBytesPerValue(0);
2062 auto val_host = val_.getHostView(Access::ReadOnly);
2064 PackTraits<impl_scalar_type>
2072 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2075 auto exportLIDsHost = exportLIDs.view_host();
2076 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2077 numPacketsPerLID.modify_host ();
2079 using reducer_type = Impl::BlockCrsReducer<Impl::BlockCrsRowStruct<size_t>,host_exec>;
2080 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs);
2081 Kokkos::parallel_reduce
2083 [=](
const size_t &i,
typename reducer_type::value_type &update) {
2084 const LO lclRow = exportLIDsHost(i);
2086 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2088 const size_t numBytes =
2089 packRowCount<LO, GO> (numEnt, numBytesPerValue, blockSize);
2090 numPacketsPerLIDHost(i) = numBytes;
2091 update +=
typename reducer_type::value_type(numEnt, numBytes, numEnt);
2092 }, rowReducerStruct);
2098 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2099 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2100 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2103 std::ostringstream os;
2105 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2107 std::cerr << os.str ();
2113 if(exports.extent(0) != totalNumBytes)
2115 const std::string oldLabel = exports.d_view.label ();
2116 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2117 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2119 if (totalNumEntries > 0) {
2121 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2123 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numExportLIDs+1);
2124 Kokkos::parallel_scan
2126 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2127 if (
final) offset(i) = update;
2128 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2131 if (offset(numExportLIDs) != totalNumBytes) {
2132 std::ostream& err = this->markLocalErrorAndGetStream ();
2134 <<
"At end of method, the final offset (in bytes) "
2135 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2136 << totalNumBytes <<
". "
2137 <<
"Please report this bug to the Tpetra developers." << std::endl;
2151 exports.modify_host();
2153 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2155 policy_type(numExportLIDs, 1, 1)
2156 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2161 Kokkos::parallel_for
2163 [=](
const typename policy_type::member_type &member) {
2164 const size_t i = member.league_rank();
2165 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2166 gblColInds(member.team_scratch(0), maxRowLength);
2168 const LO lclRowInd = exportLIDsHost(i);
2169 local_inds_host_view_type lclColInds;
2170 values_host_view_type vals;
2175 src->getLocalRowView (lclRowInd, lclColInds, vals);
2176 const size_t numEnt = lclColInds.extent(0);
2179 for (
size_t j = 0; j < numEnt; ++j)
2186 const size_t numBytes =
2187 packRowForBlockCrs<impl_scalar_type, LO, GO>
2188 (exports.view_host(),
2191 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2198 const size_t offsetDiff = offset(i+1) - offset(i);
2199 if (numBytes != offsetDiff) {
2200 std::ostringstream os;
2202 <<
"numBytes computed from packRowForBlockCrs is different from "
2203 <<
"precomputed offset values, LID = " << i << std::endl;
2204 std::cerr << os.str ();
2213 std::ostringstream os;
2214 const bool lclSuccess = ! (* (this->localError_));
2216 << (lclSuccess ?
"succeeded" :
"FAILED")
2218 std::cerr << os.str ();
2222 template<
class Scalar,
class LO,
class GO,
class Node>
2230 Kokkos::DualView<
size_t*,
2235 using ::Tpetra::Details::Behavior;
2237 using ::Tpetra::Details::ProfilingRegion;
2238 using ::Tpetra::Details::PackTraits;
2241 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2243 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2244 const bool verbose = Behavior::verbose ();
2249 std::ostringstream os;
2250 auto map = this->graph_.getRowMap ();
2251 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
2252 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2253 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2256 os <<
"Start" << endl;
2257 std::cerr << os.str ();
2262 if (* (this->localError_)) {
2263 std::ostream& err = this->markLocalErrorAndGetStream ();
2264 std::ostringstream os;
2265 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
2267 std::cerr << os.str ();
2276 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2277 std::ostream& err = this->markLocalErrorAndGetStream ();
2278 std::ostringstream os;
2279 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
2280 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2283 std::cerr << os.str ();
2289 if (combineMode !=
ADD && combineMode !=
INSERT &&
2291 combineMode !=
ZERO) {
2292 std::ostream& err = this->markLocalErrorAndGetStream ();
2293 std::ostringstream os;
2295 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
2296 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2299 std::cerr << os.str ();
2305 if (this->graph_.getColMap ().is_null ()) {
2306 std::ostream& err = this->markLocalErrorAndGetStream ();
2307 std::ostringstream os;
2308 os << prefix <<
"Target matrix's column Map is null." << endl;
2310 std::cerr << os.str ();
2319 const map_type& tgtColMap = * (this->graph_.getColMap ());
2322 const size_t blockSize = this->getBlockSize ();
2323 const size_t numImportLIDs = importLIDs.extent(0);
2330 size_t numBytesPerValue(0);
2332 auto val_host = val_.getHostView(Access::ReadOnly);
2334 PackTraits<impl_scalar_type>::packValueCount
2337 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries ();
2338 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2341 std::ostringstream os;
2342 os << prefix <<
"combineMode: "
2343 << ::Tpetra::combineModeToString (combineMode)
2344 <<
", blockSize: " << blockSize
2345 <<
", numImportLIDs: " << numImportLIDs
2346 <<
", numBytesPerValue: " << numBytesPerValue
2347 <<
", maxRowNumEnt: " << maxRowNumEnt
2348 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
2349 std::cerr << os.str ();
2352 if (combineMode ==
ZERO || numImportLIDs == 0) {
2354 std::ostringstream os;
2355 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
2356 std::cerr << os.str ();
2363 if (imports.need_sync_host ()) {
2364 imports.sync_host ();
2374 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
2375 importLIDs.need_sync_host ()) {
2376 std::ostream& err = this->markLocalErrorAndGetStream ();
2377 std::ostringstream os;
2378 os << prefix <<
"All input DualViews must be sync'd to host by now. "
2379 <<
"imports_nc.need_sync_host()="
2380 << (imports.need_sync_host () ?
"true" :
"false")
2381 <<
", numPacketsPerLID.need_sync_host()="
2382 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
2383 <<
", importLIDs.need_sync_host()="
2384 << (importLIDs.need_sync_host () ?
"true" :
"false")
2387 std::cerr << os.str ();
2393 const auto importLIDsHost = importLIDs.view_host ();
2394 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
2400 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
2402 const auto policy = Kokkos::RangePolicy<host_exec>(size_t(0), numImportLIDs+1);
2403 Kokkos::parallel_scan
2404 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
2405 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
2406 if (
final) offset(i) = update;
2407 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
2417 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2418 errorDuringUnpack (
"errorDuringUnpack");
2419 errorDuringUnpack () = 0;
2421 using policy_type = Kokkos::TeamPolicy<host_exec>;
2422 size_t scratch_per_row =
sizeof(GO) * maxRowNumEnt +
sizeof (LO) * maxRowNumEnt + numBytesPerValue * maxRowNumScalarEnt
2425 const auto policy = policy_type (numImportLIDs, 1, 1)
2426 .set_scratch_size (0, Kokkos::PerTeam (scratch_per_row));
2427 using host_scratch_space =
typename host_exec::scratch_memory_space;
2429 using pair_type = Kokkos::pair<size_t, size_t>;
2432 getValuesHostNonConst();
2434 Kokkos::parallel_for
2435 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
2436 [=] (
const typename policy_type::member_type& member) {
2437 const size_t i = member.league_rank();
2438 Kokkos::View<GO*, host_scratch_space> gblColInds
2439 (member.team_scratch (0), maxRowNumEnt);
2440 Kokkos::View<LO*, host_scratch_space> lclColInds
2441 (member.team_scratch (0), maxRowNumEnt);
2442 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
2443 (member.team_scratch (0), maxRowNumScalarEnt);
2446 const size_t offval = offset(i);
2447 const LO lclRow = importLIDsHost(i);
2448 const size_t numBytes = numPacketsPerLIDHost(i);
2449 const size_t numEnt =
2450 unpackRowCount<impl_scalar_type, LO, GO>
2451 (imports.view_host (), offval, numBytes, numBytesPerValue);
2454 if (numEnt > maxRowNumEnt) {
2455 errorDuringUnpack() = 1;
2457 std::ostringstream os;
2459 <<
"At i = " << i <<
", numEnt = " << numEnt
2460 <<
" > maxRowNumEnt = " << maxRowNumEnt
2462 std::cerr << os.str ();
2467 const size_t numScalarEnt = numEnt*blockSize*blockSize;
2468 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
2469 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
2470 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
2475 size_t numBytesOut = 0;
2478 unpackRowForBlockCrs<impl_scalar_type, LO, GO>
2479 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
2480 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
2481 imports.view_host(),
2482 offval, numBytes, numEnt,
2483 numBytesPerValue, blockSize);
2485 catch (std::exception& e) {
2486 errorDuringUnpack() = 1;
2488 std::ostringstream os;
2489 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
2490 << e.what () << endl;
2491 std::cerr << os.str ();
2496 if (numBytes != numBytesOut) {
2497 errorDuringUnpack() = 1;
2499 std::ostringstream os;
2501 <<
"At i = " << i <<
", numBytes = " << numBytes
2502 <<
" != numBytesOut = " << numBytesOut <<
"."
2504 std::cerr << os.str();
2510 for (
size_t k = 0; k < numEnt; ++k) {
2512 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2514 std::ostringstream os;
2516 <<
"At i = " << i <<
", GID " << gidsOut(k)
2517 <<
" is not owned by the calling process."
2519 std::cerr << os.str();
2527 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
2528 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
2530 if (combineMode ==
ADD) {
2531 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2532 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
2533 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2534 }
else if (combineMode ==
ABSMAX) {
2535 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2538 if (static_cast<LO> (numEnt) != numCombd) {
2539 errorDuringUnpack() = 1;
2541 std::ostringstream os;
2542 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2543 <<
" != numCombd = " << numCombd <<
"."
2545 std::cerr << os.str ();
2553 if (errorDuringUnpack () != 0) {
2554 std::ostream& err = this->markLocalErrorAndGetStream ();
2555 err << prefix <<
"Unpacking failed.";
2557 err <<
" Please run again with the environment variable setting "
2558 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2564 std::ostringstream os;
2565 const bool lclSuccess = ! (* (this->localError_));
2567 << (lclSuccess ?
"succeeded" :
"FAILED")
2569 std::cerr << os.str ();
2573 template<
class Scalar,
class LO,
class GO,
class Node>
2577 using Teuchos::TypeNameTraits;
2578 std::ostringstream os;
2579 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2580 <<
"Template parameters: { "
2581 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
2582 <<
"LO: " << TypeNameTraits<LO>::name ()
2583 <<
"GO: " << TypeNameTraits<GO>::name ()
2584 <<
"Node: " << TypeNameTraits<Node>::name ()
2586 <<
", Label: \"" << this->getObjectLabel () <<
"\""
2587 <<
", Global dimensions: ["
2588 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2589 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2590 <<
", Block size: " << getBlockSize ()
2596 template<
class Scalar,
class LO,
class GO,
class Node>
2600 const Teuchos::EVerbosityLevel verbLevel)
const
2602 using Teuchos::ArrayRCP;
2603 using Teuchos::CommRequest;
2604 using Teuchos::FancyOStream;
2605 using Teuchos::getFancyOStream;
2606 using Teuchos::ireceive;
2607 using Teuchos::isend;
2608 using Teuchos::outArg;
2609 using Teuchos::TypeNameTraits;
2610 using Teuchos::VERB_DEFAULT;
2611 using Teuchos::VERB_NONE;
2612 using Teuchos::VERB_LOW;
2613 using Teuchos::VERB_MEDIUM;
2615 using Teuchos::VERB_EXTREME;
2617 using Teuchos::wait;
2621 const Teuchos::EVerbosityLevel vl =
2622 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2624 if (vl == VERB_NONE) {
2629 Teuchos::OSTab tab0 (out);
2631 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
2632 Teuchos::OSTab tab1 (out);
2634 out <<
"Template parameters:" << endl;
2636 Teuchos::OSTab tab2 (out);
2637 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
2638 <<
"LO: " << TypeNameTraits<LO>::name () << endl
2639 <<
"GO: " << TypeNameTraits<GO>::name () << endl
2640 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
2642 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
2643 <<
"Global dimensions: ["
2644 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2645 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
2647 const LO blockSize = getBlockSize ();
2648 out <<
"Block size: " << blockSize << endl;
2651 if (vl >= VERB_MEDIUM) {
2652 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2653 const int myRank = comm.getRank ();
2655 out <<
"Row Map:" << endl;
2657 getRowMap()->describe (out, vl);
2659 if (! getColMap ().is_null ()) {
2660 if (getColMap() == getRowMap()) {
2662 out <<
"Column Map: same as row Map" << endl;
2667 out <<
"Column Map:" << endl;
2669 getColMap ()->describe (out, vl);
2672 if (! getDomainMap ().is_null ()) {
2673 if (getDomainMap () == getRowMap ()) {
2675 out <<
"Domain Map: same as row Map" << endl;
2678 else if (getDomainMap () == getColMap ()) {
2680 out <<
"Domain Map: same as column Map" << endl;
2685 out <<
"Domain Map:" << endl;
2687 getDomainMap ()->describe (out, vl);
2690 if (! getRangeMap ().is_null ()) {
2691 if (getRangeMap () == getDomainMap ()) {
2693 out <<
"Range Map: same as domain Map" << endl;
2696 else if (getRangeMap () == getRowMap ()) {
2698 out <<
"Range Map: same as row Map" << endl;
2703 out <<
"Range Map: " << endl;
2705 getRangeMap ()->describe (out, vl);
2710 if (vl >= VERB_EXTREME) {
2711 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
2712 const int myRank = comm.getRank ();
2713 const int numProcs = comm.getSize ();
2716 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
2717 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
2718 FancyOStream& os = *osPtr;
2719 os <<
"Process " << myRank <<
":" << endl;
2720 Teuchos::OSTab tab2 (os);
2722 const map_type& meshRowMap = * (graph_.getRowMap ());
2723 const map_type& meshColMap = * (graph_.getColMap ());
2728 os <<
"Row " << meshGblRow <<
": {";
2730 local_inds_host_view_type lclColInds;
2731 values_host_view_type vals;
2733 this->getLocalRowView (meshLclRow, lclColInds, vals); numInds = lclColInds.extent(0);
2735 for (LO k = 0; k < numInds; ++k) {
2738 os <<
"Col " << gblCol <<
": [";
2739 for (LO i = 0; i < blockSize; ++i) {
2740 for (LO j = 0; j < blockSize; ++j) {
2741 os << vals[blockSize*blockSize*k + i*blockSize + j];
2742 if (j + 1 < blockSize) {
2746 if (i + 1 < blockSize) {
2751 if (k + 1 < numInds) {
2761 out << lclOutStrPtr->str ();
2762 lclOutStrPtr = Teuchos::null;
2765 const int sizeTag = 1337;
2766 const int dataTag = 1338;
2768 ArrayRCP<char> recvDataBuf;
2772 for (
int p = 1; p < numProcs; ++p) {
2775 ArrayRCP<size_t> recvSize (1);
2777 RCP<CommRequest<int> > recvSizeReq =
2778 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
2779 wait<int> (comm, outArg (recvSizeReq));
2780 const size_t numCharsToRecv = recvSize[0];
2787 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
2788 recvDataBuf.resize (numCharsToRecv + 1);
2790 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
2792 RCP<CommRequest<int> > recvDataReq =
2793 ireceive<int, char> (recvData, p, dataTag, comm);
2794 wait<int> (comm, outArg (recvDataReq));
2799 recvDataBuf[numCharsToRecv] =
'\0';
2800 out << recvDataBuf.getRawPtr ();
2802 else if (myRank == p) {
2806 const std::string stringToSend = lclOutStrPtr->str ();
2807 lclOutStrPtr = Teuchos::null;
2810 const size_t numCharsToSend = stringToSend.size ();
2811 ArrayRCP<size_t> sendSize (1);
2812 sendSize[0] = numCharsToSend;
2813 RCP<CommRequest<int> > sendSizeReq =
2814 isend<int, size_t> (sendSize, 0, sizeTag, comm);
2815 wait<int> (comm, outArg (sendSizeReq));
2823 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
2824 RCP<CommRequest<int> > sendDataReq =
2825 isend<int, char> (sendData, 0, dataTag, comm);
2826 wait<int> (comm, outArg (sendDataReq));
2832 template<
class Scalar,
class LO,
class GO,
class Node>
2833 Teuchos::RCP<const Teuchos::Comm<int> >
2837 return graph_.getComm();
2841 template<
class Scalar,
class LO,
class GO,
class Node>
2846 return graph_.getGlobalNumCols();
2850 template<
class Scalar,
class LO,
class GO,
class Node>
2855 return graph_.getLocalNumCols();
2858 template<
class Scalar,
class LO,
class GO,
class Node>
2863 return graph_.getIndexBase();
2866 template<
class Scalar,
class LO,
class GO,
class Node>
2871 return graph_.getGlobalNumEntries();
2874 template<
class Scalar,
class LO,
class GO,
class Node>
2879 return graph_.getLocalNumEntries();
2882 template<
class Scalar,
class LO,
class GO,
class Node>
2887 return graph_.getNumEntriesInGlobalRow(globalRow);
2891 template<
class Scalar,
class LO,
class GO,
class Node>
2896 return graph_.getGlobalMaxNumRowEntries();
2899 template<
class Scalar,
class LO,
class GO,
class Node>
2904 return graph_.hasColMap();
2908 template<
class Scalar,
class LO,
class GO,
class Node>
2913 return graph_.isLocallyIndexed();
2916 template<
class Scalar,
class LO,
class GO,
class Node>
2921 return graph_.isGloballyIndexed();
2924 template<
class Scalar,
class LO,
class GO,
class Node>
2929 return graph_.isFillComplete ();
2932 template<
class Scalar,
class LO,
class GO,
class Node>
2940 template<
class Scalar,
class LO,
class GO,
class Node>
2944 nonconst_global_inds_host_view_type &,
2945 nonconst_values_host_view_type &,
2948 TEUCHOS_TEST_FOR_EXCEPTION(
2949 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2950 "This class doesn't support global matrix indexing.");
2954 template<
class Scalar,
class LO,
class GO,
class Node>
2958 global_inds_host_view_type &,
2959 values_host_view_type &)
const
2961 TEUCHOS_TEST_FOR_EXCEPTION(
2962 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
2963 "This class doesn't support global matrix indexing.");
2967 template<
class Scalar,
class LO,
class GO,
class Node>
2971 local_inds_host_view_type &colInds,
2972 values_host_view_type &vals)
const
2974 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2975 colInds = local_inds_host_view_type();
2976 vals = values_host_view_type();
2979 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
2980 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
2981 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
2983 vals = getValuesHost (localRowInd);
2987 template<
class Scalar,
class LO,
class GO,
class Node>
2991 local_inds_host_view_type &colInds,
2992 nonconst_values_host_view_type &vals)
const
2994 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
2995 colInds = nonconst_local_inds_host_view_type();
2996 vals = nonconst_values_host_view_type();
2999 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
3000 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
3001 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
3008 template<
class Scalar,
class LO,
class GO,
class Node>
3013 const size_t lclNumMeshRows = graph_.getLocalNumRows ();
3015 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3016 graph_.getLocalDiagOffsets (diagOffsets);
3019 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3023 auto vals_host_out = val_.getHostView(Access::ReadOnly);
3027 size_t rowOffset = 0;
3029 LO bs = getBlockSize();
3030 for(
size_t r=0; r<getLocalNumRows(); r++)
3033 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3034 for(
int b=0; b<bs; b++)
3039 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3045 template<
class Scalar,
class LO,
class GO,
class Node>
3050 TEUCHOS_TEST_FOR_EXCEPTION(
3051 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3052 "not implemented.");
3056 template<
class Scalar,
class LO,
class GO,
class Node>
3061 TEUCHOS_TEST_FOR_EXCEPTION(
3062 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3063 "not implemented.");
3067 template<
class Scalar,
class LO,
class GO,
class Node>
3068 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3075 template<
class Scalar,
class LO,
class GO,
class Node>
3076 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3080 TEUCHOS_TEST_FOR_EXCEPTION(
3081 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3082 "not implemented.");
3092 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3093 template class BlockCrsMatrix< S, LO, GO, NODE >;
3095 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
std::string description() const override
One-line description of this object.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
LO local_ordinal_type
The type of local indices.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
One or more distributed dense vectors.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Linear algebra kernels for small dense matrices and vectors.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Scalar scalar_type
The type of entries in the matrix (that is, of each entry in each block).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t getLocalNumRows() const override
get the local number of block rows
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
MultiVector for multiple degrees of freedom per mesh point.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
size_t global_size_t
Global size_t object.
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
global_size_t getGlobalNumRows() const override
get the global number of block rows
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value)
Replace current value at the specified location with specified values.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
CombineMode
Rule for combining data in an Import or Export.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
Replace old value with maximum of magnitudes of old and new values.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
Replace existing values with new values.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Replace old values with zero.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
char packet_type
Implementation detail; tells.
local_matrix_device_type getLocalMatrixDevice() const
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Base class for distributed Tpetra objects that support data redistribution.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row. ...