40 #ifndef TPETRA_CRSMATRIX_DEF_HPP
41 #define TPETRA_CRSMATRIX_DEF_HPP
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_LocalCrsMatrixOperator.hpp"
62 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
70 #include "Tpetra_Details_packCrsMatrix.hpp"
71 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
73 #include "Teuchos_FancyOStream.hpp"
74 #include "Teuchos_RCP.hpp"
75 #include "Teuchos_DataAccess.hpp"
76 #include "Teuchos_SerialDenseMatrix.hpp"
77 #include "KokkosBlas1_scal.hpp"
78 #include "KokkosSparse_getDiagCopy.hpp"
79 #include "KokkosSparse_spmv.hpp"
91 template<
class T,
class BinaryFunction>
92 T atomic_binary_function_update (
volatile T*
const dest,
106 T newVal = f (assume, inputVal);
107 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
108 }
while (assume != oldVal);
128 template<
class Scalar>
132 typedef Teuchos::ScalarTraits<Scalar> STS;
133 return std::max (STS::magnitude (x), STS::magnitude (y));
142 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
143 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
144 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
145 size_t maxNumEntriesPerRow,
146 const Teuchos::RCP<Teuchos::ParameterList>& params) :
149 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
150 "[, RCP<ParameterList>]): ";
151 Teuchos::RCP<crs_graph_type> graph;
153 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
156 catch (std::exception& e) {
157 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
158 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
159 "size_t [, RCP<ParameterList>]) threw an exception: "
166 staticGraph_ = myGraph_;
171 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
174 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
175 const Teuchos::RCP<Teuchos::ParameterList>& params) :
178 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
179 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
180 Teuchos::RCP<crs_graph_type> graph;
186 catch (std::exception& e) {
187 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
188 (
true, std::runtime_error,
"CrsGraph constructor "
189 "(RCP<const Map>, ArrayView<const size_t>"
190 "[, RCP<ParameterList>]) threw an exception: "
197 staticGraph_ = graph;
202 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
205 const Teuchos::RCP<const map_type>& colMap,
206 const size_t maxNumEntPerRow,
207 const Teuchos::RCP<Teuchos::ParameterList>& params) :
210 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
211 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
212 const char suffix[] =
213 " Please report this bug to the Tpetra developers.";
216 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
217 (! staticGraph_.is_null (), std::logic_error,
218 "staticGraph_ is not null at the beginning of the constructor."
220 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
221 (! myGraph_.is_null (), std::logic_error,
222 "myGraph_ is not null at the beginning of the constructor."
224 Teuchos::RCP<crs_graph_type> graph;
230 catch (std::exception& e) {
231 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
232 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
233 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
234 "exception: " << e.what ());
240 staticGraph_ = myGraph_;
245 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
248 const Teuchos::RCP<const map_type>& colMap,
249 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
250 const Teuchos::RCP<Teuchos::ParameterList>& params) :
253 const char tfecfFuncName[] =
254 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
255 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
256 Teuchos::RCP<crs_graph_type> graph;
262 catch (std::exception& e) {
263 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
264 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
265 "RCP<const Map>, ArrayView<const size_t>[, "
266 "RCP<ParameterList>]) threw an exception: " << e.what ());
272 staticGraph_ = graph;
278 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
280 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
281 const Teuchos::RCP<Teuchos::ParameterList>& ) :
283 staticGraph_ (graph),
284 storageStatus_ (Details::STORAGE_1D_PACKED)
287 typedef typename local_matrix_device_type::values_type values_type;
288 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
289 "RCP<ParameterList>]): ";
292 std::unique_ptr<std::string> prefix;
294 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
295 std::ostringstream os;
296 os << *prefix <<
"Start" << endl;
297 std::cerr << os.str ();
300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
301 (graph.is_null (), std::runtime_error,
"Input graph is null.");
302 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
303 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
304 "is not fill complete. You must call fillComplete on the "
305 "graph before using it to construct a CrsMatrix. Note that "
306 "calling resumeFill on the graph makes it not fill complete, "
307 "even if you had previously called fillComplete. In that "
308 "case, you must call fillComplete on the graph again.");
316 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
318 std::ostringstream os;
319 os << *prefix <<
"Allocate values: " << numEnt << endl;
320 std::cerr << os.str ();
323 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
325 valuesUnpacked_wdv = valuesPacked_wdv;
330 std::ostringstream os;
331 os << *prefix <<
"Done" << endl;
332 std::cerr << os.str ();
336 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
339 const Teuchos::RCP<const crs_graph_type>& graph,
340 const Teuchos::RCP<Teuchos::ParameterList>& params) :
342 staticGraph_ (graph),
343 storageStatus_ (matrix.storageStatus_)
345 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
346 "local_matrix_device_type::values_type, "
347 "[,RCP<ParameterList>]): ";
348 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
349 (graph.is_null (), std::runtime_error,
"Input graph is null.");
350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
351 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
352 "is not fill complete. You must call fillComplete on the "
353 "graph before using it to construct a CrsMatrix. Note that "
354 "calling resumeFill on the graph makes it not fill complete, "
355 "even if you had previously called fillComplete. In that "
356 "case, you must call fillComplete on the graph again.");
358 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
359 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
361 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
362 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
368 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
370 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
371 const typename local_matrix_device_type::values_type& values,
372 const Teuchos::RCP<Teuchos::ParameterList>& ) :
374 staticGraph_ (graph),
375 storageStatus_ (Details::STORAGE_1D_PACKED)
377 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
378 "local_matrix_device_type::values_type, "
379 "[,RCP<ParameterList>]): ";
380 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
381 (graph.is_null (), std::runtime_error,
"Input graph is null.");
382 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
383 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
384 "is not fill complete. You must call fillComplete on the "
385 "graph before using it to construct a CrsMatrix. Note that "
386 "calling resumeFill on the graph makes it not fill complete, "
387 "even if you had previously called fillComplete. In that "
388 "case, you must call fillComplete on the graph again.");
397 valuesUnpacked_wdv = valuesPacked_wdv;
408 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
411 const Teuchos::RCP<const map_type>& colMap,
412 const typename local_graph_device_type::row_map_type& rowPointers,
413 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
414 const typename local_matrix_device_type::values_type& values,
415 const Teuchos::RCP<Teuchos::ParameterList>& params) :
417 storageStatus_ (Details::STORAGE_1D_PACKED)
419 using Details::getEntryOnHost;
422 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
423 "RCP<const Map>, ptr, ind, val[, params]): ";
424 const char suffix[] =
425 ". Please report this bug to the Tpetra developers.";
429 std::unique_ptr<std::string> prefix;
431 prefix = this->createPrefix(
432 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
433 std::ostringstream os;
434 os << *prefix <<
"Start" << endl;
435 std::cerr << os.str ();
442 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
443 (values.extent(0) != columnIndices.extent(0),
444 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
445 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
447 if (debug && rowPointers.extent(0) != 0) {
448 const size_t numEnt =
449 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
450 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
451 (numEnt !=
size_t(columnIndices.extent(0)) ||
452 numEnt !=
size_t(values.extent(0)),
453 std::invalid_argument,
"Last entry of rowPointers says that "
454 "the matrix has " << numEnt <<
" entr"
455 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
456 "columnIndices and values don't match this. "
457 "columnIndices.extent(0)=" << columnIndices.extent (0)
458 <<
" and values.extent(0)=" << values.extent (0) <<
".");
461 RCP<crs_graph_type> graph;
463 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
464 columnIndices, params));
466 catch (std::exception& e) {
467 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
468 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
469 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
477 auto lclGraph = graph->getLocalGraphDevice ();
478 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
479 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
480 lclGraph.entries.extent (0) != columnIndices.extent (0),
481 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
482 "ind[, params]) did not set the local graph correctly." << suffix);
483 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
484 (lclGraph.entries.extent (0) != values.extent (0),
485 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
486 "params]) did not set the local graph correctly. "
487 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
488 <<
" != values.extent(0) = " << values.extent (0) << suffix);
494 staticGraph_ = graph;
504 valuesUnpacked_wdv = valuesPacked_wdv;
513 std::ostringstream os;
514 os << *prefix <<
"Done" << endl;
515 std::cerr << os.str();
519 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
522 const Teuchos::RCP<const map_type>& colMap,
523 const Teuchos::ArrayRCP<size_t>& ptr,
524 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
525 const Teuchos::ArrayRCP<Scalar>& val,
526 const Teuchos::RCP<Teuchos::ParameterList>& params) :
528 storageStatus_ (Details::STORAGE_1D_PACKED)
530 using Kokkos::Compat::getKokkosViewDeepCopy;
531 using Teuchos::av_reinterpret_cast;
533 using values_type =
typename local_matrix_device_type::values_type;
535 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
536 "RCP<const Map>, ptr, ind, val[, params]): ";
538 RCP<crs_graph_type> graph;
543 catch (std::exception& e) {
544 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
545 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
546 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
547 "RCP<ParameterList>]) threw an exception: " << e.what ());
553 staticGraph_ = graph;
566 auto lclGraph = staticGraph_->getLocalGraphDevice ();
567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
568 (
size_t (lclGraph.row_map.extent (0)) !=
size_t (ptr.size ()) ||
569 size_t (lclGraph.entries.extent (0)) !=
size_t (ind.size ()),
570 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
571 "ptr, ind[, params]) did not set the local graph correctly. "
572 "Please report this bug to the Tpetra developers.");
575 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
577 valuesUnpacked_wdv = valuesPacked_wdv;
587 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
590 const Teuchos::RCP<const map_type>& colMap,
592 const Teuchos::RCP<Teuchos::ParameterList>& params) :
594 storageStatus_ (Details::STORAGE_1D_PACKED),
597 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
598 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
599 const char suffix[] =
600 " Please report this bug to the Tpetra developers.";
602 Teuchos::RCP<crs_graph_type> graph;
605 lclMatrix.graph, params));
607 catch (std::exception& e) {
608 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
609 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
610 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
611 "exception: " << e.what ());
613 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
614 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
615 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
616 "did not produce a fill-complete graph. Please report this bug to the "
617 "Tpetra developers.");
622 staticGraph_ = graph;
625 valuesUnpacked_wdv = valuesPacked_wdv;
627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
629 "At the end of a CrsMatrix constructor that should produce "
630 "a fillComplete matrix, isFillActive() is true." << suffix);
631 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
633 "CrsMatrix constructor that should produce a fillComplete "
634 "matrix, isFillComplete() is false." << suffix);
638 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
641 const Teuchos::RCP<const map_type>& rowMap,
642 const Teuchos::RCP<const map_type>& colMap,
643 const Teuchos::RCP<const map_type>& domainMap,
644 const Teuchos::RCP<const map_type>& rangeMap,
645 const Teuchos::RCP<Teuchos::ParameterList>& params) :
647 storageStatus_ (Details::STORAGE_1D_PACKED),
650 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
651 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
652 "local_matrix_device_type[, RCP<ParameterList>]): ";
653 const char suffix[] =
654 " Please report this bug to the Tpetra developers.";
656 Teuchos::RCP<crs_graph_type> graph;
658 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
659 domainMap, rangeMap, params));
661 catch (std::exception& e) {
662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
663 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
664 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
665 "RCP<ParameterList>]) threw an exception: " << e.what ());
667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
668 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
669 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
670 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
671 "not produce a fillComplete graph." << suffix);
676 staticGraph_ = graph;
679 valuesUnpacked_wdv = valuesPacked_wdv;
681 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
683 "At the end of a CrsMatrix constructor that should produce "
684 "a fillComplete matrix, isFillActive() is true." << suffix);
685 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
687 "CrsMatrix constructor that should produce a fillComplete "
688 "matrix, isFillComplete() is false." << suffix);
692 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
695 const Teuchos::RCP<const map_type>& rowMap,
696 const Teuchos::RCP<const map_type>& colMap,
697 const Teuchos::RCP<const map_type>& domainMap,
698 const Teuchos::RCP<const map_type>& rangeMap,
699 const Teuchos::RCP<const import_type>& importer,
700 const Teuchos::RCP<const export_type>& exporter,
701 const Teuchos::RCP<Teuchos::ParameterList>& params) :
703 storageStatus_ (Details::STORAGE_1D_PACKED),
707 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
708 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
709 const char suffix[] =
710 " Please report this bug to the Tpetra developers.";
712 Teuchos::RCP<crs_graph_type> graph;
715 domainMap, rangeMap, importer,
718 catch (std::exception& e) {
719 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
720 (
true, std::runtime_error,
"CrsGraph constructor "
721 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
722 "params) threw: " << e.what ());
724 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
725 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
726 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
727 "Export, params) did not produce a fill-complete graph. "
728 "Please report this bug to the Tpetra developers.");
733 staticGraph_ = graph;
736 valuesUnpacked_wdv = valuesPacked_wdv;
738 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
740 "At the end of a CrsMatrix constructor that should produce "
741 "a fillComplete matrix, isFillActive() is true." << suffix);
742 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
744 "CrsMatrix constructor that should produce a fillComplete "
745 "matrix, isFillComplete() is false." << suffix);
749 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
752 const Teuchos::DataAccess copyOrView):
754 staticGraph_ (source.getCrsGraph()),
755 storageStatus_ (source.storageStatus_)
757 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
758 "const CrsMatrix&, const Teuchos::DataAccess): ";
759 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
761 "Source graph must be fillComplete().");
763 if (copyOrView == Teuchos::Copy) {
764 using values_type =
typename local_matrix_device_type::values_type;
766 using Kokkos::view_alloc;
767 using Kokkos::WithoutInitializing;
768 values_type newvals (view_alloc (
"val", WithoutInitializing),
773 valuesUnpacked_wdv = valuesPacked_wdv;
776 else if (copyOrView == Teuchos::View) {
782 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
783 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
784 "has an invalid value " << copyOrView <<
". Valid values "
785 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
786 "Teuchos::View = " << Teuchos::View <<
".");
791 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
796 std::swap(crs_matrix.
importMV_, this->importMV_);
797 std::swap(crs_matrix.
exportMV_, this->exportMV_);
798 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
799 std::swap(crs_matrix.myGraph_, this->myGraph_);
800 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
801 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
804 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
807 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
808 Teuchos::RCP<const Teuchos::Comm<int> >
811 return getCrsGraphRef ().getComm ();
814 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
818 return fillComplete_;
821 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
825 return ! fillComplete_;
828 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
832 return this->getCrsGraphRef ().isStorageOptimized ();
835 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
839 return getCrsGraphRef ().isLocallyIndexed ();
842 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
846 return getCrsGraphRef ().isGloballyIndexed ();
849 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
853 return getCrsGraphRef ().hasColMap ();
856 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
860 return getCrsGraphRef ().getGlobalNumEntries ();
863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
867 return getCrsGraphRef ().getLocalNumEntries ();
870 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
874 return getCrsGraphRef ().getGlobalNumRows ();
877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
881 return getCrsGraphRef ().getGlobalNumCols ();
884 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 return getCrsGraphRef ().getLocalNumRows ();
892 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
896 return getCrsGraphRef ().getLocalNumCols ();
900 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
904 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
907 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
911 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
914 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
918 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
921 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
925 return getCrsGraphRef ().getLocalMaxNumRowEntries ();
928 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
932 return getRowMap ()->getIndexBase ();
935 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
936 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
939 return getCrsGraphRef ().getRowMap ();
942 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
943 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
946 return getCrsGraphRef ().getColMap ();
949 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
950 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
953 return getCrsGraphRef ().getDomainMap ();
956 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
957 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
960 return getCrsGraphRef ().getRangeMap ();
963 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
964 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
967 if (staticGraph_ != Teuchos::null) {
973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
974 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
977 if (staticGraph_ != Teuchos::null) {
983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
988 #ifdef HAVE_TPETRA_DEBUG
989 constexpr
bool debug =
true;
991 constexpr
bool debug =
false;
992 #endif // HAVE_TPETRA_DEBUG
994 if (! this->staticGraph_.is_null ()) {
995 return * (this->staticGraph_);
999 const char tfecfFuncName[] =
"getCrsGraphRef: ";
1000 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1001 (this->myGraph_.is_null (), std::logic_error,
1002 "Both staticGraph_ and myGraph_ are null. "
1003 "Please report this bug to the Tpetra developers.");
1005 return * (this->myGraph_);
1009 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1010 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
1014 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
1017 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
1018 staticGraph_->getLocalGraphDevice());
1021 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1022 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
1026 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
1027 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
1028 valuesPacked_wdv.getHostView(Access::ReadWrite),
1029 staticGraph_->getLocalGraphHost());
1032 #if KOKKOSKERNELS_VERSION < 40299
1034 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1035 std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1039 auto localMatrix = getLocalMatrixDevice();
1040 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
1041 if(this->getLocalNumEntries() <=
size_t(Teuchos::OrdinalTraits<LocalOrdinal>::max()))
1043 if(this->ordinalRowptrs.data() ==
nullptr)
1045 auto originalRowptrs = localMatrix.graph.row_map;
1048 this->ordinalRowptrs = ordinal_rowptrs_type(
1049 Kokkos::ViewAllocateWithoutInitializing(
"CrsMatrix::ordinalRowptrs"), originalRowptrs.extent(0));
1050 auto ordinalRowptrs_ = this->ordinalRowptrs;
1051 Kokkos::parallel_for(
"CrsMatrix::getLocalMultiplyOperator::convertRowptrs",
1052 Kokkos::RangePolicy<execution_space>(0, originalRowptrs.extent(0)),
1053 KOKKOS_LAMBDA(LocalOrdinal i)
1055 ordinalRowptrs_(i) = originalRowptrs(i);
1059 return std::make_shared<local_multiply_op_type>(
1060 std::make_shared<local_matrix_device_type>(localMatrix), this->ordinalRowptrs);
1064 return std::make_shared<local_multiply_op_type>(
1065 std::make_shared<local_matrix_device_type>(localMatrix));
1069 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1073 return myGraph_.is_null ();
1076 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1083 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1090 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1099 const char tfecfFuncName[] =
"allocateValues: ";
1100 const char suffix[] =
1101 " Please report this bug to the Tpetra developers.";
1102 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1104 std::unique_ptr<std::string> prefix;
1106 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1107 std::ostringstream os;
1108 os << *prefix <<
"lg: "
1109 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1111 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1112 <<
"Allocated" << endl;
1113 std::cerr << os.str();
1116 const bool debug = Behavior::debug(
"CrsMatrix");
1118 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1119 (this->staticGraph_.is_null (), std::logic_error,
1120 "staticGraph_ is null." << suffix);
1125 if ((gas == GraphAlreadyAllocated) !=
1126 staticGraph_->indicesAreAllocated ()) {
1127 const char err1[] =
"The caller has asserted that the graph "
1129 const char err2[] =
"already allocated, but the static graph "
1130 "says that its indices are ";
1131 const char err3[] =
"already allocated. ";
1132 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1133 (gas == GraphAlreadyAllocated &&
1134 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1135 err1 << err2 <<
"not " << err3 << suffix);
1136 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1137 (gas != GraphAlreadyAllocated &&
1138 staticGraph_->indicesAreAllocated (), std::logic_error,
1139 err1 <<
"not " << err2 << err3 << suffix);
1147 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1148 (! this->staticGraph_->indicesAreAllocated () &&
1149 this->myGraph_.is_null (), std::logic_error,
1150 "The static graph says that its indices are not allocated, "
1151 "but the graph is not owned by the matrix." << suffix);
1154 if (gas == GraphNotYetAllocated) {
1156 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1157 (this->myGraph_.is_null (), std::logic_error,
1158 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1161 this->myGraph_->allocateIndices (lg, verbose);
1163 catch (std::exception& e) {
1164 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1165 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1166 "threw an exception: " << e.what ());
1169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1170 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1171 "threw an exception not a subclass of std::exception.");
1176 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1178 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1180 (this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1181 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1185 using values_type =
typename local_matrix_device_type::values_type;
1187 std::ostringstream os;
1188 os << *prefix <<
"Allocate values_wdv: Pre "
1189 << valuesUnpacked_wdv.extent(0) <<
", post "
1190 << lclTotalNumEntries << endl;
1191 std::cerr << os.str();
1195 values_type(
"Tpetra::CrsMatrix::values",
1196 lclTotalNumEntries));
1200 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1206 using ::Tpetra::Details::getEntryOnHost;
1207 using Teuchos::arcp_const_cast;
1208 using Teuchos::Array;
1209 using Teuchos::ArrayRCP;
1210 using Teuchos::null;
1214 using row_map_type =
typename local_graph_device_type::row_map_type;
1215 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1216 using values_type =
typename local_matrix_device_type::values_type;
1218 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1220 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1221 "fillComplete or expertStaticFillComplete): ";
1222 const char suffix[] =
1223 " Please report this bug to the Tpetra developers.";
1227 std::unique_ptr<std::string> prefix;
1229 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1230 std::ostringstream os;
1231 os << *prefix << endl;
1232 std::cerr << os.str ();
1238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1239 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1240 "(myGraph_) is null. This means that the matrix has a "
1241 "const (a.k.a. \"static\") graph. fillComplete or "
1242 "expertStaticFillComplete should never call "
1243 "fillLocalGraphAndMatrix in that case." << suffix);
1246 const size_t lclNumRows = this->getLocalNumRows ();
1261 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1262 myGraph_->rowPtrsUnpacked_dev_;
1265 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1266 (curRowOffsets.extent (0) == 0, std::logic_error,
1267 "curRowOffsets.extent(0) == 0.");
1268 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1269 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1270 "curRowOffsets.extent(0) = "
1271 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1272 << (lclNumRows + 1) <<
".");
1273 const size_t numOffsets = curRowOffsets.extent (0);
1274 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1277 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1278 std::logic_error,
"numOffsets = " <<
1279 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1280 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1281 << numOffsets <<
") = " << valToCheck <<
".");
1284 if (myGraph_->getLocalNumEntries() !=
1285 myGraph_->getLocalAllocationSize()) {
1289 typename row_map_type::non_const_type k_ptrs;
1290 row_map_type k_ptrs_const;
1291 lclinds_1d_type k_inds;
1295 std::ostringstream os;
1296 const auto numEnt = myGraph_->getLocalNumEntries();
1297 const auto allocSize = myGraph_->getLocalAllocationSize();
1298 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1299 <<
", allocSize=" << allocSize << endl;
1300 std::cerr << os.str ();
1308 if (debug && curRowOffsets.extent (0) != 0) {
1309 const size_t numOffsets =
1310 static_cast<size_t> (curRowOffsets.extent (0));
1311 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1312 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1313 (static_cast<size_t> (valToCheck) !=
1314 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1315 std::logic_error,
"(unpacked branch) Before "
1316 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1317 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1318 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1319 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1320 (static_cast<size_t> (valToCheck) !=
1321 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1322 std::logic_error,
"(unpacked branch) Before "
1323 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1324 <<
") = " << valToCheck
1325 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1326 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1334 size_t lclTotalNumEntries = 0;
1340 std::ostringstream os;
1341 os << *prefix <<
"Allocate packed row offsets: "
1342 << (lclNumRows+1) << endl;
1343 std::cerr << os.str ();
1345 typename row_map_type::non_const_type
1346 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1347 typename row_entries_type::const_type numRowEnt_h =
1348 myGraph_->k_numRowEntries_;
1351 lclTotalNumEntries =
1355 k_ptrs = packedRowOffsets;
1356 k_ptrs_const = k_ptrs;
1360 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1361 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1363 "(unpacked branch) After packing k_ptrs, "
1364 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1365 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1366 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1368 (valToCheck != lclTotalNumEntries, std::logic_error,
1369 "(unpacked branch) After filling k_ptrs, "
1370 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1371 <<
" != total number of entries on the calling process = "
1372 << lclTotalNumEntries <<
".");
1377 std::ostringstream os;
1378 os << *prefix <<
"Allocate packed local column indices: "
1379 << lclTotalNumEntries << endl;
1380 std::cerr << os.str ();
1382 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1384 std::ostringstream os;
1385 os << *prefix <<
"Allocate packed values: "
1386 << lclTotalNumEntries << endl;
1387 std::cerr << os.str ();
1389 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1401 using inds_packer_type = pack_functor<
1402 typename Graph::local_graph_device_type::entries_type::non_const_type,
1403 typename Graph::local_inds_dualv_type::t_dev::const_type,
1404 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1405 typename Graph::local_graph_device_type::row_map_type>;
1406 inds_packer_type indsPacker (
1408 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1409 k_ptrs, curRowOffsets);
1411 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1412 Kokkos::parallel_for
1413 (
"Tpetra::CrsMatrix pack column indices",
1414 range_type (0, lclNumRows), indsPacker);
1418 using vals_packer_type = pack_functor<
1419 typename values_type::non_const_type,
1420 typename values_type::const_type,
1421 typename row_map_type::non_const_type,
1422 typename row_map_type::const_type>;
1423 vals_packer_type valsPacker (
1425 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1426 k_ptrs, curRowOffsets);
1427 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1428 range_type (0, lclNumRows), valsPacker);
1431 const char myPrefix[] =
"(\"Optimize Storage\""
1432 "=true branch) After packing, ";
1433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1434 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1435 <<
"k_ptrs.extent(0) = 0. This probably means that "
1436 "rowPtrsUnpacked_ was never allocated.");
1437 if (k_ptrs.extent (0) != 0) {
1438 const size_t numOffsets (k_ptrs.extent (0));
1439 const auto valToCheck =
1440 getEntryOnHost (k_ptrs, numOffsets - 1);
1441 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1442 (
size_t (valToCheck) != k_vals.extent (0),
1443 std::logic_error, myPrefix <<
1444 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1445 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1446 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1447 (
size_t (valToCheck) != k_inds.extent (0),
1448 std::logic_error, myPrefix <<
1449 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1450 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1454 myGraph_->setRowPtrsPacked(k_ptrs_const);
1455 myGraph_->lclIndsPacked_wdv =
1462 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1463 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1464 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1465 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1466 valuesPacked_wdv = valuesUnpacked_wdv;
1469 std::ostringstream os;
1470 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1471 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1472 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1473 << valuesUnpacked_wdv.extent(0) << endl;
1474 std::cerr << os.str();
1478 const char myPrefix[] =
1479 "(\"Optimize Storage\"=false branch) ";
1480 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1481 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1482 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1483 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1484 "that rowPtrsUnpacked_ was never allocated.");
1485 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1486 const size_t numOffsets = rowPtrsUnpackedHost.extent (0);
1487 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1488 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1489 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1490 std::logic_error, myPrefix <<
1491 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1492 <<
" != valuesPacked_wdv.extent(0) = "
1493 << valuesPacked_wdv.extent (0) <<
".");
1494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1495 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1496 std::logic_error, myPrefix <<
1497 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1498 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1499 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1505 const char myPrefix[] =
"After packing, ";
1506 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1507 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1508 (
size_t (rowPtrsPackedHost.extent (0)) !=
size_t (lclNumRows + 1),
1509 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1510 << rowPtrsPackedHost.extent (0) <<
" != lclNumRows+1 = " <<
1511 (lclNumRows+1) <<
".");
1512 if (rowPtrsPackedHost.extent (0) != 0) {
1513 const size_t numOffsets (rowPtrsPackedHost.extent (0));
1514 const size_t valToCheck = rowPtrsPackedHost(numOffsets-1);
1515 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1516 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1517 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1518 (numOffsets-1) <<
") = " << valToCheck
1519 <<
" != valuesPacked_wdv.extent(0) = "
1520 << valuesPacked_wdv.extent (0) <<
".");
1521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1522 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1523 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1524 (numOffsets-1) <<
") = " << valToCheck
1525 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1526 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1534 const bool defaultOptStorage =
1535 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1536 const bool requestOptimizedStorage =
1537 (! params.is_null () &&
1538 params->get (
"Optimize Storage", defaultOptStorage)) ||
1539 (params.is_null () && defaultOptStorage);
1544 if (requestOptimizedStorage) {
1549 std::ostringstream os;
1550 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1551 << myGraph_->k_numRowEntries_.extent(0) << endl;
1552 std::cerr << os.str();
1555 myGraph_->k_numRowEntries_ = row_entries_type ();
1560 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1561 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1562 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1563 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1564 valuesUnpacked_wdv = valuesPacked_wdv;
1566 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1567 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1571 std::ostringstream os;
1572 os << *prefix <<
"User requested NOT to optimize storage"
1574 std::cerr << os.str();
1579 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1584 using ::Tpetra::Details::ProfilingRegion;
1585 using Teuchos::ArrayRCP;
1586 using Teuchos::Array;
1587 using Teuchos::null;
1591 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1592 using non_const_row_map_type =
typename row_map_type::non_const_type;
1593 using values_type =
typename local_matrix_device_type::values_type;
1594 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1595 const size_t lclNumRows = getLocalNumRows();
1598 std::unique_ptr<std::string> prefix;
1600 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1601 std::ostringstream os;
1602 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1603 std::cerr << os.str ();
1615 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1616 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1617 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1619 row_map_type k_ptrs;
1625 bool requestOptimizedStorage =
true;
1626 const bool default_OptimizeStorage =
1627 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1628 if (! params.is_null() &&
1629 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1630 requestOptimizedStorage =
false;
1637 if (! staticGraph_->isStorageOptimized () &&
1638 requestOptimizedStorage) {
1640 (
true, std::runtime_error,
"You requested optimized storage "
1641 "by setting the \"Optimize Storage\" flag to \"true\" in "
1642 "the ParameterList, or by virtue of default behavior. "
1643 "However, the associated CrsGraph was filled separately and "
1644 "requested not to optimize storage. Therefore, the "
1645 "CrsMatrix cannot optimize storage.");
1646 requestOptimizedStorage =
false;
1671 if (nodeNumEntries != nodeNumAllocated) {
1673 std::ostringstream os;
1674 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1675 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1677 std::cerr << os.str();
1682 std::ostringstream os;
1683 os << *prefix <<
"Allocate packed row offsets: "
1684 << (lclNumRows+1) << endl;
1685 std::cerr << os.str();
1687 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1692 size_t lclTotalNumEntries = 0;
1695 typename row_entries_type::const_type numRowEnt_h =
1696 staticGraph_->k_numRowEntries_;
1698 lclTotalNumEntries =
1705 std::ostringstream os;
1706 os << *prefix <<
"Allocate packed values: "
1707 << lclTotalNumEntries << endl;
1708 std::cerr << os.str ();
1710 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1714 typename values_type::non_const_type,
1715 typename values_type::const_type,
1716 typename row_map_type::non_const_type,
1717 typename row_map_type::const_type> valsPacker
1718 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1719 tmpk_ptrs, k_rowPtrs);
1722 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1723 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1724 range_type (0, lclNumRows), valsPacker);
1728 valuesPacked_wdv = valuesUnpacked_wdv;
1730 std::ostringstream os;
1731 os << *prefix <<
"Storage already packed: "
1732 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1733 std::cerr << os.str();
1738 if (requestOptimizedStorage) {
1741 valuesUnpacked_wdv = valuesPacked_wdv;
1743 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1747 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1752 const typename crs_graph_type::SLocalGlobalViews& newInds,
1753 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1754 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1755 const ELocalGlobal lg,
1756 const ELocalGlobal I)
1758 const size_t oldNumEnt = rowInfo.numEntries;
1759 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1765 if (numInserted > 0) {
1766 const size_t startOffset = oldNumEnt;
1767 memcpy (&oldRowVals[startOffset], &newRowVals[0],
1768 numInserted *
sizeof (impl_scalar_type));
1772 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1776 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1777 const Teuchos::ArrayView<const Scalar>& values,
1781 const char tfecfFuncName[] =
"insertLocalValues: ";
1783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1784 (! this->isFillActive (), std::runtime_error,
1785 "Fill is not active. After calling fillComplete, you must call "
1786 "resumeFill before you may insert entries into the matrix again.");
1787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1788 (this->isStaticGraph (), std::runtime_error,
1789 "Cannot insert indices with static graph; use replaceLocalValues() "
1793 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1794 (graph.
colMap_.is_null (), std::runtime_error,
1795 "Cannot insert local indices without a column map.");
1796 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1798 std::runtime_error,
"Graph indices are global; use "
1799 "insertGlobalValues().");
1800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1801 (values.size () != indices.size (), std::runtime_error,
1802 "values.size() = " << values.size ()
1803 <<
" != indices.size() = " << indices.size () <<
".");
1804 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1805 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1806 "Local row index " << lclRow <<
" does not belong to this process.");
1808 if (! graph.indicesAreAllocated ()) {
1812 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1815 #ifdef HAVE_TPETRA_DEBUG
1816 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1821 using Teuchos::toString;
1824 Teuchos::Array<LocalOrdinal> badColInds;
1825 bool allInColMap =
true;
1826 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1828 allInColMap =
false;
1829 badColInds.push_back (indices[k]);
1832 if (! allInColMap) {
1833 std::ostringstream os;
1834 os <<
"You attempted to insert entries in owned row " << lclRow
1835 <<
", at the following column indices: " << toString (indices)
1837 os <<
"Of those, the following indices are not in the column Map on "
1838 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1839 "the matrix has a column Map already, it is invalid to insert "
1840 "entries at those locations.";
1841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1842 (
true, std::invalid_argument, os.str ());
1845 #endif // HAVE_TPETRA_DEBUG
1849 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1851 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1852 valsView[offset] += values[k]; };
1853 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1854 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1855 }
else if (CM ==
INSERT) {
1856 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1857 valsView[offset] = values[k]; };
1858 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1859 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1861 std::ostringstream os;
1862 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1863 <<
"but this has not been implemented." << endl;
1864 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1865 (
true, std::invalid_argument, os.str ());
1869 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1873 const LocalOrdinal numEnt,
1874 const Scalar vals[],
1875 const LocalOrdinal cols[],
1878 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1879 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1880 this->insertLocalValues (localRow, colsT, valsT, CM);
1883 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1888 const GlobalOrdinal gblColInds[],
1890 const size_t numInputEnt)
1892 #ifdef HAVE_TPETRA_DEBUG
1893 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1895 const size_t curNumEnt = rowInfo.numEntries;
1896 #endif // HAVE_TPETRA_DEBUG
1898 if (! graph.indicesAreAllocated ()) {
1901 using ::Tpetra::Details::Behavior;
1902 const bool verbose = Behavior::verbose(
"CrsMatrix");
1903 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1908 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1911 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1912 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1913 valsView[offset] += vals[k];
1915 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1916 #ifdef HAVE_TPETRA_DEBUG
1922 #ifdef HAVE_TPETRA_DEBUG
1923 size_t newNumEnt = curNumEnt + numInserted;
1924 const size_t chkNewNumEnt =
1926 if (chkNewNumEnt != newNumEnt) {
1927 std::ostringstream os;
1928 os << std::endl <<
"newNumEnt = " << newNumEnt
1929 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1930 <<
") = " << chkNewNumEnt <<
"." << std::endl
1931 <<
"\torigNumEnt: " << origNumEnt << std::endl
1932 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1933 <<
"\tgblColInds: [";
1934 for (
size_t k = 0; k < numInputEnt; ++k) {
1935 os << gblColInds[k];
1936 if (k +
size_t (1) < numInputEnt) {
1940 os <<
"]" << std::endl
1942 for (
size_t k = 0; k < numInputEnt; ++k) {
1944 if (k +
size_t (1) < numInputEnt) {
1948 os <<
"]" << std::endl;
1950 if (this->supportsRowViews ()) {
1951 values_host_view_type vals2;
1952 if (this->isGloballyIndexed ()) {
1953 global_inds_host_view_type gblColInds2;
1954 const GlobalOrdinal gblRow =
1955 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1957 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1958 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1962 bool getViewThrew =
false;
1964 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1966 catch (std::exception& e) {
1967 getViewThrew =
true;
1968 os <<
"getGlobalRowView threw exception:" << std::endl
1969 << e.what () << std::endl;
1971 if (! getViewThrew) {
1972 os <<
"\tNew global column indices: ";
1973 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1974 os << gblColInds2[jjj] <<
" ";
1976 os <<
"\tNew values: ";
1977 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1978 os << vals2[jjj] <<
" ";
1983 else if (this->isLocallyIndexed ()) {
1984 local_inds_host_view_type lclColInds2;
1985 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1986 os <<
"\tNew local column indices: ";
1987 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1988 os << lclColInds2[jjj] <<
" ";
1990 os <<
"\tNew values: ";
1991 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1992 os << vals2[jjj] <<
" ";
1997 os <<
"Please report this bug to the Tpetra developers.";
1998 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1999 (
true, std::logic_error, os.str ());
2001 #endif // HAVE_TPETRA_DEBUG
2004 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2008 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2009 const Teuchos::ArrayView<const Scalar>& values)
2011 using Teuchos::toString;
2014 typedef LocalOrdinal LO;
2015 typedef GlobalOrdinal GO;
2016 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2017 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
2018 const char tfecfFuncName[] =
"insertGlobalValues: ";
2020 #ifdef HAVE_TPETRA_DEBUG
2021 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2022 (values.size () != indices.size (), std::runtime_error,
2023 "values.size() = " << values.size () <<
" != indices.size() = "
2024 << indices.size () <<
".");
2025 #endif // HAVE_TPETRA_DEBUG
2029 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2032 if (lclRow == OTLO::invalid ()) {
2039 this->insertNonownedGlobalValues (gblRow, indices, values);
2042 if (this->isStaticGraph ()) {
2044 const int myRank = rowMap.getComm ()->getRank ();
2045 const int numProcs = rowMap.getComm ()->getSize ();
2046 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2047 (
true, std::runtime_error,
2048 "The matrix was constructed with a constant (\"static\") graph, "
2049 "yet the given global row index " << gblRow <<
" is in the row "
2050 "Map on the calling process (with rank " << myRank <<
", of " <<
2051 numProcs <<
" process(es)). In this case, you may not insert "
2052 "new entries into rows owned by the calling process.");
2056 const IST*
const inputVals =
2057 reinterpret_cast<const IST*
> (values.getRawPtr ());
2058 const GO*
const inputGblColInds = indices.getRawPtr ();
2059 const size_t numInputEnt = indices.size ();
2068 if (! graph.
colMap_.is_null ()) {
2074 #ifdef HAVE_TPETRA_DEBUG
2075 Teuchos::Array<GO> badColInds;
2076 #endif // HAVE_TPETRA_DEBUG
2077 const size_type numEntriesToInsert = indices.size ();
2078 bool allInColMap =
true;
2079 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2081 allInColMap =
false;
2082 #ifdef HAVE_TPETRA_DEBUG
2083 badColInds.push_back (indices[k]);
2086 #endif // HAVE_TPETRA_DEBUG
2089 if (! allInColMap) {
2090 std::ostringstream os;
2091 os <<
"You attempted to insert entries in owned row " << gblRow
2092 <<
", at the following column indices: " << toString (indices)
2094 #ifdef HAVE_TPETRA_DEBUG
2095 os <<
"Of those, the following indices are not in the column Map "
2096 "on this process: " << toString (badColInds) <<
"." << endl
2097 <<
"Since the matrix has a column Map already, it is invalid "
2098 "to insert entries at those locations.";
2100 os <<
"At least one of those indices is not in the column Map "
2101 "on this process." << endl <<
"It is invalid to insert into "
2102 "columns not in the column Map on the process that owns the "
2104 #endif // HAVE_TPETRA_DEBUG
2105 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2106 (
true, std::invalid_argument, os.str ());
2110 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2111 inputVals, numInputEnt);
2116 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2120 const LocalOrdinal numEnt,
2121 const Scalar vals[],
2122 const GlobalOrdinal inds[])
2124 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2125 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2126 this->insertGlobalValues (globalRow, indsT, valsT);
2130 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2134 const GlobalOrdinal gblRow,
2135 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2136 const Teuchos::ArrayView<const Scalar>& values,
2139 typedef impl_scalar_type IST;
2140 typedef LocalOrdinal LO;
2141 typedef GlobalOrdinal GO;
2142 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2143 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2146 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2147 (values.size () != indices.size (), std::runtime_error,
2148 "values.size() = " << values.size () <<
" != indices.size() = "
2149 << indices.size () <<
".");
2154 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2155 const LO lclRow = rowMap.getLocalElement (gblRow);
2156 if (lclRow == OTLO::invalid ()) {
2163 this->insertNonownedGlobalValues (gblRow, indices, values);
2166 if (this->isStaticGraph ()) {
2168 const int myRank = rowMap.getComm ()->getRank ();
2169 const int numProcs = rowMap.getComm ()->getSize ();
2170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2171 (
true, std::runtime_error,
2172 "The matrix was constructed with a constant (\"static\") graph, "
2173 "yet the given global row index " << gblRow <<
" is in the row "
2174 "Map on the calling process (with rank " << myRank <<
", of " <<
2175 numProcs <<
" process(es)). In this case, you may not insert "
2176 "new entries into rows owned by the calling process.");
2179 crs_graph_type& graph = * (this->myGraph_);
2180 const IST*
const inputVals =
2181 reinterpret_cast<const IST*
> (values.getRawPtr ());
2182 const GO*
const inputGblColInds = indices.getRawPtr ();
2183 const size_t numInputEnt = indices.size ();
2184 RowInfo rowInfo = graph.getRowInfo (lclRow);
2186 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2193 const map_type& colMap = * (graph.colMap_);
2194 size_t curOffset = 0;
2195 while (curOffset < numInputEnt) {
2199 Teuchos::Array<LO> lclIndices;
2200 size_t endOffset = curOffset;
2201 for ( ; endOffset < numInputEnt; ++endOffset) {
2202 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2203 if (lclIndex != OTLO::invalid())
2204 lclIndices.push_back(lclIndex);
2211 const LO numIndInSeq = (endOffset - curOffset);
2212 if (numIndInSeq != 0) {
2213 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2219 const bool invariant = endOffset == numInputEnt ||
2220 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2221 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2222 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2224 curOffset = endOffset + 1;
2227 else if (! graph.colMap_.is_null ()) {
2228 const map_type& colMap = * (graph.colMap_);
2229 size_t curOffset = 0;
2230 while (curOffset < numInputEnt) {
2234 size_t endOffset = curOffset;
2235 for ( ; endOffset < numInputEnt &&
2236 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2242 const LO numIndInSeq = (endOffset - curOffset);
2243 if (numIndInSeq != 0) {
2244 rowInfo = graph.getRowInfo(lclRow);
2245 this->insertGlobalValuesImpl (graph, rowInfo,
2246 inputGblColInds + curOffset,
2247 inputVals + curOffset,
2254 const bool invariant = endOffset == numInputEnt ||
2255 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2256 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2257 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2259 curOffset = endOffset + 1;
2263 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2264 inputVals, numInputEnt);
2269 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2271 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2272 insertGlobalValuesFilteredChecked(
2273 const GlobalOrdinal gblRow,
2274 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2275 const Teuchos::ArrayView<const Scalar>& values,
2276 const char*
const prefix,
2284 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2286 catch(std::exception& e) {
2287 std::ostringstream os;
2289 const size_t maxNumToPrint =
2291 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2292 "exception: " << e.what() << endl
2293 <<
"Global row index: " << gblRow << endl;
2301 os <<
": insertGlobalValuesFiltered threw an exception: "
2304 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2308 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2314 const LocalOrdinal inds[],
2316 const LocalOrdinal numElts)
2318 typedef LocalOrdinal LO;
2319 typedef GlobalOrdinal GO;
2320 const bool sorted = graph.
isSorted ();
2330 for (LO j = 0; j < numElts; ++j) {
2331 const LO lclColInd = inds[j];
2332 const size_t offset =
2333 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2334 lclColInd, hint, sorted);
2335 if (offset != rowInfo.numEntries) {
2336 rowVals[offset] = newVals[j];
2343 if (graph.
colMap_.is_null ()) {
2344 return Teuchos::OrdinalTraits<LO>::invalid ();
2352 for (LO j = 0; j < numElts; ++j) {
2354 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2355 const size_t offset =
2356 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2357 gblColInd, hint, sorted);
2358 if (offset != rowInfo.numEntries) {
2359 rowVals[offset] = newVals[j];
2378 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2382 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2383 const Teuchos::ArrayView<const Scalar>& vals)
2385 typedef LocalOrdinal LO;
2387 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2388 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2389 return Teuchos::OrdinalTraits<LO>::invalid ();
2391 const LO*
const inputInds = lclCols.getRawPtr ();
2392 const Scalar*
const inputVals = vals.getRawPtr ();
2393 return this->replaceLocalValues (localRow, numInputEnt,
2394 inputVals, inputInds);
2397 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2403 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2404 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2407 const LO numInputEnt = inputInds.extent(0);
2408 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2409 return Teuchos::OrdinalTraits<LO>::invalid();
2411 const Scalar*
const inVals =
2412 reinterpret_cast<const Scalar*
>(inputVals.data());
2413 return this->replaceLocalValues(localRow, numInputEnt,
2414 inVals, inputInds.data());
2417 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2421 const LocalOrdinal numEnt,
2422 const Scalar inputVals[],
2423 const LocalOrdinal inputCols[])
2426 typedef LocalOrdinal LO;
2428 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2430 return Teuchos::OrdinalTraits<LO>::invalid ();
2435 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2438 return static_cast<LO
> (0);
2440 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2441 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2442 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2443 inputCols, inVals, numEnt);
2446 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2452 const GlobalOrdinal inds[],
2454 const LocalOrdinal numElts)
2456 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2458 [&](
size_t const k,
size_t const ,
size_t const offset) {
2459 rowVals[offset] = newVals[k];
2461 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2465 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2469 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2470 const Teuchos::ArrayView<const Scalar>& inputVals)
2472 typedef LocalOrdinal LO;
2474 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2475 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2476 return Teuchos::OrdinalTraits<LO>::invalid ();
2478 return this->replaceGlobalValues (globalRow, numInputEnt,
2479 inputVals.getRawPtr (),
2480 inputGblColInds.getRawPtr ());
2483 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2487 const LocalOrdinal numEnt,
2488 const Scalar inputVals[],
2489 const GlobalOrdinal inputGblColInds[])
2492 typedef LocalOrdinal LO;
2494 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2496 return Teuchos::OrdinalTraits<LO>::invalid ();
2501 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2504 return static_cast<LO
> (0);
2507 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2508 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2509 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2510 inputGblColInds, inVals, numEnt);
2513 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2519 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2520 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2529 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2530 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2531 return Teuchos::OrdinalTraits<LO>::invalid();
2533 const Scalar*
const inVals =
2534 reinterpret_cast<const Scalar*
>(inputVals.data());
2535 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2539 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2545 const GlobalOrdinal inds[],
2547 const LocalOrdinal numElts,
2550 typedef LocalOrdinal LO;
2551 typedef GlobalOrdinal GO;
2553 const bool sorted = graph.
isSorted ();
2562 if (graph.
colMap_.is_null ()) {
2573 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2575 for (LO j = 0; j < numElts; ++j) {
2577 if (lclColInd != LINV) {
2578 const size_t offset =
2579 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2580 lclColInd, hint, sorted);
2581 if (offset != rowInfo.numEntries) {
2583 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2586 rowVals[offset] += newVals[j];
2599 for (LO j = 0; j < numElts; ++j) {
2600 const GO gblColInd = inds[j];
2601 const size_t offset =
2602 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2603 gblColInd, hint, sorted);
2604 if (offset != rowInfo.numEntries) {
2606 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2609 rowVals[offset] += newVals[j];
2623 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2627 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2628 const Teuchos::ArrayView<const Scalar>& inputVals,
2631 typedef LocalOrdinal LO;
2633 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2634 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2635 return Teuchos::OrdinalTraits<LO>::invalid ();
2637 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2638 inputVals.getRawPtr (),
2639 inputGblColInds.getRawPtr (),
2643 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2647 const LocalOrdinal numInputEnt,
2648 const Scalar inputVals[],
2649 const GlobalOrdinal inputGblColInds[],
2653 typedef LocalOrdinal LO;
2654 typedef GlobalOrdinal GO;
2656 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2658 return Teuchos::OrdinalTraits<LO>::invalid ();
2663 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2668 using Teuchos::ArrayView;
2669 ArrayView<const GO> inputGblColInds_av(
2670 numInputEnt == 0 ?
nullptr : inputGblColInds,
2672 ArrayView<const Scalar> inputVals_av(
2673 numInputEnt == 0 ?
nullptr :
2674 inputVals, numInputEnt);
2679 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2690 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2691 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2692 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2693 inputGblColInds, inVals,
2694 numInputEnt, atomic);
2698 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2702 const LocalOrdinal numInputEnt,
2703 const impl_scalar_type inputVals[],
2704 const LocalOrdinal inputCols[],
2705 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2708 using Tpetra::Details::OrdinalTraits;
2709 typedef LocalOrdinal LO;
2711 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2713 return Teuchos::OrdinalTraits<LO>::invalid ();
2715 const crs_graph_type& graph = * (this->staticGraph_);
2716 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2718 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2721 return static_cast<LO
> (0);
2723 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2724 return this->transformLocalValues (curRowVals.data (), graph,
2725 rowInfo, inputCols, inputVals,
2726 numInputEnt, f, atomic);
2729 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2731 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2732 transformGlobalValues (
const GlobalOrdinal gblRow,
2733 const LocalOrdinal numInputEnt,
2734 const impl_scalar_type inputVals[],
2735 const GlobalOrdinal inputCols[],
2736 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2739 using Tpetra::Details::OrdinalTraits;
2740 typedef LocalOrdinal LO;
2742 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2744 return OrdinalTraits<LO>::invalid ();
2746 const crs_graph_type& graph = * (this->staticGraph_);
2747 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2749 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2752 return static_cast<LO
> (0);
2754 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2755 return this->transformGlobalValues (curRowVals.data (), graph,
2756 rowInfo, inputCols, inputVals,
2757 numInputEnt, f, atomic);
2760 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2762 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2763 transformLocalValues (impl_scalar_type rowVals[],
2764 const crs_graph_type& graph,
2765 const RowInfo& rowInfo,
2766 const LocalOrdinal inds[],
2767 const impl_scalar_type newVals[],
2768 const LocalOrdinal numElts,
2769 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2772 typedef impl_scalar_type ST;
2773 typedef LocalOrdinal LO;
2774 typedef GlobalOrdinal GO;
2781 const bool sorted = graph.isSorted ();
2786 if (graph.isLocallyIndexed ()) {
2789 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2791 for (LO j = 0; j < numElts; ++j) {
2792 const LO lclColInd = inds[j];
2793 const size_t offset =
2794 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2795 lclColInd, hint, sorted);
2796 if (offset != rowInfo.numEntries) {
2805 volatile ST*
const dest = &rowVals[offset];
2806 (void) atomic_binary_function_update (dest, newVals[j], f);
2810 rowVals[offset] = f (rowVals[offset], newVals[j]);
2817 else if (graph.isGloballyIndexed ()) {
2821 if (graph.colMap_.is_null ()) {
2828 const map_type& colMap = * (graph.colMap_);
2831 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2833 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2834 for (LO j = 0; j < numElts; ++j) {
2835 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2836 if (gblColInd != GINV) {
2837 const size_t offset =
2838 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2839 gblColInd, hint, sorted);
2840 if (offset != rowInfo.numEntries) {
2849 volatile ST*
const dest = &rowVals[offset];
2850 (void) atomic_binary_function_update (dest, newVals[j], f);
2854 rowVals[offset] = f (rowVals[offset], newVals[j]);
2869 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2871 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2872 transformGlobalValues (impl_scalar_type rowVals[],
2873 const crs_graph_type& graph,
2874 const RowInfo& rowInfo,
2875 const GlobalOrdinal inds[],
2876 const impl_scalar_type newVals[],
2877 const LocalOrdinal numElts,
2878 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2881 typedef impl_scalar_type ST;
2882 typedef LocalOrdinal LO;
2883 typedef GlobalOrdinal GO;
2890 const bool sorted = graph.isSorted ();
2895 if (graph.isGloballyIndexed ()) {
2898 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2900 for (LO j = 0; j < numElts; ++j) {
2901 const GO gblColInd = inds[j];
2902 const size_t offset =
2903 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2904 gblColInd, hint, sorted);
2905 if (offset != rowInfo.numEntries) {
2914 volatile ST*
const dest = &rowVals[offset];
2915 (void) atomic_binary_function_update (dest, newVals[j], f);
2919 rowVals[offset] = f (rowVals[offset], newVals[j]);
2926 else if (graph.isLocallyIndexed ()) {
2930 if (graph.colMap_.is_null ()) {
2936 const map_type& colMap = * (graph.colMap_);
2939 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2941 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2942 for (LO j = 0; j < numElts; ++j) {
2943 const LO lclColInd = colMap.getLocalElement (inds[j]);
2944 if (lclColInd != LINV) {
2945 const size_t offset =
2946 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2947 lclColInd, hint, sorted);
2948 if (offset != rowInfo.numEntries) {
2957 volatile ST*
const dest = &rowVals[offset];
2958 (void) atomic_binary_function_update (dest, newVals[j], f);
2962 rowVals[offset] = f (rowVals[offset], newVals[j]);
2977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2983 const LocalOrdinal inds[],
2985 const LocalOrdinal numElts,
2988 typedef LocalOrdinal LO;
2989 typedef GlobalOrdinal GO;
2991 const bool sorted = graph.
isSorted ();
3001 for (LO j = 0; j < numElts; ++j) {
3002 const LO lclColInd = inds[j];
3003 const size_t offset =
3004 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3005 lclColInd, hint, sorted);
3006 if (offset != rowInfo.numEntries) {
3008 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3011 rowVals[offset] += newVals[j];
3019 if (graph.
colMap_.is_null ()) {
3020 return Teuchos::OrdinalTraits<LO>::invalid ();
3028 for (LO j = 0; j < numElts; ++j) {
3030 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3031 const size_t offset =
3032 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3033 gblColInd, hint, sorted);
3034 if (offset != rowInfo.numEntries) {
3036 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3039 rowVals[offset] += newVals[j];
3059 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3063 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3064 const Teuchos::ArrayView<const Scalar>& values,
3068 const LO numInputEnt =
static_cast<LO
>(indices.size());
3069 if (static_cast<LO>(values.size()) != numInputEnt) {
3070 return Teuchos::OrdinalTraits<LO>::invalid();
3072 const LO*
const inputInds = indices.getRawPtr();
3073 const scalar_type*
const inputVals = values.getRawPtr();
3074 return this->sumIntoLocalValues(localRow, numInputEnt,
3075 inputVals, inputInds, atomic);
3078 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3084 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3085 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3089 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3090 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3091 return Teuchos::OrdinalTraits<LO>::invalid();
3094 reinterpret_cast<const scalar_type*
>(inputVals.data());
3095 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3096 inputInds.data(), atomic);
3099 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3103 const LocalOrdinal numEnt,
3104 const Scalar vals[],
3105 const LocalOrdinal cols[],
3109 typedef LocalOrdinal LO;
3111 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3113 return Teuchos::OrdinalTraits<LO>::invalid ();
3118 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3121 return static_cast<LO
> (0);
3123 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3124 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3125 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3126 cols, inputVals, numEnt, atomic);
3129 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3131 values_dualv_type::t_host::const_type
3135 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3136 return typename values_dualv_type::t_host::const_type ();
3138 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3143 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3145 values_dualv_type::t_host
3149 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3150 return typename values_dualv_type::t_host ();
3152 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3157 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3159 values_dualv_type::t_dev::const_type
3163 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3164 return typename values_dualv_type::t_dev::const_type ();
3166 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3171 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3173 values_dualv_type::t_dev
3177 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3178 return typename values_dualv_type::t_dev ();
3180 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3186 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3190 nonconst_local_inds_host_view_type &indices,
3191 nonconst_values_host_view_type &values,
3192 size_t& numEntries)
const
3194 using Teuchos::ArrayView;
3195 using Teuchos::av_reinterpret_cast;
3196 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3199 (! this->hasColMap (), std::runtime_error,
3200 "The matrix does not have a column Map yet. This means we don't have "
3201 "local indices for columns yet, so it doesn't make sense to call this "
3202 "method. If the matrix doesn't have a column Map yet, you should call "
3203 "fillComplete on it first.");
3205 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3206 const size_t theNumEntries = rowinfo.numEntries;
3207 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3208 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3209 static_cast<size_t> (values.size ()) < theNumEntries,
3210 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3211 theNumEntries <<
" entry/ies, but indices.size() = " <<
3212 indices.size () <<
" and values.size() = " << values.size () <<
".");
3213 numEntries = theNumEntries;
3215 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3216 if (staticGraph_->isLocallyIndexed ()) {
3217 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3218 auto curVals = getValuesViewHost(rowinfo);
3220 for (
size_t j = 0; j < theNumEntries; ++j) {
3221 values[j] = curVals[j];
3222 indices[j] = curLclInds(j);
3225 else if (staticGraph_->isGloballyIndexed ()) {
3227 const map_type& colMap = * (staticGraph_->colMap_);
3228 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3229 auto curVals = getValuesViewHost(rowinfo);
3231 for (
size_t j = 0; j < theNumEntries; ++j) {
3232 values[j] = curVals[j];
3240 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3244 nonconst_global_inds_host_view_type &indices,
3245 nonconst_values_host_view_type &values,
3246 size_t& numEntries)
const
3248 using Teuchos::ArrayView;
3249 using Teuchos::av_reinterpret_cast;
3250 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3253 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3254 const size_t theNumEntries = rowinfo.numEntries;
3255 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3256 static_cast<size_t> (indices.size ()) < theNumEntries ||
3257 static_cast<size_t> (values.size ()) < theNumEntries,
3258 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3259 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3260 indices.size () <<
" and values.size() = " << values.size () <<
".");
3261 numEntries = theNumEntries;
3263 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3264 if (staticGraph_->isLocallyIndexed ()) {
3265 const map_type& colMap = * (staticGraph_->colMap_);
3266 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3267 auto curVals = getValuesViewHost(rowinfo);
3269 for (
size_t j = 0; j < theNumEntries; ++j) {
3270 values[j] = curVals[j];
3274 else if (staticGraph_->isGloballyIndexed ()) {
3275 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3276 auto curVals = getValuesViewHost(rowinfo);
3278 for (
size_t j = 0; j < theNumEntries; ++j) {
3279 values[j] = curVals[j];
3280 indices[j] = curGblInds(j);
3287 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3291 local_inds_host_view_type &indices,
3292 values_host_view_type &values)
const
3294 const char tfecfFuncName[] =
"getLocalRowView: ";
3296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3297 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3298 "its indices as global indices, so you cannot get a view with local "
3299 "column indices. If the matrix has a column Map, you may call "
3300 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3301 "a view with global column indices by calling getGlobalRowCopy().");
3303 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3304 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3305 rowInfo.numEntries > 0) {
3306 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3310 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3317 indices = local_inds_host_view_type();
3318 values = values_host_view_type();
3321 #ifdef HAVE_TPETRA_DEBUG
3322 const char suffix[] =
". This should never happen. Please report this "
3323 "bug to the Tpetra developers.";
3324 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3325 (static_cast<size_t> (indices.size ()) !=
3326 static_cast<size_t> (values.size ()), std::logic_error,
3327 "At the end of this method, for local row " << localRow <<
", "
3328 "indices.size() = " << indices.size () <<
" != values.size () = "
3329 << values.size () << suffix);
3330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3331 (static_cast<size_t> (indices.size ()) !=
3332 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3333 "At the end of this method, for local row " << localRow <<
", "
3334 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3335 << rowInfo.numEntries << suffix);
3336 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3337 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3338 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3339 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3340 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3341 expectedNumEntries << suffix);
3342 #endif // HAVE_TPETRA_DEBUG
3346 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3350 global_inds_host_view_type &indices,
3351 values_host_view_type &values)
const
3353 const char tfecfFuncName[] =
"getGlobalRowView: ";
3355 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3356 isLocallyIndexed (), std::runtime_error,
3357 "The matrix is locally indexed, so we cannot return a view of the row "
3358 "with global column indices. Use getGlobalRowCopy() instead.");
3363 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3364 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3365 rowInfo.numEntries > 0) {
3366 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3369 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3374 indices = global_inds_host_view_type();
3375 values = values_host_view_type();
3378 #ifdef HAVE_TPETRA_DEBUG
3379 const char suffix[] =
". This should never happen. Please report this "
3380 "bug to the Tpetra developers.";
3381 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3382 (static_cast<size_t> (indices.size ()) !=
3383 static_cast<size_t> (values.size ()), std::logic_error,
3384 "At the end of this method, for global row " << globalRow <<
", "
3385 "indices.size() = " << indices.size () <<
" != values.size () = "
3386 << values.size () << suffix);
3387 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3388 (static_cast<size_t> (indices.size ()) !=
3389 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3390 "At the end of this method, for global row " << globalRow <<
", "
3391 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3392 << rowInfo.numEntries << suffix);
3393 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3394 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3395 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3396 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3397 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3398 " " << expectedNumEntries << suffix);
3399 #endif // HAVE_TPETRA_DEBUG
3403 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3410 const size_t nlrs = staticGraph_->getLocalNumRows ();
3411 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3412 if (! staticGraph_->indicesAreAllocated () ||
3413 nlrs == 0 || numEntries == 0) {
3418 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3419 KokkosBlas::scal(vals, theAlpha, vals);
3424 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3435 const size_t numEntries = staticGraph_->getLocalNumEntries();
3436 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3444 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3448 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3451 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3452 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3453 const typename local_matrix_device_type::values_type& values)
3456 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3457 const char tfecfFuncName[] =
"setAllValues: ";
3458 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3459 (columnIndices.size () != values.size (), std::invalid_argument,
3460 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3461 " = " << values.size () <<
".");
3462 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3463 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3466 myGraph_->setAllIndices (rowPointers, columnIndices);
3468 catch (std::exception &e) {
3469 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3470 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3471 "exception: " << e.what ());
3478 auto lclGraph = myGraph_->getLocalGraphDevice ();
3479 const size_t numEnt = lclGraph.entries.extent (0);
3480 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3481 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3482 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3483 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3484 "local graph. Please report this bug to the Tpetra developers.");
3487 valuesUnpacked_wdv = valuesPacked_wdv;
3491 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3493 checkInternalState ();
3496 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3502 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3504 auto graph = localDeviceMatrix.graph;
3507 auto rows = graph.row_map;
3508 auto columns = graph.entries;
3509 auto values = localDeviceMatrix.values;
3511 setAllValues(rows,columns,values);
3514 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3518 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3519 const Teuchos::ArrayRCP<Scalar>& val)
3521 using Kokkos::Compat::getKokkosViewDeepCopy;
3522 using Teuchos::ArrayRCP;
3523 using Teuchos::av_reinterpret_cast;
3526 typedef typename local_graph_device_type::row_map_type row_map_type;
3528 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3534 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3535 Kokkos::View<
const size_t*,
3536 typename row_map_type::array_layout,
3538 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3541 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3542 (ptrNative.extent (0) != ptrSizeT.extent (0),
3543 std::logic_error,
"ptrNative.extent(0) = " <<
3544 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3545 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3546 "Tpetra developers.");
3548 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3549 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3550 this->setAllValues (ptrNative, indIn, valIn);
3553 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3558 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3560 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3567 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3568 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3569 offsets.resize (lclNumRows);
3575 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3580 Kokkos::MemoryUnmanaged> output_type;
3581 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3582 staticGraph_->getLocalDiagOffsets (offsetsOut);
3585 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3586 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3587 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3588 Kokkos::MemoryUnmanaged> output_type;
3589 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3595 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3600 using Teuchos::ArrayRCP;
3601 using Teuchos::ArrayView;
3602 using Teuchos::av_reinterpret_cast;
3603 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3607 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3608 staticGraph_.is_null (), std::runtime_error,
3609 "This method requires that the matrix have a graph.");
3610 auto rowMapPtr = this->getRowMap ();
3611 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3617 auto colMapPtr = this->getColMap ();
3618 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3619 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3620 "This method requires that the matrix have a column Map.");
3621 const map_type& rowMap = * rowMapPtr;
3622 const map_type& colMap = * colMapPtr;
3623 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3625 #ifdef HAVE_TPETRA_DEBUG
3628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3629 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3630 "The input Vector's Map must be compatible with the CrsMatrix's row "
3631 "Map. You may check this by using Map's isCompatible method: "
3632 "diag.getMap ()->isCompatible (A.getRowMap ());");
3633 #endif // HAVE_TPETRA_DEBUG
3635 if (this->isFillComplete ()) {
3638 const auto D_lcl_1d =
3639 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3641 const auto lclRowMap = rowMap.getLocalMap ();
3646 getLocalMatrixDevice ());
3654 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3659 Kokkos::MemoryUnmanaged>& offsets)
const
3661 typedef LocalOrdinal LO;
3663 #ifdef HAVE_TPETRA_DEBUG
3664 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3665 const map_type& rowMap = * (this->getRowMap ());
3668 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3669 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3670 "The input Vector's Map must be compatible with (in the sense of Map::"
3671 "isCompatible) the CrsMatrix's row Map.");
3672 #endif // HAVE_TPETRA_DEBUG
3682 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3685 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3687 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3688 getLocalMatrixDevice ());
3691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3695 const Teuchos::ArrayView<const size_t>& offsets)
const
3697 using LO = LocalOrdinal;
3698 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3701 #ifdef HAVE_TPETRA_DEBUG
3702 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3703 const map_type& rowMap = * (this->getRowMap ());
3706 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3707 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3708 "The input Vector's Map must be compatible with (in the sense of Map::"
3709 "isCompatible) the CrsMatrix's row Map.");
3710 #endif // HAVE_TPETRA_DEBUG
3722 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3724 using host_offsets_view_type =
3725 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3726 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3727 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3729 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3730 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3731 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3733 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3734 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3735 Kokkos::parallel_for
3736 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3737 range_type (0, myNumRows),
3738 [&, INV, h_offsets] (
const LO lclRow) {
3739 lclVecHost1d(lclRow) = STS::zero ();
3740 if (h_offsets[lclRow] != INV) {
3741 auto curRowOffset = rowPtrsPackedHost (lclRow);
3742 lclVecHost1d(lclRow) =
3743 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3750 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3755 using ::Tpetra::Details::ProfilingRegion;
3756 using Teuchos::ArrayRCP;
3757 using Teuchos::ArrayView;
3758 using Teuchos::null;
3761 using Teuchos::rcpFromRef;
3763 const char tfecfFuncName[] =
"leftScale: ";
3765 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3767 RCP<const vec_type> xp;
3768 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3771 auto exporter = this->getCrsGraphRef ().getExporter ();
3772 if (exporter.get () !=
nullptr) {
3773 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3774 tempVec->doImport (x, *exporter,
REPLACE);
3778 xp = rcpFromRef (x);
3781 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3782 xp = rcpFromRef (x);
3785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3786 (
true, std::invalid_argument,
"x's Map must be the same as "
3787 "either the row Map or the range Map of the CrsMatrix.");
3790 if (this->isFillComplete()) {
3791 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3792 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3795 x_lcl_1d,
false,
false);
3799 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3800 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3810 using ::Tpetra::Details::ProfilingRegion;
3811 using Teuchos::ArrayRCP;
3812 using Teuchos::ArrayView;
3813 using Teuchos::null;
3816 using Teuchos::rcpFromRef;
3818 const char tfecfFuncName[] =
"rightScale: ";
3820 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3822 RCP<const vec_type> xp;
3823 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3826 auto importer = this->getCrsGraphRef ().getImporter ();
3827 if (importer.get () !=
nullptr) {
3828 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3829 tempVec->doImport (x, *importer,
REPLACE);
3833 xp = rcpFromRef (x);
3836 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3837 xp = rcpFromRef (x);
3839 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3840 (
true, std::runtime_error,
"x's Map must be the same as "
3841 "either the domain Map or the column Map of the CrsMatrix.");
3844 if (this->isFillComplete()) {
3845 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3846 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3849 x_lcl_1d,
false,
false);
3853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3854 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3859 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3864 using Teuchos::ArrayView;
3865 using Teuchos::outArg;
3866 using Teuchos::REDUCE_SUM;
3867 using Teuchos::reduceAll;
3875 if (getLocalNumEntries() > 0) {
3876 if (isStorageOptimized ()) {
3879 const size_t numEntries = getLocalNumEntries ();
3880 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3881 for (
size_t k = 0; k < numEntries; ++k) {
3882 auto val = values[k];
3886 const mag_type val_abs = STS::abs (val);
3887 mySum += val_abs * val_abs;
3891 const LocalOrdinal numRows =
3892 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3893 for (LocalOrdinal r = 0; r < numRows; ++r) {
3894 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3895 const size_t numEntries = rowInfo.numEntries;
3896 auto A_r = this->getValuesViewHost(rowInfo);
3897 for (
size_t k = 0; k < numEntries; ++k) {
3899 const mag_type val_abs = STS::abs (val);
3900 mySum += val_abs * val_abs;
3906 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3907 mySum, outArg (totalSum));
3908 return STM::sqrt (totalSum);
3911 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3916 const char tfecfFuncName[] =
"replaceColMap: ";
3920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3921 myGraph_.is_null (), std::runtime_error,
3922 "This method does not work if the matrix has a const graph. The whole "
3923 "idea of a const graph is that you are not allowed to change it, but "
3924 "this method necessarily must modify the graph, since the graph owns "
3925 "the matrix's column Map.");
3926 myGraph_->replaceColMap (newColMap);
3929 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3933 const Teuchos::RCP<const map_type>& newColMap,
3934 const Teuchos::RCP<const import_type>& newImport,
3935 const bool sortEachRow)
3937 const char tfecfFuncName[] =
"reindexColumns: ";
3938 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3939 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3940 "The input graph is null, but the matrix does not own its graph.");
3942 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3943 const bool sortGraph =
false;
3947 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3948 const LocalOrdinal lclNumRows =
3949 static_cast<LocalOrdinal
> (theGraph.getLocalNumRows ());
3951 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3953 const RowInfo rowInfo = theGraph.getRowInfo (row);
3954 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
3955 auto vals = this->getValuesViewHostNonConst (rowInfo);
3957 sort2 (lclColInds.data (),
3958 lclColInds.data () + rowInfo.numEntries,
3961 theGraph.indicesAreSorted_ =
true;
3965 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3970 const char tfecfFuncName[] =
"replaceDomainMap: ";
3971 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3972 myGraph_.is_null (), std::runtime_error,
3973 "This method does not work if the matrix has a const graph. The whole "
3974 "idea of a const graph is that you are not allowed to change it, but this"
3975 " method necessarily must modify the graph, since the graph owns the "
3976 "matrix's domain Map and Import objects.");
3977 myGraph_->replaceDomainMap (newDomainMap);
3980 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3984 Teuchos::RCP<const import_type>& newImporter)
3986 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3987 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3988 myGraph_.is_null (), std::runtime_error,
3989 "This method does not work if the matrix has a const graph. The whole "
3990 "idea of a const graph is that you are not allowed to change it, but this"
3991 " method necessarily must modify the graph, since the graph owns the "
3992 "matrix's domain Map and Import objects.");
3993 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3996 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4001 const char tfecfFuncName[] =
"replaceRangeMap: ";
4002 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4003 myGraph_.is_null (), std::runtime_error,
4004 "This method does not work if the matrix has a const graph. The whole "
4005 "idea of a const graph is that you are not allowed to change it, but this"
4006 " method necessarily must modify the graph, since the graph owns the "
4007 "matrix's domain Map and Import objects.");
4008 myGraph_->replaceRangeMap (newRangeMap);
4011 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4015 Teuchos::RCP<const export_type>& newExporter)
4017 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
4018 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4019 myGraph_.is_null (), std::runtime_error,
4020 "This method does not work if the matrix has a const graph. The whole "
4021 "idea of a const graph is that you are not allowed to change it, but this"
4022 " method necessarily must modify the graph, since the graph owns the "
4023 "matrix's domain Map and Import objects.");
4024 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
4027 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4031 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4032 const Teuchos::ArrayView<const Scalar>& values)
4034 using Teuchos::Array;
4035 typedef GlobalOrdinal GO;
4036 typedef typename Array<GO>::size_type size_type;
4038 const size_type numToInsert = indices.size ();
4041 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4042 Array<GO>& curRowInds = curRow.first;
4043 Array<Scalar>& curRowVals = curRow.second;
4044 const size_type newCapacity = curRowInds.size () + numToInsert;
4045 curRowInds.reserve (newCapacity);
4046 curRowVals.reserve (newCapacity);
4047 for (size_type k = 0; k < numToInsert; ++k) {
4048 curRowInds.push_back (indices[k]);
4049 curRowVals.push_back (values[k]);
4053 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4060 using Teuchos::Comm;
4061 using Teuchos::outArg;
4064 using Teuchos::REDUCE_MAX;
4065 using Teuchos::REDUCE_MIN;
4066 using Teuchos::reduceAll;
4070 typedef GlobalOrdinal GO;
4071 typedef typename Teuchos::Array<GO>::size_type size_type;
4072 const char tfecfFuncName[] =
"globalAssemble: ";
4073 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4075 const bool verbose = Behavior::verbose(
"CrsMatrix");
4076 std::unique_ptr<std::string> prefix;
4078 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4079 std::ostringstream os;
4080 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4082 std::cerr << os.str();
4084 RCP<const Comm<int> > comm = getComm ();
4086 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4087 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4088 "you may call this method.");
4090 const size_t myNumNonlocalRows = nonlocals_.size ();
4097 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4098 int someoneHasNonlocalRows = 0;
4099 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4100 outArg (someoneHasNonlocalRows));
4101 if (someoneHasNonlocalRows == 0) {
4115 RCP<const map_type> nonlocalRowMap;
4116 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4118 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4119 size_type curPos = 0;
4120 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4121 ++mapIter, ++curPos) {
4122 myNonlocalGblRows[curPos] = mapIter->first;
4125 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4126 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4133 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4134 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4135 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4136 merge2 (gblCols_newEnd, vals_newEnd,
4137 gblCols.begin (), gblCols.end (),
4138 vals.begin (), vals.end ());
4139 gblCols.erase (gblCols_newEnd, gblCols.end ());
4140 vals.erase (vals_newEnd, vals.end ());
4141 numEntPerNonlocalRow[curPos] = gblCols.size ();
4152 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4154 auto iter = std::min_element (myNonlocalGblRows.begin (),
4155 myNonlocalGblRows.end ());
4156 if (iter != myNonlocalGblRows.end ()) {
4157 myMinNonlocalGblRow = *iter;
4160 GO gblMinNonlocalGblRow = 0;
4161 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4162 outArg (gblMinNonlocalGblRow));
4163 const GO indexBase = gblMinNonlocalGblRow;
4164 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4165 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4174 std::ostringstream os;
4175 os << *prefix <<
"Create nonlocal matrix" << endl;
4176 std::cerr << os.str();
4178 RCP<crs_matrix_type> nonlocalMatrix =
4179 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4181 size_type curPos = 0;
4182 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4183 ++mapIter, ++curPos) {
4184 const GO gblRow = mapIter->first;
4186 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4187 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4189 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4201 auto origRowMap = this->getRowMap ();
4202 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4204 int isLocallyComplete = 1;
4206 if (origRowMapIsOneToOne) {
4208 std::ostringstream os;
4209 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4210 std::cerr << os.str();
4212 export_type exportToOrig (nonlocalRowMap, origRowMap);
4214 isLocallyComplete = 0;
4217 std::ostringstream os;
4218 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4219 std::cerr << os.str();
4221 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4226 std::ostringstream os;
4227 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4228 std::cerr << os.str();
4235 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4237 isLocallyComplete = 0;
4245 std::ostringstream os;
4246 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4248 std::cerr << os.str();
4250 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4252 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4258 std::ostringstream os;
4259 os << *prefix <<
"Free nonlocalMatrix" << endl;
4260 std::cerr << os.str();
4262 nonlocalMatrix = Teuchos::null;
4266 std::ostringstream os;
4267 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4268 std::cerr << os.str();
4270 import_type importToOrig (oneToOneRowMap, origRowMap);
4271 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4279 std::ostringstream os;
4280 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4281 std::cerr << os.str();
4283 decltype (nonlocals_) newNonlocals;
4284 std::swap (nonlocals_, newNonlocals);
4293 int isGloballyComplete = 0;
4294 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4295 outArg (isGloballyComplete));
4296 TEUCHOS_TEST_FOR_EXCEPTION
4297 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4298 "you called insertGlobalValues with a global row index which is not in "
4299 "the matrix's row Map on any process in its communicator.");
4302 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4307 if (! isStaticGraph ()) {
4308 myGraph_->resumeFill (params);
4310 #if KOKKOSKERNELS_VERSION >= 40299
4312 applyHelper.reset();
4314 fillComplete_ =
false;
4317 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4321 return getCrsGraphRef ().haveGlobalConstants ();
4324 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4329 const char tfecfFuncName[] =
"fillComplete(params): ";
4331 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4332 (this->getCrsGraph ().is_null (), std::logic_error,
4333 "getCrsGraph() returns null. This should not happen at this point. "
4334 "Please report this bug to the Tpetra developers.");
4344 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4345 Teuchos::RCP<const map_type> domainMap = rangeMap;
4346 this->fillComplete (domainMap, rangeMap, params);
4350 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4354 const Teuchos::RCP<const map_type>& rangeMap,
4355 const Teuchos::RCP<Teuchos::ParameterList>& params)
4359 using Teuchos::ArrayRCP;
4363 const char tfecfFuncName[] =
"fillComplete: ";
4364 ProfilingRegion regionFillComplete
4365 (
"Tpetra::CrsMatrix::fillComplete");
4366 const bool verbose = Behavior::verbose(
"CrsMatrix");
4367 std::unique_ptr<std::string> prefix;
4369 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4370 std::ostringstream os;
4371 os << *prefix << endl;
4372 std::cerr << os.str ();
4375 "Tpetra::CrsMatrix::fillCompete",
4378 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4379 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4380 "Matrix fill state must be active (isFillActive() "
4381 "must be true) before you may call fillComplete().");
4382 const int numProcs = this->getComm ()->getSize ();
4392 bool assertNoNonlocalInserts =
false;
4395 bool sortGhosts =
true;
4397 if (! params.is_null ()) {
4398 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4399 assertNoNonlocalInserts);
4400 if (params->isParameter (
"sort column map ghost gids")) {
4401 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4403 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4404 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4409 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4411 if (! this->myGraph_.is_null ()) {
4412 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4415 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4416 if (this->hasColMap ()) {
4417 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4420 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4425 if (needGlobalAssemble) {
4426 this->globalAssemble ();
4429 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4430 (numProcs == 1 && nonlocals_.size() > 0,
4431 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4432 "An invalid entry (i.e., with row index not in the row Map) must have "
4433 "been submitted to the CrsMatrix.");
4436 if (this->isStaticGraph ()) {
4444 #ifdef HAVE_TPETRA_DEBUG
4462 const bool domainMapsMatch =
4463 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4464 const bool rangeMapsMatch =
4465 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4467 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4468 (! domainMapsMatch, std::runtime_error,
4469 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4470 "The graph cannot be changed because it was given to the CrsMatrix "
4471 "constructor as const. You can fix this by passing in the graph's "
4472 "domain Map and range Map to the matrix's fillComplete call.");
4474 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4475 (! rangeMapsMatch, std::runtime_error,
4476 "The CrsMatrix's range Map does not match the graph's range Map. "
4477 "The graph cannot be changed because it was given to the CrsMatrix "
4478 "constructor as const. You can fix this by passing in the graph's "
4479 "domain Map and range Map to the matrix's fillComplete call.");
4480 #endif // HAVE_TPETRA_DEBUG
4484 this->fillLocalMatrix (params);
4492 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4495 Teuchos::Array<int> remotePIDs (0);
4496 const bool mustBuildColMap = ! this->hasColMap ();
4497 if (mustBuildColMap) {
4498 this->myGraph_->makeColMap (remotePIDs);
4503 const std::pair<size_t, std::string> makeIndicesLocalResult =
4504 this->myGraph_->makeIndicesLocal(verbose);
4509 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4510 (makeIndicesLocalResult.first != 0, std::runtime_error,
4511 makeIndicesLocalResult.second);
4513 const bool sorted = this->myGraph_->isSorted ();
4514 const bool merged = this->myGraph_->isMerged ();
4515 this->sortAndMergeIndicesAndValues (sorted, merged);
4520 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4524 this->fillLocalGraphAndMatrix (params);
4526 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4527 params->get (
"compute global constants",
true);
4528 if (callGraphComputeGlobalConstants) {
4529 this->myGraph_->computeGlobalConstants ();
4532 this->myGraph_->computeLocalConstants ();
4534 this->myGraph_->fillComplete_ =
true;
4535 this->myGraph_->checkInternalState ();
4540 this->fillComplete_ =
true;
4543 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4545 this->checkInternalState ();
4549 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4553 const Teuchos::RCP<const map_type> & rangeMap,
4554 const Teuchos::RCP<const import_type>& importer,
4555 const Teuchos::RCP<const export_type>& exporter,
4556 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4558 #ifdef HAVE_TPETRA_MMM_TIMINGS
4560 if(!params.is_null())
4561 label = params->get(
"Timer Label",label);
4562 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4563 using Teuchos::TimeMonitor;
4565 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4568 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4569 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4570 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4571 "must be true) before calling fillComplete().");
4572 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4573 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4576 #ifdef HAVE_TPETRA_MMM_TIMINGS
4577 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4580 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4584 #ifdef HAVE_TPETRA_MMM_TIMINGS
4585 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4588 fillLocalGraphAndMatrix (params);
4593 fillComplete_ =
true;
4596 #ifdef HAVE_TPETRA_DEBUG
4597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4598 ": We're at the end of fillComplete(), but isFillActive() is true. "
4599 "Please report this bug to the Tpetra developers.");
4600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4601 ": We're at the end of fillComplete(), but isFillActive() is true. "
4602 "Please report this bug to the Tpetra developers.");
4603 #endif // HAVE_TPETRA_DEBUG
4605 #ifdef HAVE_TPETRA_MMM_TIMINGS
4606 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4609 checkInternalState();
4613 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4619 LocalOrdinal* beg = cols;
4620 LocalOrdinal* end = cols + rowLen;
4621 LocalOrdinal* newend = beg;
4623 LocalOrdinal* cur = beg + 1;
4627 while (cur != end) {
4628 if (*cur != *newend) {
4645 return newend - beg;
4648 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4653 using ::Tpetra::Details::ProfilingRegion;
4654 typedef LocalOrdinal LO;
4655 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4656 host_execution_space;
4657 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4658 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4659 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4661 if (! sorted || ! merged) {
4662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4663 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4664 "\"static\" (const) graph, since the matrix does not own the graph.");
4665 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4666 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4667 "this matrix claims ! isStaticGraph(). "
4668 "Please report this bug to the Tpetra developers.");
4669 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4670 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4671 "this method if the graph's storage has already been optimized. "
4672 "Please report this bug to the Tpetra developers.");
4675 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4676 size_t totalNumDups = 0;
4681 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4683 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4684 [=] (
const LO lclRow,
size_t& numDups) {
4685 size_t rowBegin = rowBegins_(lclRow);
4686 size_t rowLen = rowLengths_(lclRow);
4687 LO* cols = cols_.data() + rowBegin;
4690 sort2 (cols, cols + rowLen, vals);
4693 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4694 rowLengths_(lclRow) = newRowLength;
4695 numDups += rowLen - newRowLength;
4708 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4719 using Teuchos::rcp_const_cast;
4720 using Teuchos::rcpFromRef;
4721 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4722 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4728 if (alpha == ZERO) {
4731 }
else if (beta != ONE) {
4745 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4746 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4752 const bool Y_is_overwritten = (beta ==
ZERO);
4755 const bool Y_is_replicated =
4756 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4764 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4771 RCP<const MV> X_colMap;
4772 if (importer.is_null ()) {
4780 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4782 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4787 X_colMap = rcpFromRef (X_in);
4791 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4797 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4800 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4801 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4808 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4815 if (! exporter.is_null ()) {
4816 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4818 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4824 if (Y_is_overwritten) {
4850 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4857 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4861 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4869 if (Y_is_replicated) {
4870 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4875 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4880 const Teuchos::ETransp mode,
4885 using Teuchos::null;
4888 using Teuchos::rcp_const_cast;
4889 using Teuchos::rcpFromRef;
4890 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4893 if (alpha == ZERO) {
4906 else if (beta == ZERO) {
4924 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4925 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4930 const bool Y_is_replicated = (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4931 const bool Y_is_overwritten = (beta ==
ZERO);
4932 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4938 X = rcp (
new MV (X_in, Teuchos::Copy));
4940 X = rcpFromRef (X_in);
4944 if (importer != Teuchos::null) {
4945 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4948 if (importMV_ == null) {
4949 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4952 if (exporter != Teuchos::null) {
4953 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4956 if (exportMV_ == null) {
4957 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4963 if (! exporter.is_null ()) {
4964 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4965 exportMV_->doImport (X_in, *exporter,
INSERT);
4972 if (importer != Teuchos::null) {
4973 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4980 importMV_->putScalar (ZERO);
4982 this->localApply (*X, *importMV_, mode, alpha, ZERO);
4984 if (Y_is_overwritten) {
5001 MV Y (Y_in, Teuchos::Copy);
5002 this->localApply (*X, Y, mode, alpha, beta);
5005 this->localApply (*X, Y_in, mode, alpha, beta);
5012 if (Y_is_replicated) {
5013 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
5018 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5023 const Teuchos::ETransp mode,
5024 const Scalar& alpha,
5025 const Scalar& beta)
const
5028 using Teuchos::NO_TRANS;
5029 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5036 const char tfecfFuncName[] =
"localApply: ";
5037 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5041 const bool transpose = (mode != Teuchos::NO_TRANS);
5042 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5044 getColMap ()->getLocalNumElements (), std::runtime_error,
5045 "NO_TRANS case: X has the wrong number of local rows. "
5047 "getColMap()->getLocalNumElements() = " <<
5048 getColMap ()->getLocalNumElements () <<
".");
5049 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5051 getRowMap ()->getLocalNumElements (), std::runtime_error,
5052 "NO_TRANS case: Y has the wrong number of local rows. "
5054 "getRowMap()->getLocalNumElements() = " <<
5055 getRowMap ()->getLocalNumElements () <<
".");
5056 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5058 getRowMap ()->getLocalNumElements (), std::runtime_error,
5059 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5061 <<
" != getRowMap()->getLocalNumElements() = "
5062 << getRowMap ()->getLocalNumElements () <<
".");
5063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5065 getColMap ()->getLocalNumElements (), std::runtime_error,
5066 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5068 <<
" != getColMap()->getLocalNumElements() = "
5069 << getColMap ()->getLocalNumElements () <<
".");
5070 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5071 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5072 "fill complete. You must call fillComplete() (possibly with "
5073 "domain and range Map arguments) without an intervening "
5074 "resumeFill() call before you may call this method.");
5075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5077 std::runtime_error,
"X and Y must be constant stride.");
5082 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5083 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5084 && X_lcl.extent(0) != 0,
5085 std::runtime_error,
"X and Y may not alias one another.");
5088 #if KOKKOSKERNELS_VERSION >= 40299
5089 auto A_lcl = getLocalMatrixDevice();
5090 if(!applyHelper.get()) {
5092 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
5096 const char* modeKK =
nullptr;
5099 case Teuchos::NO_TRANS:
5100 modeKK = KokkosSparse::NoTranspose;
break;
5101 case Teuchos::TRANS:
5102 modeKK = KokkosSparse::Transpose;
break;
5103 case Teuchos::CONJ_TRANS:
5104 modeKK = KokkosSparse::ConjugateTranspose;
break;
5106 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5109 if(applyHelper->shouldUseIntRowptrs())
5111 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5113 &applyHelper->handle_int, modeKK,
5119 &applyHelper->handle, modeKK,
5123 LocalOrdinal nrows = getLocalNumRows();
5124 LocalOrdinal maxRowImbalance = 0;
5126 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5128 auto matrix_lcl = getLocalMultiplyOperator();
5130 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5132 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5136 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5141 Teuchos::ETransp mode,
5146 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5148 TEUCHOS_TEST_FOR_EXCEPTION
5149 (! isFillComplete (), std::runtime_error,
5150 fnName <<
": Cannot call apply() until fillComplete() "
5151 "has been called.");
5153 if (mode == Teuchos::NO_TRANS) {
5154 ProfilingRegion regionNonTranspose (fnName);
5155 this->applyNonTranspose (X, Y, alpha, beta);
5158 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5159 this->applyTranspose (X, Y, mode, alpha, beta);
5164 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5166 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5172 const char tfecfFuncName[] =
"convert: ";
5174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5175 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5176 "of the conversion) is not fill complete. You must first call "
5177 "fillComplete() (possibly with the domain and range Map) without an "
5178 "intervening call to resumeFill(), before you may call this method.");
5180 RCP<output_matrix_type> newMatrix
5181 (
new output_matrix_type (this->getCrsGraph ()));
5185 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5186 this->getLocalMatrixDevice ().values);
5190 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5196 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5203 const char tfecfFuncName[] =
"checkInternalState: ";
5204 const char err[] =
"Internal state is not consistent. "
5205 "Please report this bug to the Tpetra developers.";
5209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5210 (staticGraph_.is_null (), std::logic_error, err);
5214 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5215 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5216 std::logic_error, err);
5218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5219 (isFillComplete () && ! staticGraph_->isFillComplete (),
5220 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5221 "but its graph is NOT fill complete.");
5224 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5225 (staticGraph_->indicesAreAllocated () &&
5226 staticGraph_->getLocalAllocationSize() > 0 &&
5227 staticGraph_->getLocalNumRows() > 0 &&
5228 valuesUnpacked_wdv.extent (0) == 0,
5229 std::logic_error, err);
5233 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5238 std::ostringstream os;
5240 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5241 if (this->getObjectLabel () !=
"") {
5242 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5244 if (isFillComplete ()) {
5245 os <<
"isFillComplete: true"
5246 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5247 << getGlobalNumCols () <<
"]"
5248 <<
", global number of entries: " << getGlobalNumEntries ()
5252 os <<
"isFillComplete: false"
5253 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5254 << getGlobalNumCols () <<
"]}";
5259 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5263 const Teuchos::EVerbosityLevel verbLevel)
const
5267 using Teuchos::ArrayView;
5268 using Teuchos::Comm;
5270 using Teuchos::TypeNameTraits;
5271 using Teuchos::VERB_DEFAULT;
5272 using Teuchos::VERB_NONE;
5273 using Teuchos::VERB_LOW;
5274 using Teuchos::VERB_MEDIUM;
5275 using Teuchos::VERB_HIGH;
5276 using Teuchos::VERB_EXTREME;
5278 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5280 if (vl == VERB_NONE) {
5285 Teuchos::OSTab tab0 (out);
5287 RCP<const Comm<int> > comm = this->getComm();
5288 const int myRank = comm->getRank();
5289 const int numProcs = comm->getSize();
5291 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5294 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5304 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5306 Teuchos::OSTab tab1 (out);
5309 if (this->getObjectLabel () !=
"") {
5310 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5313 out <<
"Template parameters:" << endl;
5314 Teuchos::OSTab tab2 (out);
5315 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5316 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5317 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5318 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5320 if (isFillComplete()) {
5321 out <<
"isFillComplete: true" << endl
5322 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5323 << getGlobalNumCols () <<
"]" << endl
5324 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5325 << endl <<
"Global max number of entries in a row: "
5326 << getGlobalMaxNumRowEntries () << endl;
5329 out <<
"isFillComplete: false" << endl
5330 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5331 << getGlobalNumCols () <<
"]" << endl;
5335 if (vl < VERB_MEDIUM) {
5341 out << endl <<
"Row Map:" << endl;
5343 if (getRowMap ().is_null ()) {
5345 out <<
"null" << endl;
5352 getRowMap ()->describe (out, vl);
5357 out <<
"Column Map: ";
5359 if (getColMap ().is_null ()) {
5361 out <<
"null" << endl;
5363 }
else if (getColMap () == getRowMap ()) {
5365 out <<
"same as row Map" << endl;
5371 getColMap ()->describe (out, vl);
5376 out <<
"Domain Map: ";
5378 if (getDomainMap ().is_null ()) {
5380 out <<
"null" << endl;
5382 }
else if (getDomainMap () == getRowMap ()) {
5384 out <<
"same as row Map" << endl;
5386 }
else if (getDomainMap () == getColMap ()) {
5388 out <<
"same as column Map" << endl;
5394 getDomainMap ()->describe (out, vl);
5399 out <<
"Range Map: ";
5401 if (getRangeMap ().is_null ()) {
5403 out <<
"null" << endl;
5405 }
else if (getRangeMap () == getDomainMap ()) {
5407 out <<
"same as domain Map" << endl;
5409 }
else if (getRangeMap () == getRowMap ()) {
5411 out <<
"same as row Map" << endl;
5417 getRangeMap ()->describe (out, vl);
5421 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5422 if (myRank == curRank) {
5423 out <<
"Process rank: " << curRank << endl;
5424 Teuchos::OSTab tab2 (out);
5425 if (! staticGraph_->indicesAreAllocated ()) {
5426 out <<
"Graph indices not allocated" << endl;
5429 out <<
"Number of allocated entries: "
5430 << staticGraph_->getLocalAllocationSize () << endl;
5432 out <<
"Number of entries: " << getLocalNumEntries () << endl
5433 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5442 if (vl < VERB_HIGH) {
5447 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5448 if (myRank == curRank) {
5449 out << std::setw(width) <<
"Proc Rank"
5450 << std::setw(width) <<
"Global Row"
5451 << std::setw(width) <<
"Num Entries";
5452 if (vl == VERB_EXTREME) {
5453 out << std::setw(width) <<
"(Index,Value)";
5456 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5457 const size_t nE = getNumEntriesInLocalRow(r);
5458 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5459 out << std::setw(width) << myRank
5460 << std::setw(width) << gid
5461 << std::setw(width) << nE;
5462 if (vl == VERB_EXTREME) {
5463 if (isGloballyIndexed()) {
5464 global_inds_host_view_type rowinds;
5465 values_host_view_type rowvals;
5466 getGlobalRowView (gid, rowinds, rowvals);
5467 for (
size_t j = 0; j < nE; ++j) {
5468 out <<
" (" << rowinds[j]
5469 <<
", " << rowvals[j]
5473 else if (isLocallyIndexed()) {
5474 local_inds_host_view_type rowinds;
5475 values_host_view_type rowvals;
5476 getLocalRowView (r, rowinds, rowvals);
5477 for (
size_t j=0; j < nE; ++j) {
5478 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5479 <<
", " << rowvals[j]
5495 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5509 return (srcRowMat !=
nullptr);
5512 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5516 const typename crs_graph_type::padding_type& padding,
5522 using LO = local_ordinal_type;
5523 using row_ptrs_type =
5524 typename local_graph_device_type::row_map_type::non_const_type;
5525 using range_policy =
5526 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5527 const char tfecfFuncName[] =
"applyCrsPadding";
5528 const char suffix[] =
5529 ". Please report this bug to the Tpetra developers.";
5530 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5532 std::unique_ptr<std::string> prefix;
5534 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5535 std::ostringstream os;
5536 os << *prefix <<
"padding: ";
5539 std::cerr << os.str();
5541 const int myRank = ! verbose ? -1 : [&] () {
5542 auto map = this->getMap();
5543 if (map.is_null()) {
5546 auto comm = map->getComm();
5547 if (comm.is_null()) {
5550 return comm->getRank();
5554 if (! myGraph_->indicesAreAllocated()) {
5556 std::ostringstream os;
5557 os << *prefix <<
"Call allocateIndices" << endl;
5558 std::cerr << os.str();
5560 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5572 std::ostringstream os;
5573 os << *prefix <<
"Allocate row_ptrs_beg: "
5574 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5575 std::cerr << os.str();
5577 using Kokkos::view_alloc;
5578 using Kokkos::WithoutInitializing;
5579 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5580 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5582 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5584 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5585 size_t(row_ptr_beg.extent(0) - 1);
5587 std::ostringstream os;
5588 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5589 std::cerr << os.str();
5591 row_ptrs_type row_ptr_end(
5592 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5594 row_ptrs_type num_row_entries_d;
5596 const bool refill_num_row_entries =
5597 myGraph_->k_numRowEntries_.extent(0) != 0;
5599 if (refill_num_row_entries) {
5602 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5603 myGraph_->k_numRowEntries_);
5604 Kokkos::parallel_for
5605 (
"Fill end row pointers", range_policy(0, N),
5606 KOKKOS_LAMBDA (
const size_t i) {
5607 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5614 Kokkos::parallel_for
5615 (
"Fill end row pointers", range_policy(0, N),
5616 KOKKOS_LAMBDA (
const size_t i) {
5617 row_ptr_end(i) = row_ptr_beg(i+1);
5621 if (myGraph_->isGloballyIndexed()) {
5623 myGraph_->gblInds_wdv,
5624 valuesUnpacked_wdv, padding, myRank, verbose);
5625 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5626 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5628 (newValuesLen != newColIndsLen, std::logic_error,
5629 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5630 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5635 myGraph_->lclIndsUnpacked_wdv,
5636 valuesUnpacked_wdv, padding, myRank, verbose);
5637 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5638 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5640 (newValuesLen != newColIndsLen, std::logic_error,
5641 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5642 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5646 if (refill_num_row_entries) {
5647 Kokkos::parallel_for
5648 (
"Fill num entries", range_policy(0, N),
5649 KOKKOS_LAMBDA (
const size_t i) {
5650 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5656 std::ostringstream os;
5657 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5658 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5659 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5660 std::cerr << os.str();
5661 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5662 row_ptr_beg.extent(0) );
5664 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5667 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5669 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5670 copyAndPermuteStaticGraph(
5671 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5672 const size_t numSameIDs,
5673 const LocalOrdinal permuteToLIDs[],
5674 const LocalOrdinal permuteFromLIDs[],
5675 const size_t numPermutes)
5677 using Details::ProfilingRegion;
5678 using Teuchos::Array;
5679 using Teuchos::ArrayView;
5681 using LO = LocalOrdinal;
5682 using GO = GlobalOrdinal;
5683 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5684 const char suffix[] =
5685 " Please report this bug to the Tpetra developers.";
5686 ProfilingRegion regionCAP
5687 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5691 std::unique_ptr<std::string> prefix;
5693 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5694 std::ostringstream os;
5695 os << *prefix <<
"Start" << endl;
5697 const char*
const prefix_raw =
5698 verbose ? prefix.get()->c_str() :
nullptr;
5700 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5705 const map_type& srcRowMap = * (srcMat.getRowMap ());
5706 nonconst_global_inds_host_view_type rowInds;
5707 nonconst_values_host_view_type rowVals;
5708 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5709 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5713 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5714 const GO targetGID = sourceGID;
5716 ArrayView<const GO>rowIndsConstView;
5717 ArrayView<const Scalar> rowValsConstView;
5719 if (sourceIsLocallyIndexed) {
5720 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5721 if (rowLength > static_cast<size_t> (rowInds.size())) {
5722 Kokkos::resize(rowInds,rowLength);
5723 Kokkos::resize(rowVals,rowLength);
5727 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5728 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5733 size_t checkRowLength = 0;
5734 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5735 rowValsView, checkRowLength);
5737 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5738 (rowLength != checkRowLength, std::logic_error,
"For "
5739 "global row index " << sourceGID <<
", the source "
5740 "matrix's getNumEntriesInGlobalRow returns a row length "
5741 "of " << rowLength <<
", but getGlobalRowCopy reports "
5742 "a row length of " << checkRowLength <<
"." << suffix);
5749 rowIndsConstView = Teuchos::ArrayView<const GO> (
5750 rowIndsView.data(), rowIndsView.extent(0),
5751 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5752 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5753 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5754 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5759 global_inds_host_view_type rowIndsView;
5760 values_host_view_type rowValsView;
5761 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5766 rowIndsConstView = Teuchos::ArrayView<const GO> (
5767 rowIndsView.data(), rowIndsView.extent(0),
5768 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5769 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5770 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5771 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5779 combineGlobalValues(targetGID, rowIndsConstView,
5781 prefix_raw, debug, verbose);
5785 std::ostringstream os;
5786 os << *prefix <<
"Do permutes" << endl;
5789 const map_type& tgtRowMap = * (this->getRowMap ());
5790 for (
size_t p = 0; p < numPermutes; ++p) {
5791 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5792 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5794 ArrayView<const GO> rowIndsConstView;
5795 ArrayView<const Scalar> rowValsConstView;
5797 if (sourceIsLocallyIndexed) {
5798 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5799 if (rowLength > static_cast<size_t> (rowInds.size ())) {
5800 Kokkos::resize(rowInds,rowLength);
5801 Kokkos::resize(rowVals,rowLength);
5805 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5806 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5811 size_t checkRowLength = 0;
5812 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5813 rowValsView, checkRowLength);
5815 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5816 (rowLength != checkRowLength, std::logic_error,
"For "
5817 "source matrix global row index " << sourceGID <<
", "
5818 "getNumEntriesInGlobalRow returns a row length of " <<
5819 rowLength <<
", but getGlobalRowCopy a row length of "
5820 << checkRowLength <<
"." << suffix);
5827 rowIndsConstView = Teuchos::ArrayView<const GO> (
5828 rowIndsView.data(), rowIndsView.extent(0),
5829 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5830 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5831 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5832 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5837 global_inds_host_view_type rowIndsView;
5838 values_host_view_type rowValsView;
5839 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5844 rowIndsConstView = Teuchos::ArrayView<const GO> (
5845 rowIndsView.data(), rowIndsView.extent(0),
5846 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5847 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5848 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5849 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5854 combineGlobalValues(targetGID, rowIndsConstView,
5856 prefix_raw, debug, verbose);
5860 std::ostringstream os;
5861 os << *prefix <<
"Done" << endl;
5865 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5867 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5868 copyAndPermuteNonStaticGraph(
5869 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5870 const size_t numSameIDs,
5871 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5872 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5873 const size_t numPermutes)
5875 using Details::ProfilingRegion;
5876 using Teuchos::Array;
5877 using Teuchos::ArrayView;
5879 using LO = LocalOrdinal;
5880 using GO = GlobalOrdinal;
5881 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5882 const char suffix[] =
5883 " Please report this bug to the Tpetra developers.";
5884 ProfilingRegion regionCAP
5885 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5889 std::unique_ptr<std::string> prefix;
5891 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5892 std::ostringstream os;
5893 os << *prefix <<
"Start" << endl;
5895 const char*
const prefix_raw =
5896 verbose ? prefix.get()->c_str() :
nullptr;
5899 using row_graph_type = RowGraph<LO, GO, Node>;
5900 const row_graph_type& srcGraph = *(srcMat.getGraph());
5902 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5903 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5904 applyCrsPadding(*padding, verbose);
5906 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5911 const map_type& srcRowMap = * (srcMat.getRowMap ());
5912 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5913 using gids_type = nonconst_global_inds_host_view_type;
5914 using vals_type = nonconst_values_host_view_type;
5917 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5921 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5922 const GO targetGID = sourceGID;
5924 ArrayView<const GO> rowIndsConstView;
5925 ArrayView<const Scalar> rowValsConstView;
5927 if (sourceIsLocallyIndexed) {
5929 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5930 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5931 Kokkos::resize(rowInds,rowLength);
5932 Kokkos::resize(rowVals,rowLength);
5936 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5937 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5942 size_t checkRowLength = 0;
5943 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5946 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5947 (rowLength != checkRowLength, std::logic_error,
": For "
5948 "global row index " << sourceGID <<
", the source "
5949 "matrix's getNumEntriesInGlobalRow returns a row length "
5950 "of " << rowLength <<
", but getGlobalRowCopy reports "
5951 "a row length of " << checkRowLength <<
"." << suffix);
5953 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5954 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5957 global_inds_host_view_type rowIndsView;
5958 values_host_view_type rowValsView;
5959 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5965 rowIndsConstView = Teuchos::ArrayView<const GO> (
5966 rowIndsView.data(), rowIndsView.extent(0),
5967 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5968 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5969 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5970 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5976 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5977 rowValsConstView, prefix_raw, debug, verbose);
5981 std::ostringstream os;
5982 os << *prefix <<
"Do permutes" << endl;
5984 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5985 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5987 const map_type& tgtRowMap = * (this->getRowMap ());
5988 for (
size_t p = 0; p < numPermutes; ++p) {
5989 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5990 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5992 ArrayView<const GO> rowIndsConstView;
5993 ArrayView<const Scalar> rowValsConstView;
5995 if (sourceIsLocallyIndexed) {
5996 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5997 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5998 Kokkos::resize(rowInds,rowLength);
5999 Kokkos::resize(rowVals,rowLength);
6003 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
6004 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
6009 size_t checkRowLength = 0;
6010 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6011 rowValsView, checkRowLength);
6013 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6014 (rowLength != checkRowLength, std::logic_error,
"For "
6015 "source matrix global row index " << sourceGID <<
", "
6016 "getNumEntriesInGlobalRow returns a row length of " <<
6017 rowLength <<
", but getGlobalRowCopy a row length of "
6018 << checkRowLength <<
"." << suffix);
6020 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6021 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6024 global_inds_host_view_type rowIndsView;
6025 values_host_view_type rowValsView;
6026 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6032 rowIndsConstView = Teuchos::ArrayView<const GO> (
6033 rowIndsView.data(), rowIndsView.extent(0),
6034 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6035 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6036 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6037 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6043 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6044 rowValsConstView, prefix_raw, debug, verbose);
6048 std::ostringstream os;
6049 os << *prefix <<
"Done" << endl;
6053 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6058 const size_t numSameIDs,
6059 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6060 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6069 const char tfecfFuncName[] =
"copyAndPermute: ";
6070 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6072 const bool verbose = Behavior::verbose(
"CrsMatrix");
6073 std::unique_ptr<std::string> prefix;
6075 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6076 std::ostringstream os;
6077 os << *prefix << endl
6078 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6079 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6088 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6090 std::cerr << os.str ();
6093 const auto numPermute = permuteToLIDs.extent (0);
6094 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6095 (numPermute != permuteFromLIDs.extent (0),
6096 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6097 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6098 << permuteFromLIDs.extent (0) <<
".");
6103 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6104 if (isStaticGraph ()) {
6105 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6106 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6107 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6108 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6110 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6111 permuteToLIDs_h.data(),
6112 permuteFromLIDs_h.data(),
6116 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6117 permuteFromLIDs, numPermute);
6121 std::ostringstream os;
6122 os << *prefix <<
"Done" << endl;
6123 std::cerr << os.str();
6127 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6132 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6133 Kokkos::DualView<char*, buffer_device_type>& exports,
6134 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6135 size_t& constantNumPackets)
6140 using Teuchos::outArg;
6141 using Teuchos::REDUCE_MAX;
6142 using Teuchos::reduceAll;
6144 typedef LocalOrdinal LO;
6145 typedef GlobalOrdinal GO;
6146 const char tfecfFuncName[] =
"packAndPrepare: ";
6147 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6149 const bool debug = Behavior::debug(
"CrsMatrix");
6150 const bool verbose = Behavior::verbose(
"CrsMatrix");
6153 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6154 if (pComm.is_null ()) {
6157 const Teuchos::Comm<int>& comm = *pComm;
6158 const int myRank = comm.getSize ();
6160 std::unique_ptr<std::string> prefix;
6162 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6163 std::ostringstream os;
6164 os << *prefix <<
"Start" << endl
6174 std::cerr << os.str ();
6197 std::ostringstream msg;
6200 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6201 const crs_matrix_type* srcCrsMat =
6202 dynamic_cast<const crs_matrix_type*
> (&source);
6203 if (srcCrsMat !=
nullptr) {
6205 std::ostringstream os;
6206 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6207 "calling packNew" << endl;
6208 std::cerr << os.str ();
6211 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6212 constantNumPackets);
6214 catch (std::exception& e) {
6216 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6220 using Kokkos::HostSpace;
6221 using Kokkos::subview;
6222 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6223 using range_type = Kokkos::pair<size_t, size_t>;
6226 std::ostringstream os;
6227 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6229 std::cerr << os.str ();
6232 const row_matrix_type* srcRowMat =
6233 dynamic_cast<const row_matrix_type*
> (&source);
6234 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6235 (srcRowMat ==
nullptr, std::invalid_argument,
6236 "The source object of the Import or Export operation is neither a "
6237 "CrsMatrix (with the same template parameters as the target object), "
6238 "nor a RowMatrix (with the same first four template parameters as the "
6249 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6250 auto exportLIDs_h = exportLIDs.view_host ();
6251 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6252 exportLIDs_h.size ());
6256 Teuchos::Array<char> exports_a;
6262 numPacketsPerLID.clear_sync_state ();
6263 numPacketsPerLID.modify_host ();
6264 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6265 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6266 numPacketsPerLID_h.size ());
6271 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6272 constantNumPackets);
6274 catch (std::exception& e) {
6276 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6280 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6281 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6282 const std::string oldLabel = exports.d_view.label ();
6283 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6284 exports = exports_type (newLabel, newAllocSize);
6289 exports.modify_host();
6291 auto exports_h = exports.view_host ();
6292 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6296 typedef typename exports_type::t_host::execution_space HES;
6297 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6298 Kokkos::View<const char*, host_device_type>
6299 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6306 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6309 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6310 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6311 "one or more participating processes.");
6315 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6316 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6317 "or more participating processes. Here is this process' error "
6318 "message: " << msg.str ());
6322 std::ostringstream os;
6323 os << *prefix <<
"packAndPrepare: Done!" << endl
6333 std::cerr << os.str ();
6337 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6339 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6340 packRow (
char exports[],
6341 const size_t offset,
6342 const size_t numEnt,
6343 const GlobalOrdinal gidsIn[],
6344 const impl_scalar_type valsIn[],
6345 const size_t numBytesPerValue)
const
6348 using Kokkos::subview;
6350 typedef LocalOrdinal LO;
6351 typedef GlobalOrdinal GO;
6352 typedef impl_scalar_type ST;
6360 const LO numEntLO =
static_cast<size_t> (numEnt);
6362 const size_t numEntBeg = offset;
6364 const size_t gidsBeg = numEntBeg + numEntLen;
6365 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6366 const size_t valsBeg = gidsBeg + gidsLen;
6367 const size_t valsLen = numEnt * numBytesPerValue;
6369 char*
const numEntOut = exports + numEntBeg;
6370 char*
const gidsOut = exports + gidsBeg;
6371 char*
const valsOut = exports + valsBeg;
6373 size_t numBytesOut = 0;
6378 Kokkos::pair<int, size_t> p;
6379 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6380 errorCode += p.first;
6381 numBytesOut += p.second;
6383 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6384 errorCode += p.first;
6385 numBytesOut += p.second;
6388 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6389 TEUCHOS_TEST_FOR_EXCEPTION
6390 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6391 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6392 << expectedNumBytes <<
".");
6393 TEUCHOS_TEST_FOR_EXCEPTION
6394 (errorCode != 0, std::runtime_error,
"packRow: "
6395 "PackTraits::packArray returned a nonzero error code");
6400 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6402 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6403 unpackRow (GlobalOrdinal gidsOut[],
6404 impl_scalar_type valsOut[],
6405 const char imports[],
6406 const size_t offset,
6407 const size_t numBytes,
6408 const size_t numEnt,
6409 const size_t numBytesPerValue)
6412 using Kokkos::subview;
6414 typedef LocalOrdinal LO;
6415 typedef GlobalOrdinal GO;
6416 typedef impl_scalar_type ST;
6418 Details::ProfilingRegion region_upack_row(
6419 "Tpetra::CrsMatrix::unpackRow",
6423 if (numBytes == 0) {
6426 const int myRank = this->getMap ()->getComm ()->getRank ();
6427 TEUCHOS_TEST_FOR_EXCEPTION
6428 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6429 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6430 "number of entries to unpack (as reported by numPacketsPerLID) "
6431 "for this row numEnt=" << numEnt <<
" != 0.");
6436 if (numEnt == 0 && numBytes != 0) {
6437 const int myRank = this->getMap ()->getComm ()->getRank ();
6438 TEUCHOS_TEST_FOR_EXCEPTION
6439 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6440 "unpackRow: The number of entries to unpack (as reported by "
6441 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6442 "numBytes=" << numBytes <<
" != 0.");
6448 const size_t numEntBeg = offset;
6450 const size_t gidsBeg = numEntBeg + numEntLen;
6451 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6452 const size_t valsBeg = gidsBeg + gidsLen;
6453 const size_t valsLen = numEnt * numBytesPerValue;
6455 const char*
const numEntIn = imports + numEntBeg;
6456 const char*
const gidsIn = imports + gidsBeg;
6457 const char*
const valsIn = imports + valsBeg;
6459 size_t numBytesOut = 0;
6463 if (static_cast<size_t> (numEntOut) != numEnt ||
6464 numEntOut == static_cast<LO> (0)) {
6465 const int myRank = this->getMap ()->getComm ()->getRank ();
6466 std::ostringstream os;
6467 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6468 bool firstErrorCondition =
false;
6469 if (static_cast<size_t> (numEntOut) != numEnt) {
6470 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6471 <<
" does not equal number of entries unpacked from imports "
6472 "buffer numEntOut=" << numEntOut <<
".";
6473 firstErrorCondition =
true;
6475 if (numEntOut == static_cast<LO> (0)) {
6476 if (firstErrorCondition) {
6479 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6480 "but number of bytes to unpack for this row numBytes=" << numBytes
6481 <<
" != 0. This should never happen, since packRow should only "
6482 "ever pack rows with a nonzero number of entries. In this case, "
6483 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6486 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6490 Kokkos::pair<int, size_t> p;
6491 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6492 errorCode += p.first;
6493 numBytesOut += p.second;
6495 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6496 errorCode += p.first;
6497 numBytesOut += p.second;
6500 TEUCHOS_TEST_FOR_EXCEPTION
6501 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6502 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6504 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6505 TEUCHOS_TEST_FOR_EXCEPTION
6506 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6507 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6508 << expectedNumBytes <<
".");
6510 TEUCHOS_TEST_FOR_EXCEPTION
6511 (errorCode != 0, std::runtime_error,
"unpackRow: "
6512 "PackTraits::unpackArray returned a nonzero error code");
6517 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6519 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6520 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6521 size_t& totalNumEntries,
6522 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6524 using Details::Behavior;
6527 typedef impl_scalar_type IST;
6528 typedef LocalOrdinal LO;
6529 typedef GlobalOrdinal GO;
6535 const bool verbose = Behavior::verbose(
"CrsMatrix");
6536 std::unique_ptr<std::string> prefix;
6538 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6539 std::ostringstream os;
6540 os << *prefix <<
"Before:"
6548 std::cerr << os.str ();
6553 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6555 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6556 auto exportLIDs_h = exportLIDs.view_host ();
6559 totalNumEntries = 0;
6560 for (LO i = 0; i < numExportLIDs; ++i) {
6561 const LO lclRow = exportLIDs_h[i];
6562 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6565 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6568 totalNumEntries += curNumEntries;
6579 const size_t allocSize =
6580 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6581 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6582 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
6583 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6585 const std::string oldLabel = exports.d_view.label ();
6586 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6587 exports = exports_type (newLabel, allocSize);
6591 std::ostringstream os;
6592 os << *prefix <<
"After:"
6600 std::cerr << os.str ();
6604 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6607 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6608 Kokkos::DualView<char*, buffer_device_type>& exports,
6609 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6610 size_t& constantNumPackets)
const
6614 if (this->isStaticGraph ()) {
6617 constantNumPackets);
6620 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6621 constantNumPackets);
6625 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6628 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6629 Kokkos::DualView<char*, buffer_device_type>& exports,
6630 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6631 size_t& constantNumPackets)
const
6639 using LO = LocalOrdinal;
6640 using GO = GlobalOrdinal;
6641 using ST = impl_scalar_type;
6642 const char tfecfFuncName[] =
"packNonStaticNew: ";
6644 const bool verbose = Behavior::verbose(
"CrsMatrix");
6645 std::unique_ptr<std::string> prefix;
6647 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6648 std::ostringstream os;
6649 os << *prefix <<
"Start" << endl;
6650 std::cerr << os.str ();
6653 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6655 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
6656 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6657 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6663 constantNumPackets = 0;
6668 size_t totalNumEntries = 0;
6669 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6670 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6673 exports.clear_sync_state();
6674 exports.modify_host();
6675 auto exports_h = exports.view_host ();
6677 std::ostringstream os;
6678 os << *prefix <<
"After marking exports as modified on host, "
6680 std::cerr << os.str ();
6684 auto exportLIDs_h = exportLIDs.view_host ();
6687 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6688 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6689 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6694 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6698 typename global_inds_host_view_type::non_const_type gidsIn_k;
6699 if (this->isLocallyIndexed()) {
6701 typename global_inds_host_view_type::non_const_type(
"packGids",
6706 for (
size_t i = 0; i < numExportLIDs; ++i) {
6707 const LO lclRow = exportLIDs_h[i];
6709 size_t numBytes = 0;
6710 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6717 numPacketsPerLID_h[i] = 0;
6721 if (this->isLocallyIndexed ()) {
6722 typename global_inds_host_view_type::non_const_type gidsIn;
6723 values_host_view_type valsIn;
6727 local_inds_host_view_type lidsIn;
6728 this->getLocalRowView (lclRow, lidsIn, valsIn);
6729 const map_type& colMap = * (this->getColMap ());
6730 for (
size_t k = 0; k < numEnt; ++k) {
6731 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6733 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6735 const size_t numBytesPerValue =
6736 PackTraits<ST>::packValueCount (valsIn[0]);
6737 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6738 gidsIn.data (), valsIn.data (),
6741 else if (this->isGloballyIndexed ()) {
6742 global_inds_host_view_type gidsIn;
6743 values_host_view_type valsIn;
6749 const map_type& rowMap = * (this->getRowMap ());
6750 const GO gblRow = rowMap.getGlobalElement (lclRow);
6751 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6753 const size_t numBytesPerValue =
6754 PackTraits<ST>::packValueCount (valsIn[0]);
6755 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6756 gidsIn.data (), valsIn.data (),
6763 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6764 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6765 "First invalid offset into 'exports' pack buffer at index i = " << i
6766 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6767 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6772 numPacketsPerLID_h[i] = numBytes;
6777 std::ostringstream os;
6778 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6785 std::cerr << os.str ();
6789 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6791 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6792 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6793 const LocalOrdinal numEnt,
6794 const impl_scalar_type vals[],
6795 const GlobalOrdinal cols[],
6797 const char*
const prefix,
6801 using GO = GlobalOrdinal;
6805 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6806 Teuchos::ArrayView<const GO> cols_av
6807 (numEnt == 0 ?
nullptr : cols, numEnt);
6808 Teuchos::ArrayView<const Scalar> vals_av
6809 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
6814 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6815 prefix, debug, verbose);
6819 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6821 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6822 combineGlobalValues(
6823 const GlobalOrdinal globalRowIndex,
6824 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6825 const Teuchos::ArrayView<const Scalar>& values,
6827 const char*
const prefix,
6831 const char tfecfFuncName[] =
"combineGlobalValues: ";
6833 if (isStaticGraph ()) {
6837 if (combineMode ==
ADD) {
6838 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6840 else if (combineMode ==
REPLACE) {
6841 replaceGlobalValues (globalRowIndex, columnIndices, values);
6843 else if (combineMode ==
ABSMAX) {
6844 using ::Tpetra::Details::AbsMax;
6846 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6850 else if (combineMode ==
INSERT) {
6851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6852 (isStaticGraph() && combineMode ==
INSERT,
6853 std::invalid_argument,
"INSERT combine mode is forbidden "
6854 "if the matrix has a static (const) graph (i.e., was "
6855 "constructed with the CrsMatrix constructor that takes a "
6856 "const CrsGraph pointer).");
6859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6860 (
true, std::logic_error,
"Invalid combine mode; should "
6862 "Please report this bug to the Tpetra developers.");
6866 if (combineMode ==
ADD || combineMode ==
INSERT) {
6873 insertGlobalValuesFilteredChecked(globalRowIndex,
6874 columnIndices, values, prefix, debug, verbose);
6885 else if (combineMode ==
ABSMAX) {
6886 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6887 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6888 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6891 else if (combineMode ==
REPLACE) {
6892 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6893 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6894 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6898 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6899 true, std::logic_error,
"Should never get here! Please report this "
6900 "bug to the Tpetra developers.");
6905 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6909 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6910 Kokkos::DualView<char*, buffer_device_type> imports,
6911 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6912 const size_t constantNumPackets,
6919 const char tfecfFuncName[] =
"unpackAndCombine: ";
6920 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6922 const bool debug = Behavior::debug(
"CrsMatrix");
6923 const bool verbose = Behavior::verbose(
"CrsMatrix");
6924 constexpr
int numValidModes = 5;
6927 const char* validModeNames[numValidModes] =
6928 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6930 std::unique_ptr<std::string> prefix;
6932 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6933 std::ostringstream os;
6934 os << *prefix <<
"Start:" << endl
6944 << *prefix <<
" constantNumPackets: " << constantNumPackets
6948 std::cerr << os.str ();
6952 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6953 validModes+numValidModes) {
6954 std::ostringstream os;
6955 os <<
"Invalid combine mode. Valid modes are {";
6956 for (
int k = 0; k < numValidModes; ++k) {
6957 os << validModeNames[k];
6958 if (k < numValidModes - 1) {
6963 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6964 (
true, std::invalid_argument, os.str ());
6966 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6967 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6968 std::invalid_argument,
"importLIDs.extent(0)="
6969 << importLIDs.extent(0)
6970 <<
" != numPacketsPerLID.extent(0)="
6971 << numPacketsPerLID.extent(0) <<
".");
6974 if (combineMode ==
ZERO) {
6979 using Teuchos::reduceAll;
6980 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6983 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6984 constantNumPackets, combineMode,
6986 }
catch (std::exception& e) {
6991 const Teuchos::Comm<int>& comm = * (this->getComm ());
6992 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6993 lclBad, Teuchos::outArg (gblBad));
6999 std::ostringstream os;
7000 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
7001 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
7002 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
7003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7004 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
7005 "threw an exception on one or more participating processes: "
7006 << endl << msg->str ());
7010 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7011 constantNumPackets, combineMode,
7016 std::ostringstream os;
7017 os << *prefix <<
"Done!" << endl
7027 std::cerr << os.str ();
7031 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7035 const Kokkos::DualView<
const local_ordinal_type*,
7036 buffer_device_type>& importLIDs,
7037 Kokkos::DualView<char*, buffer_device_type> imports,
7038 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7039 const size_t constantNumPackets,
7044 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7048 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7049 std::unique_ptr<std::string> prefix;
7051 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7052 std::ostringstream os;
7053 os << *prefix <<
"isStaticGraph(): "
7054 << (isStaticGraph() ?
"true" :
"false")
7055 <<
", importLIDs.extent(0): "
7056 << importLIDs.extent(0)
7057 <<
", imports.extent(0): "
7058 << imports.extent(0)
7059 <<
", numPacketsPerLID.extent(0): "
7060 << numPacketsPerLID.extent(0)
7062 std::cerr << os.str();
7065 if (isStaticGraph ()) {
7066 using Details::unpackCrsMatrixAndCombineNew;
7067 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7068 importLIDs, constantNumPackets,
7073 using padding_type =
typename crs_graph_type::padding_type;
7074 std::unique_ptr<padding_type> padding;
7076 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7077 importLIDs, imports, numPacketsPerLID, verbose);
7079 catch (std::exception& e) {
7080 const auto rowMap = getRowMap();
7081 const auto comm = rowMap.is_null() ? Teuchos::null :
7083 const int myRank = comm.is_null() ? -1 : comm->getRank();
7084 TEUCHOS_TEST_FOR_EXCEPTION
7085 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7086 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7087 "threw an exception: " << e.what());
7090 std::ostringstream os;
7091 os << *prefix <<
"Call applyCrsPadding" << endl;
7092 std::cerr << os.str();
7094 applyCrsPadding(*padding, verbose);
7097 std::ostringstream os;
7098 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7099 std::cerr << os.str();
7101 unpackAndCombineImplNonStatic(importLIDs, imports,
7108 std::ostringstream os;
7109 os << *prefix <<
"Done" << endl;
7110 std::cerr << os.str();
7114 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7116 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7117 unpackAndCombineImplNonStatic(
7118 const Kokkos::DualView<
const local_ordinal_type*,
7119 buffer_device_type>& importLIDs,
7120 Kokkos::DualView<char*, buffer_device_type> imports,
7121 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7122 const size_t constantNumPackets,
7126 using Kokkos::subview;
7127 using Kokkos::MemoryUnmanaged;
7128 using Details::Behavior;
7131 using Details::PackTraits;
7132 using Details::ScalarViewTraits;
7134 using LO = LocalOrdinal;
7135 using GO = GlobalOrdinal;
7136 using ST = impl_scalar_type;
7137 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7139 typename View<int*, device_type>::HostMirror::execution_space;
7140 using pair_type = std::pair<typename View<int*, HES>::size_type,
7141 typename View<int*, HES>::size_type>;
7142 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7143 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7144 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7146 const bool debug = Behavior::debug(
"CrsMatrix");
7147 const bool verbose = Behavior::verbose(
"CrsMatrix");
7148 std::unique_ptr<std::string> prefix;
7150 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7151 std::ostringstream os;
7152 os << *prefix << endl;
7153 std::cerr << os.str ();
7155 const char*
const prefix_raw =
7156 verbose ? prefix.get()->c_str() :
nullptr;
7158 const size_type numImportLIDs = importLIDs.extent (0);
7159 if (combineMode ==
ZERO || numImportLIDs == 0) {
7163 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7164 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7169 if (imports.need_sync_host()) {
7170 imports.sync_host ();
7172 auto imports_h = imports.view_host();
7175 if (numPacketsPerLID.need_sync_host()) {
7176 numPacketsPerLID.sync_host ();
7178 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7180 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7181 auto importLIDs_h = importLIDs.view_host();
7183 size_t numBytesPerValue;
7194 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7199 size_t maxRowNumEnt = 0;
7200 for (size_type i = 0; i < numImportLIDs; ++i) {
7201 const size_t numBytes = numPacketsPerLID_h[i];
7202 if (numBytes == 0) {
7207 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7208 (offset + numBytes >
size_t(imports_h.extent (0)),
7209 std::logic_error,
": At local row index importLIDs_h[i="
7210 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7211 <<
") + numBytes (=" << numBytes <<
") > "
7212 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7217 const size_t theNumBytes =
7219 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7220 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7221 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7223 const char*
const inBuf = imports_h.data () + offset;
7224 const size_t actualNumBytes =
7228 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7229 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7230 <<
", actualNumBytes=" << actualNumBytes
7231 <<
" > numBytes=" << numBytes <<
".");
7232 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7233 (numEntLO == 0, std::logic_error,
": At local row index "
7234 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7235 "the number of entries read from the packed data is "
7236 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7240 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7248 View<GO*, HES> gblColInds;
7249 View<LO*, HES> lclColInds;
7250 View<ST*, HES> vals;
7263 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7264 gid, maxRowNumEnt,
"gids");
7265 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7266 lid, maxRowNumEnt,
"lids");
7267 vals = ScalarViewTraits<ST, HES>::allocateArray(
7268 val, maxRowNumEnt,
"vals");
7272 for (size_type i = 0; i < numImportLIDs; ++i) {
7273 const size_t numBytes = numPacketsPerLID_h[i];
7274 if (numBytes == 0) {
7278 const char*
const inBuf = imports_h.data () + offset;
7281 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7282 const LO lclRow = importLIDs_h[i];
7284 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7285 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7287 const size_t numBytesOut =
7288 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7289 offset, numBytes, numEnt, numBytesPerValue);
7290 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7291 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7292 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7293 << numBytesOut <<
".");
7295 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7296 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7297 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7298 combineMode, prefix_raw, debug, verbose);
7304 std::ostringstream os;
7305 os << *prefix <<
"Done" << endl;
7306 std::cerr << os.str();
7310 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7311 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7314 const bool force)
const
7316 using Teuchos::null;
7320 TEUCHOS_TEST_FOR_EXCEPTION(
7321 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7322 "MapMultiVector: You may only call this method if the matrix has a "
7323 "column Map. If the matrix does not yet have a column Map, you should "
7324 "first call fillComplete (with domain and range Map if necessary).");
7328 TEUCHOS_TEST_FOR_EXCEPTION(
7329 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7330 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7331 "this matrix's graph is fill complete.");
7334 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7335 RCP<const map_type> colMap = this->getColMap ();
7348 if (! importer.is_null () || force) {
7349 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7350 X_colMap = rcp (
new MV (colMap, numVecs));
7353 importMV_ = X_colMap;
7356 X_colMap = importMV_;
7367 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7368 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7371 const bool force)
const
7373 using Teuchos::null;
7379 TEUCHOS_TEST_FOR_EXCEPTION(
7380 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7381 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7382 "matrix's graph is fill complete.");
7385 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7389 RCP<const map_type> rowMap = this->getRowMap ();
7401 if (! exporter.is_null () || force) {
7402 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7403 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7404 exportMV_ = Y_rowMap;
7407 Y_rowMap = exportMV_;
7413 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7418 TEUCHOS_TEST_FOR_EXCEPTION(
7419 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7420 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7421 "was created with a constant graph (that is, when it was created using "
7422 "the version of its constructor that takes an RCP<const CrsGraph>). "
7423 "This is because the matrix is not allowed to modify the graph in that "
7424 "case, but removing empty processes requires modifying the graph.");
7425 myGraph_->removeEmptyProcessesInPlace (newMap);
7429 this->map_ = this->getRowMap ();
7433 staticGraph_ = Teuchos::rcp_const_cast<
const Graph> (myGraph_);
7436 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7437 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7442 const Teuchos::RCP<const map_type>& domainMap,
7443 const Teuchos::RCP<const map_type>& rangeMap,
7444 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7446 using Teuchos::Array;
7447 using Teuchos::ArrayView;
7448 using Teuchos::ParameterList;
7451 using Teuchos::rcp_implicit_cast;
7452 using Teuchos::sublist;
7456 using crs_matrix_type =
7458 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7462 std::unique_ptr<std::string> prefix;
7464 prefix = this->createPrefix(
"CrsMatrix",
"add");
7465 std::ostringstream os;
7466 os << *prefix <<
"Start" << endl;
7467 std::cerr << os.str ();
7470 const crs_matrix_type& B = *
this;
7471 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7472 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7479 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7480 RCP<const map_type> B_domainMap = B.getDomainMap ();
7481 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7483 RCP<const map_type> theDomainMap = domainMap;
7484 RCP<const map_type> theRangeMap = rangeMap;
7486 if (domainMap.is_null ()) {
7487 if (B_domainMap.is_null ()) {
7488 TEUCHOS_TEST_FOR_EXCEPTION(
7489 A_domainMap.is_null (), std::invalid_argument,
7490 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7491 "then you must supply a nonnull domain Map to this method.");
7492 theDomainMap = A_domainMap;
7494 theDomainMap = B_domainMap;
7497 if (rangeMap.is_null ()) {
7498 if (B_rangeMap.is_null ()) {
7499 TEUCHOS_TEST_FOR_EXCEPTION(
7500 A_rangeMap.is_null (), std::invalid_argument,
7501 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7502 "then you must supply a nonnull range Map to this method.");
7503 theRangeMap = A_rangeMap;
7505 theRangeMap = B_rangeMap;
7513 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7514 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7515 TEUCHOS_TEST_FOR_EXCEPTION
7516 (! B_domainMap->isSameAs(*A_domainMap),
7517 std::invalid_argument,
7518 errPfx <<
"The input RowMatrix A must have a domain Map "
7519 "which is the same as (isSameAs) this RowMatrix's "
7521 TEUCHOS_TEST_FOR_EXCEPTION
7522 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7523 errPfx <<
"The input RowMatrix A must have a range Map "
7524 "which is the same as (isSameAs) this RowMatrix's range "
7526 TEUCHOS_TEST_FOR_EXCEPTION
7527 (! domainMap.is_null() &&
7528 ! domainMap->isSameAs(*B_domainMap),
7529 std::invalid_argument,
7530 errPfx <<
"The input domain Map must be the same as "
7531 "(isSameAs) this RowMatrix's domain Map.");
7532 TEUCHOS_TEST_FOR_EXCEPTION
7533 (! rangeMap.is_null() &&
7534 ! rangeMap->isSameAs(*B_rangeMap),
7535 std::invalid_argument,
7536 errPfx <<
"The input range Map must be the same as "
7537 "(isSameAs) this RowMatrix's range Map.");
7540 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7541 TEUCHOS_TEST_FOR_EXCEPTION
7542 (! domainMap.is_null() &&
7543 ! domainMap->isSameAs(*B_domainMap),
7544 std::invalid_argument,
7545 errPfx <<
"The input domain Map must be the same as "
7546 "(isSameAs) this RowMatrix's domain Map.");
7547 TEUCHOS_TEST_FOR_EXCEPTION
7548 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7549 std::invalid_argument,
7550 errPfx <<
"The input range Map must be the same as "
7551 "(isSameAs) this RowMatrix's range Map.");
7554 TEUCHOS_TEST_FOR_EXCEPTION
7555 (domainMap.is_null() || rangeMap.is_null(),
7556 std::invalid_argument, errPfx <<
"If neither A nor B "
7557 "have a domain and range Map, then you must supply a "
7558 "nonnull domain and range Map to this method.");
7565 bool callFillComplete =
true;
7566 RCP<ParameterList> constructorSublist;
7567 RCP<ParameterList> fillCompleteSublist;
7568 if (! params.is_null()) {
7570 params->get(
"Call fillComplete", callFillComplete);
7571 constructorSublist = sublist(params,
"Constructor parameters");
7572 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7575 RCP<const map_type> A_rowMap = A.
getRowMap ();
7576 RCP<const map_type> B_rowMap = B.getRowMap ();
7577 RCP<const map_type> C_rowMap = B_rowMap;
7578 RCP<crs_matrix_type> C;
7584 if (A_rowMap->isSameAs (*B_rowMap)) {
7585 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7586 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7589 if (alpha != ZERO) {
7590 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7592 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7597 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7598 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7599 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7603 if (constructorSublist.is_null ()) {
7604 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7606 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7607 constructorSublist));
7618 TEUCHOS_TEST_FOR_EXCEPTION
7619 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7620 "be the same for statically allocated matrices, to ensure "
7621 "that there is sufficient space to do the addition.");
7624 TEUCHOS_TEST_FOR_EXCEPTION
7625 (C.is_null (), std::logic_error,
7626 errPfx <<
"C should not be null at this point. "
7627 "Please report this bug to the Tpetra developers.");
7630 std::ostringstream os;
7631 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7632 std::cerr << os.str ();
7634 using gids_type = nonconst_global_inds_host_view_type;
7635 using vals_type = nonconst_values_host_view_type;
7639 if (alpha != ZERO) {
7640 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7641 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7643 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7644 if (A_numEntries > static_cast<size_t> (ind.size ())) {
7645 Kokkos::resize(ind,A_numEntries);
7646 Kokkos::resize(val,A_numEntries);
7648 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7649 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7653 for (
size_t k = 0; k < A_numEntries; ++k) {
7654 valView[k] *= alpha;
7657 C->insertGlobalValues (globalRow, A_numEntries,
7658 reinterpret_cast<Scalar *>(valView.data()),
7664 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7665 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7666 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7667 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7668 if (B_numEntries > static_cast<size_t> (ind.size ())) {
7669 Kokkos::resize(ind,B_numEntries);
7670 Kokkos::resize(val,B_numEntries);
7672 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7673 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7674 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7677 for (
size_t k = 0; k < B_numEntries; ++k) {
7681 C->insertGlobalValues (globalRow, B_numEntries,
7682 reinterpret_cast<Scalar *>(valView.data()),
7687 if (callFillComplete) {
7689 std::ostringstream os;
7690 os << *prefix <<
"Call fillComplete on C" << endl;
7691 std::cerr << os.str ();
7693 if (fillCompleteSublist.is_null ()) {
7694 C->fillComplete (theDomainMap, theRangeMap);
7696 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7700 std::ostringstream os;
7701 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7702 std::cerr << os.str ();
7706 std::ostringstream os;
7707 os << *prefix <<
"Done" << endl;
7708 std::cerr << os.str ();
7715 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7719 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7720 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7721 const Teuchos::RCP<const map_type>& domainMap,
7722 const Teuchos::RCP<const map_type>& rangeMap,
7723 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7730 using Teuchos::ArrayRCP;
7731 using Teuchos::ArrayView;
7732 using Teuchos::Comm;
7733 using Teuchos::ParameterList;
7736 typedef LocalOrdinal LO;
7737 typedef GlobalOrdinal GO;
7738 typedef node_type NT;
7743 const bool debug = Behavior::debug(
"CrsMatrix");
7744 const bool verbose = Behavior::verbose(
"CrsMatrix");
7745 int MyPID = getComm ()->getRank ();
7747 std::unique_ptr<std::string> verbosePrefix;
7750 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7751 std::ostringstream os;
7752 os <<
"Start" << endl;
7753 std::cerr << os.str();
7760 bool reverseMode =
false;
7761 bool restrictComm =
false;
7763 int mm_optimization_core_count =
7764 Behavior::TAFC_OptimizationCoreCount();
7765 RCP<ParameterList> matrixparams;
7766 bool overrideAllreduce =
false;
7767 bool useKokkosPath =
false;
7768 if (! params.is_null ()) {
7769 matrixparams = sublist (params,
"CrsMatrix");
7770 reverseMode = params->get (
"Reverse Mode", reverseMode);
7771 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7772 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7773 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7774 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7775 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7777 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7778 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7779 if(reverseMode) isMM =
false;
7783 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7785 int reduced_mismatch = 0;
7786 if (isMM && !overrideAllreduce) {
7789 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7790 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7791 rowTransfer.getRemoteLIDs ().size() == 0);
7792 mismatch = (source_vals != target_vals) ? 1 : 0;
7795 Teuchos::REDUCE_MAX, * (getComm ()));
7798 #ifdef HAVE_TPETRA_MMM_TIMINGS
7799 using Teuchos::TimeMonitor;
7801 if(!params.is_null())
7802 label = params->get(
"Timer Label",label);
7803 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7806 std::ostringstream os;
7807 if(isMM) os<<
":MMOpt";
7808 else os<<
":MMLegacy";
7812 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7820 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7821 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7822 TEUCHOS_TEST_FOR_EXCEPTION(
7823 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7824 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7825 "argument must be either an Import or an Export, and its template "
7826 "parameters must match the corresponding template parameters of the "
7834 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type> (domainTransfer);
7835 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type> (domainTransfer);
7837 if(! domainTransfer.is_null()) {
7838 TEUCHOS_TEST_FOR_EXCEPTION(
7839 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7840 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7841 "argument must be either an Import or an Export, and its template "
7842 "parameters must match the corresponding template parameters of the "
7845 TEUCHOS_TEST_FOR_EXCEPTION(
7846 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7847 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7848 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7849 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7850 "arguments must be of the same type (either Import or Export).");
7852 TEUCHOS_TEST_FOR_EXCEPTION(
7853 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7854 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7855 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7856 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7857 "arguments must be of the same type (either Import or Export).");
7863 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7867 RCP<const map_type> MyRowMap = reverseMode ?
7868 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7869 RCP<const map_type> MyColMap;
7870 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7871 domainMap : getDomainMap ();
7872 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7873 rangeMap : getRangeMap ();
7874 RCP<const map_type> BaseRowMap = MyRowMap;
7875 RCP<const map_type> BaseDomainMap = MyDomainMap;
7883 if (! destMat.is_null ()) {
7894 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7895 ! destMat->getGraph ()->isGloballyIndexed ();
7896 TEUCHOS_TEST_FOR_EXCEPTION(
7897 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7898 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7899 "to be nonnull, if its graph is empty (neither locally nor globally "
7908 TEUCHOS_TEST_FOR_EXCEPTION(
7909 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7910 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7911 "input argument 'destMat' is not the same as the (row) Map specified "
7912 "by the input argument 'rowTransfer'.");
7913 TEUCHOS_TEST_FOR_EXCEPTION(
7914 ! destMat->checkSizes (*
this), std::invalid_argument,
7915 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7916 "destination matrix, but checkSizes() indicates that it is not a legal "
7917 "legal target for redistribution from the source matrix (*this). This "
7918 "may mean that they do not have the same dimensions.");
7932 TEUCHOS_TEST_FOR_EXCEPTION(
7933 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7934 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7935 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7936 TEUCHOS_TEST_FOR_EXCEPTION(
7937 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7938 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7939 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7942 TEUCHOS_TEST_FOR_EXCEPTION(
7943 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7944 std::invalid_argument,
7945 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7946 "argument must be the same as the rebalanced domain map 'domainMap'");
7948 TEUCHOS_TEST_FOR_EXCEPTION(
7949 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7950 std::invalid_argument,
7951 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7952 "argument must be the same as the rebalanced domain map 'domainMap'");
7965 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7966 ArrayView<const LO> ExportLIDs = reverseMode ?
7967 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7968 auto RemoteLIDs = reverseMode ?
7969 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7970 auto PermuteToLIDs = reverseMode ?
7971 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7972 auto PermuteFromLIDs = reverseMode ?
7973 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7974 Distributor& Distor = rowTransfer.getDistributor ();
7977 Teuchos::Array<int> SourcePids;
7980 RCP<const map_type> ReducedRowMap, ReducedColMap,
7981 ReducedDomainMap, ReducedRangeMap;
7982 RCP<const Comm<int> > ReducedComm;
7986 if (destMat.is_null ()) {
7987 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7994 #ifdef HAVE_TPETRA_MMM_TIMINGS
7995 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7997 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7998 ReducedComm = ReducedRowMap.is_null () ?
8000 ReducedRowMap->getComm ();
8001 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
8003 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
8005 MyDomainMap->replaceCommWithSubset (ReducedComm);
8006 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
8008 MyRangeMap->replaceCommWithSubset (ReducedComm);
8011 MyRowMap = ReducedRowMap;
8012 MyDomainMap = ReducedDomainMap;
8013 MyRangeMap = ReducedRangeMap;
8016 if (! ReducedComm.is_null ()) {
8017 MyPID = ReducedComm->getRank ();
8024 ReducedComm = MyRowMap->getComm ();
8033 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8036 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8038 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8039 #ifdef HAVE_TPETRA_MMM_TIMINGS
8040 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8048 Import_Util::getPids (*MyImporter, SourcePids,
false);
8050 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8053 #ifdef HAVE_TPETRA_MMM_TIMINGS
8054 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8056 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8057 IntVectorType SourceCol_pids(getColMap());
8059 SourceDomain_pids.putScalar(MyPID);
8061 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8062 SourcePids.resize (getColMap ()->getLocalNumElements ());
8063 SourceCol_pids.get1dCopy (SourcePids ());
8065 else if (MyImporter.is_null ()) {
8067 #ifdef HAVE_TPETRA_MMM_TIMINGS
8068 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8070 SourcePids.resize (getColMap ()->getLocalNumElements ());
8071 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8073 else if ( ! MyImporter.is_null () &&
8074 ! domainTransfer.is_null () ) {
8079 #ifdef HAVE_TPETRA_MMM_TIMINGS
8080 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8084 IntVectorType TargetDomain_pids (domainMap);
8085 TargetDomain_pids.putScalar (MyPID);
8088 IntVectorType SourceDomain_pids (getDomainMap ());
8091 IntVectorType SourceCol_pids (getColMap ());
8093 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8094 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8096 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8097 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8099 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8100 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8102 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8103 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8106 TEUCHOS_TEST_FOR_EXCEPTION(
8107 true, std::logic_error,
"Tpetra::CrsMatrix::"
8108 "transferAndFillComplete: Should never get here! "
8109 "Please report this bug to a Tpetra developer.");
8111 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8112 SourcePids.resize (getColMap ()->getLocalNumElements ());
8113 SourceCol_pids.get1dCopy (SourcePids ());
8115 else if ( ! MyImporter.is_null () &&
8116 BaseDomainMap->isSameAs (*BaseRowMap) &&
8117 getDomainMap ()->isSameAs (*getRowMap ())) {
8119 #ifdef HAVE_TPETRA_MMM_TIMINGS
8120 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8123 IntVectorType TargetRow_pids (domainMap);
8124 IntVectorType SourceRow_pids (getRowMap ());
8125 IntVectorType SourceCol_pids (getColMap ());
8127 TargetRow_pids.putScalar (MyPID);
8128 if (! reverseMode && xferAsImport !=
nullptr) {
8129 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8131 else if (reverseMode && xferAsExport !=
nullptr) {
8132 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8134 else if (! reverseMode && xferAsExport !=
nullptr) {
8135 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8137 else if (reverseMode && xferAsImport !=
nullptr) {
8138 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8141 TEUCHOS_TEST_FOR_EXCEPTION(
8142 true, std::logic_error,
"Tpetra::CrsMatrix::"
8143 "transferAndFillComplete: Should never get here! "
8144 "Please report this bug to a Tpetra developer.");
8147 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8148 SourcePids.resize (getColMap ()->getLocalNumElements ());
8149 SourceCol_pids.get1dCopy (SourcePids ());
8152 TEUCHOS_TEST_FOR_EXCEPTION(
8153 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8154 "transferAndFillComplete: This method only allows either domainMap == "
8155 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8156 "getDomainMap () == getRowMap ()).");
8160 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8162 #ifdef HAVE_TPETRA_MMM_TIMINGS
8163 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8165 if (constantNumPackets == 0) {
8166 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8167 RemoteLIDs.view_host().size ());
8174 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8175 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8181 #ifdef HAVE_TPETRA_MMM_TIMINGS
8182 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8185 using Teuchos::outArg;
8186 using Teuchos::REDUCE_MAX;
8187 using Teuchos::reduceAll;
8190 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8191 const int myRank = comm->getRank ();
8193 std::ostringstream errStrm;
8197 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8200 destMat->numExportPacketsPerLID_.modify_host ();
8201 numExportPacketsPerLID =
8204 catch (std::exception& e) {
8205 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8206 << e.what () << std::endl;
8210 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8211 "an exception not a subclass of std::exception" << std::endl;
8215 if (! comm.is_null ()) {
8216 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8219 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8220 TEUCHOS_TEST_FOR_EXCEPTION(
8221 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8222 "exception on at least one process.");
8226 std::ostringstream os;
8227 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8229 std::cerr << os.str ();
8234 numExportPacketsPerLID,
8237 constantNumPackets);
8239 catch (std::exception& e) {
8240 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8241 << e.what () << std::endl;
8245 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8246 "an exception not a subclass of std::exception" << std::endl;
8251 std::ostringstream os;
8252 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8254 std::cerr << os.str ();
8257 if (! comm.is_null ()) {
8258 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8261 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8262 TEUCHOS_TEST_FOR_EXCEPTION(
8263 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8264 "exception on at least one process.");
8269 destMat->numExportPacketsPerLID_.modify_host ();
8270 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8273 std::ostringstream os;
8274 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8276 std::cerr << os.str ();
8280 numExportPacketsPerLID,
8283 constantNumPackets);
8285 std::ostringstream os;
8286 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8288 std::cerr << os.str ();
8295 #ifdef HAVE_TPETRA_MMM_TIMINGS
8296 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8298 if (! communication_needed) {
8300 std::ostringstream os;
8301 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8302 std::cerr << os.str ();
8307 if (constantNumPackets == 0) {
8309 std::ostringstream os;
8310 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8312 std::cerr << os.str ();
8317 destMat->numExportPacketsPerLID_.sync_host ();
8318 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8320 destMat->numImportPacketsPerLID_.sync_host ();
8321 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8325 std::ostringstream os;
8326 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8328 std::cerr << os.str ();
8330 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8331 destMat->numImportPacketsPerLID_.view_host());
8333 std::ostringstream os;
8334 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8336 std::cerr << os.str ();
8339 size_t totalImportPackets = 0;
8340 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8341 totalImportPackets += numImportPacketsPerLID[i];
8346 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8347 verbosePrefix.get ());
8348 destMat->imports_.modify_host ();
8349 auto hostImports = destMat->imports_.view_host();
8352 destMat->exports_.sync_host ();
8353 auto hostExports = destMat->exports_.view_host();
8355 std::ostringstream os;
8356 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8358 std::cerr << os.str ();
8360 Distor.doReversePostsAndWaits (hostExports,
8361 numExportPacketsPerLID,
8363 numImportPacketsPerLID);
8365 std::ostringstream os;
8366 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8368 std::cerr << os.str ();
8373 std::ostringstream os;
8374 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8376 std::cerr << os.str ();
8378 destMat->imports_.modify_host ();
8379 auto hostImports = destMat->imports_.view_host();
8382 destMat->exports_.sync_host ();
8383 auto hostExports = destMat->exports_.view_host();
8385 std::ostringstream os;
8386 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8388 std::cerr << os.str ();
8390 Distor.doReversePostsAndWaits (hostExports,
8394 std::ostringstream os;
8395 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8397 std::cerr << os.str ();
8402 if (constantNumPackets == 0) {
8404 std::ostringstream os;
8405 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8407 std::cerr << os.str ();
8412 destMat->numExportPacketsPerLID_.sync_host ();
8413 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8415 destMat->numImportPacketsPerLID_.sync_host ();
8416 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8419 std::ostringstream os;
8420 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8422 std::cerr << os.str ();
8424 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8425 destMat->numImportPacketsPerLID_.view_host());
8427 std::ostringstream os;
8428 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8430 std::cerr << os.str ();
8433 size_t totalImportPackets = 0;
8434 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8435 totalImportPackets += numImportPacketsPerLID[i];
8440 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8441 verbosePrefix.get ());
8442 destMat->imports_.modify_host ();
8443 auto hostImports = destMat->imports_.view_host();
8446 destMat->exports_.sync_host ();
8447 auto hostExports = destMat->exports_.view_host();
8449 std::ostringstream os;
8450 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8452 std::cerr << os.str ();
8454 Distor.doPostsAndWaits (hostExports,
8455 numExportPacketsPerLID,
8457 numImportPacketsPerLID);
8459 std::ostringstream os;
8460 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8462 std::cerr << os.str ();
8467 std::ostringstream os;
8468 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8470 std::cerr << os.str ();
8472 destMat->imports_.modify_host ();
8473 auto hostImports = destMat->imports_.view_host();
8476 destMat->exports_.sync_host ();
8477 auto hostExports = destMat->exports_.view_host();
8479 std::ostringstream os;
8480 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8482 std::cerr << os.str ();
8484 Distor.doPostsAndWaits (hostExports,
8488 std::ostringstream os;
8489 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8491 std::cerr << os.str ();
8502 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8504 Teuchos::Array<int> RemotePids;
8506 Teuchos::Array<int> TargetPids;
8512 destMat->numImportPacketsPerLID_.modify_host();
8514 # ifdef HAVE_TPETRA_MMM_TIMINGS
8515 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8517 ArrayRCP<size_t> CSR_rowptr;
8518 ArrayRCP<GO> CSR_colind_GID;
8519 ArrayRCP<LO> CSR_colind_LID;
8520 ArrayRCP<Scalar> CSR_vals;
8522 destMat->imports_.sync_device ();
8523 destMat->numImportPacketsPerLID_.sync_device ();
8525 size_t N = BaseRowMap->getLocalNumElements ();
8527 auto RemoteLIDs_d = RemoteLIDs.view_device();
8528 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8529 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8534 destMat->imports_.view_device(),
8535 destMat->numImportPacketsPerLID_.view_device(),
8549 if (
typeid (LO) ==
typeid (GO)) {
8550 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8553 CSR_colind_LID.resize (CSR_colind_GID.size());
8555 CSR_colind_LID.resize (CSR_colind_GID.size());
8560 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8562 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8564 #ifdef HAVE_TPETRA_MMM_TIMINGS
8565 tmCopySPRdata = Teuchos::null;
8574 std::ostringstream os;
8575 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8577 std::cerr << os.str ();
8580 #ifdef HAVE_TPETRA_MMM_TIMINGS
8581 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8583 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8593 std::ostringstream os;
8594 os << *verbosePrefix <<
"restrictComm="
8595 << (restrictComm ?
"true" :
"false") << std::endl;
8596 std::cerr << os.str ();
8603 #ifdef HAVE_TPETRA_MMM_TIMINGS
8604 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8607 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8609 MyColMap->replaceCommWithSubset (ReducedComm);
8610 MyColMap = ReducedColMap;
8615 std::ostringstream os;
8616 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8617 std::cerr << os.str ();
8619 destMat->replaceColMap (MyColMap);
8626 if (ReducedComm.is_null ()) {
8628 std::ostringstream os;
8629 os << *verbosePrefix <<
"I am no longer in the communicator; "
8630 "returning" << std::endl;
8631 std::cerr << os.str ();
8640 if ((! reverseMode && xferAsImport !=
nullptr) ||
8641 (reverseMode && xferAsExport !=
nullptr)) {
8643 std::ostringstream os;
8644 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8645 std::cerr << os.str ();
8647 #ifdef HAVE_TPETRA_MMM_TIMINGS
8648 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8650 Import_Util::sortCrsEntries (CSR_rowptr(),
8654 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8655 (reverseMode && xferAsImport !=
nullptr)) {
8657 std::ostringstream os;
8658 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8660 std::cerr << os.str();
8662 #ifdef HAVE_TPETRA_MMM_TIMINGS
8663 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8665 Import_Util::sortAndMergeCrsEntries (CSR_rowptr(),
8668 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8669 CSR_colind_LID.resize (CSR_rowptr[N]);
8670 CSR_vals.resize (CSR_rowptr[N]);
8674 TEUCHOS_TEST_FOR_EXCEPTION(
8675 true, std::logic_error,
"Tpetra::CrsMatrix::"
8676 "transferAndFillComplete: Should never get here! "
8677 "Please report this bug to a Tpetra developer.");
8684 std::ostringstream os;
8685 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8686 std::cerr << os.str ();
8695 #ifdef HAVE_TPETRA_MMM_TIMINGS
8696 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8698 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8710 destMat->numImportPacketsPerLID_.modify_host();
8712 # ifdef HAVE_TPETRA_MMM_TIMINGS
8713 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8715 ArrayRCP<size_t> CSR_rowptr;
8716 ArrayRCP<GO> CSR_colind_GID;
8717 ArrayRCP<LO> CSR_colind_LID;
8718 ArrayRCP<Scalar> CSR_vals;
8720 destMat->imports_.sync_device ();
8721 destMat->numImportPacketsPerLID_.sync_device ();
8723 size_t N = BaseRowMap->getLocalNumElements ();
8725 auto RemoteLIDs_d = RemoteLIDs.view_device();
8726 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8727 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8729 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8730 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8731 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8732 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8733 Kokkos::View<int*,device_type> TargetPids_d;
8738 destMat->imports_.view_device(),
8739 destMat->numImportPacketsPerLID_.view_device(),
8751 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8753 #ifdef HAVE_TPETRA_MMM_TIMINGS
8754 tmCopySPRdata = Teuchos::null;
8763 std::ostringstream os;
8764 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8766 std::cerr << os.str ();
8769 #ifdef HAVE_TPETRA_MMM_TIMINGS
8770 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8772 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8782 std::ostringstream os;
8783 os << *verbosePrefix <<
"restrictComm="
8784 << (restrictComm ?
"true" :
"false") << std::endl;
8785 std::cerr << os.str ();
8792 #ifdef HAVE_TPETRA_MMM_TIMINGS
8793 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8796 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8798 MyColMap->replaceCommWithSubset (ReducedComm);
8799 MyColMap = ReducedColMap;
8804 std::ostringstream os;
8805 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8806 std::cerr << os.str ();
8808 destMat->replaceColMap (MyColMap);
8815 if (ReducedComm.is_null ()) {
8817 std::ostringstream os;
8818 os << *verbosePrefix <<
"I am no longer in the communicator; "
8819 "returning" << std::endl;
8820 std::cerr << os.str ();
8830 if ((! reverseMode && xferAsImport !=
nullptr) ||
8831 (reverseMode && xferAsExport !=
nullptr)) {
8833 std::ostringstream os;
8834 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8835 std::cerr << os.str ();
8837 #ifdef HAVE_TPETRA_MMM_TIMINGS
8838 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8840 Import_Util::sortCrsEntries (CSR_rowptr_d,
8844 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8845 (reverseMode && xferAsImport !=
nullptr)) {
8847 std::ostringstream os;
8848 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8850 std::cerr << os.str();
8852 #ifdef HAVE_TPETRA_MMM_TIMINGS
8853 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8855 Import_Util::sortAndMergeCrsEntries (CSR_rowptr_d,
8860 TEUCHOS_TEST_FOR_EXCEPTION(
8861 true, std::logic_error,
"Tpetra::CrsMatrix::"
8862 "transferAndFillComplete: Should never get here! "
8863 "Please report this bug to a Tpetra developer.");
8871 std::ostringstream os;
8872 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8873 std::cerr << os.str ();
8877 #ifdef HAVE_TPETRA_MMM_TIMINGS
8878 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8880 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8888 #ifdef HAVE_TPETRA_MMM_TIMINGS
8889 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8892 Teuchos::ParameterList esfc_params;
8894 RCP<import_type> MyImport;
8897 if (iallreduceRequest.get () !=
nullptr) {
8899 std::ostringstream os;
8900 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8902 std::cerr << os.str ();
8904 iallreduceRequest->wait ();
8905 if (reduced_mismatch != 0) {
8911 #ifdef HAVE_TPETRA_MMM_TIMINGS
8912 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8917 std::ostringstream os;
8918 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8919 std::cerr << os.str ();
8922 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8923 Teuchos::ArrayRCP<int> type3PIDs;
8924 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8925 auto colind = getCrsGraph()->getLocalIndicesHost();
8928 std::ostringstream os;
8929 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8930 std::cerr << os.str ();
8934 #ifdef HAVE_TPETRA_MMM_TIMINGS
8935 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8937 Import_Util::reverseNeighborDiscovery(*
this,
8949 std::ostringstream os;
8950 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8951 std::cerr << os.str ();
8954 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8955 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8957 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8958 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8960 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8962 std::vector<bool> IsOwned(numCols,
true);
8963 std::vector<int> SentTo(numCols,-1);
8964 if (! MyImporter.is_null ()) {
8965 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8966 IsOwned[rlid]=
false;
8970 std::vector<std::pair<int,GO> > usrtg;
8971 usrtg.reserve(TEPID2.size());
8974 const auto& colMap = * (this->getColMap ());
8976 const LO row = TELID2[i];
8977 const int pid = TEPID2[i];
8978 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8979 const int col = colind[j];
8980 if (IsOwned[col] && SentTo[col] != pid) {
8982 GO gid = colMap.getGlobalElement (col);
8983 usrtg.push_back (std::pair<int,GO> (pid, gid));
8991 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8993 usrtg.erase(eopg,usrtg.end());
8996 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8997 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
9000 for(
auto && p : usrtg) {
9001 EPID2[pos]= p.first;
9002 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
9006 Teuchos::ArrayView<int> EPID3 = type3PIDs();
9007 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
9008 GO InfGID = std::numeric_limits<GO>::max();
9009 int InfPID = INT_MAX;
9012 #endif // TPETRA_MIN3
9013 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9014 int i1=0, i2=0, i3=0;
9015 int Len1 = EPID1.size();
9016 int Len2 = EPID2.size();
9017 int Len3 = EPID3.size();
9019 int MyLen=Len1+Len2+Len3;
9020 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
9021 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
9024 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9025 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9026 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9027 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9029 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9030 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9031 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9033 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9034 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9037 #endif // TPETRA_MIN3
9038 bool added_entry=
false;
9040 if(PID1 == MIN_PID && GID1 == MIN_GID){
9041 userExportLIDs[iloc]=ELID1[i1];
9042 userExportPIDs[iloc]=EPID1[i1];
9047 if(PID2 == MIN_PID && GID2 == MIN_GID){
9049 userExportLIDs[iloc]=ELID2[i2];
9050 userExportPIDs[iloc]=EPID2[i2];
9056 if(PID3 == MIN_PID && GID3 == MIN_GID){
9058 userExportLIDs[iloc]=ELID3[i3];
9059 userExportPIDs[iloc]=EPID3[i3];
9067 std::ostringstream os;
9068 os << *verbosePrefix <<
"Create Import" << std::endl;
9069 std::cerr << os.str ();
9072 #ifdef HAVE_TPETRA_MMM_TIMINGS
9073 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9075 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9077 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9078 MyImport = rcp (
new import_type (MyDomainMap,
9081 userExportLIDs.view(0,iloc).getConst(),
9082 userExportPIDs.view(0,iloc).getConst(),
9087 std::ostringstream os;
9088 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9089 std::cerr << os.str ();
9093 #ifdef HAVE_TPETRA_MMM_TIMINGS
9094 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9095 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9097 if(!params.is_null())
9098 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9099 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9105 #ifdef HAVE_TPETRA_MMM_TIMINGS
9106 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9109 std::ostringstream os;
9110 os << *verbosePrefix <<
"Create Import" << std::endl;
9111 std::cerr << os.str ();
9114 #ifdef HAVE_TPETRA_MMM_TIMINGS
9115 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9117 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9118 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9119 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9120 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9123 std::ostringstream os;
9124 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9125 std::cerr << os.str ();
9128 #ifdef HAVE_TPETRA_MMM_TIMINGS
9129 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9130 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9132 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9135 if (!params.is_null ()) {
9136 esfc_params.set (
"compute global constants",
9137 params->get (
"compute global constants",
true));
9139 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9140 MyImport, Teuchos::null,
9141 rcp (
new Teuchos::ParameterList (esfc_params)));
9144 #ifdef HAVE_TPETRA_MMM_TIMINGS
9145 tmIESFC = Teuchos::null;
9149 std::ostringstream os;
9150 os << *verbosePrefix <<
"Done" << endl;
9151 std::cerr << os.str ();
9156 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9161 const Teuchos::RCP<const map_type>& domainMap,
9162 const Teuchos::RCP<const map_type>& rangeMap,
9163 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9165 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9168 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9174 const Teuchos::RCP<const map_type>& domainMap,
9175 const Teuchos::RCP<const map_type>& rangeMap,
9176 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9178 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9181 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9186 const Teuchos::RCP<const map_type>& domainMap,
9187 const Teuchos::RCP<const map_type>& rangeMap,
9188 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9190 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9193 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9199 const Teuchos::RCP<const map_type>& domainMap,
9200 const Teuchos::RCP<const map_type>& rangeMap,
9201 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9203 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9214 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9216 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9218 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9220 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9221 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9223 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9225 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9226 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9227 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9228 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9229 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9230 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9231 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9232 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9233 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9234 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9235 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9236 const Teuchos::RCP<Teuchos::ParameterList>& params);
9238 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9240 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9241 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9242 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9243 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9244 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9245 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9246 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9247 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9248 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9249 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9250 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9251 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9252 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9253 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9254 const Teuchos::RCP<Teuchos::ParameterList>& params);
9257 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9259 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9260 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9261 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9262 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9263 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9264 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9265 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9266 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9267 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9268 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9269 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9270 const Teuchos::RCP<Teuchos::ParameterList>& params);
9272 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9274 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9275 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9276 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9277 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9278 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9279 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9280 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9281 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9282 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9283 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9284 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9285 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9286 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9287 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9288 const Teuchos::RCP<Teuchos::ParameterList>& params);
9291 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9292 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9293 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9294 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9295 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9296 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9298 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror num_row_entries_type
Row offsets for "1-D" storage.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.