Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_packCrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
42 
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
54 #include <memory>
55 #include <sstream>
56 #include <stdexcept>
57 #include <string>
58 
81 
82 namespace Tpetra {
83 
84 //
85 // Users must never rely on anything in the Details namespace.
86 //
87 namespace Details {
88 
89 namespace PackCrsMatrixImpl {
97 template<class OutputOffsetsViewType,
98  class CountsViewType,
99  class InputOffsetsViewType,
100  class InputLocalRowIndicesViewType,
101  class InputLocalRowPidsViewType,
102  const bool debug =
103 #ifdef HAVE_TPETRA_DEBUG
104  true
105 #else
106  false
107 #endif // HAVE_TPETRA_DEBUG
108  >
110 public:
111  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112  typedef typename CountsViewType::non_const_value_type count_type;
113  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
116  // output Views drive where execution happens.
117  typedef typename OutputOffsetsViewType::device_type device_type;
118  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
119  typename device_type::execution_space>::value,
120  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121  static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
122  "OutputOffsetsViewType must be a Kokkos::View.");
123  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125  static_assert (std::is_integral<output_offset_type>::value,
126  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127  static_assert (Kokkos::is_view<CountsViewType>::value,
128  "CountsViewType must be a Kokkos::View.");
129  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130  "CountsViewType must be a nonconst Kokkos::View.");
131  static_assert (std::is_integral<count_type>::value,
132  "The type of each entry of CountsViewType must be a built-in integer type.");
133  static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
134  "InputOffsetsViewType must be a Kokkos::View.");
135  static_assert (std::is_integral<input_offset_type>::value,
136  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137  static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
138  "InputLocalRowIndicesViewType must be a Kokkos::View.");
139  static_assert (std::is_integral<local_row_index_type>::value,
140  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
141 
142  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
143  const CountsViewType& counts,
144  const InputOffsetsViewType& rowOffsets,
145  const InputLocalRowIndicesViewType& lclRowInds,
146  const InputLocalRowPidsViewType& lclRowPids,
147  const count_type sizeOfLclCount,
148  const count_type sizeOfGblColInd,
149  const count_type sizeOfPid,
150  const count_type sizeOfValue) :
151  outputOffsets_ (outputOffsets),
152  counts_ (counts),
153  rowOffsets_ (rowOffsets),
154  lclRowInds_ (lclRowInds),
155  lclRowPids_ (lclRowPids),
156  sizeOfLclCount_ (sizeOfLclCount),
157  sizeOfGblColInd_ (sizeOfGblColInd),
158  sizeOfPid_ (sizeOfPid),
159  sizeOfValue_ (sizeOfValue),
160  error_ ("error") // don't forget this, or you'll get segfaults!
161  {
162  if (debug) {
163  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
164 
165  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
166  std::ostringstream os;
167  os << "lclRowInds.extent(0) = " << numRowsToPack
168  << " != counts.extent(0) = " << counts_.extent (0)
169  << ".";
170  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
171  }
172  if (static_cast<size_t> (numRowsToPack + 1) !=
173  static_cast<size_t> (outputOffsets_.extent (0))) {
174  std::ostringstream os;
175  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
177  << ".";
178  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
179  }
180  }
181  }
182 
183  KOKKOS_INLINE_FUNCTION void
184  operator() (const local_row_index_type& curInd,
185  output_offset_type& update,
186  const bool final) const
187  {
188  if (debug) {
189  if (curInd < static_cast<local_row_index_type> (0)) {
190  error_ () = 1;
191  return;
192  }
193  }
194 
195  if (final) {
196  if (debug) {
197  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
198  error_ () = 2;
199  return;
200  }
201  }
202  outputOffsets_(curInd) = update;
203  }
204 
205  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
206  const auto lclRow = lclRowInds_(curInd);
207  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
208  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
209  error_ () = 3;
210  return;
211  }
212  // count_type could differ from the type of each row offset.
213  // For example, row offsets might each be 64 bits, but if their
214  // difference always fits in 32 bits, we may then safely use a
215  // 32-bit count_type.
216  const count_type count =
217  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
218 
219  // We pack first the number of entries in the row, then that
220  // many global column indices, then that many pids (if any),
221  // then that many values. However, if the number of entries in
222  // the row is zero, we pack nothing.
223  const count_type numBytes = (count == 0) ?
224  static_cast<count_type> (0) :
225  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
227  sizeOfValue_);
228 
229  if (final) {
230  counts_(curInd) = numBytes;
231  }
232  update += numBytes;
233  }
234  }
235 
236  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
237  // have join both with and without volatile! Otherwise intrawarp
238  // joins are really slow on GPUs.
239 
241  int getError () const {
242  typedef typename device_type::execution_space execution_space;
243  auto error_h = Kokkos::create_mirror_view (error_);
244  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
245  Kokkos::deep_copy (execution_space(), error_h, error_);
246  return error_h ();
247  }
248 
249 private:
250  OutputOffsetsViewType outputOffsets_;
251  CountsViewType counts_;
252  typename InputOffsetsViewType::const_type rowOffsets_;
253  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
254  typename InputLocalRowPidsViewType::const_type lclRowPids_;
255  count_type sizeOfLclCount_;
256  count_type sizeOfGblColInd_;
257  count_type sizeOfPid_;
258  count_type sizeOfValue_;
259  Kokkos::View<int, device_type> error_;
260 };
261 
271 template<class OutputOffsetsViewType,
272  class CountsViewType,
273  class InputOffsetsViewType,
274  class InputLocalRowIndicesViewType,
275  class InputLocalRowPidsViewType>
276 typename CountsViewType::non_const_value_type
277 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
278  const CountsViewType& counts,
279  const InputOffsetsViewType& rowOffsets,
280  const InputLocalRowIndicesViewType& lclRowInds,
281  const InputLocalRowPidsViewType& lclRowPids,
282  const typename CountsViewType::non_const_value_type sizeOfLclCount,
283  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
284  const typename CountsViewType::non_const_value_type sizeOfPid,
285  const typename CountsViewType::non_const_value_type sizeOfValue)
286 {
287  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
288  CountsViewType, typename InputOffsetsViewType::const_type,
289  typename InputLocalRowIndicesViewType::const_type,
290  typename InputLocalRowPidsViewType::const_type> functor_type;
291  typedef typename CountsViewType::non_const_value_type count_type;
292  typedef typename OutputOffsetsViewType::size_type size_type;
293  typedef typename OutputOffsetsViewType::execution_space execution_space;
294  typedef typename functor_type::local_row_index_type LO;
295  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
296  const char prefix[] = "computeNumPacketsAndOffsets: ";
297 
298  count_type count = 0;
299  const count_type numRowsToPack = lclRowInds.extent (0);
300 
301  if (numRowsToPack == 0) {
302  return count;
303  }
304  else {
305  TEUCHOS_TEST_FOR_EXCEPTION
306  (rowOffsets.extent (0) <= static_cast<size_type> (1),
307  std::invalid_argument, prefix << "There is at least one row to pack, "
308  "but the matrix has no rows. lclRowInds.extent(0) = " <<
309  numRowsToPack << ", but rowOffsets.extent(0) = " <<
310  rowOffsets.extent (0) << " <= 1.");
311  TEUCHOS_TEST_FOR_EXCEPTION
312  (outputOffsets.extent (0) !=
313  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
314  prefix << "Output dimension does not match number of rows to pack. "
315  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
316  << " != lclRowInds.extent(0) + 1 = "
317  << static_cast<size_type> (numRowsToPack + 1) << ".");
318  TEUCHOS_TEST_FOR_EXCEPTION
319  (counts.extent (0) != numRowsToPack, std::invalid_argument,
320  prefix << "counts.extent(0) = " << counts.extent (0)
321  << " != numRowsToPack = " << numRowsToPack << ".");
322 
323  functor_type f (outputOffsets, counts, rowOffsets,
324  lclRowInds, lclRowPids, sizeOfLclCount,
325  sizeOfGblColInd, sizeOfPid, sizeOfValue);
326  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
327 
328  // At least in debug mode, this functor checks for errors.
329  const int errCode = f.getError ();
330  TEUCHOS_TEST_FOR_EXCEPTION
331  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
332  << errCode << " != 0.");
333 
334 #if 0
335  size_t total = 0;
336  for (LO k = 0; k < numRowsToPack; ++k) {
337  total += counts[k];
338  }
339  if (outputOffsets(numRowsToPack) != total) {
340  if (errStr.get () == NULL) {
341  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
342  }
343  std::ostringstream& os = *errStr;
344  os << prefix
345  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
346  << outputOffsets(numRowsToPack) << " != sum of counts = "
347  << total << "." << std::endl;
348  if (numRowsToPack != 0) {
349  // Only print the array if it's not too long.
350  if (numRowsToPack < static_cast<LO> (10)) {
351  os << "outputOffsets: [";
352  for (LO i = 0; i <= numRowsToPack; ++i) {
353  os << outputOffsets(i);
354  if (static_cast<LO> (i + 1) <= numRowsToPack) {
355  os << ",";
356  }
357  }
358  os << "]" << std::endl;
359  os << "counts: [";
360  for (LO i = 0; i < numRowsToPack; ++i) {
361  os << counts(i);
362  if (static_cast<LO> (i + 1) < numRowsToPack) {
363  os << ",";
364  }
365  }
366  os << "]" << std::endl;
367  }
368  else {
369  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
370  << outputOffsets(numRowsToPack-1) << "." << std::endl;
371  }
372  }
373  count = outputOffsets(numRowsToPack);
374  return {false, errStr};
375  }
376 #endif // HAVE_TPETRA_DEBUG
377 
378  // Get last entry of outputOffsets, which is the sum of the entries
379  // of counts. Don't assume UVM.
380  using Tpetra::Details::getEntryOnHost;
381  return static_cast<count_type> (getEntryOnHost (outputOffsets,
382  numRowsToPack));
383  }
384 }
385 
401 template<class ST, class ColumnMap, class BufferDeviceType>
402 KOKKOS_FUNCTION
403 Kokkos::pair<int, size_t>
404 packCrsMatrixRow (const ColumnMap& col_map,
405  const Kokkos::View<char*, BufferDeviceType>& exports,
406  const typename PackTraits<typename ColumnMap::local_ordinal_type>::input_array_type& lids_in,
407  const typename PackTraits<int>::input_array_type& pids_in,
408  const typename PackTraits<ST>::input_array_type& vals_in,
409  const size_t offset,
410  const size_t num_ent,
411  const size_t num_bytes_per_value,
412  const bool pack_pids)
413 {
414  using Kokkos::subview;
415  using LO = typename ColumnMap::local_ordinal_type;
416  using GO = typename ColumnMap::global_ordinal_type;
417  using return_type = Kokkos::pair<int, size_t>;
418 
419  if (num_ent == 0) {
420  // Empty rows always take zero bytes, to ensure sparsity.
421  return return_type (0, 0);
422  }
423 
424  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
425  const size_t num_ent_beg = offset;
426  const size_t num_ent_len = PackTraits<LO>::packValueCount (num_ent_LO);
427 
428  const size_t gids_beg = num_ent_beg + num_ent_len;
429  const size_t gids_len = num_ent * PackTraits<GO>::packValueCount (GO (0));
430 
431  const size_t pids_beg = gids_beg + gids_len;
432  const size_t pids_len = pack_pids ?
433  num_ent * PackTraits<int>::packValueCount (int (0)) :
434  static_cast<size_t> (0);
435 
436  const size_t vals_beg = gids_beg + gids_len + pids_len;
437  const size_t vals_len = num_ent * num_bytes_per_value;
438 
439  char* const num_ent_out = exports.data () + num_ent_beg;
440  char* const gids_out = exports.data () + gids_beg;
441  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
442  char* const vals_out = exports.data () + vals_beg;
443 
444  size_t num_bytes_out = 0;
445  int error_code = 0;
446  num_bytes_out += PackTraits<LO>::packValue (num_ent_out, num_ent_LO);
447 
448  {
449  // Copy column indices one at a time, so that we don't need
450  // temporary storage.
451  for (size_t k = 0; k < num_ent; ++k) {
452  const LO lid = lids_in[k];
453  const GO gid = col_map.getGlobalElement (lid);
454  num_bytes_out += PackTraits<GO>::packValue (gids_out, k, gid);
455  }
456  // Copy PIDs one at a time, so that we don't need temporary storage.
457  if (pack_pids) {
458  for (size_t k = 0; k < num_ent; ++k) {
459  const LO lid = lids_in[k];
460  const int pid = pids_in[lid];
461  num_bytes_out += PackTraits<int>::packValue (pids_out, k, pid);
462  }
463  }
464  const auto p =
465  PackTraits<ST>::packArray (vals_out, vals_in.data (), num_ent);
466  error_code += p.first;
467  num_bytes_out += p.second;
468  }
469 
470  if (error_code != 0) {
471  return return_type (10, num_bytes_out);
472  }
473 
474  const size_t expected_num_bytes =
475  num_ent_len + gids_len + pids_len + vals_len;
476  if (num_bytes_out != expected_num_bytes) {
477  return return_type (11, num_bytes_out);
478  }
479  return return_type (0, num_bytes_out);
480 }
481 
482 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
483 struct PackCrsMatrixFunctor {
484  typedef LocalMatrix local_matrix_device_type;
485  typedef LocalMap local_map_type;
486  typedef typename local_matrix_device_type::value_type ST;
487  typedef typename local_map_type::local_ordinal_type LO;
488  typedef typename local_map_type::global_ordinal_type GO;
489  typedef typename local_matrix_device_type::device_type DT;
490 
491  typedef Kokkos::View<const size_t*, BufferDeviceType>
492  num_packets_per_lid_view_type;
493  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
494  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
495  using export_lids_view_type = typename PackTraits<LO>::input_array_type;
496  using source_pids_view_type = typename PackTraits<int>::input_array_type;
497 
498  typedef typename num_packets_per_lid_view_type::non_const_value_type
499  count_type;
500  typedef typename offsets_view_type::non_const_value_type
501  offset_type;
502  typedef Kokkos::pair<int, LO> value_type;
503 
504  static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
505  "local_map_type::local_ordinal_type and "
506  "local_matrix_device_type::ordinal_type must be the same.");
507 
508  local_matrix_device_type local_matrix;
509  local_map_type local_col_map;
510  exports_view_type exports;
511  num_packets_per_lid_view_type num_packets_per_lid;
512  export_lids_view_type export_lids;
513  source_pids_view_type source_pids;
514  offsets_view_type offsets;
515  size_t num_bytes_per_value;
516  bool pack_pids;
517 
518  PackCrsMatrixFunctor (const local_matrix_device_type& local_matrix_in,
519  const local_map_type& local_col_map_in,
520  const exports_view_type& exports_in,
521  const num_packets_per_lid_view_type& num_packets_per_lid_in,
522  const export_lids_view_type& export_lids_in,
523  const source_pids_view_type& source_pids_in,
524  const offsets_view_type& offsets_in,
525  const size_t num_bytes_per_value_in,
526  const bool pack_pids_in) :
527  local_matrix (local_matrix_in),
528  local_col_map (local_col_map_in),
529  exports (exports_in),
530  num_packets_per_lid (num_packets_per_lid_in),
531  export_lids (export_lids_in),
532  source_pids (source_pids_in),
533  offsets (offsets_in),
534  num_bytes_per_value (num_bytes_per_value_in),
535  pack_pids (pack_pids_in)
536  {
537  const LO numRows = local_matrix_in.numRows ();
538  const LO rowMapDim =
539  static_cast<LO> (local_matrix.graph.row_map.extent (0));
540  TEUCHOS_TEST_FOR_EXCEPTION
541  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
542  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
543  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
544  }
545 
546  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
547  {
548  using ::Tpetra::Details::OrdinalTraits;
549  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
550  }
551 
552  KOKKOS_INLINE_FUNCTION void
553  join (value_type& dst, const value_type& src) const
554  {
555  // `dst` should reflect the first (least) bad index and all other
556  // associated error codes and data, so prefer keeping it.
557  if (src.first != 0 && dst.first == 0) {
558  dst = src;
559  }
560  }
561 
562  KOKKOS_INLINE_FUNCTION
563  void operator() (const LO i, value_type& dst) const
564  {
565  const size_t offset = offsets[i];
566  const LO export_lid = export_lids[i];
567  const size_t buf_size = exports.size();
568  const size_t num_bytes = num_packets_per_lid(i);
569  const size_t num_ent =
570  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
571  - local_matrix.graph.row_map[export_lid]);
572 
573  // Only pack this row's data if it has a nonzero number of
574  // entries. We can do this because receiving processes get the
575  // number of packets, and will know that zero packets means zero
576  // entries.
577  if (num_ent == 0) {
578  return;
579  }
580 
581  if (export_lid >= local_matrix.numRows ()) {
582  if (dst.first != 0) { // keep only the first error
583  dst = Kokkos::make_pair (1, i); // invalid row
584  }
585  return;
586  }
587  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
588  if (dst.first != 0) { // keep only the first error
589  dst = Kokkos::make_pair (2, i); // out of bounds
590  }
591  return;
592  }
593 
594  // We can now pack this row
595 
596  // Since the matrix is locally indexed on the calling process, we
597  // have to use its column Map (which it _must_ have in this case)
598  // to convert to global indices.
599  const auto row_beg = local_matrix.graph.row_map[export_lid];
600  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
601  auto vals_in = subview (local_matrix.values,
602  Kokkos::make_pair (row_beg, row_end));
603  auto lids_in = subview (local_matrix.graph.entries,
604  Kokkos::make_pair (row_beg, row_end));
605  typedef local_map_type LMT;
606  typedef BufferDeviceType BDT;
607  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
608  source_pids, vals_in, offset,
609  num_ent, num_bytes_per_value,
610  pack_pids);
611  int error_code_this_row = p.first;
612  size_t num_bytes_packed_this_row = p.second;
613  if (error_code_this_row != 0) {
614  if (dst.first != 0) { // keep only the first error
615  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
616  }
617  }
618  else if (num_bytes_packed_this_row != num_bytes) {
619  if (dst.first != 0) { // keep only the first error
620  dst = Kokkos::make_pair (3, i);
621  }
622  }
623  }
624 };
625 
633 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
634 void
635 do_pack (const LocalMatrix& local_matrix,
636  const LocalMap& local_map,
637  const Kokkos::View<char*, BufferDeviceType>& exports,
638  const typename PackTraits<size_t>::input_array_type& num_packets_per_lid,
639  const typename PackTraits<typename LocalMap::local_ordinal_type>::input_array_type& export_lids,
640  const typename PackTraits<int>::input_array_type& source_pids,
641  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
642  const size_t num_bytes_per_value,
643  const bool pack_pids)
644 {
645  using LO = typename LocalMap::local_ordinal_type;
646  using DT = typename LocalMatrix::device_type;
647  using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
648  const char prefix[] = "Tpetra::Details::do_pack: ";
649 
650  if (export_lids.extent (0) != 0) {
651  TEUCHOS_TEST_FOR_EXCEPTION
652  (static_cast<size_t> (offsets.extent (0)) !=
653  static_cast<size_t> (export_lids.extent (0) + 1),
654  std::invalid_argument, prefix << "offsets.extent(0) = "
655  << offsets.extent (0) << " != export_lids.extent(0) (= "
656  << export_lids.extent (0) << ") + 1.");
657  TEUCHOS_TEST_FOR_EXCEPTION
658  (export_lids.extent (0) != num_packets_per_lid.extent (0),
659  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
660  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
661  << num_packets_per_lid.extent (0) << ".");
662  // If exports has nonzero length at this point, then the matrix
663  // has at least one entry to pack. Thus, if packing process
664  // ranks, we had better have at least one process rank to pack.
665  TEUCHOS_TEST_FOR_EXCEPTION
666  (pack_pids && exports.extent (0) != 0 &&
667  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
668  "pack_pids is true, and exports.extent(0) = " <<
669  exports.extent (0) << " != 0, meaning that we need to pack at "
670  "least one matrix entry, but source_pids.extent(0) = 0.");
671  }
672 
673  using pack_functor_type =
674  PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
675  pack_functor_type f (local_matrix, local_map, exports,
676  num_packets_per_lid, export_lids,
677  source_pids, offsets, num_bytes_per_value,
678  pack_pids);
679 
680  typename pack_functor_type::value_type result;
681  range_type range (0, num_packets_per_lid.extent (0));
682  Kokkos::parallel_reduce (range, f, result);
683 
684  if (result.first != 0) {
685  // We can't deep_copy from AnonymousSpace Views, so we can't print
686  // out any information from them in case of error.
687  TEUCHOS_TEST_FOR_EXCEPTION
688  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor "
689  "reported error code " << result.first << " for the first "
690  "bad row " << result.second << ".");
691  }
692 }
693 
723 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
724 void
725 packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
726  Kokkos::DualView<char*, BufferDeviceType>& exports,
727  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
728  const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
729  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
730  size_t& constant_num_packets,
731  const bool pack_pids)
732 {
733  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix(
734  "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
735  "Import/Export"
736  );
737  using Kokkos::View;
738  typedef BufferDeviceType DT;
739  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
740  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
741  constexpr bool debug = false;
742 
743  auto local_matrix = sourceMatrix.getLocalMatrixDevice ();
744  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
745 
746  // Setting this to zero tells the caller to expect a possibly
747  // different ("nonconstant") number of packets per local index
748  // (i.e., a possibly different number of entries per row).
749  constant_num_packets = 0;
750 
751  const size_t num_export_lids =
752  static_cast<size_t> (export_lids.extent (0));
753  TEUCHOS_TEST_FOR_EXCEPTION
754  (num_export_lids !=
755  static_cast<size_t> (num_packets_per_lid.extent (0)),
756  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
757  << num_export_lids << " != num_packets_per_lid.extent(0) = "
758  << num_packets_per_lid.extent (0) << ".");
759  if (num_export_lids != 0) {
760  TEUCHOS_TEST_FOR_EXCEPTION
761  (num_packets_per_lid.data () == NULL, std::invalid_argument,
762  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
763  "num_packets_per_lid.data() = "
764  << num_packets_per_lid.data () << " == NULL.");
765  }
766 
767  const size_t num_bytes_per_lid = PackTraits<LO>::packValueCount (LO (0));
768  const size_t num_bytes_per_gid = PackTraits<GO>::packValueCount (GO (0));
769  const size_t num_bytes_per_pid = PackTraits<int>::packValueCount (int (0));
770 
771  size_t num_bytes_per_value = 0;
772  if (PackTraits<ST>::compileTimeSize) {
773  // Assume ST is default constructible; packValueCount wants an instance.
774  num_bytes_per_value = PackTraits<ST>::packValueCount (ST ());
775  }
776  else {
777  // Since the packed data come from the source matrix, we can use
778  // the source matrix to get the number of bytes per Scalar value
779  // stored in the matrix. This assumes that all Scalar values in
780  // the source matrix require the same number of bytes. If the
781  // source matrix has no entries on the calling process, then we
782  // hope that some process does have some idea how big a Scalar
783  // value is. Of course, if no processes have any entries, then no
784  // values should be packed (though this does assume that in our
785  // packing scheme, rows with zero entries take zero bytes).
786  size_t num_bytes_per_value_l = 0;
787  if (local_matrix.values.extent(0) > 0) {
788  const ST& val = local_matrix.values(0);
789  num_bytes_per_value_l = PackTraits<ST>::packValueCount (val);
790  }
791  using Teuchos::reduceAll;
792  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
793  Teuchos::REDUCE_MAX,
794  num_bytes_per_value_l,
795  Teuchos::outArg (num_bytes_per_value));
796  }
797 
798  if (num_export_lids == 0) {
799  exports = exports_view_type ("exports", 0);
800  return;
801  }
802 
803  // Array of offsets into the pack buffer.
804  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
805 
806  // Compute number of packets per LID (row to send), as well as
807  // corresponding offsets (the prefix sum of the packet counts).
808  const size_t count =
809  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
810  local_matrix.graph.row_map, export_lids,
811  export_pids,
812  num_bytes_per_lid, num_bytes_per_gid,
813  num_bytes_per_pid, num_bytes_per_value);
814 
815  // Resize the output pack buffer if needed.
816  if (count > static_cast<size_t> (exports.extent (0))) {
817  exports = exports_view_type ("exports", count);
818  if (debug) {
819  std::ostringstream os;
820  os << "*** exports resized to " << count << std::endl;
821  std::cerr << os.str ();
822  }
823  }
824  if (debug) {
825  std::ostringstream os;
826  os << "*** count: " << count << ", exports.extent(0): "
827  << exports.extent (0) << std::endl;
828  std::cerr << os.str ();
829  }
830 
831  // If exports has nonzero length at this point, then the matrix has
832  // at least one entry to pack. Thus, if packing process ranks, we
833  // had better have at least one process rank to pack.
834  TEUCHOS_TEST_FOR_EXCEPTION
835  (pack_pids && exports.extent (0) != 0 &&
836  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
837  "pack_pids is true, and exports.extent(0) = " <<
838  exports.extent (0) << " != 0, meaning that we need to pack at least "
839  "one matrix entry, but export_pids.extent(0) = 0.");
840 
841  typedef typename std::decay<decltype (local_matrix)>::type
842  local_matrix_device_type;
843  typedef typename std::decay<decltype (local_col_map)>::type
844  local_map_type;
845 
846  exports.modify_device ();
847  auto exports_d = exports.view_device ();
848  do_pack<local_matrix_device_type, local_map_type, DT>
849  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
850  export_lids, export_pids, offsets, num_bytes_per_value,
851  pack_pids);
852  // If we got this far, we succeeded.
853 }
854 
855 } // namespace PackCrsMatrixImpl
856 
857 template<typename ST, typename LO, typename GO, typename NT>
858 void
860  Teuchos::Array<char>& exports,
861  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
862  const Teuchos::ArrayView<const LO>& exportLIDs,
863  size_t& constantNumPackets)
864 {
865 
867  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
868  using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
869  using device_exec_space = typename device_type::execution_space;
870  using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
871 
872  // Convert all Teuchos::Array to Kokkos::View
873 
874  // This is an output array, so we don't have to copy to device here.
875  // However, we'll have to remember to copy back to host when done.
876  Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
877  create_mirror_view_from_raw_host_array (buffer_device_type (),
878  numPacketsPerLID.getRawPtr (),
879  numPacketsPerLID.size (), false,
880  "num_packets_per_lid");
881  // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
882  // DualView through here, instead of recreating a device View from a
883  // host ArrayView that itself came from a DualView.
884  //
885  // This is an input array, so we have to copy to device here.
886  // However, we never need to copy it back to host.
887  Kokkos::View<const LO*, buffer_device_type> export_lids_d =
888  create_mirror_view_from_raw_host_array (buffer_device_type (),
889  exportLIDs.getRawPtr (),
890  exportLIDs.size (), true,
891  "export_lids");
892 
893  Kokkos::View<int*, device_type> export_pids_d; // output arg
894  Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
895  constexpr bool pack_pids = false;
896  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
897  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
898  export_pids_d, constantNumPackets, pack_pids);
899 
900  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
901  // copy them back to host.
902  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
903  (numPacketsPerLID.getRawPtr (),
904  numPacketsPerLID.size ());
905  // DEEP_COPY REVIEW - DEVICE-TO-HOST
906  Kokkos::deep_copy (device_exec_space(), num_packets_per_lid_h, num_packets_per_lid_d);
907 
908  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
909  // exports_dv above, then we have two host copies for exports_h.
910 
911  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
912  // to copy them back to host.
913  if (static_cast<size_t> (exports.size ()) !=
914  static_cast<size_t> (exports_dv.extent (0))) {
915  exports.resize (exports_dv.extent (0));
916  }
917  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
918  exports.size ());
919  // DEEP_COPY REVIEW - DEVICE-TO-HOST
920  Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.d_view);
921 }
922 
923 template<typename ST, typename LO, typename GO, typename NT>
924 void
926  const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
927  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
928  const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
929  const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
930  size_t& constantNumPackets)
931 {
932  using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
933  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
934 
935  // Create an empty array of PIDs, since the interface needs it.
936  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
937  constexpr bool pack_pids = false;
938 
939  // Write-only device access
940  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
941  numPacketsPerLID_nc.clear_sync_state ();
942  numPacketsPerLID_nc.modify_device ();
943  auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
944 
945  // Read-only device access
946  TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
947  auto exportLIDs_d = exportLIDs.view_device ();
948 
949  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix_new(
950  "Tpetra::Details::packCrsMatrixNew",
951  "Import/Export"
952  );
953  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
954  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
955  exportPIDs_d, constantNumPackets, pack_pids);
956 }
957 
958 template<typename ST, typename LO, typename GO, typename NT>
959 void
961  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
962  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
963  const Teuchos::ArrayView<const LO>& exportLIDs,
964  const Teuchos::ArrayView<const int>& sourcePIDs,
965  size_t& constantNumPackets)
966 {
967  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type local_matrix_device_type;
968  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
969  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
970  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
971 
972  typename local_matrix_device_type::device_type outputDevice;
973  typedef typename NT::execution_space execution_space;
974 
975 
976  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
977  std::unique_ptr<std::string> prefix;
978  if (verbose) {
979  const int myRank = [&] () {
980  auto map = sourceMatrix.getMap ();
981  if (map.get () == nullptr) {
982  return -1;
983  }
984  auto comm = map->getComm ();
985  if (comm.get () == nullptr) {
986  return -2;
987  }
988  return comm->getRank ();
989  } ();
990  std::ostringstream os;
991  os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
992  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
993 
994  std::ostringstream os2;
995  os2 << *prefix << "start" << std::endl;
996  std::cerr << os2.str ();
997  }
998 
999  // Convert all Teuchos::Array to Kokkos::View
1000 
1001  // This is an output array, so we don't have to copy to device here.
1002  // However, we'll have to remember to copy back to host when done.
1003  auto num_packets_per_lid_d =
1004  create_mirror_view_from_raw_host_array (buffer_device_type (),
1005  numPacketsPerLID.getRawPtr (),
1006  numPacketsPerLID.size (), false,
1007  "num_packets_per_lid");
1008 
1009  // This is an input array, so we have to copy to device here.
1010  // However, we never need to copy it back to host.
1011  auto export_lids_d =
1012  create_mirror_view_from_raw_host_array (buffer_device_type (),
1013  exportLIDs.getRawPtr (),
1014  exportLIDs.size (), true,
1015  "export_lids");
1016  // This is an input array, so we have to copy to device here.
1017  // However, we never need to copy it back to host.
1018  auto export_pids_d =
1020  sourcePIDs.getRawPtr (),
1021  sourcePIDs.size (), true,
1022  "export_pids");
1023  constexpr bool pack_pids = true;
1024  try {
1026  (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1027  export_pids_d, constantNumPackets, pack_pids);
1028  }
1029  catch (std::exception& e) {
1030  if (verbose) {
1031  std::ostringstream os;
1032  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1033  << e.what () << std::endl;
1034  std::cerr << os.str ();
1035  }
1036  throw;
1037  }
1038  catch (...) {
1039  if (verbose) {
1040  std::ostringstream os;
1041  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1042  "not a subclass of std::exception" << std::endl;
1043  std::cerr << os.str ();
1044  }
1045  throw;
1046  }
1047 
1048  if (numPacketsPerLID.size () != 0) {
1049  try {
1050  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1051  // so we have to copy them back to host.
1052  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1053  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1054  // DEEP_COPY REVIEW - DEVICE-TO-HOST
1055  Kokkos::deep_copy (execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
1056  }
1057  catch (std::exception& e) {
1058  if (verbose) {
1059  std::ostringstream os;
1060  os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1061  std::cerr << os.str ();
1062  }
1063  throw;
1064  }
1065  catch (...) {
1066  if (verbose) {
1067  std::ostringstream os;
1068  os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1069  "of std::exception" << std::endl;
1070  std::cerr << os.str ();
1071  }
1072  throw;
1073  }
1074  }
1075 
1076  if (verbose) {
1077  std::ostringstream os;
1078  os << *prefix << "done" << std::endl;
1079  std::cerr << os.str ();
1080  }
1081 }
1082 
1083 } // namespace Details
1084 } // namespace Tpetra
1085 
1086 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1087  template void \
1088  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1089  Teuchos::Array<char>&, \
1090  const Teuchos::ArrayView<size_t>&, \
1091  const Teuchos::ArrayView<const LO>&, \
1092  size_t&); \
1093  template void \
1094  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1095  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1096  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1097  const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1098  size_t&); \
1099  template void \
1100  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1101  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1102  const Teuchos::ArrayView<size_t>&, \
1103  const Teuchos::ArrayView<const LO>&, \
1104  const Teuchos::ArrayView<const int>&, \
1105  size_t&);
1106 
1107 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
GlobalOrdinal global_ordinal_type
The type of global indices.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Import KokkosSparse::OrdinalTraits, a traits class for &quot;invalid&quot; (flag) values of integer types...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Compute the number of packets and offsets for the pack procedure.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for &quot;new&quot; DistObject inter...
static bool verbose()
Whether Tpetra is in verbose mode.
typename Node::device_type device_type
The Kokkos device type.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Base class for distributed Tpetra objects that support data redistribution.
LocalOrdinal local_ordinal_type
The type of local indices.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.