42 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
43 #define TPETRA_DETAILS_COPYOFFSETS_HPP
50 #include "TpetraCore_config.h"
52 #include "Kokkos_Core.hpp"
54 #include <type_traits>
71 template<
class OutputType,
class InputType>
72 struct OutputCanFitInput {
74 static constexpr
bool output_signed = std::is_signed<OutputType>::value;
75 static constexpr
bool input_signed = std::is_signed<InputType>::value;
78 static const bool value =
sizeof (OutputType) >
sizeof (InputType) ||
79 (
sizeof (OutputType) ==
sizeof (InputType) &&
80 ! output_signed && input_signed);
84 template<
class InputType,
85 bool input_signed = std::is_signed<InputType>::value>
88 template<
class InputType>
89 struct Negative<InputType, true> {
90 static KOKKOS_INLINE_FUNCTION
bool
91 negative (
const InputType src) {
92 return src < InputType (0);
96 template<
class InputType>
97 struct Negative<InputType, false> {
98 static KOKKOS_INLINE_FUNCTION
bool
99 negative (
const InputType ) {
104 template<
class InputType>
105 KOKKOS_INLINE_FUNCTION
bool negative (
const InputType src) {
106 return Negative<InputType>::negative (src);
109 template<
class OutputType,
class InputType>
110 struct OverflowChecker {
112 static constexpr
bool output_signed = std::is_signed<OutputType>::value;
113 static constexpr
bool input_signed = std::is_signed<InputType>::value;
119 static constexpr
bool could_overflow =
120 (! output_signed && input_signed) ||
121 (
sizeof (OutputType) <
sizeof (InputType)) ||
122 (
sizeof (OutputType) ==
sizeof (InputType) &&
123 output_signed && ! input_signed);
125 KOKKOS_INLINE_FUNCTION
bool
126 overflows (
const InputType src)
const
128 if (! could_overflow) {
133 if (! output_signed && input_signed) {
134 return negative (src);
138 return src < minDstVal_ || src > maxDstVal_;
145 InputType minDstVal_ = input_signed ?
146 std::numeric_limits<OutputType>::min () : OutputType (0);
147 InputType maxDstVal_ = std::numeric_limits<OutputType>::max ();
151 template<
class OutputViewType,
class InputViewType>
153 errorIfOverflow (
const OutputViewType& dst,
154 const InputViewType& src,
155 const size_t overflowCount)
157 if (overflowCount == 0) {
161 std::ostringstream os;
162 const bool plural = overflowCount != size_t (1);
163 os <<
"copyOffsets: " << overflowCount <<
" value" <<
164 (plural ?
"s" :
"") <<
" in src were too big (in the "
165 "sense of integer overflow) to fit in dst.";
169 const size_t maxNumToPrint =
171 const size_t srcLen (src.extent (0));
172 if (srcLen <= maxNumToPrint) {
173 auto dst_h = Kokkos::create_mirror_view (dst);
174 auto src_h = Kokkos::create_mirror_view (src);
181 for (
size_t k = 0; k < srcLen; ++k) {
183 if (k +
size_t (1) < srcLen) {
190 for (
size_t k = 0; k < srcLen; ++k) {
192 if (k +
size_t (1) < srcLen) {
199 os <<
" src.extent(0) > " << maxNumToPrint <<
", Tpetra's "
200 "verbose print count threshold. To increase this, set the "
201 "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
202 "to the desired threshold and rerun. You do NOT need to "
206 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str ());
218 template<
class OutputViewType,
220 const bool outputCanFitInput =
221 OutputCanFitInput<
typename OutputViewType::non_const_value_type,
222 typename InputViewType::non_const_value_type>::value>
223 class CopyOffsetsFunctor {};
226 template<
class OutputViewType,
class InputViewType>
227 class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
229 using execution_space =
typename OutputViewType::execution_space;
230 using size_type =
typename OutputViewType::size_type;
231 using value_type = size_t;
233 using input_value_type =
typename InputViewType::non_const_value_type;
234 using output_value_type =
typename OutputViewType::non_const_value_type;
236 CopyOffsetsFunctor (
const OutputViewType& dst,
const InputViewType& src) :
237 dst_ (dst), src_ (src)
239 static_assert (Kokkos::SpaceAccessibility<
240 typename OutputViewType::memory_space,
241 typename InputViewType::memory_space>::accessible,
242 "CopyOffsetsFunctor (implements copyOffsets): Output "
243 "View's space must be able to access the input View's "
247 KOKKOS_INLINE_FUNCTION
void
248 operator () (
const size_type i, value_type& overflowCount)
const {
249 const input_value_type src_i = src_(i);
250 if (checker_.overflows (src_i)) {
253 dst_(i) =
static_cast<output_value_type
> (src_i);
256 KOKKOS_INLINE_FUNCTION
void
257 operator () (
const size_type i)
const {
258 const input_value_type src_i = src_(i);
259 dst_(i) =
static_cast<output_value_type
> (src_i);
262 KOKKOS_INLINE_FUNCTION
void init (value_type& overflowCount)
const {
266 KOKKOS_INLINE_FUNCTION
void
267 join (value_type& result,
268 const value_type& current)
const {
275 OverflowChecker<output_value_type, input_value_type> checker_;
279 template<
class OutputViewType,
class InputViewType>
280 class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
282 using execution_space =
typename OutputViewType::execution_space;
283 using size_type =
typename OutputViewType::size_type;
284 using value_type = size_t;
286 CopyOffsetsFunctor (
const OutputViewType& dst,
const InputViewType& src) :
290 static_assert (Kokkos::SpaceAccessibility<
291 typename OutputViewType::memory_space,
292 typename InputViewType::memory_space>::accessible,
293 "CopyOffsetsFunctor (implements copyOffsets): Output "
294 "View's space must be able to access the input View's "
298 KOKKOS_INLINE_FUNCTION
void
299 operator () (
const size_type i, value_type& )
const {
304 KOKKOS_INLINE_FUNCTION
void
305 operator () (
const size_type i)
const {
309 KOKKOS_INLINE_FUNCTION
void init (value_type& overflowCount)
const {
313 KOKKOS_INLINE_FUNCTION
void
315 const value_type& )
const
339 template<
class OutputViewType,
341 const bool sameLayoutsSameOffsetTypes =
342 std::is_same<
typename OutputViewType::array_layout,
343 typename InputViewType::array_layout>::value &&
344 std::is_same<
typename OutputViewType::non_const_value_type,
345 typename InputViewType::non_const_value_type>::value,
346 const bool outputExecSpaceCanAccessInputMemSpace =
347 Kokkos::SpaceAccessibility<
348 typename OutputViewType::memory_space,
349 typename InputViewType::memory_space>::accessible>
350 struct CopyOffsetsImpl {
351 static void run (
const OutputViewType& dst,
const InputViewType& src);
362 template<
class OutputViewType,
364 const bool outputExecSpaceCanAccessInputMemSpace>
365 struct CopyOffsetsImpl<OutputViewType, InputViewType,
366 true, outputExecSpaceCanAccessInputMemSpace> {
367 static void run (
const OutputViewType& dst,
const InputViewType& src) {
368 static_assert (std::is_same<
typename OutputViewType::non_const_value_type,
369 typename InputViewType::non_const_value_type>::value,
370 "CopyOffsetsImpl (implementation of copyOffsets): In order"
371 " to call this specialization, the input and output must "
372 "use the same offset type.");
373 static_assert (static_cast<int> (OutputViewType::rank) ==
374 static_cast<int> (InputViewType::rank),
375 "CopyOffsetsImpl (implementation of copyOffsets): In order"
376 " to call this specialization, src and dst must have the "
378 static_assert (std::is_same<
typename OutputViewType::array_layout,
379 typename InputViewType::array_layout>::value,
380 "CopyOffsetsImpl (implementation of copyOffsets): In order"
381 " to call this specialization, src and dst must have the "
382 "the same array_layout.");
384 using execution_space =
typename OutputViewType::execution_space;
400 template<
class OutputViewType,
402 struct CopyOffsetsImpl<OutputViewType, InputViewType,
404 static void run (
const OutputViewType& dst,
const InputViewType& src) {
405 static_assert (static_cast<int> (OutputViewType::rank) ==
406 static_cast<int> (InputViewType::rank),
407 "CopyOffsetsImpl (implementation of copyOffsets): "
408 "src and dst must have the same rank.");
409 constexpr
bool sameLayoutsSameOffsetTypes =
410 std::is_same<
typename OutputViewType::array_layout,
411 typename InputViewType::array_layout>::value &&
412 std::is_same<
typename OutputViewType::non_const_value_type,
413 typename InputViewType::non_const_value_type>::value;
414 static_assert (! sameLayoutsSameOffsetTypes,
415 "CopyOffsetsImpl (implements copyOffsets): In order to "
416 "call this specialization, sameLayoutsSameOffsetTypes "
417 "must be false. That is, either the input and output "
418 "must have different array layouts, or their value types "
420 static_assert (Kokkos::SpaceAccessibility<
421 typename OutputViewType::memory_space,
422 typename InputViewType::memory_space>::accessible,
423 "CopyOffsetsImpl (implements copyOffsets): In order to "
424 "call this specialization, the output View's space must "
425 "be able to access the input View's memory space.");
426 using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
427 using execution_space =
typename OutputViewType::execution_space;
428 using size_type =
typename OutputViewType::size_type;
429 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
433 size_t overflowCount = 0;
434 Kokkos::parallel_reduce (
"Tpetra::Details::copyOffsets",
435 range_type (0, dst.extent (0)),
436 functor_type (dst, src),
438 errorIfOverflow (dst, src, overflowCount);
441 Kokkos::parallel_for (
"Tpetra::Details::copyOffsets",
442 range_type (0, dst.extent (0)),
443 functor_type (dst, src));
465 template<
class OutputViewType,
class InputViewType>
466 struct CopyOffsetsImpl<OutputViewType, InputViewType,
468 static void run (
const OutputViewType& dst,
const InputViewType& src) {
469 static_assert (static_cast<int> (OutputViewType::rank) ==
470 static_cast<int> (InputViewType::rank),
471 "CopyOffsetsImpl (implementation of copyOffsets): In order"
472 " to call this specialization, src and dst must have the "
474 constexpr
bool sameLayoutsSameOffsetTypes =
475 std::is_same<
typename OutputViewType::array_layout,
476 typename InputViewType::array_layout>::value &&
477 std::is_same<
typename OutputViewType::non_const_value_type,
478 typename InputViewType::non_const_value_type>::value;
479 static_assert (! sameLayoutsSameOffsetTypes,
480 "CopyOffsetsImpl (implements copyOffsets): In order to "
481 "call this specialization, sameLayoutsSameOffsetTypes "
482 "must be false. That is, either the input and output "
483 "must have different array layouts, or their value types "
485 using output_space_copy_type =
486 Kokkos::View<
typename InputViewType::non_const_value_type*,
487 Kokkos::LayoutLeft,
typename OutputViewType::device_type>;
488 using Kokkos::view_alloc;
489 using Kokkos::WithoutInitializing;
490 using execution_space =
typename OutputViewType::execution_space;
491 output_space_copy_type
492 outputSpaceCopy (view_alloc (
"outputSpace", WithoutInitializing),
500 CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
501 using size_type =
typename OutputViewType::size_type;
502 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
506 size_t overflowCount = 0;
507 Kokkos::parallel_reduce (
"Tpetra::Details::copyOffsets",
508 range_type (0, dst.extent (0)),
509 functor_type (dst, outputSpaceCopy),
511 errorIfOverflow (dst, src, overflowCount);
514 Kokkos::parallel_for (
"Tpetra::Details::copyOffsets",
515 range_type (0, dst.extent (0)),
516 functor_type (dst, outputSpaceCopy));
533 template<
class OutputViewType,
class InputViewType>
537 static_assert (Kokkos::is_view<OutputViewType>::value,
538 "OutputViewType (the type of dst) must be a Kokkos::View.");
539 static_assert (Kokkos::is_view<InputViewType>::value,
540 "InputViewType (the type of src) must be a Kokkos::View.");
541 static_assert (std::is_same<
typename OutputViewType::value_type,
542 typename OutputViewType::non_const_value_type>::value,
543 "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
544 static_assert (static_cast<int> (OutputViewType::rank) == 1,
545 "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
546 static_assert (static_cast<int> (InputViewType::rank) == 1,
547 "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
548 static_assert (std::is_integral<
typename std::decay<decltype (dst(0)) >::type>::value,
549 "The entries of dst must be built-in integers.");
550 static_assert (std::is_integral<
typename std::decay<decltype (src(0)) >::type>::value,
551 "The entries of src must be built-in integers.");
553 TEUCHOS_TEST_FOR_EXCEPTION
554 (dst.extent (0) != src.extent (0), std::invalid_argument,
555 "copyOffsets: dst.extent(0) = " << dst.extent (0)
556 <<
" != src.extent(0) = " << src.extent (0) <<
".");
558 CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
564 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
static bool debug()
Whether Tpetra is in debug mode.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.