Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Fad_KokkosAtomicTests.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
30 
31 #include "Sacado.hpp"
32 
33 template <typename T>
34 struct is_dfad {
35  static const bool value = false;
36 };
37 
38 template <typename T>
39 struct is_dfad< Sacado::Fad::Exp::DFad<T> > {
40  static const bool value = true;
41 };
42 
43 template <typename FadType1, typename FadType2>
44 bool checkFads(const FadType1& x, const FadType2& x2,
45  Teuchos::FancyOStream& out, double tol = 1.0e-15)
46 {
47  bool success = true;
48 
49  // Check sizes match
50  TEUCHOS_TEST_EQUALITY(x.size(), x2.size(), out, success);
51 
52  // Check values match
53  TEUCHOS_TEST_FLOATING_EQUALITY(x.val(), x2.val(), tol, out, success);
54 
55  // Check derivatives match
56  for (int i=0; i<x.size(); ++i)
57  TEUCHOS_TEST_FLOATING_EQUALITY(x.dx(i), x2.dx(i), tol, out, success);
58 
59  return success;
60 }
61 
62 template <typename fadtype, typename ordinal>
63 inline
64 fadtype generate_fad( const ordinal num_rows,
65  const ordinal num_cols,
66  const ordinal fad_size,
67  const ordinal row,
68  const ordinal col )
69 {
70  typedef typename fadtype::value_type scalar;
71  fadtype x(fad_size, scalar(0.0));
72 
73  const scalar x_row = 100.0 + scalar(num_rows) / scalar(row+1);
74  const scalar x_col = 10.0 + scalar(num_cols) / scalar(col+1);
75  x.val() = x_row + x_col;
76  for (ordinal i=0; i<fad_size; ++i) {
77  const scalar x_fad = 1.0 + scalar(fad_size) / scalar(i+1);
78  x.fastAccessDx(i) = x_row + x_col + x_fad;
79  }
80  return x;
81 }
82 
83 #ifndef GLOBAL_FAD_SIZE
84 #define GLOBAL_FAD_SIZE 5
85 #endif
86 const int global_num_rows = 11;
87 const int global_num_cols = 7;
89 
90 struct AddTag {
91  static double init() { return 0.0; }
92  template <typename T1, typename T2>
93  static auto apply(const T1& a, const T2& b) -> decltype(a+b)
94  {
95  return a+b;
96  }
97 };
98 struct SubTag {
99  static double init() { return 0.0; }
100  template <typename T1, typename T2>
101  static auto apply(const T1& a, const T2& b) -> decltype(a-b)
102  {
103  return a-b;
104  }
105 };
106 struct MulTag {
107  static double init() { return 1.0; }
108  template <typename T1, typename T2>
109  static auto apply(const T1& a, const T2& b) -> decltype(a*b)
110  {
111  return a*b;
112  }
113 };
114 struct DivTag {
115  static double init() { return 1.0; }
116  template <typename T1, typename T2>
117  static auto apply(const T1& a, const T2& b) -> decltype(a/b)
118  {
119  return a/b;
120  }
121 };
122 struct MaxTag {
123  static double init() { return 1.0; }
124  template <typename T1, typename T2>
125  static auto apply(const T1& a, const T2& b) -> decltype(max(a,b))
126  {
127  return max(a,b);
128  }
129 };
130 struct MinTag {
131  static double init() { return 1.0; }
132  template <typename T1, typename T2>
133  static auto apply(const T1& a, const T2& b) -> decltype(min(a,b))
134  {
135  return min(a,b);
136  }
137 };
138 
139 // Kernel to test atomic_add
140 template <typename ViewType, typename ScalarViewType, bool OperFetch>
141 struct AtomicKernel {
142  typedef typename ViewType::execution_space execution_space;
143  typedef typename ViewType::size_type size_type;
144  typedef typename Kokkos::TeamPolicy< execution_space>::member_type team_handle;
145  typedef typename Kokkos::ThreadLocalScalarType<ViewType>::type local_scalar_type;
146  static const size_type stride = Kokkos::ViewScalarStride<ViewType>::stride;
147 
148  const ViewType m_v;
149  const ScalarViewType m_s;
150 
151  AtomicKernel(const ViewType& v, const ScalarViewType& s) :
152  m_v(v), m_s(s) {};
153 
154  KOKKOS_INLINE_FUNCTION
155  void operator() (AddTag tag, const size_type i) const {
156  local_scalar_type x = m_v(i);
157  if (OperFetch)
158  Kokkos::atomic_add_fetch(&(m_s()), x);
159  else
160  Kokkos::atomic_fetch_add(&(m_s()), x);
161  }
162 
163  KOKKOS_INLINE_FUNCTION
164  void operator() (SubTag tag, const size_type i) const {
165  local_scalar_type x = m_v(i);
166  if (OperFetch)
167  Kokkos::atomic_sub_fetch(&(m_s()), x);
168  else
169  Kokkos::atomic_fetch_sub(&(m_s()), x);
170  }
171 
172  KOKKOS_INLINE_FUNCTION
173  void operator() (MulTag tag, const size_type i) const {
174  local_scalar_type x = m_v(i);
175  if (OperFetch)
176  Kokkos::atomic_mul_fetch(&(m_s()), x);
177  else
178  Kokkos::atomic_fetch_mul(&(m_s()), x);
179  }
180 
181  KOKKOS_INLINE_FUNCTION
182  void operator() (DivTag tag, const size_type i) const {
183  local_scalar_type x = m_v(i);
184  if (OperFetch)
185  Kokkos::atomic_div_fetch(&(m_s()), x);
186  else
187  Kokkos::atomic_fetch_div(&(m_s()), x);
188  }
189 
190  KOKKOS_INLINE_FUNCTION
191  void operator() (MaxTag tag, const size_type i) const {
192  local_scalar_type x = m_v(i);
193  if (OperFetch)
194  Kokkos::atomic_max_fetch(&(m_s()), x);
195  else
196  Kokkos::atomic_fetch_max(&(m_s()), x);
197  }
198 
199  KOKKOS_INLINE_FUNCTION
200  void operator() (MinTag tag, const size_type i) const {
201  local_scalar_type x = m_v(i);
202  if (OperFetch)
203  Kokkos::atomic_min_fetch(&(m_s()), x);
204  else
205  Kokkos::atomic_fetch_min(&(m_s()), x);
206  }
207 
208  template <typename Tag>
209  KOKKOS_INLINE_FUNCTION
210  void operator()( Tag tag, const team_handle& team ) const
211  {
212  const size_type i = team.league_rank()*team.team_size() + team.team_rank();
213  if (i < m_v.extent(0))
214  (*this)(tag, i);
215  }
216 
217  // Kernel launch
218  template <typename Tag>
219  static void apply(Tag tag, const ViewType& v, const ScalarViewType& s) {
220  const size_type nrow = v.extent(0);
221 
222 #if defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL)
223  const bool use_team =
226  ( stride > 1 );
227 #elif defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
228  const bool use_team =
232 #elif defined (KOKKOS_ENABLE_HIP) && defined (SACADO_VIEW_CUDA_HIERARCHICAL)
233  const bool use_team =
236  ( stride > 1 );
237 #elif defined (KOKKOS_ENABLE_HIP) && defined (SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
238  const bool use_team =
241  is_dfad<typename ViewType::non_const_value_type>::value;
242 #else
243  const bool use_team = false;
244 #endif
245 
246  if (use_team) {
247  const size_type team_size = 256 / stride;
248  Kokkos::TeamPolicy<execution_space, Tag> policy(
249  (nrow+team_size-1)/team_size, team_size, stride );
250  Kokkos::parallel_for( policy, AtomicKernel(v,s) );
251  }
252  else {
253  Kokkos::RangePolicy<execution_space, Tag> policy( 0, nrow );
254  Kokkos::parallel_for( policy, AtomicKernel(v,s) );
255  }
256  }
257 };
258 
259 template <typename FadType, typename Layout, typename Device, bool OperFetch,
260  typename TagType>
261 bool testAtomic(const TagType& tag, Teuchos::FancyOStream& out)
262 {
263  typedef Kokkos::View<FadType*,Layout,Device> ViewType;
264  typedef Kokkos::View<FadType,Layout,Device> ScalarViewType;
265  typedef typename ViewType::size_type size_type;
266  typedef typename ViewType::HostMirror host_view_type;
267  typedef typename ScalarViewType::HostMirror host_scalar_view_type;
268 
269  const size_type num_rows = global_num_rows;
270  const size_type fad_size = global_fad_size;
271 
272  // Create and fill view
273  ViewType v;
274  ScalarViewType s0;
275 #if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
276  v = ViewType ("view", num_rows);
277  s0 = ScalarViewType ("");
278 #else
279  v = ViewType ("view", num_rows, fad_size+1);
280  s0 = ScalarViewType ("", fad_size+1);
281 #endif
282  host_view_type h_v = Kokkos::create_mirror_view(v);
283  for (size_type i=0; i<num_rows; ++i)
284  h_v(i) =
285  generate_fad<FadType>(num_rows, size_type(1), fad_size, i, size_type(0));
286  Kokkos::deep_copy(v, h_v);
287 
288  Kokkos::deep_copy(s0, tag.init());
289 
290  // Create scalar view
291  ScalarViewType s;
292 #if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
293  s = ScalarViewType ("scalar view");
294 #else
295  s = ScalarViewType ("scalar view", fad_size+1);
296 #endif
297  Kokkos::deep_copy( s, tag.init() );
298 
299  // Call atomic_add kernel, which adds up entries in v
301 
302  // Copy to host
303  host_scalar_view_type hs = Kokkos::create_mirror_view(s);
304  Kokkos::deep_copy(hs, s);
305 
306  // Compute correct result
307  auto b = Kokkos::create_mirror_view(s0);
308  Kokkos::deep_copy(b, s0);
309 
310  for (size_type i=0; i<num_rows; ++i)
311  b() = tag.apply(b(), h_v(i));
312 
313  // Check
314  bool success = checkFads(b(), hs(), out);
315 
316  return success;
317 }
318 
319 // Test atomic_oper_fetch form
320 
322  Kokkos_View_Fad, AtomicAddFetch, FadType, Layout, Device )
323 {
324  success = testAtomic<FadType, Layout, Device, true>(AddTag(), out);
325 }
326 
328  Kokkos_View_Fad, AtomicSubFetch, FadType, Layout, Device )
329 {
330  success = testAtomic<FadType, Layout, Device, true>(SubTag(), out);
331 }
332 
334  Kokkos_View_Fad, AtomicMulFetch, FadType, Layout, Device )
335 {
336  success = testAtomic<FadType, Layout, Device, true>(MulTag(), out);
337 }
338 
340  Kokkos_View_Fad, AtomicDivFetch, FadType, Layout, Device )
341 {
342  success = testAtomic<FadType, Layout, Device, true>(DivTag(), out);
343 }
344 
346  Kokkos_View_Fad, AtomicMaxFetch, FadType, Layout, Device )
347 {
348  success = testAtomic<FadType, Layout, Device, true>(MaxTag(), out);
349 }
350 
352  Kokkos_View_Fad, AtomicMinFetch, FadType, Layout, Device )
353 {
354  success = testAtomic<FadType, Layout, Device, true>(MinTag(), out);
355 }
356 
357 // Test atomic_fetch_oper form
358 
360  Kokkos_View_Fad, AtomicFetchAdd, FadType, Layout, Device )
361 {
362  success = testAtomic<FadType, Layout, Device, false>(AddTag(), out);
363 }
364 
366  Kokkos_View_Fad, AtomicFetchSub, FadType, Layout, Device )
367 {
368  success = testAtomic<FadType, Layout, Device, false>(SubTag(), out);
369 }
370 
372  Kokkos_View_Fad, AtomicFetchMul, FadType, Layout, Device )
373 {
374  success = testAtomic<FadType, Layout, Device, false>(MulTag(), out);
375 }
376 
378  Kokkos_View_Fad, AtomicFetchDiv, FadType, Layout, Device )
379 {
380  success = testAtomic<FadType, Layout, Device, false>(DivTag(), out);
381 }
382 
384  Kokkos_View_Fad, AtomicFetchMax, FadType, Layout, Device )
385 {
386  success = testAtomic<FadType, Layout, Device, false>(MaxTag(), out);
387 }
388 
390  Kokkos_View_Fad, AtomicFetchMin, FadType, Layout, Device )
391 {
392  success = testAtomic<FadType, Layout, Device, false>(MinTag(), out);
393 }
394 
395 #define VIEW_FAD_TESTS_FLD( F, L, D ) \
396  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicAddFetch, F, L, D ) \
397  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicSubFetch, F, L, D ) \
398  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMulFetch, F, L, D ) \
399  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicDivFetch, F, L, D ) \
400  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMaxFetch, F, L, D ) \
401  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMinFetch, F, L, D ) \
402  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchAdd, F, L, D ) \
403  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchSub, F, L, D ) \
404  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMul, F, L, D ) \
405  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchDiv, F, L, D ) \
406  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMax, F, L, D ) \
407  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMin, F, L, D )
408 
409 using Kokkos::LayoutLeft;
410 using Kokkos::LayoutRight;
413 
414 #define VIEW_FAD_TESTS_FD( F, D ) \
415  VIEW_FAD_TESTS_FLD( F, LayoutLeft, D ) \
416  VIEW_FAD_TESTS_FLD( F, LayoutRight, D ) \
417  VIEW_FAD_TESTS_FLD( F, LeftContiguous, D ) \
418  VIEW_FAD_TESTS_FLD( F, RightContiguous, D )
419 
420 // Full set of atomics only implemented for new design
421 #if SACADO_ENABLE_NEW_DESIGN
425 
426 #if SACADO_TEST_DFAD
427 #define VIEW_FAD_TESTS_D( D ) \
428  VIEW_FAD_TESTS_FD( SFadType, D ) \
429  VIEW_FAD_TESTS_FD( SLFadType, D ) \
430  VIEW_FAD_TESTS_FD( DFadType, D )
431 #else
432 #define VIEW_FAD_TESTS_D( D ) \
433  VIEW_FAD_TESTS_FD( SFadType, D ) \
434  VIEW_FAD_TESTS_FD( SLFadType, D )
435 #endif
436 
437 #else
438 
439 #define VIEW_FAD_TESTS_D( D ) /* */
440 
441 #endif
static double init()
KOKKOS_INLINE_FUNCTION void operator()(Tag tag, const team_handle &team) const
static auto apply(const T1 &a, const T2 &b) -> decltype(a/b)
Kokkos::LayoutContiguous< Kokkos::LayoutRight > RightContiguous
Kokkos::LayoutContiguous< Kokkos::LayoutLeft > LeftContiguous
#define TEUCHOS_TEST_FLOATING_EQUALITY(v1, v2, tol, out, success)
Kokkos::ThreadLocalScalarType< ViewType >::type local_scalar_type
static const bool value
const int global_fad_size
Sacado::Fad::DFad< double > FadType
static double init()
ViewType::size_type size_type
bool checkFads(const FadType1 &x, const FadType2 &x2, Teuchos::FancyOStream &out, double tol=1.0e-15)
const int global_num_rows
AtomicKernel(const ViewType &v, const ScalarViewType &s)
scalar generate_fad(const size_t n0, const size_t n1, const size_t n2, const size_t n3, const int fad_size, const size_t i0, const size_t i1, const size_t i2, const size_t i3, const int i_fad)
Sacado::Fad::SFad< double, fad_dim > SFadType
static double init()
static const size_type stride
static auto apply(const T1 &a, const T2 &b) -> decltype(a-b)
TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(Kokkos_View_FadFad, DeepCopy, FadFadType, Layout, Device)
const int global_num_cols
#define T2(r, f)
Definition: Sacado_rad.hpp:578
GeneralFad< DynamicStorage< T > > DFad
SimpleFad< ValueT > min(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Forward-mode AD class templated on the storage for the derivative array.
static auto apply(const T1 &a, const T2 &b) -> decltype(a+b)
static double init()
static double init()
#define T1(r, f)
Definition: Sacado_rad.hpp:603
Sacado::Fad::SLFad< double, fad_dim > SLFadType
Sacado::Fad::DFad< double > DFadType
static double init()
static auto apply(const T1 &a, const T2 &b) -> decltype(min(a, b))
static void apply(Tag tag, const ViewType &v, const ScalarViewType &s)
#define GLOBAL_FAD_SIZE
int value
KOKKOS_INLINE_FUNCTION void operator()(AddTag tag, const size_type i) const
#define TEUCHOS_TEST_EQUALITY(v1, v2, out, success)
Kokkos::TeamPolicy< execution_space >::member_type team_handle
const ScalarViewType m_s
const double tol
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
ViewType::execution_space execution_space
bool testAtomic(const TagType &tag, Teuchos::FancyOStream &out)
static auto apply(const T1 &a, const T2 &b) -> decltype(a *b)
static auto apply(const T1 &a, const T2 &b) -> decltype(max(a, b))