Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_KokkosTeuchosTimerInjection.cpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ************************************************************************
39 // @HEADER
40 */
42 #include "TpetraCore_config.h"
44 #include "Kokkos_Core.hpp"
45 #include "Teuchos_TimeMonitor.hpp"
46 #include "Teuchos_Time.hpp"
47 #include "Teuchos_RCP.hpp"
48 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
49 #include "Teuchos_StackedTimer.hpp"
50 #include <sstream>
51 #endif
52 #include <string>
53 
55 
56 
57 
58 namespace {
59  // Get a useful label from the deviceId
60  // NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
61  std::string deviceIdToString(const uint32_t deviceId) {
62  using namespace Kokkos::Tools::Experimental;
63  std::string device_label("(");
64  ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
65  if (eid.type == DeviceType::Serial) device_label+="Serial";
66  else if (eid.type == DeviceType::OpenMP) device_label+="OpenMP";
67  else if (eid.type == DeviceType::Cuda) device_label+="Cuda";
68  else if (eid.type == DeviceType::HIP) device_label+="HIP";
69  else if (eid.type == DeviceType::OpenMPTarget) device_label+="OpenMPTarget";
70  else if (eid.type == DeviceType::HPX) device_label+="HPX";
71  else if (eid.type == DeviceType::Threads) device_label+="Threads";
72  else if (eid.type == DeviceType::SYCL) device_label+="SYCL";
73  else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC";
74  else if (eid.type == DeviceType::Unknown) device_label+="Unknown";
75  else device_label+="Unknown to Tpetra";
76 
77  if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
78  device_label += " All Instances)";
79  else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
80  device_label += " DeepCopyResource)";
81  else
82  device_label += " Instance " + std::to_string(eid.instance_id) + ")";
83 
84  return device_label;
85  }
86 
87  void overlappingWarning() {
88  std::ostringstream warning;
89  warning <<
90  "\n*********************************************************************\n"
91  "WARNING: Overlapping timers detected!\n"
92  "A TimeMonitor timer was stopped before a nested subtimer was\n"
93  "stopped. This is not allowed by the StackedTimer. This corner case\n"
94  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
95  "assigned to a new timer. To disable this warning, either fix the\n"
96  "ordering of timer creation and destuction or disable the StackedTimer\n";
97  std::cout << warning.str() << std::endl;
98  }
99 
100 }// anonymous space
101 
102 
103 namespace Tpetra {
104 namespace Details {
105 
106  namespace DeepCopyTimerInjection {
107  Teuchos::RCP<Teuchos::Time> timer_;
108  bool initialized_ = false;
109 
110  void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
111  Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
112  uint64_t size) {
113  // In verbose mode, we add the src/dst names as well
114  std::string extra_label;
116  extra_label = std::string(" {") + src_name + "=>" + dst_name + "}";
118  extra_label = std::string(" {") + src_name + "=>" + dst_name + "," + std::to_string(size)+"}";
119  }
120 
121  if(timer_ != Teuchos::null)
122  std::cout << "WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error"<<std::endl;
123 
124  // If the src_name is "Scalar" or "(none)" then we're doing a "Fill" style copy from host to devices, which we want to record separately.
125  if(!strcmp(src_name,"Scalar") || !strcmp(src_name,"(none)"))
126  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy_scalar [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
127  // If the size is under 65 bytes, we're going to flag this as "small" to make it easier to watch the big stuff
128  else if(size <= 64)
129  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy_small [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
130  else
131  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
132  timer_->start();
133  timer_->incrementNumCalls();
134 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
135  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
136  if (nonnull(stackedTimer))
137  stackedTimer->start(timer_->name());
138 #endif
139  }
140 
141  void kokkosp_end_deep_copy() {
142  if (timer_ != Teuchos::null) {
143  timer_->stop();
144 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
145  try {
146  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
147  if (nonnull(stackedTimer))
148  stackedTimer->stop(timer_->name());
149  }
150  catch (std::runtime_error&) {
151  overlappingWarning();
152  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
153  }
154 #endif
155  }
156  timer_ = Teuchos::null;
157  }
158 
159  }// end DeepCopyTimerInjection
160 
161  void AddKokkosDeepCopyToTimeMonitor(bool force) {
162  if (!DeepCopyTimerInjection::initialized_) {
165  Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
166  Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
167  DeepCopyTimerInjection::initialized_=true;
168  }
169  }
170  }
171 
172 
173  namespace FenceTimerInjection {
174  Teuchos::RCP<Teuchos::Time> timer_;
175  bool initialized_ = false;
176  uint64_t active_handle;
177 
178  void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
179  uint64_t* handle) {
180 
181  // Nested fences are not allowed
182  if(timer_ != Teuchos::null)
183  return;
184  active_handle = (active_handle+1) % 1024;
185  *handle = active_handle;
186 
187  std::string device_label = deviceIdToString(deviceId);
188 
189  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::fence ")+name + " " + device_label);
190  timer_->start();
191  timer_->incrementNumCalls();
192 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
193  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
194  if (nonnull(stackedTimer))
195  stackedTimer->start(timer_->name());
196 #endif
197 
198  }
199 
200 
201  void kokkosp_end_fence(const uint64_t handle) {
202  if(handle == active_handle) {
203  if (timer_ != Teuchos::null) {
204  timer_->stop();
205 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
206  try {
207  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
208  if (nonnull(stackedTimer))
209  stackedTimer->stop(timer_->name());
210  }
211  catch (std::runtime_error&) {
212  overlappingWarning();
213  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
214  }
215 #endif
216  }
217  timer_ = Teuchos::null;
218  }
219  // Else: We've nested our fences, and we need to ignore the inner fences
220  }
221 
222 
223  }//end FenceTimerInjection
224 
225  void AddKokkosFenceToTimeMonitor(bool force) {
226  if (!FenceTimerInjection::initialized_) {
228  Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
229  Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
230  FenceTimerInjection::initialized_=true;
231  }
232  }
233  }
234 
235 
236  namespace FunctionsTimerInjection {
237  Teuchos::RCP<Teuchos::Time> timer_;
238  bool initialized_ = false;
239 
240  void kokkosp_begin_kernel(const char* kernelName, const char* kernelPrefix, const uint32_t devID,
241  uint64_t* kernelID) {
242  // Nested fences are not allowed
243  if(timer_ != Teuchos::null)
244  return;
245  std::string device_label = deviceIdToString(devID);
246 
247  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::")+ kernelName + " " +kernelPrefix + " " + device_label);
248  timer_->start();
249  timer_->incrementNumCalls();
250 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
251  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
252  if (nonnull(stackedTimer))
253  stackedTimer->start(timer_->name());
254 #endif
255 
256  }
257 
258  void kokkosp_begin_for(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
259  kokkosp_begin_kernel("parallel_for",kernelPrefix,devID,kernelID);
260  }
261 
262  void kokkosp_begin_scan(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
263  kokkosp_begin_kernel("parallel_scan",kernelPrefix,devID,kernelID);
264  }
265 
266  void kokkosp_begin_reduce(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
267  kokkosp_begin_kernel("parallel_reduce",kernelPrefix,devID,kernelID);
268  }
269 
270  void kokkosp_end_kernel(const uint64_t handle) {
271  if (timer_ != Teuchos::null) {
272  timer_->stop();
273 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
274  try {
275  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
276  if (nonnull(stackedTimer))
277  stackedTimer->stop(timer_->name());
278  }
279  catch (std::runtime_error&) {
280  overlappingWarning();
281  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
282  }
283 #endif
284  }
285 
286  timer_ = Teuchos::null;
287  }
288  }//end FunctionsInjection
289 
290  void AddKokkosFunctionsToTimeMonitor(bool force) {
291  if (!FunctionsTimerInjection::initialized_) {
293  Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
294  Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
295  Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
296 
297  // The end-call is generic, even though the start-call is not.
298  Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
299  Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
300  Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
301  FunctionsTimerInjection::initialized_=true;
302  }
303  }
304  }
305 
306 
307 
308 } // namespace Details
309 } // namespace Tpetra
310 
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
Declaration functions that use Kokkos&#39; profiling library to add deep copies between memory spaces...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.