Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Cholmod_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
53 #ifndef AMESOS2_CHOLMOD_DEF_HPP
54 #define AMESOS2_CHOLMOD_DEF_HPP
55 
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
59 
61 #include "Amesos2_Cholmod_decl.hpp"
62 
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
64 #include "KokkosSparse_sptrsv_cholmod.hpp"
65 #endif
66 
67 namespace Amesos2 {
68 
69 
70 template <class Matrix, class Vector>
72  Teuchos::RCP<const Matrix> A,
73  Teuchos::RCP<Vector> X,
74  Teuchos::RCP<const Vector> B )
75  : SolverCore<Amesos2::Cholmod,Matrix,Vector>(A, X, B)
76  , firstsolve(true)
77  , skip_symfact(false)
78  , map()
79  , is_contiguous_(true) // default is set by params
80  , use_triangular_solves_(false) // default is set by params
81  , use_cholmod_int_type_(false) // if true we use CHOLMOD_INT (no GPU support)
82 {
83  data_.L = NULL;
84  data_.Y = NULL;
85  data_.E = NULL;
86 
87  data_.c.supernodal = CHOLMOD_AUTO;
88  data_.c.quick_return_if_not_posdef = 1;
89 }
90 
91 
92 template <class Matrix, class Vector>
94 {
95  if(use_cholmod_int_type_) {
96  cholmod_free_factor(&(data_.L), &(data_.c));
97  cholmod_free_dense(&(data_.Y), &data_.c);
98  cholmod_free_dense(&(data_.E), &data_.c);
99  cholmod_finish(&(data_.c));
100  }
101  else {
102  // useful to check if GPU is being used, but very verbose
103  // cholmod_l_gpu_stats(&(data_.c));
104  cholmod_l_free_factor(&(data_.L), &(data_.c));
105  cholmod_l_free_dense(&(data_.Y), &data_.c);
106  cholmod_l_free_dense(&(data_.E), &data_.c);
107  cholmod_l_finish(&(data_.c));
108  }
109 }
110 
111 template<class Matrix, class Vector>
112 int
114 {
115 #ifdef HAVE_AMESOS2_TIMERS
116  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
117 #endif
118 
119  int info = 0;
120 
121  if(use_cholmod_int_type_) {
122  data_.L = cholmod_analyze(&data_.A, &(data_.c));
123  }
124  else {
125  data_.L = cholmod_l_analyze(&data_.A, &(data_.c));
126  }
127 
128  info = data_.c.status;
129  skip_symfact = true;
130 
131  /* All processes should have the same error code */
132  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
133 
134  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
135  std::runtime_error,
136  "Amesos2 cholmod_l_analyze failure in Cholmod preOrdering_impl");
137 
138  return(0);
139 }
140 
141 
142 template <class Matrix, class Vector>
143 int
145 {
146  int info = 0;
147 
148  if(!skip_symfact) {
149 #ifdef HAVE_AMESOS2_TIMERS
150  Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
151 #endif
152 
153  if(use_cholmod_int_type_) {
154  cholmod_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
155  }
156  else {
157  cholmod_l_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
158  }
159 
160  info = data_.c.status;
161  } else {
162  /*
163  * Symbolic factorization has already occured in preOrdering_impl,
164  * but if the user calls this routine directly later, we need to
165  * redo the symbolic factorization.
166  */
167  skip_symfact = false;
168  }
169 
170  /* All processes should have the same error code */
171  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
172 
173  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
174  std::runtime_error,
175  "Amesos2 cholmod_l_resymbol failure in Cholmod symbolicFactorization_impl");
176 
177  if(use_triangular_solves_) {
178  triangular_solve_symbolic();
179  }
180 
181  return(0);
182 }
183 
184 
185 template <class Matrix, class Vector>
186 int
188 {
189  int info = 0;
190 
191 #ifdef HAVE_AMESOS2_DEBUG
192  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.ncol != Teuchos::as<size_t>(this->globalNumCols_),
193  std::runtime_error,
194  "Error in converting to cholmod_sparse: wrong number of global columns." );
195  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.nrow != Teuchos::as<size_t>(this->globalNumRows_),
196  std::runtime_error,
197  "Error in converting to cholmod_sparse: wrong number of global rows." );
198 #endif
199 
200 #ifdef HAVE_AMESOS2_TIMERS
201  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
202 #endif
203 
204 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
205  // Add print out of views here - need host conversion first
206 #endif
207 
208  if(use_cholmod_int_type_) {
209  cholmod_factorize(&data_.A, data_.L, &(data_.c));
210  }
211  else {
212  cholmod_l_factorize(&data_.A, data_.L, &(data_.c));
213  }
214 
215  info = data_.c.status;
216 
217  /* All processes should have the same error code */
218  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
219 
220  TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_OUT_OF_MEMORY,
221  std::runtime_error,
222  "Amesos2 cholmod_l_factorize error code: CHOLMOD_OUT_OF_MEMORY");
223 
224  TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_NOT_POSDEF,
225  std::runtime_error,
226  "Amesos2 cholmod_l_factorize error code: CHOLMOD_NOT_POSDEF.");
227 
228  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
229  std::runtime_error,
230  "Amesos2 cholmod_l_factorize error code:" << info);
231 
232  if(use_triangular_solves_) {
233  triangular_solve_numeric();
234  }
235 
236  return(info);
237 }
238 
239 
240 template <class Matrix, class Vector>
241 int
243  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
244 {
245  const global_size_type ld_rhs = X->getGlobalLength();
246  const size_t nrhs = X->getGlobalNumVectors();
247 
248  bool bDidAssignX;
249  { // Get values from RHS B
250 #ifdef HAVE_AMESOS2_TIMERS
251  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
252  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
253 #endif
254 
255  // In general we may want to write directly to the x space without a copy.
256  // So we 'get' x which may be a direct view assignment to the MV.
257  const bool initialize_data = true;
258  const bool do_not_initialize_data = false;
259  if(use_triangular_solves_) { // to device
260 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
261  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
262  device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
263  Teuchos::as<size_t>(ld_rhs),
264  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
265  this->rowIndexBase_);
266  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
267  device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
268  Teuchos::as<size_t>(ld_rhs),
269  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
270  this->rowIndexBase_);
271 #endif
272  }
273  else { // to host
274  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
275  host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
276  Teuchos::as<size_t>(ld_rhs),
277  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
278  this->rowIndexBase_);
279  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
280  host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
281  Teuchos::as<size_t>(ld_rhs),
282  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
283  this->rowIndexBase_);
284  }
285  }
286 
287  int ierr = 0; // returned error code
288 
289 #ifdef HAVE_AMESOS2_TIMERS
290  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
291 #endif
292 
293  if(use_triangular_solves_) {
294  triangular_solve();
295  }
296  else {
297  function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
298  Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_bValues_.data(), &data_.b);
299  function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
300  Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_xValues_.data(), &data_.x);
301 
302  cholmod_dense *xtemp = &(data_.x);
303  if(use_cholmod_int_type_) {
304  cholmod_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
305  &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
306  }
307  else {
308  cholmod_l_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
309  &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
310  }
311  }
312 
313  ierr = data_.c.status;
314 
315  /* All processes should have the same error code */
316  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
317 
318  TEUCHOS_TEST_FOR_EXCEPTION(ierr == -2, std::runtime_error, "Ran out of memory" );
319 
320  /* Update X's global values */
321 
322  // if bDidAssignX, then we solved straight to the adapter's X memory space without
323  // requiring additional memory allocation, so the x data is already in place.
324  if(!bDidAssignX) {
325 #ifdef HAVE_AMESOS2_TIMERS
326  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
327 #endif
328 
329  if(use_triangular_solves_) { // to device
330 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
331  Util::put_1d_data_helper_kokkos_view<
332  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
333  Teuchos::as<size_t>(ld_rhs),
334  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
335  this->rowIndexBase_);
336 #endif
337  }
338  else { // to host
339  Util::put_1d_data_helper_kokkos_view<
340  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
341  Teuchos::as<size_t>(ld_rhs),
342  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
343  this->rowIndexBase_);
344  }
345  }
346 
347  return(ierr);
348 }
349 
350 
351 template <class Matrix, class Vector>
352 bool
354 {
355  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
356 }
357 
358 
359 template <class Matrix, class Vector>
360 void
361 Cholmod<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
362 {
363  // Note that the ordering is very important here. We must do this sequence:
364  // (1) Read parameter for use_cholmod_int_type
365  // (2) Call cholmod_start or cholmod_l_start
366  // (3) Set additional data_.c values
367  use_cholmod_int_type_ = parameterList->get<bool>("CholmodInt", false);
368 
369  if(use_cholmod_int_type_) {
370  data_.c.itype = CHOLMOD_INT;
371  cholmod_start(&data_.c);
372  }
373  else {
374  data_.c.itype = CHOLMOD_LONG;
375  cholmod_l_start(&data_.c); // long form required for CUDA
376  }
377 
378  using Teuchos::RCP;
379  using Teuchos::getIntegralValue;
380  using Teuchos::ParameterEntryValidator;
381 
382  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
383 
384  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
385  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
386 
387  if(use_triangular_solves_) {
388 #if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
389  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
390  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_CHOLMOD not configured." );
391 #endif
392  }
393 
394  data_.c.dbound = parameterList->get<double>("dbound", 0.0);
395  data_.c.prefer_upper = (parameterList->get<bool>("PreferUpper", true)) ? 1 : 0;
396  data_.c.print = parameterList->get<int>("print", 3);
397  data_.c.nmethods = parameterList->get<int>("nmethods", 0); // tries AMD and may try METIS if necessary and available
398 
399  // useGPU = 1 means use GPU if possible - note this must be set after cholmod_l_start
400  // since cholmod_l_start sets it to the default value of -1.
401  // Setting to -1 means GPU is only used if env variable CHOLMOD_USE_GPU is set to 1.
402 #ifdef KOKKOS_ENABLE_CUDA
403  const int default_gpu_setting = 1; // use gpu by default
404 #else
405  // If building Trilinos with Cuda off, Cholmod can stil use GPU and the solver will still work.
406  // But that is likely not what was expected/wanted. If you do still want it, set the param to 1.
407  const int default_gpu_setting = 0; // use gpu by default
408 #endif
409 
410  data_.c.useGPU = parameterList->get<int>("useGPU", default_gpu_setting);
411 
412 #ifdef KOKKOS_ENABLE_CUDA
413  TEUCHOS_TEST_FOR_EXCEPTION(data_.c.useGPU != 0 && use_cholmod_int_type_, std::runtime_error,
414  "Amesos2 Cholmod solver must not use GPU (parameter useGPU = 0) if CholmodInt is turned on because Cholmod only supports GPU with long." );
415 #endif
416 
417  bool bSuperNodal = parameterList->get<bool>("SuperNodal", false);
418  data_.c.supernodal = bSuperNodal ? CHOLMOD_SUPERNODAL : CHOLMOD_AUTO;
419 }
420 
421 
422 template <class Matrix, class Vector>
423 Teuchos::RCP<const Teuchos::ParameterList>
425 {
426  using std::string;
427  using Teuchos::tuple;
428  using Teuchos::ParameterList;
429  using Teuchos::EnhancedNumberValidator;
430  using Teuchos::setStringToIntegralParameter;
431  using Teuchos::stringToIntegralParameterEntryValidator;
432 
433  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
434 
435  if( is_null(valid_params) ){
436  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
437 
438 
439  Teuchos::RCP<EnhancedNumberValidator<int> > print_validator
440  = Teuchos::rcp( new EnhancedNumberValidator<int>(0,5));
441 
442  Teuchos::RCP<EnhancedNumberValidator<int> > nmethods_validator
443  = Teuchos::rcp( new EnhancedNumberValidator<int>(0,9));
444 
445  pl->set("nmethods", 0, "Specifies the number of different ordering methods to try", nmethods_validator);
446 
447  pl->set("print", 3, "Specifies the verbosity of the print statements", print_validator);
448 
449  pl->set("dbound", 0.0,
450  "Specifies the smallest absolute value on the diagonal D for the LDL factorization");
451 
452  pl->set("PreferUpper", true,
453  "Specifies whether the matrix will be "
454  "stored in upper triangular form.");
455 
456  pl->set("useGPU", -1, "1: Use GPU is 1, 0: Do not use GPU, -1: ENV CHOLMOD_USE_GPU set GPU usage.");
457 
458  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
459 
460  pl->set("CholmodInt", false, "Whether to use cholmod in int form");
461 
462  pl->set("IsContiguous", true, "Whether GIDs contiguous");
463 
464  pl->set("SuperNodal", false, "Whether to use super nodal");
465 
466  valid_params = pl;
467  }
468 
469  return valid_params;
470 }
471 
472 
473 template <class Matrix, class Vector>
474 bool
476 {
477 #ifdef HAVE_AMESOS2_TIMERS
478  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
479 #endif
480 
481  // Only the root image needs storage allocated
482  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
483  if(use_cholmod_int_type_) {
484  Kokkos::resize(host_rows_int_view_, this->globalNumNonZeros_);
485  Kokkos::resize(host_col_ptr_int_view_, this->globalNumRows_ + 1);
486  }
487  else {
488  Kokkos::resize(host_rows_long_view_, this->globalNumNonZeros_);
489  Kokkos::resize(host_col_ptr_long_view_, this->globalNumRows_ + 1);
490  }
491 
492  {
493 #ifdef HAVE_AMESOS2_TIMERS
494  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
495 #endif
496 
497  TEUCHOS_TEST_FOR_EXCEPTION(this->rowIndexBase_ != this->columnIndexBase_,
498  std::runtime_error,
499  "Row and column maps have different indexbase ");
500 
501  if(use_cholmod_int_type_) {
502  int nnz_ret = 0;
504  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_int_type_array,
505  host_size_int_type_array>::do_get(this->matrixA_.ptr(),
506  host_nzvals_view_, host_rows_int_view_,
507  host_col_ptr_int_view_, nnz_ret,
508  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
509  ARBITRARY,
510  this->rowIndexBase_);
511 
512  TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
513  std::runtime_error,
514  "Did not get the expected number of non-zero vals");
515  }
516  else {
517  long nnz_ret = 0;
518 
520  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_long_type_array,
521  host_size_long_type_array>::do_get(this->matrixA_.ptr(),
522  host_nzvals_view_, host_rows_long_view_,
523  host_col_ptr_long_view_, nnz_ret,
524  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
525  ARBITRARY,
526  this->rowIndexBase_);
527 
528  TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
529  std::runtime_error,
530  "Did not get the expected number of non-zero vals");
531  }
532  }
533 
534  if(use_cholmod_int_type_) {
535  function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
536  Teuchos::as<size_t>(this->globalNumCols_),
537  Teuchos::as<size_t>(this->globalNumNonZeros_),
538  0,
539  host_col_ptr_int_view_.data(),
540  host_nzvals_view_.data(),
541  host_rows_int_view_.data(),
542  &(data_.A),
543  CHOLMOD_INT);
544  }
545  else {
546  function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
547  Teuchos::as<size_t>(this->globalNumCols_),
548  Teuchos::as<size_t>(this->globalNumNonZeros_),
549  0,
550  host_col_ptr_long_view_.data(),
551  host_nzvals_view_.data(),
552  host_rows_long_view_.data(),
553  &(data_.A),
554  CHOLMOD_LONG);
555  }
556 
557  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.stype == 0, std::runtime_error,
558  "CHOLMOD loadA_impl loaded matrix but it is not symmetric.");
559 
560  return true;
561 }
562 
563 
564 template <class Matrix, class Vector>
565 void
567 {
568 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
569  // Create handles for U and U^T solves
570  if(use_cholmod_int_type_) {
571  device_int_khL_.create_sptrsv_handle(
572  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
573  device_int_khU_.create_sptrsv_handle(
574  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
575  }
576  else {
577  device_long_khL_.create_sptrsv_handle(
578  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
579  device_long_khU_.create_sptrsv_handle(
580  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
581  }
582 
583  // extract etree and iperm from CHOLMOD
584  Kokkos::resize(host_trsv_etree_, data_.L->nsuper);
585  if(use_cholmod_int_type_) {
586  int *int_etree = static_cast<int*>(data_.c.Iwork) + 2 * data_.L->n;
587  for (size_t i = 0 ; i < data_.L->nsuper; ++i) {
588  host_trsv_etree_(i) = int_etree[i];
589  }
590  }
591  else {
592  long *long_etree = static_cast<long*>(data_.c.Iwork) + 2 * data_.L->n;
593  for (size_t i = 0 ; i < data_.L->nsuper; ++i) { // convert long to int array for trsv API
594  host_trsv_etree_(i) = static_cast<int>(long_etree[i]);
595  }
596  }
597 
598  // set etree
599  if(use_cholmod_int_type_) {
600  device_int_khL_.set_sptrsv_etree(host_trsv_etree_.data());
601  device_int_khU_.set_sptrsv_etree(host_trsv_etree_.data());
602  }
603  else {
604  device_long_khL_.set_sptrsv_etree(host_trsv_etree_.data());
605  device_long_khU_.set_sptrsv_etree(host_trsv_etree_.data());
606  }
607 
608  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
609  Kokkos::resize(host_trsv_perm_, ld_rhs);
610  if(use_cholmod_int_type_) {
611  int *int_iperm = static_cast<int*>(data_.L->Perm);
612  for (size_t i = 0; i < ld_rhs; i++) {
613  host_trsv_perm_(int_iperm[i]) = i;
614  }
615  }
616  else {
617  long *long_iperm = static_cast<long*>(data_.L->Perm);
618  for (size_t i = 0; i < ld_rhs; i++) {
619  host_trsv_perm_(long_iperm[i]) = i;
620  }
621  }
622  deep_copy_or_assign_view(device_trsv_perm_, host_trsv_perm_); // will use device to permute
623 
624  // set permutation
625  if(use_cholmod_int_type_) {
626  device_int_khL_.set_sptrsv_perm(host_trsv_perm_.data());
627  device_int_khU_.set_sptrsv_perm(host_trsv_perm_.data());
628  }
629  else {
630  device_long_khL_.set_sptrsv_perm(host_trsv_perm_.data());
631  device_long_khU_.set_sptrsv_perm(host_trsv_perm_.data());
632  }
633 
634  // Do symbolic analysis
635  if(use_cholmod_int_type_) {
636  KokkosSparse::Experimental::sptrsv_symbolic<int, kernel_handle_int_type>
637  (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
638  }
639  else {
640  KokkosSparse::Experimental::sptrsv_symbolic<long, kernel_handle_long_type>
641  (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
642  }
643 #endif
644 }
645 
646 
647 template <class Matrix, class Vector>
648 void
649 Cholmod<Matrix,Vector>::triangular_solve_numeric()
650 {
651 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
652  // Do numerical compute
653  if(use_cholmod_int_type_) {
654  KokkosSparse::Experimental::sptrsv_compute<int, kernel_handle_int_type>
655  (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
656  }
657  else {
658  KokkosSparse::Experimental::sptrsv_compute<long, kernel_handle_long_type>
659  (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
660  }
661 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
662 }
663 
664 
665 template <class Matrix, class Vector>
666 void
667 Cholmod<Matrix,Vector>::triangular_solve() const
668 {
669 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
670  size_t ld_rhs = device_xValues_.extent(0);
671  size_t nrhs = device_xValues_.extent(1);
672 
673  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
674  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
675 
676  // forward pivot
677  auto local_device_bValues = device_bValues_;
678  auto local_device_trsv_perm = device_trsv_perm_;
679  auto local_device_trsv_rhs = device_trsv_rhs_;
680  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
681  KOKKOS_LAMBDA(size_t j) {
682  for(size_t k = 0; k < nrhs; ++k) {
683  local_device_trsv_rhs(local_device_trsv_perm(j),k) = local_device_bValues(j,k);
684  }
685  });
686 
687  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
688  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
689  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
690 
691  // do L solve= - numeric (only rhs is modified) on the default device/host space
692  if(use_cholmod_int_type_) {
693  KokkosSparse::Experimental::sptrsv_solve(&device_int_khL_, sub_sol, sub_rhs);
694  }
695  else {
696  KokkosSparse::Experimental::sptrsv_solve(&device_long_khL_, sub_sol, sub_rhs);
697  }
698 
699  // do L^T solve - numeric (only rhs is modified) on the default device/host space
700  if(use_cholmod_int_type_) {
701  KokkosSparse::Experimental::sptrsv_solve(&device_int_khU_, sub_rhs, sub_sol);
702  }
703  else {
704  KokkosSparse::Experimental::sptrsv_solve(&device_long_khU_, sub_rhs, sub_sol);
705  }
706 
707  } // end loop over rhs vectors
708 
709  // backward pivot
710  auto local_device_xValues = device_xValues_;
711  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
712  KOKKOS_LAMBDA(size_t j) {
713  for(size_t k = 0; k < nrhs; ++k) {
714  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm(j),k);
715  }
716  });
717 #endif
718 }
719 
720 
721 template<class Matrix, class Vector>
722 const char* Cholmod<Matrix,Vector>::name = "Cholmod";
723 
724 
725 } // end namespace Amesos2
726 
727 #endif // AMESOS2_CHOLMOD_DEF_HPP
~Cholmod()
Destructor.
Definition: Amesos2_Cholmod_def.hpp:93
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Cholmod_def.hpp:113
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:648
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Cholmod_def.hpp:424
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Cholmod_def.hpp:353
Cholmod(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Cholmod_def.hpp:71
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
CHOLMOD specific solve.
Definition: Amesos2_Cholmod_def.hpp:242
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Cholmod_def.hpp:361
Amesos2 CHOLMOD declarations.
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Cholmod_def.hpp:475
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using CHOLMOD.
Definition: Amesos2_Cholmod_def.hpp:144
Amesos2 interface to the CHOLMOD package.
Definition: Amesos2_Cholmod_decl.hpp:76
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
int numericFactorization_impl()
CHOLMOD specific numeric factorization.
Definition: Amesos2_Cholmod_def.hpp:187