42#ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
43#define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
45#ifdef HAVE_TPETRA_INST_HIP
54 class LocalOrdinalViewType>
55struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
56 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
58 const LocalOrdinalViewType & Acol2Brow,
59 const LocalOrdinalViewType & Acol2Irow,
60 const LocalOrdinalViewType & Bcol2Ccol,
61 const LocalOrdinalViewType & Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
63 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
71 const LocalOrdinalViewType & Acol2Brow,
72 const LocalOrdinalViewType & Acol2Irow,
73 const LocalOrdinalViewType & Bcol2Ccol,
74 const LocalOrdinalViewType & Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
76 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
85 class GlobalOrdinal,
class LocalOrdinalViewType>
86struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
87 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
88 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> & Dinv,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
90 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
91 const LocalOrdinalViewType & Acol2Brow,
92 const LocalOrdinalViewType & Acol2Irow,
93 const LocalOrdinalViewType & Bcol2Ccol,
94 const LocalOrdinalViewType & Icol2Ccol,
95 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
96 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
97 const std::string& label = std::string(),
98 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
100 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
101 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> & Dinv,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
103 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
104 const LocalOrdinalViewType & Acol2Brow,
105 const LocalOrdinalViewType & Acol2Irow,
106 const LocalOrdinalViewType & Bcol2Ccol,
107 const LocalOrdinalViewType & Icol2Ccol,
108 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
109 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
110 const std::string& label = std::string(),
111 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
113 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
114 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> & Dinv,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
116 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
117 const LocalOrdinalViewType & Acol2Brow,
118 const LocalOrdinalViewType & Acol2Irow,
119 const LocalOrdinalViewType & Bcol2Ccol,
120 const LocalOrdinalViewType & Icol2Ccol,
121 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
122 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
123 const std::string& label = std::string(),
124 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
130template<
class Scalar,
133 class LocalOrdinalViewType>
134void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
135 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
136 const LocalOrdinalViewType & Acol2Brow,
137 const LocalOrdinalViewType & Acol2Irow,
138 const LocalOrdinalViewType & Bcol2Ccol,
139 const LocalOrdinalViewType & Icol2Ccol,
140 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
141 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
142 const std::string& label,
143 const Teuchos::RCP<Teuchos::ParameterList>& params) {
146#ifdef HAVE_TPETRA_MMM_TIMINGS
147 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
148 using Teuchos::TimeMonitor;
149 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPWrapper")))));
152 typedef Kokkos::Compat::KokkosHIPWrapperNode Node;
153 std::string nodename(
"HIP");
158 typedef typename KCRS::device_type device_t;
159 typedef typename KCRS::StaticCrsGraphType graph_t;
160 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
161 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
162 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
163 typedef typename KCRS::values_type::non_const_type scalar_view_t;
167 int team_work_size = 16;
168 std::string myalg(
"SPGEMM_KK_MEMORY");
169 if(!params.is_null()) {
170 if(params->isParameter(
"hip: algorithm"))
171 myalg = params->get(
"hip: algorithm",myalg);
172 if(params->isParameter(
"hip: team work size"))
173 team_work_size = params->get(
"hip: team work size",team_work_size);
177 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
178 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
179 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
182 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
183 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
185 c_lno_view_t Arowptr = Amat.graph.row_map,
186 Browptr = Bmat.graph.row_map;
187 const lno_nnz_view_t Acolind = Amat.graph.entries,
188 Bcolind = Bmat.graph.entries;
189 const scalar_view_t Avals = Amat.values,
192 c_lno_view_t Irowptr;
193 lno_nnz_view_t Icolind;
195 if(!Bview.importMatrix.is_null()) {
196 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
197 Irowptr = lclB.graph.row_map;
198 Icolind = lclB.graph.entries;
204 std::string alg = nodename+std::string(
" algorithm");
206 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
207 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
210 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
212#ifdef HAVE_TPETRA_MMM_TIMINGS
213 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPCore"))));
217 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
218 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
219 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
221 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
222 lno_nnz_view_t entriesC;
223 scalar_view_t valuesC;
225 kh.create_spgemm_handle(alg_enum);
226 kh.set_team_work_size(team_work_size);
228 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
230 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
232 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
233 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
235 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
236 kh.destroy_spgemm_handle();
238#ifdef HAVE_TPETRA_MMM_TIMINGS
239 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPSort"))));
243 if (params.is_null() || params->get(
"sort entries",
true))
244 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
245 C.setAllValues(row_mapC,entriesC,valuesC);
247#ifdef HAVE_TPETRA_MMM_TIMINGS
248 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPESFC"))));
252 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
253 labelList->set(
"Timer Label",label);
254 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
255 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > dummyExport;
256 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
261template<
class Scalar,
264 class LocalOrdinalViewType>
265void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
266 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
267 const LocalOrdinalViewType & targetMapToOrigRow_dev,
268 const LocalOrdinalViewType & targetMapToImportRow_dev,
269 const LocalOrdinalViewType & Bcol2Ccol_dev,
270 const LocalOrdinalViewType & Icol2Ccol_dev,
271 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
272 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
273 const std::string& label,
274 const Teuchos::RCP<Teuchos::ParameterList>& params) {
277 typedef Kokkos::Compat::KokkosHIPWrapperNode Node;
279#ifdef HAVE_TPETRA_MMM_TIMINGS
280 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
281 using Teuchos::TimeMonitor;
282 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
283 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
290 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
291 typedef typename KCRS::StaticCrsGraphType graph_t;
292 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
293 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
294 typedef typename KCRS::values_type::non_const_type scalar_view_t;
297 typedef LocalOrdinal LO;
298 typedef GlobalOrdinal GO;
300 typedef Map<LO,GO,NO> map_type;
301 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
302 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
303 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
308 auto targetMapToOrigRow =
309 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
310 targetMapToOrigRow_dev);
311 auto targetMapToImportRow =
312 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
313 targetMapToImportRow_dev);
315 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
318 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
322 RCP<const map_type> Ccolmap = C.getColMap();
323 size_t m = Aview.origMatrix->getLocalNumRows();
324 size_t n = Ccolmap->getLocalNumElements();
327 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
328 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
329 const KCRS & Cmat = C.getLocalMatrixHost();
331 c_lno_view_t Arowptr = Amat.graph.row_map,
332 Browptr = Bmat.graph.row_map,
333 Crowptr = Cmat.graph.row_map;
334 const lno_nnz_view_t Acolind = Amat.graph.entries,
335 Bcolind = Bmat.graph.entries,
336 Ccolind = Cmat.graph.entries;
337 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
338 scalar_view_t Cvals = Cmat.values;
340 c_lno_view_t Irowptr;
341 lno_nnz_view_t Icolind;
343 if(!Bview.importMatrix.is_null()) {
344 auto lclB = Bview.importMatrix->getLocalMatrixHost();
345 Irowptr = lclB.graph.row_map;
346 Icolind = lclB.graph.entries;
350#ifdef HAVE_TPETRA_MMM_TIMINGS
351 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
363 std::vector<size_t> c_status(n, ST_INVALID);
366 size_t CSR_ip = 0, OLD_ip = 0;
367 for (
size_t i = 0; i < m; i++) {
371 CSR_ip = Crowptr[i+1];
372 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
373 c_status[Ccolind[k]] = k;
379 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
381 const SC Aval = Avals[k];
385 if (targetMapToOrigRow[Aik] != LO_INVALID) {
387 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
389 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
391 LO Cij = Bcol2Ccol[Bkj];
393 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
394 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
395 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
397 Cvals[c_status[Cij]] += Aval * Bvals[j];
402 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
403 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
405 LO Cij = Icol2Ccol[Ikj];
407 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
408 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
409 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
411 Cvals[c_status[Cij]] += Aval * Ivals[j];
417 C.fillComplete(C.getDomainMap(), C.getRangeMap());
421template<
class Scalar,
424 class LocalOrdinalViewType>
425void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
426 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> & Dinv,
427 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
428 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
429 const LocalOrdinalViewType & Acol2Brow,
430 const LocalOrdinalViewType & Acol2Irow,
431 const LocalOrdinalViewType & Bcol2Ccol,
432 const LocalOrdinalViewType & Icol2Ccol,
433 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
434 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
435 const std::string& label,
436 const Teuchos::RCP<Teuchos::ParameterList>& params) {
438#ifdef HAVE_TPETRA_MMM_TIMINGS
439 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
440 using Teuchos::TimeMonitor;
441 Teuchos::RCP<TimeMonitor> MM;
449 std::string myalg(
"KK");
450 if(!params.is_null()) {
451 if(params->isParameter(
"hip: jacobi algorithm"))
452 myalg = params->get(
"hip: jacobi algorithm",myalg);
455 if(myalg ==
"MSAK") {
456 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
458 else if(myalg ==
"KK") {
459 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
462 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
465#ifdef HAVE_TPETRA_MMM_TIMINGS
466 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
470 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
471 labelList->set(
"Timer Label",label);
472 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
475 if(!C.isFillComplete()) {
476 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > dummyExport;
477 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
485template<
class Scalar,
488 class LocalOrdinalViewType>
489void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
490 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> & Dinv,
491 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
492 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
493 const LocalOrdinalViewType & targetMapToOrigRow_dev,
494 const LocalOrdinalViewType & targetMapToImportRow_dev,
495 const LocalOrdinalViewType & Bcol2Ccol_dev,
496 const LocalOrdinalViewType & Icol2Ccol_dev,
497 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
498 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
499 const std::string& label,
500 const Teuchos::RCP<Teuchos::ParameterList>& params) {
503 typedef Kokkos::Compat::KokkosHIPWrapperNode Node;
505#ifdef HAVE_TPETRA_MMM_TIMINGS
506 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
507 using Teuchos::TimeMonitor;
508 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore"))));
509 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
515 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
516 typedef typename KCRS::StaticCrsGraphType graph_t;
517 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
518 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
519 typedef typename KCRS::values_type::non_const_type scalar_view_t;
520 typedef typename scalar_view_t::memory_space scalar_memory_space;
523 typedef LocalOrdinal LO;
524 typedef GlobalOrdinal GO;
526 typedef Map<LO,GO,NO> map_type;
527 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
529 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
534 auto targetMapToOrigRow =
535 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
536 targetMapToOrigRow_dev);
537 auto targetMapToImportRow =
538 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
539 targetMapToImportRow_dev);
541 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
544 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
549 RCP<const map_type> Ccolmap = C.getColMap();
550 size_t m = Aview.origMatrix->getLocalNumRows();
551 size_t n = Ccolmap->getLocalNumElements();
554 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
555 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
556 const KCRS & Cmat = C.getLocalMatrixHost();
558 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
559 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
560 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
561 scalar_view_t Cvals = Cmat.values;
563 c_lno_view_t Irowptr;
564 lno_nnz_view_t Icolind;
566 if(!Bview.importMatrix.is_null()) {
567 auto lclB = Bview.importMatrix->getLocalMatrixHost();
568 Irowptr = lclB.graph.row_map;
569 Icolind = lclB.graph.entries;
575 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
577#ifdef HAVE_TPETRA_MMM_TIMINGS
578 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore - Compare"))));
586 std::vector<size_t> c_status(n, ST_INVALID);
589 size_t CSR_ip = 0, OLD_ip = 0;
590 for (
size_t i = 0; i < m; i++) {
595 CSR_ip = Crowptr[i+1];
596 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
597 c_status[Ccolind[k]] = k;
603 SC minusOmegaDval = -omega*Dvals(i,0);
606 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
607 Scalar Bval = Bvals[j];
611 LO Cij = Bcol2Ccol[Bij];
613 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
614 std::runtime_error,
"Trying to insert a new entry into a static graph");
616 Cvals[c_status[Cij]] = Bvals[j];
620 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
622 const SC Aval = Avals[k];
626 if (targetMapToOrigRow[Aik] != LO_INVALID) {
628 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
630 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
632 LO Cij = Bcol2Ccol[Bkj];
634 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
635 std::runtime_error,
"Trying to insert a new entry into a static graph");
637 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
642 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
643 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
645 LO Cij = Icol2Ccol[Ikj];
647 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
648 std::runtime_error,
"Trying to insert a new entry into a static graph");
650 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
656#ifdef HAVE_TPETRA_MMM_TIMINGS
658 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
661 C.fillComplete(C.getDomainMap(), C.getRangeMap());
666template<
class Scalar,
669 class LocalOrdinalViewType>
670void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
671 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> & Dinv,
672 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Aview,
673 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& Bview,
674 const LocalOrdinalViewType & Acol2Brow,
675 const LocalOrdinalViewType & Acol2Irow,
676 const LocalOrdinalViewType & Bcol2Ccol,
677 const LocalOrdinalViewType & Icol2Ccol,
678 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosHIPWrapperNode>& C,
679 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > Cimport,
680 const std::string& label,
681 const Teuchos::RCP<Teuchos::ParameterList>& params) {
683#ifdef HAVE_TPETRA_MMM_TIMINGS
684 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
685 using Teuchos::TimeMonitor;
686 Teuchos::RCP<TimeMonitor> MM;
693 auto rowMap = Aview.origMatrix->getRowMap();
695 Aview.origMatrix->getLocalDiagCopy(diags);
696 size_t diagLength = rowMap->getLocalNumElements();
697 Teuchos::Array<Scalar> diagonal(diagLength);
698 diags.get1dCopy(diagonal());
700 for(
size_t i = 0; i < diagLength; ++i) {
701 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
703 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
704 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
709 using device_t =
typename Kokkos::Compat::KokkosHIPWrapperNode::device_type;
711 using graph_t =
typename matrix_t::StaticCrsGraphType;
712 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
713 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
714 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
715 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
718 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
719 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
720 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
723 c_lno_view_t Irowptr;
724 lno_nnz_view_t Icolind;
726 if(!Bview.importMatrix.is_null()) {
727 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
728 Irowptr = lclB.graph.row_map;
729 Icolind = lclB.graph.entries;
734 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
737 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
738 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
740 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
741 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
742 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
744 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
745 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
746 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
749 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
750 lno_nnz_view_t entriesC;
751 scalar_view_t valuesC;
754 int team_work_size = 16;
755 std::string myalg(
"SPGEMM_KK_MEMORY");
756 if(!params.is_null()) {
757 if(params->isParameter(
"hip: algorithm"))
758 myalg = params->get(
"hip: algorithm",myalg);
759 if(params->isParameter(
"hip: team work size"))
760 team_work_size = params->get(
"hip: team work size",team_work_size);
764 std::string nodename(
"HIP");
765 std::string alg = nodename + std::string(
" algorithm");
766 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
767 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
772 kh.create_spgemm_handle(alg_enum);
773 kh.set_team_work_size(team_work_size);
775 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
776 Arowptr, Acolind,
false,
777 Browptr, Bcolind,
false,
780 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
782 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
783 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
786 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
787 Arowptr, Acolind, Avals,
false,
788 Browptr, Bcolind, Bvals,
false,
789 row_mapC, entriesC, valuesC,
790 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
791 kh.destroy_spgemm_handle();
793#ifdef HAVE_TPETRA_MMM_TIMINGS
794 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPSort"))));
798 if (params.is_null() || params->get(
"sort entries",
true))
799 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
800 C.setAllValues(row_mapC,entriesC,valuesC);
802#ifdef HAVE_TPETRA_MMM_TIMINGS
803 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
807 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
808 labelList->set(
"Timer Label",label);
809 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
810 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosHIPWrapperNode> > dummyExport;
811 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
Struct that holds views of the contents of a CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.