45#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
46#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
48#ifdef HAVE_TPETRA_INST_SYCL
57 class LocalOrdinalViewType>
58struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
59 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
66 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
72 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
79 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
88 class GlobalOrdinal,
class LocalOrdinalViewType>
89struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
90 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
91 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
92 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
93 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
94 const LocalOrdinalViewType & Acol2Brow,
95 const LocalOrdinalViewType & Acol2Irow,
96 const LocalOrdinalViewType & Bcol2Ccol,
97 const LocalOrdinalViewType & Icol2Ccol,
98 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
99 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
100 const std::string& label = std::string(),
101 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
103 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
104 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
106 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
107 const LocalOrdinalViewType & Acol2Brow,
108 const LocalOrdinalViewType & Acol2Irow,
109 const LocalOrdinalViewType & Bcol2Ccol,
110 const LocalOrdinalViewType & Icol2Ccol,
111 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
112 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
113 const std::string& label = std::string(),
114 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
116 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
117 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
118 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
119 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
120 const LocalOrdinalViewType & Acol2Brow,
121 const LocalOrdinalViewType & Acol2Irow,
122 const LocalOrdinalViewType & Bcol2Ccol,
123 const LocalOrdinalViewType & Icol2Ccol,
124 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
125 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
126 const std::string& label = std::string(),
127 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
133template<
class Scalar,
136 class LocalOrdinalViewType>
137void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
138 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
139 const LocalOrdinalViewType & Acol2Brow,
140 const LocalOrdinalViewType & Acol2Irow,
141 const LocalOrdinalViewType & Bcol2Ccol,
142 const LocalOrdinalViewType & Icol2Ccol,
143 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
144 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
145 const std::string& label,
146 const Teuchos::RCP<Teuchos::ParameterList>& params) {
149#ifdef HAVE_TPETRA_MMM_TIMINGS
150 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
151 using Teuchos::TimeMonitor;
152 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLWrapper")))));
155 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
156 std::string nodename(
"SYCL");
161 typedef typename KCRS::device_type device_t;
162 typedef typename KCRS::StaticCrsGraphType graph_t;
163 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
164 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
165 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
166 typedef typename KCRS::values_type::non_const_type scalar_view_t;
170 int team_work_size = 16;
171 std::string myalg(
"SPGEMM_KK_MEMORY");
172 if(!params.is_null()) {
173 if(params->isParameter(
"sycl: algorithm"))
174 myalg = params->get(
"sycl: algorithm",myalg);
175 if(params->isParameter(
"sycl: team work size"))
176 team_work_size = params->get(
"sycl: team work size",team_work_size);
180 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
181 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
182 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
185 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
186 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
188 c_lno_view_t Arowptr = Amat.graph.row_map,
189 Browptr = Bmat.graph.row_map;
190 const lno_nnz_view_t Acolind = Amat.graph.entries,
191 Bcolind = Bmat.graph.entries;
192 const scalar_view_t Avals = Amat.values,
195 c_lno_view_t Irowptr;
196 lno_nnz_view_t Icolind;
198 if(!Bview.importMatrix.is_null()) {
199 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
200 Irowptr = lclB.graph.row_map;
201 Icolind = lclB.graph.entries;
207 std::string alg = nodename+std::string(
" algorithm");
209 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
210 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
213 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
215#ifdef HAVE_TPETRA_MMM_TIMINGS
216 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLCore"))));
220 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
221 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
222 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
224 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
225 lno_nnz_view_t entriesC;
226 scalar_view_t valuesC;
228 kh.create_spgemm_handle(alg_enum);
229 kh.set_team_work_size(team_work_size);
231 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
233 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
235 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
236 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
238 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
239 kh.destroy_spgemm_handle();
241#ifdef HAVE_TPETRA_MMM_TIMINGS
242 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLSort"))));
246 if (params.is_null() || params->get(
"sort entries",
true))
247 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
248 C.setAllValues(row_mapC,entriesC,valuesC);
250#ifdef HAVE_TPETRA_MMM_TIMINGS
251 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLESFC"))));
255 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
256 labelList->set(
"Timer Label",label);
257 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
258 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
259 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
264template<
class Scalar,
267 class LocalOrdinalViewType>
268void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
269 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
270 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
271 const LocalOrdinalViewType & targetMapToOrigRow_dev,
272 const LocalOrdinalViewType & targetMapToImportRow_dev,
273 const LocalOrdinalViewType & Bcol2Ccol_dev,
274 const LocalOrdinalViewType & Icol2Ccol_dev,
275 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
276 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
277 const std::string& label,
278 const Teuchos::RCP<Teuchos::ParameterList>& params) {
281 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
283#ifdef HAVE_TPETRA_MMM_TIMINGS
284 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
285 using Teuchos::TimeMonitor;
286 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
287 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
294 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
295 typedef typename KCRS::StaticCrsGraphType graph_t;
296 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
297 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
298 typedef typename KCRS::values_type::non_const_type scalar_view_t;
301 typedef LocalOrdinal LO;
302 typedef GlobalOrdinal GO;
304 typedef Map<LO,GO,NO> map_type;
305 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
306 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
307 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
315 auto targetMapToOrigRow =
316 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 targetMapToOrigRow_dev);
318 auto targetMapToImportRow =
319 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
320 targetMapToImportRow_dev);
322 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
325 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
329 RCP<const map_type> Ccolmap = C.getColMap();
330 size_t m = Aview.origMatrix->getLocalNumRows();
331 size_t n = Ccolmap->getLocalNumElements();
334 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
335 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
336 const KCRS & Cmat = C.getLocalMatrixHost();
338 c_lno_view_t Arowptr = Amat.graph.row_map,
339 Browptr = Bmat.graph.row_map,
340 Crowptr = Cmat.graph.row_map;
341 const lno_nnz_view_t Acolind = Amat.graph.entries,
342 Bcolind = Bmat.graph.entries,
343 Ccolind = Cmat.graph.entries;
344 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
345 scalar_view_t Cvals = Cmat.values;
347 c_lno_view_t Irowptr;
348 lno_nnz_view_t Icolind;
350 if(!Bview.importMatrix.is_null()) {
351 auto lclB = Bview.importMatrix->getLocalMatrixHost();
352 Irowptr = lclB.graph.row_map;
353 Icolind = lclB.graph.entries;
357#ifdef HAVE_TPETRA_MMM_TIMINGS
358 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
370 std::vector<size_t> c_status(n, ST_INVALID);
373 size_t CSR_ip = 0, OLD_ip = 0;
374 for (
size_t i = 0; i < m; i++) {
378 CSR_ip = Crowptr[i+1];
379 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
380 c_status[Ccolind[k]] = k;
386 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
388 const SC Aval = Avals[k];
392 if (targetMapToOrigRow[Aik] != LO_INVALID) {
394 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
396 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
398 LO Cij = Bcol2Ccol[Bkj];
400 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
401 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
402 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
404 Cvals[c_status[Cij]] += Aval * Bvals[j];
409 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
410 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
412 LO Cij = Icol2Ccol[Ikj];
414 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
415 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
416 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
418 Cvals[c_status[Cij]] += Aval * Ivals[j];
424 C.fillComplete(C.getDomainMap(), C.getRangeMap());
428template<
class Scalar,
431 class LocalOrdinalViewType>
432void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
433 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
434 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
435 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
436 const LocalOrdinalViewType & Acol2Brow,
437 const LocalOrdinalViewType & Acol2Irow,
438 const LocalOrdinalViewType & Bcol2Ccol,
439 const LocalOrdinalViewType & Icol2Ccol,
440 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
441 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
442 const std::string& label,
443 const Teuchos::RCP<Teuchos::ParameterList>& params) {
445#ifdef HAVE_TPETRA_MMM_TIMINGS
446 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
447 using Teuchos::TimeMonitor;
448 Teuchos::RCP<TimeMonitor> MM;
456 std::string myalg(
"KK");
457 if(!params.is_null()) {
458 if(params->isParameter(
"sycl: jacobi algorithm"))
459 myalg = params->get(
"sycl: jacobi algorithm",myalg);
462 if(myalg ==
"MSAK") {
463 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
465 else if(myalg ==
"KK") {
466 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
469 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
472#ifdef HAVE_TPETRA_MMM_TIMINGS
473 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
477 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
478 labelList->set(
"Timer Label",label);
479 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
482 if(!C.isFillComplete()) {
483 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
484 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
492template<
class Scalar,
495 class LocalOrdinalViewType>
496void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
497 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
498 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
499 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
500 const LocalOrdinalViewType & targetMapToOrigRow_dev,
501 const LocalOrdinalViewType & targetMapToImportRow_dev,
502 const LocalOrdinalViewType & Bcol2Ccol_dev,
503 const LocalOrdinalViewType & Icol2Ccol_dev,
504 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
505 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
506 const std::string& label,
507 const Teuchos::RCP<Teuchos::ParameterList>& params) {
510 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
512#ifdef HAVE_TPETRA_MMM_TIMINGS
513 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
514 using Teuchos::TimeMonitor;
515 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore"))));
516 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
522 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
523 typedef typename KCRS::StaticCrsGraphType graph_t;
524 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
525 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
526 typedef typename KCRS::values_type::non_const_type scalar_view_t;
527 typedef typename scalar_view_t::memory_space scalar_memory_space;
530 typedef LocalOrdinal LO;
531 typedef GlobalOrdinal GO;
533 typedef Map<LO,GO,NO> map_type;
534 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
535 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
536 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
544 auto targetMapToOrigRow =
545 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
546 targetMapToOrigRow_dev);
547 auto targetMapToImportRow =
548 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
549 targetMapToImportRow_dev);
551 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
554 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
559 RCP<const map_type> Ccolmap = C.getColMap();
560 size_t m = Aview.origMatrix->getLocalNumRows();
561 size_t n = Ccolmap->getLocalNumElements();
564 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
565 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
566 const KCRS & Cmat = C.getLocalMatrixHost();
568 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
569 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
570 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
571 scalar_view_t Cvals = Cmat.values;
573 c_lno_view_t Irowptr;
574 lno_nnz_view_t Icolind;
576 if(!Bview.importMatrix.is_null()) {
577 auto lclB = Bview.importMatrix->getLocalMatrixHost();
578 Irowptr = lclB.graph.row_map;
579 Icolind = lclB.graph.entries;
585 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
587#ifdef HAVE_TPETRA_MMM_TIMINGS
588 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore - Compare"))));
596 std::vector<size_t> c_status(n, ST_INVALID);
599 size_t CSR_ip = 0, OLD_ip = 0;
600 for (
size_t i = 0; i < m; i++) {
605 CSR_ip = Crowptr[i+1];
606 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
607 c_status[Ccolind[k]] = k;
613 SC minusOmegaDval = -omega*Dvals(i,0);
616 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
617 Scalar Bval = Bvals[j];
621 LO Cij = Bcol2Ccol[Bij];
623 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
624 std::runtime_error,
"Trying to insert a new entry into a static graph");
626 Cvals[c_status[Cij]] = Bvals[j];
630 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
632 const SC Aval = Avals[k];
636 if (targetMapToOrigRow[Aik] != LO_INVALID) {
638 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
640 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
642 LO Cij = Bcol2Ccol[Bkj];
644 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
645 std::runtime_error,
"Trying to insert a new entry into a static graph");
647 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
652 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
653 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
655 LO Cij = Icol2Ccol[Ikj];
657 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
658 std::runtime_error,
"Trying to insert a new entry into a static graph");
660 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
666#ifdef HAVE_TPETRA_MMM_TIMINGS
668 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
671 C.fillComplete(C.getDomainMap(), C.getRangeMap());
676template<
class Scalar,
679 class LocalOrdinalViewType>
680void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
681 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
682 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
683 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
684 const LocalOrdinalViewType & Acol2Brow,
685 const LocalOrdinalViewType & Acol2Irow,
686 const LocalOrdinalViewType & Bcol2Ccol,
687 const LocalOrdinalViewType & Icol2Ccol,
688 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
689 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
690 const std::string& label,
691 const Teuchos::RCP<Teuchos::ParameterList>& params) {
693#ifdef HAVE_TPETRA_MMM_TIMINGS
694 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
695 using Teuchos::TimeMonitor;
696 Teuchos::RCP<TimeMonitor> MM;
703 auto rowMap = Aview.origMatrix->getRowMap();
705 Aview.origMatrix->getLocalDiagCopy(diags);
706 size_t diagLength = rowMap->getLocalNumElements();
707 Teuchos::Array<Scalar> diagonal(diagLength);
708 diags.get1dCopy(diagonal());
710 for(
size_t i = 0; i < diagLength; ++i) {
711 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
713 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
714 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
719 using device_t =
typename Kokkos::Compat::KokkosSYCLWrapperNode::device_type;
721 using graph_t =
typename matrix_t::StaticCrsGraphType;
722 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
723 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
724 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
725 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
728 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
729 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
730 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
733 c_lno_view_t Irowptr;
734 lno_nnz_view_t Icolind;
736 if(!Bview.importMatrix.is_null()) {
737 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
738 Irowptr = lclB.graph.row_map;
739 Icolind = lclB.graph.entries;
744 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
747 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
748 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
750 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
751 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
752 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
754 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
755 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
756 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
759 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
760 lno_nnz_view_t entriesC;
761 scalar_view_t valuesC;
764 int team_work_size = 16;
765 std::string myalg(
"SPGEMM_KK_MEMORY");
766 if(!params.is_null()) {
767 if(params->isParameter(
"sycl: algorithm"))
768 myalg = params->get(
"sycl: algorithm",myalg);
769 if(params->isParameter(
"sycl: team work size"))
770 team_work_size = params->get(
"sycl: team work size",team_work_size);
774 std::string nodename(
"SYCL");
775 std::string alg = nodename + std::string(
" algorithm");
776 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
777 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
782 kh.create_spgemm_handle(alg_enum);
783 kh.set_team_work_size(team_work_size);
785 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
786 Arowptr, Acolind,
false,
787 Browptr, Bcolind,
false,
790 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
792 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
793 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
796 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
797 Arowptr, Acolind, Avals,
false,
798 Browptr, Bcolind, Bvals,
false,
799 row_mapC, entriesC, valuesC,
800 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
801 kh.destroy_spgemm_handle();
803#ifdef HAVE_TPETRA_MMM_TIMINGS
804 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLSort"))));
808 if (params.is_null() || params->get(
"sort entries",
true))
809 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
810 C.setAllValues(row_mapC,entriesC,valuesC);
812#ifdef HAVE_TPETRA_MMM_TIMINGS
813 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
817 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
818 labelList->set(
"Timer Label",label);
819 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
820 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
821 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
Struct that holds views of the contents of a CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.