Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_OpenMP.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
44
45#ifdef HAVE_TPETRA_INST_OPENMP
46namespace Tpetra {
47namespace MMdetails {
48
49/*********************************************************************************************************/
50// MMM KernelWrappers for Partial Specialization to OpenMP
51template<class Scalar,
52 class LocalOrdinal,
53 class GlobalOrdinal, class LocalOrdinalViewType>
54struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57 const LocalOrdinalViewType & Acol2Brow,
58 const LocalOrdinalViewType & Acol2Irow,
59 const LocalOrdinalViewType & Bcol2Ccol,
60 const LocalOrdinalViewType & Icol2Ccol,
61 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63 const std::string& label = std::string(),
64 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
65
66 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType & Acol2Brow,
69 const LocalOrdinalViewType & Acol2Irow,
70 const LocalOrdinalViewType & Bcol2Ccol,
71 const LocalOrdinalViewType & Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76
77
78};
79
80
81// Jacobi KernelWrappers for Partial Specialization to OpenMP
82template<class Scalar,
83 class LocalOrdinal,
84 class GlobalOrdinal, class LocalOrdinalViewType>
85struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
88 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90 const LocalOrdinalViewType & Acol2Brow,
91 const LocalOrdinalViewType & Acol2Irow,
92 const LocalOrdinalViewType & Bcol2Ccol,
93 const LocalOrdinalViewType & Icol2Ccol,
94 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96 const std::string& label = std::string(),
97 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
98
99 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
103 const LocalOrdinalViewType & Acol2Brow,
104 const LocalOrdinalViewType & Acol2Irow,
105 const LocalOrdinalViewType & Bcol2Ccol,
106 const LocalOrdinalViewType & Icol2Ccol,
107 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
108 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
109 const std::string& label = std::string(),
110 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
111
112 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
113 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
114 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
116 const LocalOrdinalViewType & Acol2Brow,
117 const LocalOrdinalViewType & Acol2Irow,
118 const LocalOrdinalViewType & Bcol2Ccol,
119 const LocalOrdinalViewType & Icol2Ccol,
120 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
121 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
122 const std::string& label = std::string(),
123 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
124
125};
126
127
128// Triple-Product KernelWrappers for Partial Specialization to OpenMP
129template<class Scalar,
130 class LocalOrdinal,
131 class GlobalOrdinal, class LocalOrdinalViewType>
132struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
133 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
134 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
135 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
136 const LocalOrdinalViewType & Acol2Prow,
137 const LocalOrdinalViewType & Acol2PIrow,
138 const LocalOrdinalViewType & Pcol2Ccol,
139 const LocalOrdinalViewType & PIcol2Ccol,
140 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
141 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
142 const std::string& label = std::string(),
143 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
144
145static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
146 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
147 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
148 const LocalOrdinalViewType & Acol2Prow,
149 const LocalOrdinalViewType & Acol2PIrow,
150 const LocalOrdinalViewType & Pcol2Ccol,
151 const LocalOrdinalViewType & PIcol2Ccol,
152 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
153 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
154 const std::string& label = std::string(),
155 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
156
157 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
158 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
159 const LocalOrdinalViewType & Acol2Prow,
160 const LocalOrdinalViewType & Acol2PIrow,
161 const LocalOrdinalViewType & Pcol2Ccol,
162 const LocalOrdinalViewType & PIcol2Ccol,
163 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
164 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
165 const std::string& label = std::string(),
166 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
167
168 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
169 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
170 const LocalOrdinalViewType & Acol2Prow,
171 const LocalOrdinalViewType & Acol2PIrow,
172 const LocalOrdinalViewType & Pcol2Ccol,
173 const LocalOrdinalViewType & PIcol2Ccol,
174 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
175 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
176 const std::string& label = std::string(),
177 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
178};
179
180
181/*********************************************************************************************************/
182template<class Scalar,
183 class LocalOrdinal,
184 class GlobalOrdinal,
185 class LocalOrdinalViewType>
186void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
187 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
188 const LocalOrdinalViewType & Acol2Brow,
189 const LocalOrdinalViewType & Acol2Irow,
190 const LocalOrdinalViewType & Bcol2Ccol,
191 const LocalOrdinalViewType & Icol2Ccol,
192 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
193 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
194 const std::string& label,
195 const Teuchos::RCP<Teuchos::ParameterList>& params) {
196
197#ifdef HAVE_TPETRA_MMM_TIMINGS
198 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
199 using Teuchos::TimeMonitor;
200 Teuchos::RCP<TimeMonitor> MM;
201#endif
202
203 // Node-specific code
204 std::string nodename("OpenMP");
205
206 // Lots and lots of typedefs
207 using Teuchos::RCP;
209 typedef typename KCRS::device_type device_t;
210 typedef typename KCRS::StaticCrsGraphType graph_t;
211 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
212 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
213 typedef typename KCRS::values_type::non_const_type scalar_view_t;
214
215 // Options
216 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
217 std::string myalg("SPGEMM_KK_MEMORY");
218
219
220 if(!params.is_null()) {
221 if(params->isParameter("openmp: algorithm"))
222 myalg = params->get("openmp: algorithm",myalg);
223 if(params->isParameter("openmp: team work size"))
224 team_work_size = params->get("openmp: team work size",team_work_size);
225 }
226
227 if(myalg == "LTG") {
228 // Use the LTG kernel if requested
229 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
230 }
231 else {
232 // Use the Kokkos-Kernels OpenMP Kernel
233#ifdef HAVE_TPETRA_MMM_TIMINGS
234 MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
235#endif
236 // KokkosKernelsHandle
237 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
238 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
239 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
240
241 // Grab the Kokkos::SparseCrsMatrices
242 const KCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
243 // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
244
245 // Get the algorithm mode
246 std::string alg = nodename+std::string(" algorithm");
247 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
248 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
249 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
250
251 // Merge the B and Bimport matrices
252 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
253
254#ifdef HAVE_TPETRA_MMM_TIMINGS
255 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
256#endif
257
258 // Do the multiply on whatever we've got
259 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
260 // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
261 // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
262 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
263 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
264
265
266 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
267 lno_nnz_view_t entriesC;
268 scalar_view_t valuesC;
269 KernelHandle kh;
270 kh.create_spgemm_handle(alg_enum);
271 kh.set_team_work_size(team_work_size);
272 // KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
273 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
274
275 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
276 if (c_nnz_size){
277 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
278 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
279 }
280 // KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
281 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
282 kh.destroy_spgemm_handle();
283
284#ifdef HAVE_TPETRA_MMM_TIMINGS
285 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
286#endif
287 // Sort & set values
288 if (params.is_null() || params->get("sort entries",true))
289 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
290 C.setAllValues(row_mapC,entriesC,valuesC);
291
292 }// end OMP KokkosKernels loop
293
294#ifdef HAVE_TPETRA_MMM_TIMINGS
295 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
296#endif
297
298 // Final Fillcomplete
299 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
300 labelList->set("Timer Label",label);
301 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
302 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
303 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
304
305#if 0
306 {
307 Teuchos::ArrayRCP< const size_t > Crowptr;
308 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
309 Teuchos::ArrayRCP< const Scalar > Cvalues;
310 C.getAllValues(Crowptr,Ccolind,Cvalues);
311
312 // DEBUG
313 int MyPID = C->getComm()->getRank();
314 printf("[%d] Crowptr = ",MyPID);
315 for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
316 printf("%3d ",(int)Crowptr.getConst()[i]);
317 }
318 printf("\n");
319 printf("[%d] Ccolind = ",MyPID);
320 for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
321 printf("%3d ",(int)Ccolind.getConst()[i]);
322 }
323 printf("\n");
324 fflush(stdout);
325 // END DEBUG
326 }
327#endif
328}
329
330
331/*********************************************************************************************************/
332template<class Scalar,
333 class LocalOrdinal,
334 class GlobalOrdinal,
335 class LocalOrdinalViewType>
336void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
337 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
338 const LocalOrdinalViewType & Acol2Brow,
339 const LocalOrdinalViewType & Acol2Irow,
340 const LocalOrdinalViewType & Bcol2Ccol,
341 const LocalOrdinalViewType & Icol2Ccol,
342 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
343 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
344 const std::string& label,
345 const Teuchos::RCP<Teuchos::ParameterList>& params) {
346#ifdef HAVE_TPETRA_MMM_TIMINGS
347 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
348 using Teuchos::TimeMonitor;
349 Teuchos::RCP<TimeMonitor> MM;
350#endif
351
352 // Lots and lots of typedefs
353 using Teuchos::RCP;
354
355 // Options
356 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
357 std::string myalg("LTG");
358 if(!params.is_null()) {
359 if(params->isParameter("openmp: algorithm"))
360 myalg = params->get("openmp: algorithm",myalg);
361 if(params->isParameter("openmp: team work size"))
362 team_work_size = params->get("openmp: team work size",team_work_size);
363 }
364
365 if(myalg == "LTG") {
366 // Use the LTG kernel if requested
367 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
368 }
369 else {
370 throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
371 }
372
373#ifdef HAVE_TPETRA_MMM_TIMINGS
374 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
375#endif
376 C.fillComplete(C.getDomainMap(), C.getRangeMap());
377}
378
379
380/*********************************************************************************************************/
381template<class Scalar,
382 class LocalOrdinal,
383 class GlobalOrdinal,
384 class LocalOrdinalViewType>
385void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
386 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
387 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
388 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
389 const LocalOrdinalViewType & Acol2Brow,
390 const LocalOrdinalViewType & Acol2Irow,
391 const LocalOrdinalViewType & Bcol2Ccol,
392 const LocalOrdinalViewType & Icol2Ccol,
393 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
394 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
395 const std::string& label,
396 const Teuchos::RCP<Teuchos::ParameterList>& params) {
397
398#ifdef HAVE_TPETRA_MMM_TIMINGS
399 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
400 using Teuchos::TimeMonitor;
401 Teuchos::RCP<TimeMonitor> MM;
402#endif
403
404 // Node-specific code
405 using Teuchos::RCP;
406
407 // Options
408 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
409 std::string myalg("LTG");
410 if(!params.is_null()) {
411 if(params->isParameter("openmp: jacobi algorithm"))
412 myalg = params->get("openmp: jacobi algorithm",myalg);
413 if(params->isParameter("openmp: team work size"))
414 team_work_size = params->get("openmp: team work size",team_work_size);
415 }
416
417 if(myalg == "LTG") {
418 // Use the LTG kernel if requested
419 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
420 }
421 else if(myalg == "MSAK") {
422 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
423 }
424 else if(myalg == "KK") {
425 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
426 }
427 else {
428 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
429 }
430
431#ifdef HAVE_TPETRA_MMM_TIMINGS
432 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
433#endif
434
435 // Final Fillcomplete
436 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
437 labelList->set("Timer Label",label);
438 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
439
440 // NOTE: MSAK already fillCompletes, so we have to check here
441 if(!C.isFillComplete()) {
442 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
443 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
444 }
445
446}
447
448
449
450/*********************************************************************************************************/
451template<class Scalar,
452 class LocalOrdinal,
453 class GlobalOrdinal,
454 class LocalOrdinalViewType>
455void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
456 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
457 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
458 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
459 const LocalOrdinalViewType & Acol2Brow,
460 const LocalOrdinalViewType & Acol2Irow,
461 const LocalOrdinalViewType & Bcol2Ccol,
462 const LocalOrdinalViewType & Icol2Ccol,
463 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
464 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
465 const std::string& label,
466 const Teuchos::RCP<Teuchos::ParameterList>& params) {
467
468#ifdef HAVE_TPETRA_MMM_TIMINGS
469 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
470 using Teuchos::TimeMonitor;
471 Teuchos::RCP<TimeMonitor> MM;
472#endif
473
474 // Lots and lots of typedefs
475 using Teuchos::RCP;
476
477 // Options
478 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
479 std::string myalg("LTG");
480 if(!params.is_null()) {
481 if(params->isParameter("openmp: jacobi algorithm"))
482 myalg = params->get("openmp: jacobi algorithm",myalg);
483 if(params->isParameter("openmp: team work size"))
484 team_work_size = params->get("openmp: team work size",team_work_size);
485 }
486
487 if(myalg == "LTG") {
488 // Use the LTG kernel if requested
489 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
490 }
491 else {
492 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
493 }
494
495#ifdef HAVE_TPETRA_MMM_TIMINGS
496 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
497#endif
498 C.fillComplete(C.getDomainMap(), C.getRangeMap());
499
500}
501
502/*********************************************************************************************************/
503template<class Scalar,
504 class LocalOrdinal,
505 class GlobalOrdinal,
506 class LocalOrdinalViewType>
507void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
508 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
509 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
510 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
511 const LocalOrdinalViewType & Acol2Brow,
512 const LocalOrdinalViewType & Acol2Irow,
513 const LocalOrdinalViewType & Bcol2Ccol,
514 const LocalOrdinalViewType & Icol2Ccol,
515 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
516 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
517 const std::string& label,
518 const Teuchos::RCP<Teuchos::ParameterList>& params) {
519
520#ifdef HAVE_TPETRA_MMM_TIMINGS
521 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
522 using Teuchos::TimeMonitor;
523 Teuchos::RCP<TimeMonitor> MM;
524#endif
525
526 // Check if the diagonal entries exist in debug mode
527 const bool debug = Tpetra::Details::Behavior::debug();
528 if(debug) {
529
530 auto rowMap = Aview.origMatrix->getRowMap();
532 Aview.origMatrix->getLocalDiagCopy(diags);
533 size_t diagLength = rowMap->getLocalNumElements();
534 Teuchos::Array<Scalar> diagonal(diagLength);
535 diags.get1dCopy(diagonal());
536
537 for(size_t i = 0; i < diagLength; ++i) {
538 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
539 std::runtime_error,
540 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
541 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
542 }
543 }
544
545 // Usings
546 using device_t = typename Kokkos::Compat::KokkosOpenMPWrapperNode::device_type;
548 using graph_t = typename matrix_t::StaticCrsGraphType;
549 using lno_view_t = typename graph_t::row_map_type::non_const_type;
550 using c_lno_view_t = typename graph_t::row_map_type::const_type;
551 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
552 using scalar_view_t = typename matrix_t::values_type::non_const_type;
553
554 // KokkosKernels handle
555 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
556 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
557 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
558
559 // Get the rowPtr, colInd and vals of importMatrix
560 c_lno_view_t Irowptr;
561 lno_nnz_view_t Icolind;
562 scalar_view_t Ivals;
563 if(!Bview.importMatrix.is_null()) {
564 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
565 Irowptr = lclB.graph.row_map;
566 Icolind = lclB.graph.entries;
567 Ivals = lclB.values;
568 }
569
570 // Merge the B and Bimport matrices
571 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
572
573 // Get the properties and arrays of input matrices
574 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
575 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
576
577 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
578 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
579 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
580
581 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
582 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
583 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
584
585 // Arrays of the output matrix
586 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
587 lno_nnz_view_t entriesC;
588 scalar_view_t valuesC;
589
590 // Options
591 int team_work_size = 16;
592 std::string myalg("SPGEMM_KK_MEMORY");
593 if(!params.is_null()) {
594 if(params->isParameter("cuda: algorithm"))
595 myalg = params->get("cuda: algorithm",myalg);
596 if(params->isParameter("cuda: team work size"))
597 team_work_size = params->get("cuda: team work size",team_work_size);
598 }
599
600 // Get the algorithm mode
601 std::string nodename("OpenMP");
602 std::string alg = nodename + std::string(" algorithm");
603 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
604 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
605
606
607 // KokkosKernels call
608 handle_t kh;
609 kh.create_spgemm_handle(alg_enum);
610 kh.set_team_work_size(team_work_size);
611
612 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
613 Arowptr, Acolind, false,
614 Browptr, Bcolind, false,
615 row_mapC);
616
617 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
618 if (c_nnz_size){
619 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
620 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
621 }
622
623 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
624 Arowptr, Acolind, Avals, false,
625 Browptr, Bcolind, Bvals, false,
626 row_mapC, entriesC, valuesC,
627 omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
628 kh.destroy_spgemm_handle();
629
630#ifdef HAVE_TPETRA_MMM_TIMINGS
631 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
632#endif
633
634 // Sort & set values
635 if (params.is_null() || params->get("sort entries",true))
636 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
637 C.setAllValues(row_mapC,entriesC,valuesC);
638
639#ifdef HAVE_TPETRA_MMM_TIMINGS
640 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
641#endif
642
643 // Final Fillcomplete
644 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
645 labelList->set("Timer Label",label);
646 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
647 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
648 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
649}
650
651
652/*********************************************************************************************************/
653template<class Scalar,
654 class LocalOrdinal,
655 class GlobalOrdinal,
656 class LocalOrdinalViewType>
657void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
658 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
659 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
660 const LocalOrdinalViewType & Acol2Prow,
661 const LocalOrdinalViewType & Acol2PIrow,
662 const LocalOrdinalViewType & Pcol2Accol,
663 const LocalOrdinalViewType & PIcol2Accol,
664 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
665 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
666 const std::string& label,
667 const Teuchos::RCP<Teuchos::ParameterList>& params) {
668
669
670
671#ifdef HAVE_TPETRA_MMM_TIMINGS
672 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
673 using Teuchos::TimeMonitor;
674 Teuchos::RCP<TimeMonitor> MM;
675#endif
676
677 // Node-specific code
678 std::string nodename("OpenMP");
679
680 // Options
681 std::string myalg("LTG");
682
683 if(!params.is_null()) {
684 if(params->isParameter("openmp: rap algorithm"))
685 myalg = params->get("openmp: rap algorithm",myalg);
686 }
687
688 if(myalg == "LTG") {
689 // Use the LTG kernel if requested
690 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
691 }
692 else {
693 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
694 }
695}
696
697/*********************************************************************************************************/
698template<class Scalar,
699 class LocalOrdinal,
700 class GlobalOrdinal,
701 class LocalOrdinalViewType>
702void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
703 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
704 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
705 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
706
707 const LocalOrdinalViewType & Acol2Prow,
708 const LocalOrdinalViewType & Acol2Irow,
709 const LocalOrdinalViewType & Pcol2Ccol,
710 const LocalOrdinalViewType & Icol2Ccol,
711 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
712 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
713 const std::string& label,
714 const Teuchos::RCP<Teuchos::ParameterList>& params) {
715
716#ifdef HAVE_TPETRA_MMM_TIMINGS
717 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
718 using Teuchos::TimeMonitor;
719 Teuchos::RCP<TimeMonitor> MM;
720#endif
721
722 // Lots and lots of typedefs
723 using Teuchos::RCP;
724
725 // Options
726 std::string myalg("LTG");
727 if(!params.is_null()) {
728 if(params->isParameter("openmp: rap algorithm"))
729 myalg = params->get("openmp: rap algorithm",myalg);
730 }
731
732 if(myalg == "LTG") {
733 // Use the LTG kernel if requested
734 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
735 }
736 else {
737 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
738 }
739
740#ifdef HAVE_TPETRA_MMM_TIMINGS
741 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
742#endif
743 C.fillComplete(C.getDomainMap(), C.getRangeMap());
744
745}
746
747
748
749/*********************************************************************************************************/
750template<class Scalar,
751 class LocalOrdinal,
752 class GlobalOrdinal,
753 class LocalOrdinalViewType>
754void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
755
756 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
757 const LocalOrdinalViewType & Acol2Prow,
758 const LocalOrdinalViewType & Acol2PIrow,
759 const LocalOrdinalViewType & Pcol2Accol,
760 const LocalOrdinalViewType & PIcol2Accol,
761 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
762 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
763 const std::string& label,
764 const Teuchos::RCP<Teuchos::ParameterList>& params) {
765
766
767#ifdef HAVE_TPETRA_MMM_TIMINGS
768 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
769 using Teuchos::TimeMonitor;
770 Teuchos::RCP<TimeMonitor> MM;
771#endif
772
773 // Node-specific code
774 std::string nodename("OpenMP");
775
776 // Options
777 std::string myalg("LTG");
778
779 if(!params.is_null()) {
780 if(params->isParameter("openmp: ptap algorithm"))
781 myalg = params->get("openmp: ptap algorithm",myalg);
782 }
783
784 if(myalg == "LTG") {
785#ifdef HAVE_TPETRA_MMM_TIMINGS
786 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
787#endif
788
789 using Teuchos::ParameterList;
790 using Teuchos::RCP;
791 using LO = LocalOrdinal;
792 using GO = GlobalOrdinal;
793 using SC = Scalar;
794
795 // We don't need a kernel-level PTAP, we just transpose here
796 using transposer_type =
797 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
798 transposer_type transposer (Pview.origMatrix, label + "XP: ");
799 RCP<ParameterList> transposeParams (new ParameterList);
800 if (! params.is_null ()) {
801 transposeParams->set ("compute global constants",
802 params->get ("compute global constants: temporaries",
803 false));
804 }
805 transposeParams->set ("sort", false);
806 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
807 transposer.createTransposeLocal (transposeParams);
808 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
809 Rview.origMatrix = Ptrans;
810
811 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
812 mult_R_A_P_newmatrix_LowThreadGustavsonKernel
813 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
814 PIcol2Accol, Ac, Acimport, label, params);
815 }
816 else {
817 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
818 }
819}
820
821/*********************************************************************************************************/
822template<class Scalar,
823 class LocalOrdinal,
824 class GlobalOrdinal,
825 class LocalOrdinalViewType>
826void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
827
828 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
829 const LocalOrdinalViewType & Acol2Prow,
830 const LocalOrdinalViewType & Acol2PIrow,
831 const LocalOrdinalViewType & Pcol2Accol,
832 const LocalOrdinalViewType & PIcol2Accol,
833 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
834 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
835 const std::string& label,
836 const Teuchos::RCP<Teuchos::ParameterList>& params) {
837
838
839#ifdef HAVE_TPETRA_MMM_TIMINGS
840 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
841 using Teuchos::TimeMonitor;
842 Teuchos::RCP<TimeMonitor> MM;
843#endif
844
845 // Node-specific code
846 std::string nodename("OpenMP");
847
848 // Options
849 std::string myalg("LTG");
850
851 if(!params.is_null()) {
852 if(params->isParameter("openmp: ptap algorithm"))
853 myalg = params->get("openmp: ptap algorithm",myalg);
854 }
855
856 if(myalg == "LTG") {
857#ifdef HAVE_TPETRA_MMM_TIMINGS
858 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
859#endif
860
861 using Teuchos::ParameterList;
862 using Teuchos::RCP;
863 using LO = LocalOrdinal;
864 using GO = GlobalOrdinal;
865 using SC = Scalar;
866
867 // We don't need a kernel-level PTAP, we just transpose here
868 using transposer_type =
869 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
870 transposer_type transposer (Pview.origMatrix, label + "XP: ");
871 RCP<ParameterList> transposeParams (new ParameterList);
872 if (! params.is_null ()) {
873 transposeParams->set ("compute global constants",
874 params->get ("compute global constants: temporaries",
875 false));
876 }
877 transposeParams->set ("sort", false);
878 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
879 transposer.createTransposeLocal (transposeParams);
880 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
881 Rview.origMatrix = Ptrans;
882
883 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
884 mult_R_A_P_reuse_LowThreadGustavsonKernel
885 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
886 PIcol2Accol, Ac, Acimport, label, params);
887 }
888 else {
889 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
890 }
891 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
892}
893
894
895}//MMdetails
896}//Tpetra
897
898#endif//OpenMP
899
900#endif
Struct that holds views of the contents of a CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.