Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_ExtraKernels_decl.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
43#define TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
45
46
47namespace Tpetra {
48
49namespace MatrixMatrix {
50
51 // This guy allows us to easily get an Unmanaged Kokkos View from a ManagedOne
52 template <typename View>
53 using UnmanagedView = Kokkos::View< typename View::data_type
54 , typename View::array_layout
55 , typename View::device_type
56 , typename Kokkos::MemoryTraits< Kokkos::Unmanaged>
57 >;
58
59 namespace ExtraKernels {
60
61 template<class CrsMatrixType>
62 size_t C_estimate_nnz_per_row(CrsMatrixType & A, CrsMatrixType &B);
63
64 // 2019 Apr 10 JJE:
65 // copies data from thread local chunks into a unified CSR structure
66 // 'const' on the inCol and inVals array is a lie. The routine will deallocate
67 // the thread local storage. Maybe they shouldn't be const. Or mark, non-const
68 // and have a helper function for the actual copies that takes these as const
69 // . The point of const is that we want the loops to optimize assuming the
70 // RHS is unchanging
71 template<class InColindArrayType,
72 class InValsArrayType,
73 class OutRowptrType,
74 class OutColindType,
75 class OutValsType>
76 void copy_out_from_thread_memory(const OutColindType& thread_total_nnz,
77 const InColindArrayType& Incolind,
78 const InValsArrayType& Invals,
79 const size_t m,
80 const double thread_chunk,
81 OutRowptrType& Outrowptr,
82 OutColindType& Outcolind,
83 OutValsType& Outvals);
84
85 /***************************** Matrix-Matrix OpenMP Only Kernels *****************************/
86#ifdef HAVE_TPETRA_INST_OPENMP
87 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
88 static inline void mult_A_B_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90 const LocalOrdinalViewType & Acol2Brow,
91 const LocalOrdinalViewType & Acol2Irow,
92 const LocalOrdinalViewType & Bcol2Ccol,
93 const LocalOrdinalViewType & Icol2Ccol,
94 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96 const std::string& label,
97 const Teuchos::RCP<Teuchos::ParameterList>& params);
98
99 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
100 static inline void mult_A_B_reuse_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
102 const LocalOrdinalViewType & Acol2Brow,
103 const LocalOrdinalViewType & Acol2Irow,
104 const LocalOrdinalViewType & Bcol2Ccol,
105 const LocalOrdinalViewType & Icol2Ccol,
106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
107 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
108 const std::string& label,
109 const Teuchos::RCP<Teuchos::ParameterList>& params);
110
111 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
112 static inline void jacobi_A_B_newmatrix_LowThreadGustavsonKernel(Scalar omega,
113 const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
114 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
116 const LocalOrdinalViewType & Acol2Brow,
117 const LocalOrdinalViewType & Acol2Irow,
118 const LocalOrdinalViewType & Bcol2Ccol,
119 const LocalOrdinalViewType & Icol2Ccol,
120 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
121 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
122 const std::string& label,
123 const Teuchos::RCP<Teuchos::ParameterList>& params);
124
125 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
126 static inline void jacobi_A_B_reuse_LowThreadGustavsonKernel(Scalar omega,
127 const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
128 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
129 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
130 const LocalOrdinalViewType & Acol2Brow,
131 const LocalOrdinalViewType & Acol2Irow,
132 const LocalOrdinalViewType & Bcol2Ccol,
133 const LocalOrdinalViewType & Icol2Ccol,
134 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
135 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
136 const std::string& label,
137 const Teuchos::RCP<Teuchos::ParameterList>& params);
138#endif
139
140 /***************************** Matrix-Matrix Generic Kernels *****************************/
141 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalOrdinalViewType>
142 static inline void jacobi_A_B_newmatrix_MultiplyScaleAddKernel(Scalar omega,
143 const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Node> & Dinv,
144 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
145 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
146 const LocalOrdinalViewType & Acol2rrow,
147 const LocalOrdinalViewType & Acol2Irow,
148 const LocalOrdinalViewType & Bcol2Ccol,
149 const LocalOrdinalViewType & Icol2Ccol,
150 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
151 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
152 const std::string& label,
153 const Teuchos::RCP<Teuchos::ParameterList>& params);
154
155
156 /***************************** Triple Product OpenMP Only Kernels *****************************/
157#ifdef HAVE_TPETRA_INST_OPENMP
158 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
159 static inline void mult_R_A_P_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
160 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
161 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
162 const LocalOrdinalViewType & Acol2Prow,
163 const LocalOrdinalViewType & Acol2PIrow,
164 const LocalOrdinalViewType & Pcol2Accol,
165 const LocalOrdinalViewType & PIcol2Accol,
166 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
167 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
168 const std::string& label = std::string(),
169 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
170#endif
171
172
173 }// ExtraKernels
174}//MatrixMatrix
175}//Tpetra
176
177
178
179#endif
Namespace Tpetra contains the class and methods constituting the Tpetra library.