Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_crsMatrixAssembleElement.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
43#define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
44
45#include "KokkosSparse_CrsMatrix.hpp"
47#include <type_traits>
48
49namespace Tpetra {
50namespace Details {
51
89template<class SparseMatrixType,
90 class ValsViewType>
91KOKKOS_FUNCTION
92typename SparseMatrixType::ordinal_type
94 const typename SparseMatrixType::ordinal_type lclRow,
95 const typename SparseMatrixType::ordinal_type lclColInds[],
96 const typename SparseMatrixType::ordinal_type sortPerm[],
97 const ValsViewType& vals,
98 const typename SparseMatrixType::ordinal_type numEntInInput,
99 const bool forceAtomic =
101 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
102#else // NOT KOKKOS_ENABLE_SERIAL
103 false,
104#endif // KOKKOS_ENABLE_SERIAL
105 const bool checkInputIndices = true)
106{
107 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
108 matrix_scalar_type;
109 static_assert (std::is_same<matrix_scalar_type,
110 typename SparseMatrixType::value_type>::value,
111 "The matrix's entries must have a nonconst type.");
112 // static_assert (std::is_assignable<matrix_scalar_type,
113 // typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
114 // "The result of adding a matrix entry and an entry of vals "
115 // "MUST be assignable to a matrix entry.");
116 typedef typename SparseMatrixType::ordinal_type LO;
117 static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
118 "must be a built-in integer type.");
119
120 // If lclRow is NOT a valid row index, this will return a view of
121 // zero entries. If checkInputIndices is true, thus, then none of
122 // the input indices will be valid in that case.
123 auto row_view = A.row (lclRow);
124 const LO numEntInRow = static_cast<LO> (row_view.length);
125 // Number of valid local column indices found, that is, the number
126 // of input indices that are valid column indices found in row
127 // lclRow of the matrix. If not checking, we just return the number
128 // of input indices.
129 LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
130
131 // Since both the matrix row and the input (after permutation) are
132 // sorted, we only need to pass once over the matrix row. 'offset'
133 // tells us the current search position in the matrix row.
134 LO offset = 0;
135 for (LO j = 0; j < numEntInInput; ++j) {
136 const LO perm_index = sortPerm[j];
137 const LO lclColInd = lclColInds[perm_index];
138 // Search linearly in the matrix row for the current index.
139 // If we ever want binary search, this would be the place.
140 while (row_view.colidx(offset) != lclColInd) {
141 ++offset;
142 }
143
144 // If we could make checkInputIndices a compile-time constant,
145 // then the compiler might not need to insert a branch here. This
146 // should help vectorization, if vectorization is possible.
147 if (checkInputIndices) {
148 if (offset != numEntInRow) {
149 // If we could make forceAtomic a compile-time constant, then
150 // the compiler might not need to insert a branch here. This
151 // should help vectorization, if vectorization is possible.
152 if (forceAtomic) {
153 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
154 }
155 else {
156 row_view.value(offset) += vals[perm_index];
157 }
158 ++numValid;
159 }
160 }
161 else { // don't check input indices; assume they are in the row
162 // See above note on forceAtomic.
163 if (forceAtomic) {
164 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
165 }
166 else {
167 row_view.value(offset) += vals[perm_index];
168 }
169 }
170 }
171
172 return numValid;
173}
174
213template<class SparseMatrixType,
214 class ValsViewType>
216typename SparseMatrixType::ordinal_type
218 const typename SparseMatrixType::ordinal_type lclRow,
219 const typename SparseMatrixType::ordinal_type lclColInds[],
220 const typename SparseMatrixType::ordinal_type sortPerm[],
221 const ValsViewType& vals,
222 const typename SparseMatrixType::ordinal_type numEntInInput,
223 const bool forceAtomic =
225 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
226#else // NOT KOKKOS_ENABLE_SERIAL
227 false,
228#endif // KOKKOS_ENABLE_SERIAL
229 const bool checkInputIndices = true)
230{
231 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
232 matrix_scalar_type;
233 static_assert (std::is_same<matrix_scalar_type,
234 typename SparseMatrixType::value_type>::value,
235 "The matrix's entries must have a nonconst type.");
236 static_assert (std::is_assignable<matrix_scalar_type,
237 typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
238 "The result of adding a matrix entry and an entry of vals "
239 "MUST be assignable to a matrix entry.");
240 typedef typename SparseMatrixType::ordinal_type LO;
241 static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
242 "must be a built-in integer type.");
243
244 // If lclRow is NOT a valid row index, this will return a view of
245 // zero entries. If checkInputIndices is true, thus, then none of
246 // the input indices will be valid in that case.
247 auto row_view = A.row (lclRow);
248 const LO numEntInRow = static_cast<LO> (row_view.length);
249 // Number of valid local column indices found, that is, the number
250 // of input indices that are valid column indices found in row
251 // lclRow of the matrix. If not checking, we just return the number
252 // of input indices.
253 LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
254
255 // Since both the matrix row and the input (after permutation) are
256 // sorted, we only need to pass once over the matrix row. 'offset'
257 // tells us the current search position in the matrix row.
258 LO offset = 0;
259 for (LO j = 0; j < numEntInInput; ++j) {
260 const LO perm_index = sortPerm[j];
261 const LO lclColInd = lclColInds[perm_index];
262 // Search linearly in the matrix row for the current index.
263 // If we ever want binary search, this would be the place.
264 while (row_view.colidx(offset) != lclColInd) {
265 ++offset;
266 }
267
268 // If checkInputIndices were a compile-time constant, then the
269 // compiler might not need to insert a branch here. This should
270 // help vectorization, if vectorization is possible at all.
271 if (checkInputIndices) {
272 if (offset != numEntInRow) {
273 // If forceAtomic were a compile-time constant, then the
274 // compiler might not need to insert a branch here. This
275 // could help vectorization, if vectorization is possible.
276 if (forceAtomic) {
277 Kokkos::atomic_assign (&(row_view.value(offset)), vals[perm_index]);
278 }
279 else {
280 row_view.value(offset) += vals[perm_index];
281 }
282 ++numValid;
283 }
284 }
285 else { // don't check input indices; assume they are in the row
286 // See above note on forceAtomic.
287 if (forceAtomic) {
288 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
289 }
290 else {
291 row_view.value(offset) += vals[perm_index];
292 }
293 }
294 }
295
296 return numValid;
297}
298
350template<class SparseMatrixType,
351 class VectorViewType,
352 class RhsViewType,
353 class LhsViewType>
355typename SparseMatrixType::ordinal_type
357 const VectorViewType& x,
358 typename SparseMatrixType::ordinal_type lids[],
359 typename SparseMatrixType::ordinal_type sortPerm[],
360 const RhsViewType& rhs,
361 const LhsViewType& lhs,
362 const bool forceAtomic =
364 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
365#else // NOT KOKKOS_ENABLE_SERIAL
366 false,
367#endif // KOKKOS_ENABLE_SERIAL
368 const bool checkInputIndices = true)
369{
370 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
371 matrix_scalar_type;
372 typedef typename std::remove_const<typename VectorViewType::value_type>::type
374 static_assert (std::is_same<matrix_scalar_type,
375 typename SparseMatrixType::value_type>::value,
376 "The sparse output matrix A's entries must have a nonconst type.");
377 static_assert (std::is_same<vector_scalar_type,
378 typename VectorViewType::value_type>::value,
379 "The dense output vector x's entries must have a nonconst type.");
380 // static_assert (std::is_assignable<matrix_scalar_type,
381 // typename std::decay< decltype (A.values[0] + lhs(0,0)) >::type>::value,
382 // "The result of adding a sparse matrix entry and an entry of "
383 // "lhs (the dense element matrix) "
384 // "MUST be assignable to a matrix entry.");
385 // static_assert (std::is_assignable<vector_scalar_type,
386 // typename std::decay< decltype (x[0] + rhs[0]) >::type>::value,
387 // "The result of adding a vector entry and an entry of "
388 // "rhs (the dense element vector) "
389 // "MUST be assignable to a vector entry.");
390 typedef typename SparseMatrixType::ordinal_type LO;
391 static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
392 "must be a built-in integer type.");
393
394 const LO eltDim = rhs.extent (0);
395
396 // Generate sort permutation
397 for (LO i = 0; i < eltDim; ++i) {
398 sortPerm[i] = i;
399 }
401
402 LO totalNumValid = 0;
403 for (LO r = 0; r < eltDim; ++r) {
404 const LO lid = lids[r];
405 //auto lhs_r = Kokkos::subview (lhs, sortPerm[r], Kokkos::ALL ());
406 auto lhs_r = Kokkos::subview (lhs, r, Kokkos::ALL ());
407
408 // This assumes that lid is always a valid row in the sparse
409 // matrix, and that the local indices in each row of the matrix
410 // are always sorted.
411 const LO curNumValid =
415 if (forceAtomic) {
416 Kokkos::atomic_add (&x(lid), rhs(sortPerm[r]));
417 }
418 else {
419 x(lid) += rhs(sortPerm[r]);
420 }
422 }
423 return totalNumValid;
424}
425
426} // namespace Details
427} // namespace Tpetra
428
429#endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
Namespace Tpetra contains the class and methods constituting the Tpetra library.