Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_radixSort.hpp
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_DETAILS_RADIXSORT_HPP
45#define TPETRA_DETAILS_RADIXSORT_HPP
46
47#include "TpetraCore_config.h"
48#include "Kokkos_Macros.hpp"
49
50namespace Tpetra
51{
52namespace Details
53{
54
66template<typename KeyType, typename ValueType, typename IndexType>
67KOKKOS_INLINE_FUNCTION void
69{
70 if(n <= 1)
71 return;
72 KeyType mask = 0xF;
73 bool inAux = false;
74 //maskPos counts the low bit index of mask (0, 4, 8, ...)
76 //Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
78 while(upperBound)
79 {
80 upperBound >>= 1;
81 sortBits++;
82 }
83 for(IndexType s = 0; s < (sortBits + 3) / 4; s++)
84 {
85 //Count the number of elements in each bucket
86 IndexType count[16] = {0};
87 IndexType offset[17] = {0};
88 if(!inAux)
89 {
90 for(IndexType i = 0; i < n; i++)
91 {
92 count[(keys[i] & mask) >> maskPos]++;
93 }
94 }
95 else
96 {
97 for(IndexType i = 0; i < n; i++)
98 {
99 count[(keysAux[i] & mask) >> maskPos]++;
100 }
101 }
102 //get offset as the prefix sum for count
103 for(IndexType i = 0; i < 16; i++)
104 {
105 offset[i + 1] = offset[i] + count[i];
106 }
107 //now for each element in [lo, hi), move it to its offset in the other buffer
108 //this branch should be ok because whichBuf is the same on all threads
109 if(!inAux)
110 {
111 //copy from *Over to *Aux
112 for(IndexType i = 0; i < n; i++)
113 {
114 IndexType bucket = (keys[i] & mask) >> maskPos;
115 keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
116 valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
117 count[bucket]--;
118 }
119 }
120 else
121 {
122 //copy from *Aux to *Over
123 for(IndexType i = 0; i < n; i++)
124 {
126 keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
127 values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
128 count[bucket]--;
129 }
130 }
131 inAux = !inAux;
132 mask = mask << 4;
133 maskPos += 4;
134 }
135 if(inAux)
136 {
137 //need to deep copy from aux arrays to main
138 for(IndexType i = 0; i < n; i++)
139 {
140 keys[i] = keysAux[i];
141 values[i] = valuesAux[i];
142 }
143 }
144}
145
146} //namespace Details
147} //namespace Tpetra
148
149#endif //include guard
150
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Namespace Tpetra contains the class and methods constituting the Tpetra library.