Tpetra parallel linear algebra
Version of the Day
Loading...
Searching...
No Matches
core
src
Tpetra_Details_radixSort.hpp
1
/*
2
// @HEADER
3
// ***********************************************************************
4
//
5
// Tpetra: Templated Linear Algebra Services Package
6
// Copyright (2008) Sandia Corporation
7
//
8
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9
// the U.S. Government retains certain rights in this software.
10
//
11
// Redistribution and use in source and binary forms, with or without
12
// modification, are permitted provided that the following conditions are
13
// met:
14
//
15
// 1. Redistributions of source code must retain the above copyright
16
// notice, this list of conditions and the following disclaimer.
17
//
18
// 2. Redistributions in binary form must reproduce the above copyright
19
// notice, this list of conditions and the following disclaimer in the
20
// documentation and/or other materials provided with the distribution.
21
//
22
// 3. Neither the name of the Corporation nor the names of the
23
// contributors may be used to endorse or promote products derived from
24
// this software without specific prior written permission.
25
//
26
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
//
38
// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39
//
40
// ************************************************************************
41
// @HEADER
42
*/
43
44
#ifndef TPETRA_DETAILS_RADIXSORT_HPP
45
#define TPETRA_DETAILS_RADIXSORT_HPP
46
47
#include "TpetraCore_config.h"
48
#include "Kokkos_Macros.hpp"
49
50
namespace
Tpetra
51
{
52
namespace
Details
53
{
54
66
template
<
typename
KeyType,
typename
ValueType,
typename
IndexType>
67
KOKKOS_INLINE_FUNCTION
void
68
radixSortKeysAndValues
(
KeyType
*
keys
,
KeyType
*
keysAux
,
ValueType
*
values
,
ValueType
*
valuesAux
,
IndexType
n
,
IndexType
upperBound
)
69
{
70
if
(
n
<= 1)
71
return
;
72
KeyType
mask
= 0xF;
73
bool
inAux
=
false
;
74
//maskPos counts the low bit index of mask (0, 4, 8, ...)
75
IndexType
maskPos
= 0;
76
//Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
77
IndexType
sortBits
= 0;
78
while
(
upperBound
)
79
{
80
upperBound
>>= 1;
81
sortBits
++;
82
}
83
for
(
IndexType
s
= 0;
s
< (
sortBits
+ 3) / 4;
s
++)
84
{
85
//Count the number of elements in each bucket
86
IndexType
count[16] = {0};
87
IndexType
offset
[17] = {0};
88
if
(!
inAux
)
89
{
90
for
(
IndexType
i
= 0;
i
<
n
;
i
++)
91
{
92
count[(
keys
[
i
] &
mask
) >>
maskPos
]++;
93
}
94
}
95
else
96
{
97
for
(
IndexType
i
= 0;
i
<
n
;
i
++)
98
{
99
count[(
keysAux
[
i
] &
mask
) >>
maskPos
]++;
100
}
101
}
102
//get offset as the prefix sum for count
103
for
(
IndexType
i
= 0;
i
< 16;
i
++)
104
{
105
offset
[
i
+ 1] =
offset
[
i
] + count[
i
];
106
}
107
//now for each element in [lo, hi), move it to its offset in the other buffer
108
//this branch should be ok because whichBuf is the same on all threads
109
if
(!
inAux
)
110
{
111
//copy from *Over to *Aux
112
for
(
IndexType
i
= 0;
i
<
n
;
i
++)
113
{
114
IndexType
bucket
= (
keys
[
i
] &
mask
) >>
maskPos
;
115
keysAux
[
offset
[
bucket
+ 1] - count[
bucket
]] =
keys
[
i
];
116
valuesAux
[
offset
[
bucket
+ 1] - count[
bucket
]] =
values
[
i
];
117
count[
bucket
]--;
118
}
119
}
120
else
121
{
122
//copy from *Aux to *Over
123
for
(
IndexType
i
= 0;
i
<
n
;
i
++)
124
{
125
IndexType
bucket
= (
keysAux
[
i
] &
mask
) >>
maskPos
;
126
keys
[
offset
[
bucket
+ 1] - count[
bucket
]] =
keysAux
[
i
];
127
values
[
offset
[
bucket
+ 1] - count[
bucket
]] =
valuesAux
[
i
];
128
count[
bucket
]--;
129
}
130
}
131
inAux
= !
inAux
;
132
mask
=
mask
<< 4;
133
maskPos
+= 4;
134
}
135
if
(
inAux
)
136
{
137
//need to deep copy from aux arrays to main
138
for
(
IndexType
i
= 0;
i
<
n
;
i
++)
139
{
140
keys
[
i
] =
keysAux
[
i
];
141
values
[
i
] =
valuesAux
[
i
];
142
}
143
}
144
}
145
146
}
//namespace Details
147
}
//namespace Tpetra
148
149
#endif
//include guard
150
Tpetra::CrsMatrixStruct
Struct that holds views of the contents of a CrsMatrix.
Definition
TpetraExt_MMHelpers_decl.hpp:69
Details
Implementation details of Tpetra.
Tpetra::Details::radixSortKeysAndValues
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Definition
Tpetra_Details_radixSort.hpp:68
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Generated by
1.10.0