Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Kokkos_Parallel_MP_Vector.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef KOKKOS_PARALLEL_MP_VECTOR_HPP
43#define KOKKOS_PARALLEL_MP_VECTOR_HPP
44
45#include "Sacado_MP_Vector.hpp"
46#include "Kokkos_Core.hpp"
47
48//----------------------------------------------------------------------------
49// Kokkos execution policies useful for Sacado::MP::Vector scalar type
50//----------------------------------------------------------------------------
51
52namespace Kokkos {
53
57 template< class ExecSpace, class Tag = void >
59
61 typedef ExecSpace execution_space ;
62 typedef Tag work_tag ;
63
64 size_t range;
65 size_t team;
66 size_t shared;
67
68 MPVectorWorkConfig( const size_t range_,
69 const size_t team_,
70 const size_t shared_ = 0 ) :
71 range(range_), team(team_), shared(shared_) {}
72
73 ExecSpace space() const { return ExecSpace(); }
74};
75
76namespace Impl {
77
78#if defined( KOKKOS_ENABLE_THREADS )
79// Specialization of ParallelFor<> for MPVectorWorkConfig and Threads
80// The default implementation ignores the team size and uses the standard
81// work-range implementation. In the future maybe we should try and use
82// hyperthreads in a useful way. That would require:
83// -- interpreting the team-size differently, rather as the sacado size
84// -- determining the vector size of the architecture
85// -- laying out the threads differently to use hyperthreads across the
86// the sacado dimension
87template< class FunctorType, class Tag >
88class ParallelFor< FunctorType , MPVectorWorkConfig< Threads, Tag > > :
89 public ParallelFor< FunctorType , Kokkos::RangePolicy< Tag, Threads > > {
90 typedef Kokkos::RangePolicy< Tag, Threads > Policy ;
91public:
92 ParallelFor( const FunctorType & functor ,
93 const MPVectorWorkConfig< Threads, Tag > & work_config ) :
94 ParallelFor< FunctorType , Policy >( functor ,
95 Policy( 0, work_config.range ) ) {}
96};
97#endif
98
99#if defined( KOKKOS_ENABLE_OPENMP )
100// Specialization of ParallelFor<> for MPVectorWorkConfig and OpenMP
101// The default implementation ignores the team size and uses the standard
102// work-range implementation. In the future maybe we should try and use
103// hyperthreads in a useful way. That would require:
104// -- interpreting the team-size differently, rather as the sacado size
105// -- determining the vector size of the architecture
106// -- laying out the threads differently to use hyperthreads across the
107// the sacado dimension
108template< class FunctorType, class Tag >
109class ParallelFor< FunctorType , MPVectorWorkConfig< OpenMP, Tag > > :
110 public ParallelFor< FunctorType , Kokkos::RangePolicy< Tag, OpenMP > > {
111 typedef Kokkos::RangePolicy< Tag, OpenMP > Policy ;
112public:
113 ParallelFor( const FunctorType & functor ,
114 const MPVectorWorkConfig< OpenMP, Tag > & work_config ) :
115 ParallelFor< FunctorType , Policy >( functor ,
116 Policy( 0, work_config.range ) ) {}
117};
118#endif
119
120#if defined(KOKKOS_ENABLE_SERIAL)
121// Specialization of ParallelFor<> for MPVectorWorkConfig and Serial
122// The default implementation ignores the team size and uses the standard
123// work-range implementation. In the future maybe we should try and use
124// hyperthreads in a useful way. That would require:
125// -- interpreting the team-size differently, rather as the sacado size
126// -- determining the vector size of the architecture
127// -- laying out the threads differently to use hyperthreads across the
128// the sacado dimension
129template< class FunctorType, class Tag >
130class ParallelFor< FunctorType , MPVectorWorkConfig< Serial, Tag > > :
131 public ParallelFor< FunctorType , Kokkos::RangePolicy< Tag, Serial > > {
132 typedef Kokkos::RangePolicy< Tag, Serial > Policy ;
133public:
134 ParallelFor( const FunctorType & functor ,
135 const MPVectorWorkConfig< Serial, Tag > & work_config ) :
136 ParallelFor< FunctorType , Policy >( functor ,
137 Policy( 0, work_config.range ) ) {}
138};
139#endif // defined(KOKKOS_ENABLE_SERIAL)
140
141#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
142
143// Specialization of ParallelFor<> for MPVectorWorkConfig on Cuda
144// Here we use threadIdx.x for each entry in the specified team-size
145template< class FunctorType, class Tag >
146class ParallelFor< FunctorType , MPVectorWorkConfig< Cuda, Tag > > {
147public:
148
149 typedef Kokkos::RangePolicy< Tag, Cuda > Policy;
150
151 const FunctorType m_functor ;
152 const MPVectorWorkConfig< Cuda, Tag > m_config;
153 const Cuda::size_type m_work ;
154 const Policy m_policy;
155
156 template <class TagType>
157 inline __device__
158 typename std::enable_if<std::is_same<TagType, void>::value>::type
159 exec_range(const Cuda::size_type i, Cuda::size_type j) const {
160 m_functor(i, j);
161 }
162
163 template <class TagType>
164 inline __device__
165 typename std::enable_if<!std::is_same<TagType, void>::value>::type
166 exec_range(const Cuda::size_type i, Cuda::size_type j) const {
167 m_functor(TagType(), i, j);
168 }
169
170 Policy const& get_policy() const { return m_policy; }
171
172 inline
173 __device__
174 void operator()(void) const
175 {
176 const Cuda::size_type work_stride = blockDim.y * gridDim.x ;
177
178 for ( Cuda::size_type iwork = threadIdx.y + blockDim.y * blockIdx.x ;
179 iwork < m_work ;
180 iwork += work_stride ) {
181 this->template exec_range<Tag>(iwork, threadIdx.x);
182 }
183 }
184
185 ParallelFor( const FunctorType & functor ,
186 const MPVectorWorkConfig< Cuda, Tag > & work_config )
187 : m_functor( functor ) ,
188 m_config( work_config ) ,
189 m_work( work_config.range ),
190 m_policy()
191 {
192 }
193
194 inline
195 void execute() const
196 {
197 // To do: query number of registers used by functor and adjust
198 // nwarp accordingly to get maximum occupancy
199
200 Cuda::size_type nwarp = 0;
201 if (m_config.team > CudaTraits::WarpSize) {
202 const Cuda::size_type warps_per_team =
203 ( m_config.team + CudaTraits::WarpSize-1 ) / CudaTraits::WarpSize;
204 nwarp = cuda_internal_maximum_warp_count() / warps_per_team;
205 }
206 else {
207 const Cuda::size_type teams_per_warp =
208 CudaTraits::WarpSize / m_config.team ;
209 nwarp = cuda_internal_maximum_warp_count() * teams_per_warp;
210 }
211 const dim3 block( m_config.team , nwarp , 1 );
212
213 Cuda::size_type nblock =
214 std::min( (m_work + block.y - 1 ) / block.y ,
215 cuda_internal_maximum_grid_count()[0] );
216 const dim3 grid( nblock , 1 , 1 );
217
218 const Cuda::size_type shared = m_config.shared;
219 CudaParallelLaunch< ParallelFor >( *this , grid , block , shared , m_policy.space().impl_internal_space_instance() );
220 }
221};
222
223#endif
224
225} // namespace Impl
226
227} // namespace Kokkos
228
229//----------------------------------------------------------------------------
230//----------------------------------------------------------------------------
231
232#endif /* #ifndef KOKKOS_ATOMIC_MP_VECTOR_HPP */
Team-based parallel work configuration for Sacado::MP::Vector.
MPVectorWorkConfig(const size_t range_, const size_t team_, const size_t shared_=0)