42#if defined( __CUDA_ARCH__ )
46 template <
typename ordinal_t,
typename value_t>
47 class DynamicThreadedStorage<ordinal_t, value_t,
Kokkos::Cuda> {
50 static const bool is_static =
false;
51 static const int static_size = 0;
52 static const bool supports_reset =
true;
58 typedef volatile value_type& volatile_reference;
60 typedef const volatile value_type& const_volatile_reference;
64 typedef const volatile value_type* const_volatile_pointer;
68 template <
typename ord_t,
typename val_t = value_t ,
typename dev_t = Kokkos::Cuda >
70 typedef DynamicThreadedStorage<ord_t,val_t,dev_t> type;
75 DynamicThreadedStorage(
const ordinal_type& sz = 1,
77 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
78 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
83 DynamicThreadedStorage(
const ordinal_type& sz,
const value_type* x) :
84 sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_) {
85 allocate_coeff_array(coeff_, is_owned_, total_sz_, x);
90 DynamicThreadedStorage(
const ordinal_type& sz, pointer v,
bool owned) :
91 coeff_(v), sz_(sz), stride_(num_threads()), total_sz_(sz_*stride_),
96 DynamicThreadedStorage(
const DynamicThreadedStorage& s) :
97 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
98 allocate_coeff_array(coeff_, is_owned_, total_sz_);
99 for (ordinal_type i=0; i<total_sz_; i+=stride_)
100 coeff_[i] = s.coeff_[i];
105 DynamicThreadedStorage(
const volatile DynamicThreadedStorage& s) :
106 sz_(s.sz_), stride_(s.stride_), total_sz_(s.total_sz_) {
107 allocate_coeff_array(coeff_, is_owned_, total_sz_);
108 for (ordinal_type i=0; i<total_sz_; i+=stride_)
109 coeff_[i] = s.coeff_[i];
114 ~DynamicThreadedStorage() {
115 destroy_coeff_array(coeff_, is_owned_, total_sz_);
120 DynamicThreadedStorage& operator=(
const DynamicThreadedStorage& s) {
123 destroy_coeff_array(coeff_, is_owned_, total_sz_);
126 total_sz_ = sz_*stride_;
127 allocate_coeff_array(coeff_, is_owned_, total_sz_);
128 for (ordinal_type i=0; i<total_sz_; i+=stride_)
129 coeff_[i] = s.coeff_[i];
132 for (ordinal_type i=0; i<total_sz_; i+=stride_)
133 coeff_[i] = s.coeff_[i];
141 DynamicThreadedStorage&
142 operator=(
const volatile DynamicThreadedStorage& s) {
145 destroy_coeff_array(coeff_, is_owned_, total_sz_);
148 total_sz_ = sz_*stride_;
149 allocate_coeff_array(coeff_, is_owned_, total_sz_);
150 for (ordinal_type i=0; i<total_sz_; i+=stride_)
151 coeff_[i] = s.coeff_[i];
154 for (ordinal_type i=0; i<total_sz_; i+=stride_)
155 coeff_[i] = s.coeff_[i];
163 volatile DynamicThreadedStorage&
164 operator=(
const DynamicThreadedStorage& s)
volatile {
167 destroy_coeff_array(coeff_, is_owned_, total_sz_);
170 total_sz_ = sz_*stride_;
171 allocate_coeff_array(coeff_, is_owned_, total_sz_);
172 for (ordinal_type i=0; i<total_sz_; i+=stride_)
173 coeff_[i] = s.coeff_[i];
176 for (ordinal_type i=0; i<total_sz_; i+=stride_)
177 coeff_[i] = s.coeff_[i];
185 volatile DynamicThreadedStorage&
186 operator=(
const volatile DynamicThreadedStorage& s)
volatile {
189 destroy_coeff_array(coeff_, is_owned_, total_sz_);
192 total_sz_ = sz_*stride_;
193 allocate_coeff_array(coeff_, is_owned_, total_sz_);
194 for (ordinal_type i=0; i<total_sz_; i+=stride_)
195 coeff_[i] = s.coeff_[i];
198 for (ordinal_type i=0; i<total_sz_; i+=stride_)
199 coeff_[i] = s.coeff_[i];
207 void init(const_reference v) {
208 for (ordinal_type i=0; i<total_sz_; i+=stride_)
214 void init(const_reference v)
volatile {
215 for (ordinal_type i=0; i<total_sz_; i+=stride_)
221 void init(const_pointer v,
const ordinal_type& sz = 0) {
225 for (ordinal_type i=0; i<my_sz; i+=stride_)
231 void init(const_pointer v,
const ordinal_type& sz = 0)
volatile {
235 for (ordinal_type i=0; i<my_sz; i+=stride_)
241 void load(pointer v) {
242 for (ordinal_type i=0; i<total_sz_; i+=stride_)
248 void load(pointer v)
volatile {
249 for (ordinal_type i=0; i<total_sz_; i+=stride_)
255 void resize(
const ordinal_type& sz) {
260 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
262 if (total_sz_ > total_sz_new)
263 my_tsz = total_sz_new;
264 for (ordinal_type i=0; i<my_tsz; i+=stride_)
265 coeff_new[i] = coeff_[i];
266 destroy_coeff_array(coeff_, is_owned_, total_sz_);
269 total_sz_ = total_sz_new;
270 is_owned_ = owned_new;
276 void resize(
const ordinal_type& sz)
volatile {
281 allocate_coeff_array(coeff_new, owned_new, total_sz_new);
283 if (total_sz_ > total_sz_new)
284 my_tsz = total_sz_new;
285 for (ordinal_type i=0; i<my_tsz; i+=stride_)
286 coeff_new[i] = coeff_[i];
287 destroy_coeff_array(coeff_, is_owned_, total_sz_);
290 total_sz_ = total_sz_new;
291 is_owned_ = owned_new;
297 void shallowReset(pointer v,
const ordinal_type& sz,
298 const ordinal_type& stride,
bool owned) {
299 destroy_coeff_array(coeff_, is_owned_, total_sz_);
303 total_sz_ = sz_*stride_;
309 void shallowReset(pointer v,
const ordinal_type& sz,
310 const ordinal_type& stride,
bool owned)
volatile {
311 destroy_coeff_array(coeff_, is_owned_, total_sz_);
315 total_sz_ = sz_*stride_;
328 KOKKOS_INLINE_FUNCTION
329 const_reference operator[] (
const ordinal_type& i)
const {
330 return coeff_[i*stride_];
334 KOKKOS_INLINE_FUNCTION
335 const_volatile_reference operator[] (
const ordinal_type& i)
const volatile {
336 return coeff_[i*stride_];
340 KOKKOS_INLINE_FUNCTION
341 reference operator[] (
const ordinal_type& i) {
342 return coeff_[i*stride_];
346 KOKKOS_INLINE_FUNCTION
347 volatile_reference operator[] (
const ordinal_type& i)
volatile {
348 return coeff_[i*stride_];
352 KOKKOS_INLINE_FUNCTION
353 reference getCoeff() {
return coeff_[i*stride_]; }
356 KOKKOS_INLINE_FUNCTION
357 volatile_reference getCoeff()
volatile {
return coeff_[i*stride_]; }
360 KOKKOS_INLINE_FUNCTION
361 const_reference getCoeff()
const {
return coeff_[i*stride_]; }
364 KOKKOS_INLINE_FUNCTION
365 const_volatile_reference getCoeff()
const volatile {
return coeff_[i*stride_]; }
368 KOKKOS_INLINE_FUNCTION
369 const_volatile_pointer coeff()
const volatile {
return coeff_; }
372 KOKKOS_INLINE_FUNCTION
373 const_pointer coeff()
const {
return coeff_; }
376 KOKKOS_INLINE_FUNCTION
377 volatile_pointer coeff()
volatile {
return coeff_; }
380 KOKKOS_INLINE_FUNCTION
381 pointer coeff() {
return coeff_; }
388 return blockDim.x*blockDim.y*blockDim.z;
394 return blockDim.x*blockDim.y*blockDim.z;
400 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
406 return threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x;
411 void allocate_coeff_array(pointer& c,
bool& owned,
412 ordinal_type total_size,
416 __shared__ pointer ptr;
419 ptr = ds::get_and_fill(total_size,x);
432 void allocate_coeff_array(pointer& c,
bool& owned,
433 ordinal_type total_size,
434 const value_type& x =
value_type(0.0))
volatile {
437 __shared__ pointer ptr;
440 ptr = ds::get_and_fill(total_size,x);
453 void allocate_coeff_array(pointer& c,
bool& owned,
454 ordinal_type total_size,
455 const value_type* x) {
458 __shared__ pointer ptr;
461 ptr = ds::get_and_fill(x, total_size);
474 void destroy_coeff_array(pointer c,
bool owned, ordinal_type total_size) {
477 ds::destroy_and_release(c, total_size);
482 void destroy_coeff_array(pointer c,
bool owned, ordinal_type total_size)
volatile {
485 ds::destroy_and_release(c, total_size);
Kokkos::DefaultExecutionSpace execution_space
Top-level namespace for Stokhos classes and functions.
Dynamic array allocation class that is specialized for scalar i.e., fundamental or built-in types (fl...