17 #include <cusp/array1d.h> 19 #include <cusp/detail/format_utils.h> 31 template <
typename IndexType,
35 const IndexType xnum_row,
39 const ValueType * Aval,
43 const IndexType
thread_id = blockDim.x * blockIdx.x + threadIdx.x;
44 const IndexType grid_size = gridDim.x * blockDim.x;
47 for(IndexType row =
thread_id; row < Anum_rows; row += grid_size)
49 const IndexType row_start = Ar[row];
50 const IndexType row_end = Ar[row+1];
51 const IndexType r = row_end - row_start;
56 for (IndexType jj = row_start; jj < row_end; jj++)
67 template <
typename IndexType,
75 const ValueType * Aval,
79 const IndexType
thread_id = blockDim.x * blockIdx.x + threadIdx.x;
80 const IndexType grid_size = gridDim.x * blockDim.x;
81 for(IndexType row =
thread_id; row < Anum_rows; row += grid_size){
82 const IndexType row_start = Ar[row];
83 const IndexType row_end = Ar[row+1];
88 for (IndexType jj = row_start; jj < row_end; jj++)
90 y[
j*Anum_rows+row]=
sum;
106 template <
typename Matrix1,
111 Vector3&
y, cusp::row_major)
113 CUSP_PROFILE_SCOPED();
114 typedef typename Vector3::index_type IndexType;
116 typedef typename Vector3::memory_space MemorySpace;
117 const size_t BLOCK_SIZE = 256;
118 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
119 const size_t NUM_BLOCKS =
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
123 row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
124 (A.num_rows,
x.num_rows,
x.num_cols,
125 thrust::raw_pointer_cast(&A.row_offsets[0]),
126 thrust::raw_pointer_cast(&A.column_indices[0]),
127 thrust::raw_pointer_cast(&A.values[0]),
128 thrust::raw_pointer_cast(&(
x.values)[0]),
129 thrust::raw_pointer_cast(&(
y.values)[0]));
133 template <
typename Matrix1,
138 Vector3&
y, cusp::column_major)
140 CUSP_PROFILE_SCOPED();
141 typedef typename Vector3::index_type IndexType;
143 typedef typename Vector3::memory_space MemorySpace;
144 const size_t BLOCK_SIZE = 256;
145 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
146 const size_t NUM_BLOCKS =
std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
147 column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
148 (A.num_rows,
x.num_rows,
x.num_cols,
149 thrust::raw_pointer_cast(&A.row_offsets[0]),
150 thrust::raw_pointer_cast(&A.column_indices[0]),
151 thrust::raw_pointer_cast(&A.values[0]),
152 thrust::raw_pointer_cast(&(
x.values)[0]),
153 thrust::raw_pointer_cast(&(
y.values)[0]));
158 template <
typename Matrix1,
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
const IndexType xnum_rows
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
const IndexType const IndexType xnum_cols
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
const IndexType thread_id
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)