Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
csr.h
Go to the documentation of this file.
1/*
2 * Copyright 2008-2009 NVIDIA Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <cusp/array1d.h>
18
19#include <cusp/detail/format_utils.h>
20
21//#include <iostream>
22//#include <cusp/print.h>
23namespace cusp
24{
25namespace detail
26{
27namespace device
28{
29
30//Row-wise AX where X is a 2d array with one thread per row (based on scalar model)
31template <typename IndexType,
32 typename ValueType>
33__global__ void
34row_spmm_csr_scalar_kernel(const IndexType Anum_rows,
35 const IndexType xnum_row,
36 const IndexType xnum_cols,
37 const IndexType * Ar,
38 const IndexType * Ac,
39 const ValueType * Aval,
40 const ValueType * x,
41 ValueType * y)
42{
43 const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
44 const IndexType grid_size = gridDim.x * blockDim.x;
45
46
47 for(IndexType row = thread_id; row < Anum_rows; row += grid_size)
48 {
49 const IndexType row_start = Ar[row];
50 const IndexType row_end = Ar[row+1];
51 const IndexType r = row_end - row_start;
52
53 for (IndexType j = 0; j < xnum_cols; j++){
54
55 ValueType sum = 0.0;
56 for (IndexType jj = row_start; jj < row_end; jj++)
57 sum += Aval[jj] * x[j+xnum_cols*Ac[jj]];
58 y[j+xnum_cols*row]=sum;
59 }
60
61 }
62}
63
64
65
66//Col-wise with one thread per row
67template <typename IndexType,
68 typename ValueType>
69__global__ void
70column_spmm_csr_scalar_kernel(const IndexType Anum_rows,
71 const IndexType xnum_rows,
72 const IndexType xnum_cols,
73 const IndexType * Ar,
74 const IndexType * Ac,
75 const ValueType * Aval,
76 const ValueType * x,
77 ValueType * y)
78{
79 const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
80 const IndexType grid_size = gridDim.x * blockDim.x;
81 for(IndexType row = thread_id; row < Anum_rows; row += grid_size){
82 const IndexType row_start = Ar[row];
83 const IndexType row_end = Ar[row+1];
84
85 for (IndexType j = 0; j < xnum_cols; j++){
86
87 ValueType sum = 0;
88 for (IndexType jj = row_start; jj < row_end; jj++)
89 sum += Aval[jj] * x[Ac[jj]+xnum_rows*j];//x(Ac[jj], j)
90 y[j*Anum_rows+row]=sum;
91 }
92 }
93}
94
95
104
105
106template <typename Matrix1,
107 typename Vector2,
108 typename Vector3>
109void __spmm_csr_scalar(const Matrix1& A,
110 const Vector2& x,
111 Vector3& y, cusp::row_major)
112{
113 CUSP_PROFILE_SCOPED();
114 typedef typename Vector3::index_type IndexType;
115 typedef typename Vector3::value_type ValueType;
116 typedef typename Vector3::memory_space MemorySpace;
117 const size_t BLOCK_SIZE = 256;
118 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
119 const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
120
121
122
123 row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
124 (A.num_rows, x.num_rows, x.num_cols,
125 thrust::raw_pointer_cast(&A.row_offsets[0]),
126 thrust::raw_pointer_cast(&A.column_indices[0]),
127 thrust::raw_pointer_cast(&A.values[0]),
128 thrust::raw_pointer_cast(&(x.values)[0]),
129 thrust::raw_pointer_cast(&(y.values)[0]));
130
131}
132
133template <typename Matrix1,
134 typename Vector2,
135 typename Vector3>
136void __spmm_csr_scalar(const Matrix1& A,
137 const Vector2& x,
138 Vector3& y, cusp::column_major)
139{
140 CUSP_PROFILE_SCOPED();
141 typedef typename Vector3::index_type IndexType;
142 typedef typename Vector3::value_type ValueType;
143 typedef typename Vector3::memory_space MemorySpace;
144 const size_t BLOCK_SIZE = 256;
145 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
146 const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
147 column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
148 (A.num_rows, x.num_rows, x.num_cols,
149 thrust::raw_pointer_cast(&A.row_offsets[0]),
150 thrust::raw_pointer_cast(&A.column_indices[0]),
151 thrust::raw_pointer_cast(&A.values[0]),
152 thrust::raw_pointer_cast(&(x.values)[0]),
153 thrust::raw_pointer_cast(&(y.values)[0]));
154
155}
156
157
158template <typename Matrix1,
159 typename Vector2,
160 typename Vector3>
161void spmm_csr_scalar(const Matrix1& A,
162 const Vector2& x,
163 Vector3& y)
164{
165//Determine if row-wise or col-wise then call appropriate multiply
166 __spmm_csr_scalar(A, x, y, typename Vector2::orientation());
167}
168} // end namespace device
169} // end namespace detail
170} // end namespace cusp
171
Kokkos::DefaultExecutionSpace device
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition csr_vector.h:265
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
Definition csr.h:161
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition csr.h:34
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)
Definition csr.h:109
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition csr_vector.h:267
const IndexType xnum_rows
Definition csr_vector.h:260
const IndexType const IndexType xnum_cols
Definition csr_vector.h:261
const IndexType thread_id
Definition csr_vector.h:273
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition csr.h:70