1 #include "../../../include/monolish_blas.hpp"
2 #include "../../internal/monolish_internal.hpp"
8 template <
typename T>
void CRS<T>::diag(vector<T> &vec)
const {
12 size_t n = get_row() < get_col() ? rowN : colN;
15 assert(n == vec.size());
16 assert(get_device_mem_stat() == vec.get_device_mem_stat());
18 const T *vald = val.data();
19 const int *rowd = row_ptr.data();
20 const int *cold = col_ind.data();
22 if (gpu_status ==
true) {
23 #if MONOLISH_USE_GPU // gpu
24 #pragma omp target teams distribute parallel for
25 for (
size_t i = 0; i < n; i++) {
28 #pragma omp target teams distribute parallel for
29 for (
size_t i = 0; i < n; i++) {
30 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
31 if ((
int)i == cold[j]) {
37 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
40 #pragma omp parallel for
41 for (
size_t i = 0; i < n; i++) {
44 #pragma omp parallel for
45 for (
size_t i = 0; i < n; i++) {
46 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
47 if ((
int)i == cold[j]) {
59 template <
typename T>
void CRS<T>::diag(view1D<vector<T>, T> &vec)
const {
63 size_t n = get_row() < get_col() ? rowN : colN;
66 assert(n == vec.size());
67 assert(get_device_mem_stat() == vec.get_device_mem_stat());
69 const T *vald = val.data();
70 const int *rowd = row_ptr.data();
71 const int *cold = col_ind.data();
73 if (gpu_status ==
true) {
74 #if MONOLISH_USE_GPU // gpu
75 #pragma omp target teams distribute parallel for
76 for (
size_t i = 0; i < n; i++) {
79 #pragma omp target teams distribute parallel for
80 for (
size_t i = 0; i < n; i++) {
81 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
82 if ((
int)i == cold[j]) {
88 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
91 #pragma omp parallel for
92 for (
size_t i = 0; i < n; i++) {
95 #pragma omp parallel for
96 for (
size_t i = 0; i < n; i++) {
97 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
98 if ((
int)i == cold[j]) {
112 template <
typename T>
113 void CRS<T>::diag(view1D<matrix::Dense<T>, T> &vec)
const {
117 size_t n = get_row() < get_col() ? rowN : colN;
118 T *vecd = vec.data();
120 assert(n == vec.size());
121 assert(get_device_mem_stat() == vec.get_device_mem_stat());
123 const T *vald = val.data();
124 const int *rowd = row_ptr.data();
125 const int *cold = col_ind.data();
127 if (gpu_status ==
true) {
128 #if MONOLISH_USE_GPU // gpu
129 #pragma omp target teams distribute parallel for
130 for (
size_t i = 0; i < n; i++) {
133 #pragma omp target teams distribute parallel for
134 for (
size_t i = 0; i < n; i++) {
135 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
136 if ((
int)i == cold[j]) {
142 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
145 #pragma omp parallel for
146 for (
size_t i = 0; i < n; i++) {
149 #pragma omp parallel for
150 for (
size_t i = 0; i < n; i++) {
151 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
152 if ((
int)i == cold[j]) {
162 view1D<matrix::Dense<double>,
double> &vec)
const;
164 view1D<matrix::Dense<float>,
float> &vec)
const;
167 template <
typename T>
void CRS<T>::row(
const size_t r, vector<T> &vec)
const {
171 size_t n = get_row();
172 T *vecd = vec.data();
174 const T *vald = val.data();
175 const int *rowd = row_ptr.data();
177 assert(n == vec.size());
178 assert(get_device_mem_stat() == vec.get_device_mem_stat());
180 if (gpu_status ==
true) {
181 #if MONOLISH_USE_GPU // gpu
182 const int *indexd = col_ind.data();
184 #pragma omp target teams distribute parallel for
185 for (
size_t i = 0; i < n; i++) {
188 #pragma omp target teams distribute parallel for
189 for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
190 vecd[indexd[j]] = vald[j];
193 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
196 #pragma omp parallel for
197 for (
size_t i = 0; i < n; i++) {
200 #pragma omp parallel for
201 for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
202 vecd[col_ind[j]] = vald[j];
209 vector<double> &vec)
const;
211 vector<float> &vec)
const;
213 template <
typename T>
214 void CRS<T>::row(
const size_t r, view1D<vector<T>, T> &vec)
const {
218 size_t n = get_row();
219 T *vecd = vec.data();
221 const T *vald = val.data();
222 const int *rowd = row_ptr.data();
224 assert(n == vec.size());
225 assert(get_device_mem_stat() == vec.get_device_mem_stat());
227 if (gpu_status ==
true) {
228 #if MONOLISH_USE_GPU // gpu
229 const int *indexd = col_ind.data();
231 #pragma omp target teams distribute parallel for
232 for (
size_t i = 0; i < n; i++) {
235 #pragma omp target teams distribute parallel for
236 for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
237 vecd[indexd[j]] = vald[j];
240 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
243 #pragma omp parallel for
244 for (
size_t i = 0; i < n; i++) {
247 #pragma omp parallel for
248 for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
249 vecd[col_ind[j]] = vald[j];
257 view1D<vector<double>,
double> &vec)
const;
260 view1D<vector<float>,
float> &vec)
const;
262 template <
typename T>
263 void CRS<T>::row(
const size_t r, view1D<matrix::Dense<T>, T> &vec)
const {
267 size_t n = get_row();
268 T *vecd = vec.data();
270 const T *vald = val.data();
271 const int *rowd = row_ptr.data();
273 assert(n == vec.size());
274 assert(get_device_mem_stat() == vec.get_device_mem_stat());
276 if (gpu_status ==
true) {
277 #if MONOLISH_USE_GPU // gpu
278 const int *indexd = col_ind.data();
280 #pragma omp target teams distribute parallel for
281 for (
size_t i = 0; i < n; i++) {
284 #pragma omp target teams distribute parallel for
285 for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
286 vecd[indexd[j]] = vald[j];
289 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
292 #pragma omp parallel for
293 for (
size_t i = 0; i < n; i++) {
296 #pragma omp parallel for
297 for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
298 vecd[col_ind[j]] = vald[j];
305 const size_t r, view1D<matrix::Dense<double>,
double> &vec)
const;
307 const size_t r, view1D<matrix::Dense<float>,
float> &vec)
const;
310 template <
typename T>
void CRS<T>::col(
const size_t c, vector<T> &vec)
const {
314 size_t n = get_col();
315 T *vecd = vec.data();
317 const T *vald = val.data();
318 const int *rowd = row_ptr.data();
319 const int *cold = col_ind.data();
321 assert(n == vec.size());
322 assert(get_device_mem_stat() == vec.get_device_mem_stat());
324 if (gpu_status ==
true) {
325 #if MONOLISH_USE_GPU // gpu
326 #pragma omp target teams distribute parallel for
327 for (
size_t i = 0; i < n; i++) {
330 #pragma omp target teams distribute parallel for
331 for (
size_t i = 0; i < n; i++) {
332 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
333 if ((
int)c == cold[j]) {
339 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
342 #pragma omp parallel for
343 for (
size_t i = 0; i < n; i++) {
346 #pragma omp parallel for
347 for (
size_t i = 0; i < n; i++) {
348 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
349 if ((
int)c == cold[j]) {
359 vector<double> &vec)
const;
361 vector<float> &vec)
const;
363 template <
typename T>
364 void CRS<T>::col(
const size_t c, view1D<vector<T>, T> &vec)
const {
368 size_t n = get_col();
369 T *vecd = vec.data();
371 const T *vald = val.data();
372 const int *rowd = row_ptr.data();
373 const int *cold = col_ind.data();
375 assert(n == vec.size());
376 assert(get_device_mem_stat() == vec.get_device_mem_stat());
378 if (gpu_status ==
true) {
379 #if MONOLISH_USE_GPU // gpu
380 #pragma omp target teams distribute parallel for
381 for (
size_t i = 0; i < n; i++) {
384 #pragma omp target teams distribute parallel for
385 for (
size_t i = 0; i < n; i++) {
386 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
387 if ((
int)c == cold[j]) {
393 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
396 #pragma omp parallel for
397 for (
size_t i = 0; i < n; i++) {
400 #pragma omp parallel for
401 for (
size_t i = 0; i < n; i++) {
402 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
403 if ((
int)c == cold[j]) {
414 view1D<vector<double>,
double> &vec)
const;
417 view1D<vector<float>,
float> &vec)
const;
419 template <
typename T>
420 void CRS<T>::col(
const size_t c, view1D<matrix::Dense<T>, T> &vec)
const {
424 size_t n = get_col();
425 T *vecd = vec.data();
427 const T *vald = val.data();
428 const int *rowd = row_ptr.data();
429 const int *cold = col_ind.data();
431 assert(n == vec.size());
432 assert(get_device_mem_stat() == vec.get_device_mem_stat());
434 if (gpu_status ==
true) {
435 #if MONOLISH_USE_GPU // gpu
436 #pragma omp target teams distribute parallel for
437 for (
size_t i = 0; i < n; i++) {
440 #pragma omp target teams distribute parallel for
441 for (
size_t i = 0; i < n; i++) {
442 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
443 if ((
int)c == cold[j]) {
449 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
452 #pragma omp parallel for
453 for (
size_t i = 0; i < n; i++) {
456 #pragma omp parallel for
457 for (
size_t i = 0; i < n; i++) {
458 for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
459 if ((
int)c == cold[j]) {
469 const size_t c, view1D<matrix::Dense<double>,
double> &vec)
const;
471 const size_t c, view1D<matrix::Dense<float>,
float> &vec)
const;