monolish  0.14.0
MONOlithic LIner equation Solvers for Highly-parallel architecture
dense_matvec.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 namespace monolish {
4 
5 namespace {
6 // double ///////////////////
7 template <typename VEC1, typename VEC2>
8 void Dmatvec_core(const matrix::Dense<double> &A, const VEC1 &x, VEC2 &y) {
9  Logger &logger = Logger::get_instance();
10  logger.func_in(monolish_func);
11 
12  // err, M = MN * N
13  assert(A.get_row() == y.size());
14  assert(A.get_col() == x.size());
15  assert(util::is_same_device_mem_stat(A, x, y));
16 
17  const double *xd = x.data();
18  double *yd = y.data();
19  const double *vald = A.val.data();
20  const size_t m = A.get_row();
21  const size_t n = A.get_col();
22  const double alpha = 1.0;
23  const double beta = 0.0;
24  const size_t xoffset = x.get_offset();
25  const size_t yoffset = y.get_offset();
26 
27  if (A.get_device_mem_stat() == true) {
28 #if MONOLISH_USE_GPU
29  cublasHandle_t h;
30  internal::check_CUDA(cublasCreate(&h));
31 #pragma omp target data use_device_ptr(xd, yd, vald)
32  {
33  // cublas is col major
34  internal::check_CUDA(cublasDgemv(h, CUBLAS_OP_T, n, m, &alpha, vald, n,
35  xd + xoffset, 1, &beta, yd + yoffset,
36  1));
37  }
38  cublasDestroy(h);
39 #else
40  throw std::runtime_error("error USE_GPU is false, but gpu_status == true");
41 #endif
42  } else {
43  cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, alpha, vald, n, xd + xoffset,
44  1, beta, yd + yoffset, 1);
45  }
46 
47  logger.func_out();
48 }
49 
50 // float ///////////////////
51 template <typename VEC1, typename VEC2>
52 void Smatvec_core(const matrix::Dense<float> &A, const VEC1 &x, VEC2 &y) {
53  Logger &logger = Logger::get_instance();
54  logger.func_in(monolish_func);
55 
56  // err, M = MN * N
57  assert(A.get_row() == y.size());
58  assert(A.get_col() == x.size());
59  assert(util::is_same_device_mem_stat(A, x, y));
60 
61  const float *xd = x.data();
62  float *yd = y.data();
63  const float *vald = A.val.data();
64  const size_t n = A.get_row();
65  const size_t m = A.get_col();
66  const float alpha = 1.0;
67  const float beta = 0.0;
68  const size_t xoffset = x.get_offset();
69  const size_t yoffset = y.get_offset();
70 
71  if (A.get_device_mem_stat() == true) {
72 #if MONOLISH_USE_GPU
73  cublasHandle_t h;
74  internal::check_CUDA(cublasCreate(&h));
75 #pragma omp target data use_device_ptr(xd, yd, vald)
76  {
77  // cublas is col major
78  internal::check_CUDA(cublasSgemv(h, CUBLAS_OP_T, m, n, &alpha, vald, m,
79  xd + xoffset, 1, &beta, yd + yoffset,
80  1));
81  }
82  cublasDestroy(h);
83 #else
84  throw std::runtime_error("error USE_GPU is false, but gpu_status == true");
85 #endif
86  } else {
87  cblas_sgemv(CblasRowMajor, CblasNoTrans, n, m, alpha, vald, m, xd + xoffset,
88  1, beta, yd + yoffset, 1);
89  }
90 
91  logger.func_out();
92 }
93 } // namespace
94 
95 } // namespace monolish
monolish_func
#define monolish_func
Definition: monolish_logger.hpp:9
monolish::util::is_same_device_mem_stat
bool is_same_device_mem_stat(const T &arg1, const U &arg2)
compare same device memory status
Definition: monolish_common.hpp:431
monolish
Definition: monolish_matrix_blas.hpp:9
monolish::Logger::get_instance
static Logger & get_instance()
Definition: monolish_logger.hpp:42