7 template <
typename VEC1,
typename VEC2>
8 void Dmatvec_core(
const matrix::Dense<double> &A,
const VEC1 &x, VEC2 &y) {
13 assert(A.get_row() == y.size());
14 assert(A.get_col() == x.size());
17 const double *xd = x.data();
18 double *yd = y.data();
19 const double *vald = A.val.data();
20 const size_t m = A.get_row();
21 const size_t n = A.get_col();
22 const double alpha = 1.0;
23 const double beta = 0.0;
24 const size_t xoffset = x.get_offset();
25 const size_t yoffset = y.get_offset();
27 if (A.get_device_mem_stat() ==
true) {
30 internal::check_CUDA(cublasCreate(&h));
31 #pragma omp target data use_device_ptr(xd, yd, vald)
34 internal::check_CUDA(cublasDgemv(h, CUBLAS_OP_T, n, m, &alpha, vald, n,
35 xd + xoffset, 1, &beta, yd + yoffset,
40 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
43 cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, alpha, vald, n, xd + xoffset,
44 1, beta, yd + yoffset, 1);
51 template <
typename VEC1,
typename VEC2>
52 void Smatvec_core(
const matrix::Dense<float> &A,
const VEC1 &x, VEC2 &y) {
57 assert(A.get_row() == y.size());
58 assert(A.get_col() == x.size());
61 const float *xd = x.data();
63 const float *vald = A.val.data();
64 const size_t n = A.get_row();
65 const size_t m = A.get_col();
66 const float alpha = 1.0;
67 const float beta = 0.0;
68 const size_t xoffset = x.get_offset();
69 const size_t yoffset = y.get_offset();
71 if (A.get_device_mem_stat() ==
true) {
74 internal::check_CUDA(cublasCreate(&h));
75 #pragma omp target data use_device_ptr(xd, yd, vald)
78 internal::check_CUDA(cublasSgemv(h, CUBLAS_OP_T, m, n, &alpha, vald, m,
79 xd + xoffset, 1, &beta, yd + yoffset,
84 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
87 cblas_sgemv(CblasRowMajor, CblasNoTrans, n, m, alpha, vald, m, xd + xoffset,
88 1, beta, yd + yoffset, 1);