6 template <
typename F1,
typename F2>
double Ddot_core(
const F1 &x,
const F2 &y) {
15 const double *xd = x.data();
16 const double *yd = y.data();
17 const size_t size = x.size();
18 const size_t xoffset = x.get_offset();
19 const size_t yoffset = y.get_offset();
21 if (x.get_device_mem_stat() ==
true) {
22 #if MONOLISH_USE_NVIDIA_GPU
24 internal::check_CUDA(cublasCreate(&h));
25 #pragma omp target data use_device_ptr(xd, yd)
28 cublasDdot(h, size, xd + xoffset, 1, yd + yoffset, 1, &ans));
32 throw std::runtime_error(
33 "error USE_GPU is false, but get_device_mem_stat() == true");
36 ans = cblas_ddot(size, xd + xoffset, 1, yd + yoffset, 1);
41 ans = comm.Allreduce(ans);
48 template <
typename F1,
typename F2>
float Sdot_core(
const F1 &x,
const F2 &y) {
57 const float *xd = x.data();
58 const float *yd = y.data();
59 const size_t size = x.size();
60 const size_t xoffset = x.get_offset();
61 const size_t yoffset = y.get_offset();
63 if (x.get_device_mem_stat() ==
true) {
64 #if MONOLISH_USE_NVIDIA_GPU
66 internal::check_CUDA(cublasCreate(&h));
67 #pragma omp target data use_device_ptr(xd, yd)
70 cublasSdot(h, size, xd + xoffset, 1, yd + yoffset, 1, &ans));
74 throw std::runtime_error(
75 "error USE_GPU is false, but get_device_mem_stat() == true");
78 ans = cblas_sdot(size, xd + xoffset, 1, yd + yoffset, 1);
83 ans = comm.Allreduce(ans);