6 template <
typename F1>
double Dnrm2_core(
const F1 &x) {
11 const double *xd = x.data();
12 size_t size = x.size();
13 const size_t xoffset = x.get_offset();
15 if (x.get_device_mem_stat() ==
true) {
16 #if MONOLISH_USE_NVIDIA_GPU
18 internal::check_CUDA(cublasCreate(&h));
19 #pragma omp target data use_device_ptr(xd)
22 cublasDdot(h, size, xd + xoffset, 1, xd + xoffset, 1, &ans));
26 throw std::runtime_error(
27 "error USE_GPU is false, but get_device_mem_stat() == true");
30 ans = cblas_ddot(size, xd + xoffset, 1, xd + xoffset, 1);
35 ans = comm.Allreduce(ans);
42 template <
typename F1>
float Snrm2_core(
const F1 &x) {
47 const float *xd = x.data();
48 size_t size = x.size();
49 const size_t xoffset = x.get_offset();
51 if (x.get_device_mem_stat() ==
true) {
52 #if MONOLISH_USE_NVIDIA_GPU
54 internal::check_CUDA(cublasCreate(&h));
55 #pragma omp target data use_device_ptr(xd)
58 cublasSdot(h, size, xd + xoffset, 1, xd + xoffset, 1, &ans));
62 throw std::runtime_error(
63 "error USE_GPU is false, but get_device_mem_stat() == true");
66 ans = cblas_sdot(size, xd + xoffset, 1, xd + xoffset, 1);
71 ans = comm.Allreduce(ans);