monolish  0.14.2
MONOlithic LIner equation Solvers for Highly-parallel architecture
dot.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 namespace monolish {
4 
5 namespace {
6 template <typename F1, typename F2> double Ddot_core(const F1 &x, const F2 &y) {
7  Logger &logger = Logger::get_instance();
8  logger.func_in(monolish_func);
9 
10  // err
11  assert(util::is_same_size(x, y));
12  assert(util::is_same_device_mem_stat(x, y));
13 
14  double ans = 0;
15  const double *xd = x.data();
16  const double *yd = y.data();
17  const size_t size = x.size();
18  const size_t xoffset = x.get_offset();
19  const size_t yoffset = y.get_offset();
20 
21  if (x.get_device_mem_stat() == true) {
22 #if MONOLISH_USE_NVIDIA_GPU
23  cublasHandle_t h;
24  internal::check_CUDA(cublasCreate(&h));
25 #pragma omp target data use_device_ptr(xd, yd)
26  {
27  internal::check_CUDA(
28  cublasDdot(h, size, xd + xoffset, 1, yd + yoffset, 1, &ans));
29  }
30  cublasDestroy(h);
31 #else
32  throw std::runtime_error(
33  "error USE_GPU is false, but get_device_mem_stat() == true");
34 #endif
35  } else {
36  ans = cblas_ddot(size, xd + xoffset, 1, yd + yoffset, 1);
37  }
38 
39 #if MONOLISH_USE_MPI
40  mpi::comm &comm = mpi::comm::get_instance();
41  ans = comm.Allreduce(ans);
42 #endif
43 
44  logger.func_out();
45  return ans;
46 }
47 
48 template <typename F1, typename F2> float Sdot_core(const F1 &x, const F2 &y) {
49  Logger &logger = Logger::get_instance();
50  logger.func_in(monolish_func);
51 
52  // err
53  assert(util::is_same_size(x, y));
54  assert(util::is_same_device_mem_stat(x, y));
55 
56  float ans = 0;
57  const float *xd = x.data();
58  const float *yd = y.data();
59  const size_t size = x.size();
60  const size_t xoffset = x.get_offset();
61  const size_t yoffset = y.get_offset();
62 
63  if (x.get_device_mem_stat() == true) {
64 #if MONOLISH_USE_NVIDIA_GPU
65  cublasHandle_t h;
66  internal::check_CUDA(cublasCreate(&h));
67 #pragma omp target data use_device_ptr(xd, yd)
68  {
69  internal::check_CUDA(
70  cublasSdot(h, size, xd + xoffset, 1, yd + yoffset, 1, &ans));
71  }
72  cublasDestroy(h);
73 #else
74  throw std::runtime_error(
75  "error USE_GPU is false, but get_device_mem_stat() == true");
76 #endif
77  } else {
78  ans = cblas_sdot(size, xd + xoffset, 1, yd + yoffset, 1);
79  }
80 
81 #if MONOLISH_USE_MPI
82  mpi::comm &comm = mpi::comm::get_instance();
83  ans = comm.Allreduce(ans);
84 #endif
85 
86  logger.func_out();
87  return ans;
88 }
89 
90 } // namespace
91 } // namespace monolish
monolish::util::is_same_size
bool is_same_size(const T &x, const U &y)
compare size of vector or 1Dview (same as is_same_structure())
Definition: monolish_common.hpp:377
monolish_func
#define monolish_func
Definition: monolish_logger.hpp:9
monolish::util::is_same_device_mem_stat
bool is_same_device_mem_stat(const T &arg1, const U &arg2)
compare same device memory status
Definition: monolish_common.hpp:454
monolish
Definition: monolish_matrix_blas.hpp:10
monolish::mpi::comm::get_instance
static comm & get_instance()
Definition: monolish_mpi_core.hpp:33
monolish::Logger::get_instance
static Logger & get_instance()
Definition: monolish_logger.hpp:42