monolish  0.14.0
MONOlithic LIner equation Solvers for Highly-parallel architecture
axpyz.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 namespace monolish {
4 
5 namespace {
6 template <typename F1, typename F2, typename F3, typename F4>
7 void Daxpyz_core(const F1 alpha, const F2 &x, const F3 &y, F4 &z) {
8  Logger &logger = Logger::get_instance();
9  logger.func_in(monolish_func);
10 
11  // err
12  assert(util::is_same_size(x, y, z));
13  assert(util::is_same_device_mem_stat(x, y, z));
14 
15  const double *xd = x.data();
16  const double *yd = y.data();
17  double *zd = z.data();
18  size_t size = x.size();
19  const size_t xoffset = x.get_offset();
20  const size_t yoffset = y.get_offset();
21  const size_t zoffset = z.get_offset();
22 
23  if (x.get_device_mem_stat() == true) {
24 #if MONOLISH_USE_GPU
25 #pragma omp target teams distribute parallel for
26  for (size_t i = 0; i < size; i++) {
27  zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];
28  }
29 #else
30  throw std::runtime_error(
31  "error USE_GPU is false, but get_device_mem_stat() == true");
32 #endif
33  } else {
34 #pragma omp parallel for
35  for (size_t i = 0; i < size; i++) {
36  zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];
37  }
38  }
39  logger.func_out();
40 }
41 
42 template <typename F1, typename F2, typename F3, typename F4>
43 void Saxpyz_core(const F1 alpha, const F2 &x, const F3 &y, F4 &z) {
44  Logger &logger = Logger::get_instance();
45  logger.func_in(monolish_func);
46 
47  // err
48  assert(util::is_same_size(x, y, z));
49  assert(util::is_same_device_mem_stat(x, y, z));
50 
51  const float *xd = x.data();
52  const float *yd = y.data();
53  float *zd = z.data();
54  size_t size = x.size();
55  const size_t xoffset = x.get_offset();
56  const size_t yoffset = y.get_offset();
57  const size_t zoffset = z.get_offset();
58 
59  if (x.get_device_mem_stat() == true) {
60 #if MONOLISH_USE_GPU
61 #pragma omp target teams distribute parallel for
62  for (size_t i = 0; i < size; i++) {
63  zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];
64  }
65 #else
66  throw std::runtime_error(
67  "error USE_GPU is false, but get_device_mem_stat() == true");
68 #endif
69  } else {
70 #pragma omp parallel for
71  for (size_t i = 0; i < size; i++) {
72  zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];
73  }
74  }
75  logger.func_out();
76 }
77 
78 } // namespace
79 
80 } // namespace monolish
monolish::util::is_same_size
bool is_same_size(const T &x, const U &y)
compare size of vector or 1Dview (same as is_same_structure())
Definition: monolish_common.hpp:358
monolish_func
#define monolish_func
Definition: monolish_logger.hpp:9
monolish::util::is_same_device_mem_stat
bool is_same_device_mem_stat(const T &arg1, const U &arg2)
compare same device memory status
Definition: monolish_common.hpp:431
monolish
Definition: monolish_matrix_blas.hpp:9
monolish::Logger::get_instance
static Logger & get_instance()
Definition: monolish_logger.hpp:42