monolish  0.14.0
MONOlithic LIner equation Solvers for Highly-parallel architecture
sum.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 namespace monolish {
4 
5 namespace {
6 template <typename F1> double Dsum_core(const F1 &x) {
7  Logger &logger = Logger::get_instance();
8  logger.func_in(monolish_func);
9 
10  double ans = 0;
11  const double *xd = x.data();
12  size_t size = x.size();
13  const size_t xoffset = x.get_offset();
14 
15  if (x.get_device_mem_stat() == true) {
16 #if MONOLISH_USE_GPU
17 #pragma omp target teams distribute parallel for reduction(+ : ans) map (tofrom: ans)
18  for (size_t i = 0; i < size; i++) {
19  ans += xd[i + xoffset];
20  }
21 #else
22  throw std::runtime_error(
23  "error USE_GPU is false, but get_device_mem_stat() == true");
24 #endif
25  } else {
26 #pragma omp parallel for reduction(+ : ans)
27  for (size_t i = 0; i < size; i++) {
28  ans += xd[i + xoffset];
29  }
30  }
31 
32  logger.func_out();
33  return ans;
34 }
35 
36 template <typename F1> float Ssum_core(const F1 &x) {
37  Logger &logger = Logger::get_instance();
38  logger.func_in(monolish_func);
39 
40  float ans = 0;
41  const float *xd = x.data();
42  size_t size = x.size();
43  const size_t xoffset = x.get_offset();
44 
45  if (x.get_device_mem_stat() == true) {
46 #if MONOLISH_USE_GPU
47 #pragma omp target teams distribute parallel for reduction(+ : ans) map (tofrom: ans)
48  for (size_t i = 0; i < size; i++) {
49  ans += xd[i + xoffset];
50  }
51 #else
52  throw std::runtime_error(
53  "error USE_GPU is false, but get_device_mem_stat() == true");
54 #endif
55  } else {
56 #pragma omp parallel for reduction(+ : ans)
57  for (size_t i = 0; i < size; i++) {
58  ans += xd[i + xoffset];
59  }
60  }
61 
62  logger.func_out();
63  return ans;
64 }
65 
66 } // namespace
67 
68 } // namespace monolish
monolish_func
#define monolish_func
Definition: monolish_logger.hpp:9
monolish
Definition: monolish_matrix_blas.hpp:9
monolish::Logger::get_instance
static Logger & get_instance()
Definition: monolish_logger.hpp:42