6 template <
typename F1,
typename F2,
typename F3,
typename F4>
7 void Daxpyz_core(
const F1 alpha,
const F2 &x,
const F3 &y, F4 &z) {
15 const double *xd = x.data();
16 const double *yd = y.data();
17 double *zd = z.data();
18 size_t size = x.size();
19 const size_t xoffset = x.get_offset();
20 const size_t yoffset = y.get_offset();
21 const size_t zoffset = z.get_offset();
23 if (x.get_device_mem_stat() ==
true) {
25 #pragma omp target teams distribute parallel for
26 for (
size_t i = 0; i < size; i++) {
27 zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];
30 throw std::runtime_error(
31 "error USE_GPU is false, but get_device_mem_stat() == true");
34 #pragma omp parallel for
35 for (
size_t i = 0; i < size; i++) {
36 zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];
42 template <
typename F1,
typename F2,
typename F3,
typename F4>
43 void Saxpyz_core(
const F1 alpha,
const F2 &x,
const F3 &y, F4 &z) {
51 const float *xd = x.data();
52 const float *yd = y.data();
54 size_t size = x.size();
55 const size_t xoffset = x.get_offset();
56 const size_t yoffset = y.get_offset();
57 const size_t zoffset = z.get_offset();
59 if (x.get_device_mem_stat() ==
true) {
61 #pragma omp target teams distribute parallel for
62 for (
size_t i = 0; i < size; i++) {
63 zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];
66 throw std::runtime_error(
67 "error USE_GPU is false, but get_device_mem_stat() == true");
70 #pragma omp parallel for
71 for (
size_t i = 0; i < size; i++) {
72 zd[i + zoffset] = alpha * xd[i + xoffset] + yd[i + yoffset];