1 #include "../../../include/monolish_blas.hpp"
2 #include "../../internal/monolish_internal.hpp"
14 const T *vald = val.data();
15 const size_t N = get_col();
16 const size_t Len =
std::min(get_row(), get_col());
18 assert(Len == vec.size());
19 assert(get_device_mem_stat() == vec.get_device_mem_stat());
21 if (gpu_status ==
true) {
22 #if MONOLISH_USE_GPU // gpu
23 #pragma omp target teams distribute parallel for
24 for (
size_t i = 0; i < Len; i++) {
25 vecd[i] = vald[N * i + i];
28 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
31 #pragma omp parallel for
32 for (
size_t i = 0; i < Len; i++) {
33 vecd[i] = vald[N * i + i];
42 template <
typename T>
void Dense<T>::diag(view1D<vector<T>, T> &vec)
const {
48 const T *vald = val.data();
49 const size_t N = get_col();
50 const size_t Len =
std::min(get_row(), get_col());
52 assert(Len == vec.size());
53 assert(get_device_mem_stat() == vec.get_device_mem_stat());
55 if (gpu_status ==
true) {
56 #if MONOLISH_USE_GPU // gpu
57 #pragma omp target teams distribute parallel for
58 for (
size_t i = 0; i < Len; i++) {
59 vecd[i] = vald[N * i + i];
62 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
65 #pragma omp parallel for
66 for (
size_t i = 0; i < Len; i++) {
67 vecd[i] = vald[N * i + i];
74 view1D<vector<double>,
double> &vec)
const;
85 const T *vald = val.data();
86 const size_t N = get_col();
87 const size_t Len =
std::min(get_row(), get_col());
89 assert(Len == vec.size());
90 assert(get_device_mem_stat() == vec.get_device_mem_stat());
92 if (gpu_status ==
true) {
93 #if MONOLISH_USE_GPU // gpu
94 #pragma omp target teams distribute parallel for
95 for (
size_t i = 0; i < Len; i++) {
96 vecd[i] = vald[N * i + i];
99 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
102 #pragma omp parallel for
103 for (
size_t i = 0; i < Len; i++) {
104 vecd[i] = vald[N * i + i];
111 view1D<matrix::Dense<double>,
double> &vec)
const;
113 view1D<matrix::Dense<float>,
float> &vec)
const;
116 template <
typename T>
void Dense<T>::row(
const size_t r, vector<T> &vec)
const {
120 T *vecd = vec.data();
122 const T *vald = val.data();
123 const size_t N = get_col();
125 assert(N == vec.size());
126 assert(get_device_mem_stat() == vec.get_device_mem_stat());
128 if (gpu_status ==
true) {
129 #if MONOLISH_USE_GPU // gpu
130 #pragma omp target teams distribute parallel for
131 for (
size_t i = 0; i < N; i++) {
132 vecd[i] = vald[r * N + i];
135 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
138 #pragma omp parallel for
139 for (
size_t i = 0; i < N; i++) {
140 vecd[i] = vald[r * N + i];
147 vector<double> &vec)
const;
149 vector<float> &vec)
const;
151 template <
typename T>
152 void Dense<T>::row(
const size_t r, view1D<vector<T>, T> &vec)
const {
156 T *vecd = vec.data();
158 const T *vald = val.data();
159 const size_t N = get_col();
161 assert(N == vec.size());
162 assert(get_device_mem_stat() == vec.get_device_mem_stat());
164 if (gpu_status ==
true) {
165 #if MONOLISH_USE_GPU // gpu
166 #pragma omp target teams distribute parallel for
167 for (
size_t i = 0; i < N; i++) {
168 vecd[i] = vald[r * N + i];
171 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
174 #pragma omp parallel for
175 for (
size_t i = 0; i < N; i++) {
176 vecd[i] = vald[r * N + i];
184 view1D<vector<double>,
double> &vec)
const;
187 view1D<vector<float>,
float> &vec)
const;
189 template <
typename T>
190 void Dense<T>::row(
const size_t r, view1D<matrix::Dense<T>, T> &vec)
const {
194 T *vecd = vec.data();
196 const T *vald = val.data();
197 const size_t N = get_col();
199 assert(N == vec.size());
200 assert(get_device_mem_stat() == vec.get_device_mem_stat());
202 if (gpu_status ==
true) {
203 #if MONOLISH_USE_GPU // gpu
204 #pragma omp target teams distribute parallel for
205 for (
size_t i = 0; i < N; i++) {
206 vecd[i] = vald[r * N + i];
209 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
212 #pragma omp parallel for
213 for (
size_t i = 0; i < N; i++) {
214 vecd[i] = vald[r * N + i];
221 const size_t r, view1D<matrix::Dense<double>,
double> &vec)
const;
223 const size_t r, view1D<matrix::Dense<float>,
float> &vec)
const;
226 template <
typename T>
void Dense<T>::col(
const size_t c, vector<T> &vec)
const {
230 T *vecd = vec.data();
232 const T *vald = val.data();
233 const size_t M = get_row();
234 const size_t N = get_col();
236 assert(M == vec.size());
237 assert(get_device_mem_stat() == vec.get_device_mem_stat());
239 if (gpu_status ==
true) {
240 #if MONOLISH_USE_GPU // gpu
241 #pragma omp target teams distribute parallel for
242 for (
size_t i = 0; i < M; i++) {
243 vecd[i] = vald[i * N + c];
246 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
249 #pragma omp parallel for
250 for (
size_t i = 0; i < M; i++) {
251 vecd[i] = vald[i * N + c];
258 vector<double> &vec)
const;
260 vector<float> &vec)
const;
262 template <
typename T>
263 void Dense<T>::col(
const size_t c, view1D<vector<T>, T> &vec)
const {
267 T *vecd = vec.data();
269 const T *vald = val.data();
270 const size_t M = get_row();
271 const size_t N = get_col();
273 assert(M == vec.size());
274 assert(get_device_mem_stat() == vec.get_device_mem_stat());
276 if (gpu_status ==
true) {
277 #if MONOLISH_USE_GPU // gpu
278 #pragma omp target teams distribute parallel for
279 for (
size_t i = 0; i < M; i++) {
280 vecd[i] = vald[i * N + c];
283 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
286 #pragma omp parallel for
287 for (
size_t i = 0; i < M; i++) {
288 vecd[i] = vald[i * N + c];
296 view1D<vector<double>,
double> &vec)
const;
299 view1D<vector<float>,
float> &vec)
const;
301 template <
typename T>
302 void Dense<T>::col(
const size_t c, view1D<matrix::Dense<T>, T> &vec)
const {
306 T *vecd = vec.data();
308 const T *vald = val.data();
309 const size_t M = get_row();
310 const size_t N = get_col();
312 assert(M == vec.size());
313 assert(get_device_mem_stat() == vec.get_device_mem_stat());
315 if (gpu_status ==
true) {
316 #if MONOLISH_USE_GPU // gpu
317 #pragma omp target teams distribute parallel for
318 for (
size_t i = 0; i < M; i++) {
319 vecd[i] = vald[i * N + c];
322 throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
325 #pragma omp parallel for
326 for (
size_t i = 0; i < M; i++) {
327 vecd[i] = vald[i * N + c];
334 const size_t c, view1D<matrix::Dense<double>,
double> &vec)
const;
336 const size_t c, view1D<matrix::Dense<float>,
float> &vec)
const;