1 #include "../../../include/monolish_blas.hpp" 
    2 #include "../../internal/monolish_internal.hpp" 
   14   const T *vald = val.data();
 
   15   const size_t N = get_col();
 
   16   const size_t Len = 
std::min(get_row(), get_col());
 
   18   assert(Len == vec.size());
 
   19   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
   21   if (gpu_status == 
true) {
 
   22 #if MONOLISH_USE_GPU // gpu 
   23 #pragma omp target teams distribute parallel for 
   24     for (
size_t i = 0; i < Len; i++) {
 
   25       vecd[i] = vald[N * i + i];
 
   28     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
   31 #pragma omp parallel for 
   32     for (
size_t i = 0; i < Len; i++) {
 
   33       vecd[i] = vald[N * i + i];
 
   42 template <
typename T> 
void Dense<T>::diag(view1D<vector<T>, T> &vec)
 const {
 
   48   const T *vald = val.data();
 
   49   const size_t N = get_col();
 
   50   const size_t Len = 
std::min(get_row(), get_col());
 
   52   assert(Len == vec.size());
 
   53   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
   55   if (gpu_status == 
true) {
 
   56 #if MONOLISH_USE_GPU // gpu 
   57 #pragma omp target teams distribute parallel for 
   58     for (
size_t i = 0; i < Len; i++) {
 
   59       vecd[i] = vald[N * i + i];
 
   62     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
   65 #pragma omp parallel for 
   66     for (
size_t i = 0; i < Len; i++) {
 
   67       vecd[i] = vald[N * i + i];
 
   74     view1D<vector<double>, 
double> &vec) 
const;
 
   85   const T *vald = val.data();
 
   86   const size_t N = get_col();
 
   87   const size_t Len = 
std::min(get_row(), get_col());
 
   89   assert(Len == vec.size());
 
   90   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
   92   if (gpu_status == 
true) {
 
   93 #if MONOLISH_USE_GPU // gpu 
   94 #pragma omp target teams distribute parallel for 
   95     for (
size_t i = 0; i < Len; i++) {
 
   96       vecd[i] = vald[N * i + i];
 
   99     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  102 #pragma omp parallel for 
  103     for (
size_t i = 0; i < Len; i++) {
 
  104       vecd[i] = vald[N * i + i];
 
  111     view1D<matrix::Dense<double>, 
double> &vec) 
const;
 
  113     view1D<matrix::Dense<float>, 
float> &vec) 
const;
 
  116 template <
typename T> 
void Dense<T>::row(
const size_t r, vector<T> &vec)
 const {
 
  120   T *vecd = vec.data();
 
  122   const T *vald = val.data();
 
  123   const size_t N = get_col();
 
  125   assert(N == vec.size());
 
  126   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  128   if (gpu_status == 
true) {
 
  129 #if MONOLISH_USE_GPU // gpu 
  130 #pragma omp target teams distribute parallel for 
  131     for (
size_t i = 0; i < N; i++) {
 
  132       vecd[i] = vald[r * N + i];
 
  135     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  138 #pragma omp parallel for 
  139     for (
size_t i = 0; i < N; i++) {
 
  140       vecd[i] = vald[r * N + i];
 
  147                                                    vector<double> &vec) 
const;
 
  149                                                   vector<float> &vec) 
const;
 
  151 template <
typename T>
 
  152 void Dense<T>::row(
const size_t r, view1D<vector<T>, T> &vec)
 const {
 
  156   T *vecd = vec.data();
 
  158   const T *vald = val.data();
 
  159   const size_t N = get_col();
 
  161   assert(N == vec.size());
 
  162   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  164   if (gpu_status == 
true) {
 
  165 #if MONOLISH_USE_GPU // gpu 
  166 #pragma omp target teams distribute parallel for 
  167     for (
size_t i = 0; i < N; i++) {
 
  168       vecd[i] = vald[r * N + i];
 
  171     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  174 #pragma omp parallel for 
  175     for (
size_t i = 0; i < N; i++) {
 
  176       vecd[i] = vald[r * N + i];
 
  184                                      view1D<vector<double>, 
double> &vec) 
const;
 
  187                                     view1D<vector<float>, 
float> &vec) 
const;
 
  189 template <
typename T>
 
  190 void Dense<T>::row(
const size_t r, view1D<matrix::Dense<T>, T> &vec)
 const {
 
  194   T *vecd = vec.data();
 
  196   const T *vald = val.data();
 
  197   const size_t N = get_col();
 
  199   assert(N == vec.size());
 
  200   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  202   if (gpu_status == 
true) {
 
  203 #if MONOLISH_USE_GPU // gpu 
  204 #pragma omp target teams distribute parallel for 
  205     for (
size_t i = 0; i < N; i++) {
 
  206       vecd[i] = vald[r * N + i];
 
  209     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  212 #pragma omp parallel for 
  213     for (
size_t i = 0; i < N; i++) {
 
  214       vecd[i] = vald[r * N + i];
 
  221     const size_t r, view1D<matrix::Dense<double>, 
double> &vec) 
const;
 
  223     const size_t r, view1D<matrix::Dense<float>, 
float> &vec) 
const;
 
  226 template <
typename T> 
void Dense<T>::col(
const size_t c, vector<T> &vec)
 const {
 
  230   T *vecd = vec.data();
 
  232   const T *vald = val.data();
 
  233   const size_t M = get_row();
 
  234   const size_t N = get_col();
 
  236   assert(M == vec.size());
 
  237   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  239   if (gpu_status == 
true) {
 
  240 #if MONOLISH_USE_GPU // gpu 
  241 #pragma omp target teams distribute parallel for 
  242     for (
size_t i = 0; i < M; i++) {
 
  243       vecd[i] = vald[i * N + c];
 
  246     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  249 #pragma omp parallel for 
  250     for (
size_t i = 0; i < M; i++) {
 
  251       vecd[i] = vald[i * N + c];
 
  258                                                    vector<double> &vec) 
const;
 
  260                                                   vector<float> &vec) 
const;
 
  262 template <
typename T>
 
  263 void Dense<T>::col(
const size_t c, view1D<vector<T>, T> &vec)
 const {
 
  267   T *vecd = vec.data();
 
  269   const T *vald = val.data();
 
  270   const size_t M = get_row();
 
  271   const size_t N = get_col();
 
  273   assert(M == vec.size());
 
  274   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  276   if (gpu_status == 
true) {
 
  277 #if MONOLISH_USE_GPU // gpu 
  278 #pragma omp target teams distribute parallel for 
  279     for (
size_t i = 0; i < M; i++) {
 
  280       vecd[i] = vald[i * N + c];
 
  283     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  286 #pragma omp parallel for 
  287     for (
size_t i = 0; i < M; i++) {
 
  288       vecd[i] = vald[i * N + c];
 
  296                                      view1D<vector<double>, 
double> &vec) 
const;
 
  299                                     view1D<vector<float>, 
float> &vec) 
const;
 
  301 template <
typename T>
 
  302 void Dense<T>::col(
const size_t c, view1D<matrix::Dense<T>, T> &vec)
 const {
 
  306   T *vecd = vec.data();
 
  308   const T *vald = val.data();
 
  309   const size_t M = get_row();
 
  310   const size_t N = get_col();
 
  312   assert(M == vec.size());
 
  313   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  315   if (gpu_status == 
true) {
 
  316 #if MONOLISH_USE_GPU // gpu 
  317 #pragma omp target teams distribute parallel for 
  318     for (
size_t i = 0; i < M; i++) {
 
  319       vecd[i] = vald[i * N + c];
 
  322     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  325 #pragma omp parallel for 
  326     for (
size_t i = 0; i < M; i++) {
 
  327       vecd[i] = vald[i * N + c];
 
  334     const size_t c, view1D<matrix::Dense<double>, 
double> &vec) 
const;
 
  336     const size_t c, view1D<matrix::Dense<float>, 
float> &vec) 
const;