1 #include "../../../include/monolish_blas.hpp" 
    2 #include "../../internal/monolish_internal.hpp" 
    8 template <
typename T> 
void CRS<T>::diag(vector<T> &vec)
 const {
 
   12   size_t n = get_row() < get_col() ? rowN : colN;
 
   15   assert(n == vec.size());
 
   16   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
   18   const T *vald = val.data();
 
   19   const int *rowd = row_ptr.data();
 
   20   const int *cold = col_ind.data();
 
   22   if (gpu_status == 
true) {
 
   23 #if MONOLISH_USE_GPU // gpu 
   24 #pragma omp target teams distribute parallel for 
   25     for (
size_t i = 0; i < n; i++) {
 
   28 #pragma omp target teams distribute parallel for 
   29     for (
size_t i = 0; i < n; i++) {
 
   30       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
   31         if ((
int)i == cold[j]) {
 
   37     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
   40 #pragma omp parallel for 
   41     for (
size_t i = 0; i < n; i++) {
 
   44 #pragma omp parallel for 
   45     for (
size_t i = 0; i < n; i++) {
 
   46       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
   47         if ((
int)i == cold[j]) {
 
   59 template <
typename T> 
void CRS<T>::diag(view1D<vector<T>, T> &vec)
 const {
 
   63   size_t n = get_row() < get_col() ? rowN : colN;
 
   66   assert(n == vec.size());
 
   67   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
   69   const T *vald = val.data();
 
   70   const int *rowd = row_ptr.data();
 
   71   const int *cold = col_ind.data();
 
   73   if (gpu_status == 
true) {
 
   74 #if MONOLISH_USE_GPU // gpu 
   75 #pragma omp target teams distribute parallel for 
   76     for (
size_t i = 0; i < n; i++) {
 
   79 #pragma omp target teams distribute parallel for 
   80     for (
size_t i = 0; i < n; i++) {
 
   81       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
   82         if ((
int)i == cold[j]) {
 
   88     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
   91 #pragma omp parallel for 
   92     for (
size_t i = 0; i < n; i++) {
 
   95 #pragma omp parallel for 
   96     for (
size_t i = 0; i < n; i++) {
 
   97       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
   98         if ((
int)i == cold[j]) {
 
  112 template <
typename T>
 
  113 void CRS<T>::diag(view1D<matrix::Dense<T>, T> &vec)
 const {
 
  117   size_t n = get_row() < get_col() ? rowN : colN;
 
  118   T *vecd = vec.data();
 
  120   assert(n == vec.size());
 
  121   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  123   const T *vald = val.data();
 
  124   const int *rowd = row_ptr.data();
 
  125   const int *cold = col_ind.data();
 
  127   if (gpu_status == 
true) {
 
  128 #if MONOLISH_USE_GPU // gpu 
  129 #pragma omp target teams distribute parallel for 
  130     for (
size_t i = 0; i < n; i++) {
 
  133 #pragma omp target teams distribute parallel for 
  134     for (
size_t i = 0; i < n; i++) {
 
  135       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  136         if ((
int)i == cold[j]) {
 
  142     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  145 #pragma omp parallel for 
  146     for (
size_t i = 0; i < n; i++) {
 
  149 #pragma omp parallel for 
  150     for (
size_t i = 0; i < n; i++) {
 
  151       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  152         if ((
int)i == cold[j]) {
 
  162     view1D<matrix::Dense<double>, 
double> &vec) 
const;
 
  164     view1D<matrix::Dense<float>, 
float> &vec) 
const;
 
  167 template <
typename T> 
void CRS<T>::row(
const size_t r, vector<T> &vec)
 const {
 
  171   size_t n = get_row();
 
  172   T *vecd = vec.data();
 
  174   const T *vald = val.data();
 
  175   const int *rowd = row_ptr.data();
 
  177   assert(n == vec.size());
 
  178   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  180   if (gpu_status == 
true) {
 
  181 #if MONOLISH_USE_GPU // gpu 
  182     const int *indexd = col_ind.data();
 
  184 #pragma omp target teams distribute parallel for 
  185     for (
size_t i = 0; i < n; i++) {
 
  188 #pragma omp target teams distribute parallel for 
  189     for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
 
  190       vecd[indexd[j]] = vald[j];
 
  193     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  196 #pragma omp parallel for 
  197     for (
size_t i = 0; i < n; i++) {
 
  200 #pragma omp parallel for 
  201     for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
 
  202       vecd[col_ind[j]] = vald[j];
 
  209                                                  vector<double> &vec) 
const;
 
  211                                                 vector<float> &vec) 
const;
 
  213 template <
typename T>
 
  214 void CRS<T>::row(
const size_t r, view1D<vector<T>, T> &vec)
 const {
 
  218   size_t n = get_row();
 
  219   T *vecd = vec.data();
 
  221   const T *vald = val.data();
 
  222   const int *rowd = row_ptr.data();
 
  224   assert(n == vec.size());
 
  225   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  227   if (gpu_status == 
true) {
 
  228 #if MONOLISH_USE_GPU // gpu 
  229     const int *indexd = col_ind.data();
 
  231 #pragma omp target teams distribute parallel for 
  232     for (
size_t i = 0; i < n; i++) {
 
  235 #pragma omp target teams distribute parallel for 
  236     for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
 
  237       vecd[indexd[j]] = vald[j];
 
  240     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  243 #pragma omp parallel for 
  244     for (
size_t i = 0; i < n; i++) {
 
  247 #pragma omp parallel for 
  248     for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
 
  249       vecd[col_ind[j]] = vald[j];
 
  257                                    view1D<vector<double>, 
double> &vec) 
const;
 
  260                                   view1D<vector<float>, 
float> &vec) 
const;
 
  262 template <
typename T>
 
  263 void CRS<T>::row(
const size_t r, view1D<matrix::Dense<T>, T> &vec)
 const {
 
  267   size_t n = get_row();
 
  268   T *vecd = vec.data();
 
  270   const T *vald = val.data();
 
  271   const int *rowd = row_ptr.data();
 
  273   assert(n == vec.size());
 
  274   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  276   if (gpu_status == 
true) {
 
  277 #if MONOLISH_USE_GPU // gpu 
  278     const int *indexd = col_ind.data();
 
  280 #pragma omp target teams distribute parallel for 
  281     for (
size_t i = 0; i < n; i++) {
 
  284 #pragma omp target teams distribute parallel for 
  285     for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
 
  286       vecd[indexd[j]] = vald[j];
 
  289     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  292 #pragma omp parallel for 
  293     for (
size_t i = 0; i < n; i++) {
 
  296 #pragma omp parallel for 
  297     for (
int j = rowd[r]; j < rowd[r + 1]; j++) {
 
  298       vecd[col_ind[j]] = vald[j];
 
  305     const size_t r, view1D<matrix::Dense<double>, 
double> &vec) 
const;
 
  307     const size_t r, view1D<matrix::Dense<float>, 
float> &vec) 
const;
 
  310 template <
typename T> 
void CRS<T>::col(
const size_t c, vector<T> &vec)
 const {
 
  314   size_t n = get_col();
 
  315   T *vecd = vec.data();
 
  317   const T *vald = val.data();
 
  318   const int *rowd = row_ptr.data();
 
  319   const int *cold = col_ind.data();
 
  321   assert(n == vec.size());
 
  322   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  324   if (gpu_status == 
true) {
 
  325 #if MONOLISH_USE_GPU // gpu 
  326 #pragma omp target teams distribute parallel for 
  327     for (
size_t i = 0; i < n; i++) {
 
  330 #pragma omp target teams distribute parallel for 
  331     for (
size_t i = 0; i < n; i++) {
 
  332       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  333         if ((
int)c == cold[j]) {
 
  339     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  342 #pragma omp parallel for 
  343     for (
size_t i = 0; i < n; i++) {
 
  346 #pragma omp parallel for 
  347     for (
size_t i = 0; i < n; i++) {
 
  348       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  349         if ((
int)c == cold[j]) {
 
  359                                                  vector<double> &vec) 
const;
 
  361                                                 vector<float> &vec) 
const;
 
  363 template <
typename T>
 
  364 void CRS<T>::col(
const size_t c, view1D<vector<T>, T> &vec)
 const {
 
  368   size_t n = get_col();
 
  369   T *vecd = vec.data();
 
  371   const T *vald = val.data();
 
  372   const int *rowd = row_ptr.data();
 
  373   const int *cold = col_ind.data();
 
  375   assert(n == vec.size());
 
  376   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  378   if (gpu_status == 
true) {
 
  379 #if MONOLISH_USE_GPU // gpu 
  380 #pragma omp target teams distribute parallel for 
  381     for (
size_t i = 0; i < n; i++) {
 
  384 #pragma omp target teams distribute parallel for 
  385     for (
size_t i = 0; i < n; i++) {
 
  386       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  387         if ((
int)c == cold[j]) {
 
  393     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  396 #pragma omp parallel for 
  397     for (
size_t i = 0; i < n; i++) {
 
  400 #pragma omp parallel for 
  401     for (
size_t i = 0; i < n; i++) {
 
  402       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  403         if ((
int)c == cold[j]) {
 
  414                                    view1D<vector<double>, 
double> &vec) 
const;
 
  417                                   view1D<vector<float>, 
float> &vec) 
const;
 
  419 template <
typename T>
 
  420 void CRS<T>::col(
const size_t c, view1D<matrix::Dense<T>, T> &vec)
 const {
 
  424   size_t n = get_col();
 
  425   T *vecd = vec.data();
 
  427   const T *vald = val.data();
 
  428   const int *rowd = row_ptr.data();
 
  429   const int *cold = col_ind.data();
 
  431   assert(n == vec.size());
 
  432   assert(get_device_mem_stat() == vec.get_device_mem_stat());
 
  434   if (gpu_status == 
true) {
 
  435 #if MONOLISH_USE_GPU // gpu 
  436 #pragma omp target teams distribute parallel for 
  437     for (
size_t i = 0; i < n; i++) {
 
  440 #pragma omp target teams distribute parallel for 
  441     for (
size_t i = 0; i < n; i++) {
 
  442       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  443         if ((
int)c == cold[j]) {
 
  449     throw std::runtime_error(
"error USE_GPU is false, but gpu_status == true");
 
  452 #pragma omp parallel for 
  453     for (
size_t i = 0; i < n; i++) {
 
  456 #pragma omp parallel for 
  457     for (
size_t i = 0; i < n; i++) {
 
  458       for (
int j = rowd[i]; j < rowd[i + 1]; j++) {
 
  459         if ((
int)c == cold[j]) {
 
  469     const size_t c, view1D<matrix::Dense<double>, 
double> &vec) 
const;
 
  471     const size_t c, view1D<matrix::Dense<float>, 
float> &vec) 
const;