monolish  0.17.3-dev.23
MONOlithic LInear equation Solvers for Highly-parallel architecture
monolish_tensor_crs.hpp
Go to the documentation of this file.
1 #pragma once
2 #include "monolish_matrix.hpp"
3 #include "monolish_tensor.hpp"
4 #include "monolish_vector.hpp"
5 
6 namespace monolish {
7 template <typename Float> class vector;
8 template <typename TYPE, typename Float> class view1D;
9 namespace tensor {
10 template <typename Float> class tensor_Dense;
11 template <typename Float> class tensor_COO;
12 template <typename Float> class tensor_CRS {
13 private:
14  std::vector<size_t> shape;
15 
16  mutable std::shared_ptr<bool> gpu_status = std::make_shared<bool>(false);
17 
19 
20  size_t first = 0;
21 
22 public:
23  std::shared_ptr<Float> val;
24 
25  size_t val_nnz = 0;
26 
27  size_t alloc_nnz = 0;
28 
29  bool val_create_flag = false;
30 
31  std::vector<std::vector<int>> row_ptrs;
32 
33  std::vector<std::vector<int>> col_inds;
34 
36  val_create_flag = true;
37  }
38 
39  tensor_CRS(const std::vector<size_t> &shape_)
40  : shape(shape_), row_ptrs(), col_inds(), val_nnz(0) {
41  val_create_flag = true;
42  }
43 
44  tensor_CRS(const std::initializer_list<size_t> &shape_)
45  : shape(shape_), row_ptrs(), col_inds(), val_nnz(0) {
46  val_create_flag = true;
47  }
48 
50 
52  val_create_flag = true;
53  convert(coo);
54  }
55 
57 
59  val_create_flag = true;
60  convert(crs);
61  }
62 
63  void convert(const matrix::CRS<Float> &crs);
64 
66  val_create_flag = true;
67  convert(crs);
68  }
69 
70  tensor_CRS(const std::vector<size_t> &shape_,
71  const std::vector<std::vector<int>> &row_ptrs_,
72  const std::vector<std::vector<int>> &col_inds_,
73  const Float *value);
74 
75  tensor_CRS(const tensor_CRS<Float> &crs, Float value);
76 
77  void print_all(bool force_cpu = false) const;
78 
79  // communication
80  // ///////////////////////////////////////////////////////////////////////////
88  void send() const;
89 
97  void recv();
98 
106  void nonfree_recv();
107 
115  void device_free() const;
116 
117  // TODO
125  [[nodiscard]] double get_data_size() const {
126  return get_nnz() * sizeof(Float) / 1.0e+9;
127  }
128 
129  void set_ptr(const std::vector<size_t> &shape,
130  const std::vector<std::vector<int>> &rowptrs,
131  const std::vector<std::vector<int>> &colinds,
132  const std::vector<Float> &value);
133 
134  void set_ptr(const std::vector<size_t> &shape,
135  const std::vector<std::vector<int>> &rowptrs,
136  const std::vector<std::vector<int>> &colinds, const size_t vsize,
137  const Float *value);
138 
139  void set_ptr(const std::vector<size_t> &shape,
140  const std::vector<std::vector<int>> &rowptrs,
141  const std::vector<std::vector<int>> &colinds, const size_t vsize,
142  const Float value);
143 
151  [[nodiscard]] std::vector<size_t> get_shape() const { return shape; }
152 
160  [[nodiscard]] std::shared_ptr<Float> get_val() { return val; }
161 
169  [[nodiscard]] const std::shared_ptr<Float> get_val() const { return val; }
170 
178  [[nodiscard]] size_t get_nnz() const { return val_nnz; }
179 
187  [[nodiscard]] size_t get_alloc_nnz() const { return alloc_nnz; }
188 
195  [[nodiscard]] size_t get_first() const { return first; }
196 
203  [[nodiscard]] size_t get_offset() const { return get_first(); }
204 
210  void set_first(size_t i) { first = i; }
211 
219  void set_shape(const std::vector<size_t> &shape) { this->shape = shape; }
220 
225  [[nodiscard]] bool get_device_mem_stat() const { return *gpu_status; }
226 
231  [[nodiscard]] std::shared_ptr<bool> get_gpu_status() const {
232  return gpu_status;
233  }
234 
236  if (val_create_flag) {
237  if (get_device_mem_stat()) {
238  device_free();
239  }
240  }
241  }
242 
249  [[nodiscard]] const Float *data() const { return val.get(); }
250 
257  [[nodiscard]] Float *data() { return val.get(); }
258 
267  void resize(const size_t N, Float Val = 0) {
268  if (first + N < alloc_nnz) {
269  for (size_t i = val_nnz; i < N; ++i) {
270  begin()[i] = Val;
271  }
272  val_nnz = N;
273  return;
274  }
275  if (get_device_mem_stat()) {
276  throw std::runtime_error("Error, GPU matrix cant use resize");
277  }
278  if (val_create_flag) {
279  std::shared_ptr<Float> tmp(new Float[N], std::default_delete<Float[]>());
280  size_t copy_size = std::min(val_nnz, N);
281  for (size_t i = 0; i < copy_size; ++i) {
282  tmp.get()[i] = data()[i];
283  }
284  for (size_t i = copy_size; i < N; ++i) {
285  tmp.get()[i] = Val;
286  }
287  val = tmp;
288  alloc_nnz = N;
289  val_nnz = N;
290  first = 0;
291  } else {
292  throw std::runtime_error("Error, not create vector cant use resize");
293  }
294  }
295 
303  [[nodiscard]] std::string type() const { return "tensor_CRS"; }
304 
312  void compute_hash();
313 
319  [[nodiscard]] size_t get_hash() const { return structure_hash; }
320 
327  [[nodiscard]] const Float *begin() const { return data() + get_offset(); }
328 
335  [[nodiscard]] Float *begin() { return data() + get_offset(); }
336 
343  [[nodiscard]] const Float *end() const {
344  return data() + get_offset() + get_nnz();
345  }
346 
353  [[nodiscard]] Float *end() { return data() + get_offset() + get_nnz(); }
354 
363  void fill(Float value);
364 
375  void operator=(const tensor_CRS<Float> &mat);
376 
386  [[nodiscard]] Float &operator[](size_t i) {
387  if (get_device_mem_stat()) {
388  throw std::runtime_error("Error, GPU vector cant use operator[]");
389  }
390  return data()[first + i];
391  }
392 
403  [[nodiscard]] bool equal(const tensor_CRS<Float> &mat,
404  bool compare_cpu_and_device = false) const;
405 
417  [[nodiscard]] bool operator==(const tensor_CRS<Float> &mat) const;
418 
430  [[nodiscard]] bool operator!=(const tensor_CRS<Float> &mat) const;
431 };
432 
433 } // namespace tensor
434 
435 } // namespace monolish
Compressed Row Storage (CRS) format Matrix.
bool operator==(const tensor_CRS< Float > &mat) const
Comparing matrices (A == mat)
double get_data_size() const
Memory data space required by the matrix.
void device_free() const
free data on GPU
Float * data()
returns a direct pointer to the tensor
tensor_CRS(const tensor::tensor_COO< Float > &coo)
tensor_CRS(const tensor_CRS< Float > &crs, Float value)
const std::shared_ptr< Float > get_val() const
get shared_ptr of val
std::shared_ptr< bool > get_gpu_status() const
gpu status shared pointer
size_t get_first() const
get first position
void nonfree_recv()
recv. data to GPU (w/o free)
void operator=(const tensor_CRS< Float > &mat)
matrix copy
std::string type() const
get format name "tensor_CRS"
size_t get_nnz() const
get # of non-zeros
void compute_hash()
compute index array hash (to compare structure)
std::vector< size_t > get_shape() const
get shape
void set_ptr(const std::vector< size_t > &shape, const std::vector< std::vector< int >> &rowptrs, const std::vector< std::vector< int >> &colinds, const size_t vsize, const Float value)
void set_shape(const std::vector< size_t > &shape)
Set shape.
const Float * begin() const
returns a begin iterator
void convert(const matrix::CRS< Float > &crs)
bool get_device_mem_stat() const
true: sended, false: not send
bool equal(const tensor_CRS< Float > &mat, bool compare_cpu_and_device=false) const
Comparing matrices (A == mat)
std::shared_ptr< Float > val
void set_ptr(const std::vector< size_t > &shape, const std::vector< std::vector< int >> &rowptrs, const std::vector< std::vector< int >> &colinds, const std::vector< Float > &value)
void convert(const tensor::tensor_CRS< Float > &crs)
void fill(Float value)
fill tensor elements with a scalar value
tensor_CRS(const std::vector< size_t > &shape_, const std::vector< std::vector< int >> &row_ptrs_, const std::vector< std::vector< int >> &col_inds_, const Float *value)
void resize(const size_t N, Float Val=0)
resize tensor value
const Float * data() const
returns a direct pointer to the tensor
Float * begin()
returns a begin iterator
std::vector< std::vector< int > > row_ptrs
tensor_CRS(const matrix::CRS< Float > &crs)
Float & operator[](size_t i)
reference to the element at position (v[i])
void send() const
send data to GPU
void set_first(size_t i)
change first position
Float * end()
returns a end iterator
size_t get_alloc_nnz() const
get # of alloced non-zeros
void set_ptr(const std::vector< size_t > &shape, const std::vector< std::vector< int >> &rowptrs, const std::vector< std::vector< int >> &colinds, const size_t vsize, const Float *value)
size_t get_offset() const
get first position (same as get_first())
size_t get_hash() const
get index array hash (to compare structure)
const Float * end() const
returns a end iterator
tensor_CRS(const std::vector< size_t > &shape_)
bool operator!=(const tensor_CRS< Float > &mat) const
Comparing matrices (A != mat)
void convert(const tensor::tensor_COO< Float > &coo)
void print_all(bool force_cpu=false) const
std::shared_ptr< bool > gpu_status
void recv()
recv. data to GPU, and free data on GPU
tensor_CRS(const std::initializer_list< size_t > &shape_)
std::shared_ptr< Float > get_val()
get shared_ptr of val
tensor_CRS(const tensor::tensor_CRS< Float > &crs)
std::vector< std::vector< int > > col_inds
void min(const matrix::CRS< double > &A, const matrix::CRS< double > &B, matrix::CRS< double > &C)
Create a new CRS matrix with smallest elements of two matrices (C[0:nnz] = min(A[0:nnz],...
monolish namespaces