1 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
46 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
51 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
52 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(mat.
handle());
53 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
54 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
59 unsigned int row_end = row_buffer[
row+1];
61 switch (info_selector)
64 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
65 value = std::max<ScalarType>(value, std::fabs(elements[i]));
69 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
70 value += std::fabs(elements[i]);
74 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
75 value += elements[i] * elements[i];
76 value = std::sqrt(value);
80 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
82 if (col_buffer[i] ==
row)
93 result_buf[
row] = value;
107 template<
class ScalarType,
unsigned int ALIGNMENT>
112 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.
handle());
113 ScalarType
const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
114 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(mat.
handle());
115 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
116 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
118 #ifdef VIENNACL_WITH_OPENMP
119 #pragma omp parallel for
121 for (
long row = 0; row < static_cast<long>(mat.
size1()); ++
row)
126 dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.
stride() + vec.
start()];
140 template<
class ScalarType,
typename NumericT,
unsigned int ALIGNMENT,
typename F1,
typename F2>
145 ScalarType
const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.
handle());
146 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
147 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
149 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
150 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
167 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
169 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
172 #ifdef VIENNACL_WITH_OPENMP
173 #pragma omp parallel for
175 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
180 for (
vcl_size_t k = row_start; k < row_end; ++k) {
181 temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col);
183 result_wrapper(
row, col) = temp;
188 #ifdef VIENNACL_WITH_OPENMP
189 #pragma omp parallel for
191 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
192 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
196 for (
vcl_size_t k = row_start; k < row_end; ++k) {
197 temp += sp_mat_elements[k] * d_mat_wrapper(sp_mat_col_buffer[k], col);
199 result_wrapper(
row, col) = temp;
215 template<
class ScalarType,
typename NumericT,
unsigned int ALIGNMENT,
typename F1,
typename F2>
222 ScalarType
const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.
handle());
223 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
224 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
226 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
227 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
244 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
246 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
249 #ifdef VIENNACL_WITH_OPENMP
250 #pragma omp parallel for
252 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
255 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
257 for (
vcl_size_t k = row_start; k < row_end; ++k) {
258 temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]);
260 result_wrapper(
row, col) = temp;
265 #ifdef VIENNACL_WITH_OPENMP
266 #pragma omp parallel for
268 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
273 for (
vcl_size_t k = row_start; k < row_end; ++k) {
274 temp += sp_mat_elements[k] * d_mat_wrapper(col, sp_mat_col_buffer[k]);
276 result_wrapper(
row, col) = temp;
289 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
291 SizeTypeArray
const & col_buffer,
292 ConstScalarTypeArray
const & element_buffer,
293 ScalarTypeArray & vec_buffer,
300 NumericT vec_entry = vec_buffer[
row];
302 for (
vcl_size_t i = row_begin; i < row_end; ++i)
306 vec_entry -= vec_buffer[col_index] * element_buffer[i];
308 vec_buffer[
row] = vec_entry;
313 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
315 SizeTypeArray
const & col_buffer,
316 ConstScalarTypeArray
const & element_buffer,
317 ScalarTypeArray & vec_buffer,
324 NumericT vec_entry = vec_buffer[
row];
328 NumericT diagonal_entry = 0;
329 for (
vcl_size_t i = row_begin; i < row_end; ++i)
333 vec_entry -= vec_buffer[col_index] * element_buffer[i];
334 else if (col_index ==
row)
335 diagonal_entry = element_buffer[i];
338 vec_buffer[
row] = vec_entry / diagonal_entry;
344 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
346 SizeTypeArray
const & col_buffer,
347 ConstScalarTypeArray
const & element_buffer,
348 ScalarTypeArray & vec_buffer,
352 for (
vcl_size_t row2 = 1; row2 < num_cols; ++row2)
355 NumericT vec_entry = vec_buffer[
row];
358 for (
vcl_size_t i = row_begin; i < row_end; ++i)
362 vec_entry -= vec_buffer[col_index] * element_buffer[i];
364 vec_buffer[
row] = vec_entry;
368 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
370 SizeTypeArray
const & col_buffer,
371 ConstScalarTypeArray
const & element_buffer,
372 ScalarTypeArray & vec_buffer,
376 for (
vcl_size_t row2 = 0; row2 < num_cols; ++row2)
379 NumericT vec_entry = vec_buffer[
row];
384 NumericT diagonal_entry = 0;
385 for (
vcl_size_t i = row_begin; i < row_end; ++i)
389 vec_entry -= vec_buffer[col_index] * element_buffer[i];
390 else if (col_index == row)
391 diagonal_entry = element_buffer[i];
394 vec_buffer[
row] = vec_entry / diagonal_entry;
408 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
413 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
414 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(L.
handle());
415 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
416 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
418 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
427 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
432 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
433 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(L.
handle());
434 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
435 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
437 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
447 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
452 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
453 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(U.
handle());
454 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
455 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
457 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
466 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
471 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
472 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(U.
handle());
473 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
474 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
476 detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
491 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
493 SizeTypeArray
const & col_buffer,
494 ConstScalarTypeArray
const & element_buffer,
495 ScalarTypeArray & vec_buffer,
500 for (
vcl_size_t col = 0; col < num_cols; ++col)
502 NumericT vec_entry = vec_buffer[col];
504 for (
vcl_size_t i = col_begin; i < col_end; ++i)
506 unsigned int row_index = col_buffer[i];
508 vec_buffer[row_index] -= vec_entry * element_buffer[i];
514 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
516 SizeTypeArray
const & col_buffer,
517 ConstScalarTypeArray
const & element_buffer,
518 ScalarTypeArray & vec_buffer,
523 for (
vcl_size_t col = 0; col < num_cols; ++col)
528 NumericT diagonal_entry = 0;
529 for (
vcl_size_t i = col_begin; i < col_end; ++i)
532 if (row_index == col)
534 diagonal_entry = element_buffer[i];
540 NumericT vec_entry = vec_buffer[col] / diagonal_entry;
541 vec_buffer[col] = vec_entry;
542 for (
vcl_size_t i = col_begin; i < col_end; ++i)
546 vec_buffer[row_index] -= vec_entry * element_buffer[i];
552 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
554 SizeTypeArray
const & col_buffer,
555 ConstScalarTypeArray
const & element_buffer,
556 ScalarTypeArray & vec_buffer,
560 for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
564 NumericT vec_entry = vec_buffer[col];
567 for (
vcl_size_t i = col_begin; i < col_end; ++i)
571 vec_buffer[row_index] -= vec_entry * element_buffer[i];
577 template <
typename NumericT,
typename ConstScalarTypeArray,
typename ScalarTypeArray,
typename SizeTypeArray>
579 SizeTypeArray
const & col_buffer,
580 ConstScalarTypeArray
const & element_buffer,
581 ScalarTypeArray & vec_buffer,
585 for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
592 NumericT diagonal_entry = 0;
593 for (
vcl_size_t i = col_begin; i < col_end; ++i)
596 if (row_index == col)
598 diagonal_entry = element_buffer[i];
604 NumericT vec_entry = vec_buffer[col] / diagonal_entry;
605 vec_buffer[col] = vec_entry;
606 for (
vcl_size_t i = col_begin; i < col_end; ++i)
610 vec_buffer[row_index] -= vec_entry * element_buffer[i];
619 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
630 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
631 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
632 ScalarType
const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle());
633 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.
handle());
636 for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
638 ScalarType vec_entry = vec_buffer[col];
640 for (
vcl_size_t i = col_begin; i < col_end; ++i)
642 unsigned int row_index = col_buffer[i];
644 vec_buffer[row_index] -= vec_entry * elements[i];
650 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
661 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
662 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
663 ScalarType
const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L.lhs().handle());
664 ScalarType
const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(L_diagonal.
handle());
665 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.
handle());
668 for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
672 ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col];
673 vec_buffer[col] = vec_entry;
674 for (
vcl_size_t i = col_begin; i < col_end; ++i)
678 vec_buffer[row_index] -= vec_entry * elements[i];
686 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
697 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
698 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
699 ScalarType
const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle());
700 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.
handle());
702 for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
704 vcl_size_t col = (U.lhs().size1() - col2) - 1;
706 ScalarType vec_entry = vec_buffer[col];
709 for (
vcl_size_t i = col_begin; i < col_end; ++i)
713 vec_buffer[row_index] -= vec_entry * elements[i];
719 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
730 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
731 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
732 ScalarType
const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U.lhs().handle());
733 ScalarType
const * diagonal_buffer = detail::extract_raw_pointer<ScalarType>(U_diagonal.
handle());
734 ScalarType * vec_buffer = detail::extract_raw_pointer<ScalarType>(vec.
handle());
736 for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
738 vcl_size_t col = (U.lhs().size1() - col2) - 1;
743 ScalarType vec_entry = vec_buffer[col] / diagonal_buffer[col];
744 vec_buffer[col] = vec_entry;
745 for (
vcl_size_t i = col_begin; i < col_end; ++i)
749 vec_buffer[row_index] -= vec_entry * elements[i];
763 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
770 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
771 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
772 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
773 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
775 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
784 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
791 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
792 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
793 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
794 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
796 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
806 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
813 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
814 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
815 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
816 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
818 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
828 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
835 ScalarType * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
836 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(proxy.lhs().handle());
837 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
838 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
840 detail::csr_trans_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
857 template<
class ScalarType>
862 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.
handle());
863 ScalarType
const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
864 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(mat.
handle());
865 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
866 unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
867 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
871 #ifdef VIENNACL_WITH_OPENMP
872 #pragma omp parallel for
874 for (
long i = 0; i < static_cast<long>(mat.
nnz1()); ++i)
878 for (
vcl_size_t j = row_buffer[i]; j < row_end; ++j)
879 dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.
stride() + vec.
start()];
893 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT>
898 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
899 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(mat.
handle());
900 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
902 ScalarType value = 0;
903 unsigned int last_row = 0;
907 unsigned int current_row = coord_buffer[2*i];
909 if (current_row != last_row)
912 value = std::sqrt(value);
914 result_buf[last_row] = value;
916 last_row = current_row;
919 switch (info_selector)
922 value = std::max<ScalarType>(value, std::fabs(elements[i]));
926 value += std::fabs(elements[i]);
930 value += elements[i] * elements[i];
934 if (coord_buffer[2*i+1] == current_row)
944 value = std::sqrt(value);
946 result_buf[last_row] = value;
958 template<
class ScalarType,
unsigned int ALIGNMENT>
963 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.
handle());
964 ScalarType
const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
965 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(mat.
handle());
966 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
969 result_buf[i * result.
stride() + result.
start()] = 0;
972 result_buf[coord_buffer[2*i] * result.
stride() + result.
start()]
973 += elements[i] * vec_buf[coord_buffer[2*i+1] * vec.
stride() + vec.
start()];
984 template<
class ScalarType,
unsigned int ALIGNMENT,
class NumericT,
typename F1,
typename F2>
989 ScalarType
const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.
handle());
990 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
992 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
993 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1010 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1012 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1016 #ifdef VIENNACL_WITH_OPENMP
1017 #pragma omp parallel for
1019 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1021 result_wrapper(
row, col) = (NumericT)0;
1023 #ifdef VIENNACL_WITH_OPENMP
1024 #pragma omp parallel for
1026 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1027 NumericT x =
static_cast<NumericT
>(sp_mat_elements[i]);
1028 unsigned int r = sp_mat_coords[2*i];
1029 unsigned int c = sp_mat_coords[2*i+1];
1031 NumericT y = d_mat_wrapper( c, col);
1032 result_wrapper(r, col) += x * y;
1039 #ifdef VIENNACL_WITH_OPENMP
1040 #pragma omp parallel for
1042 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
1044 result_wrapper(
row, col) = (NumericT)0;
1046 #ifdef VIENNACL_WITH_OPENMP
1047 #pragma omp parallel for
1049 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
1053 NumericT x =
static_cast<NumericT
>(sp_mat_elements[i]);
1054 unsigned int r = sp_mat_coords[2*i];
1055 unsigned int c = sp_mat_coords[2*i+1];
1056 NumericT y = d_mat_wrapper( c, col);
1058 result_wrapper( r, col) += x*y;
1075 template<
class ScalarType,
unsigned int ALIGNMENT,
class NumericT,
typename F1,
typename F2>
1082 ScalarType
const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.
handle());
1083 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
1085 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1086 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1103 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1105 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1109 #ifdef VIENNACL_WITH_OPENMP
1110 #pragma omp parallel for
1112 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1113 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1114 result_wrapper(
row, col) = (NumericT)0;
1118 #ifdef VIENNACL_WITH_OPENMP
1119 #pragma omp parallel for
1121 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1123 result_wrapper(
row, col) = (NumericT)0;
1126 #ifdef VIENNACL_WITH_OPENMP
1127 #pragma omp parallel for
1129 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1130 NumericT x =
static_cast<NumericT
>(sp_mat_elements[i]);
1131 unsigned int r = sp_mat_coords[2*i];
1132 unsigned int c = sp_mat_coords[2*i+1];
1133 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1134 NumericT y = d_mat_wrapper( col, c);
1135 result_wrapper(r, col) += x * y;
1151 template<
class ScalarType,
unsigned int ALIGNMENT>
1156 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.
handle());
1157 ScalarType
const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
1158 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(mat.
handle());
1159 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1165 for(
unsigned int item_id = 0; item_id < mat.
internal_maxnnz(); ++item_id)
1168 ScalarType val = elements[offset];
1172 unsigned int col = coords[offset];
1173 sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
1189 template<
class ScalarType,
typename NumericT,
unsigned int ALIGNMENT,
typename F1,
typename F2>
1194 ScalarType
const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.
handle());
1195 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
1197 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1198 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1215 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1217 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1220 #ifdef VIENNACL_WITH_OPENMP
1221 #pragma omp parallel for
1223 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1225 result_wrapper(
row, col) = (NumericT)0;
1227 #ifdef VIENNACL_WITH_OPENMP
1228 #pragma omp parallel for
1230 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1232 for (
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id)
1235 NumericT sp_mat_val =
static_cast<NumericT
>(sp_mat_elements[offset]);
1236 unsigned int sp_mat_col = sp_mat_coords[offset];
1238 if( sp_mat_val != 0)
1241 result_wrapper(static_cast<vcl_size_t>(
row), col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col);
1247 #ifdef VIENNACL_WITH_OPENMP
1248 #pragma omp parallel for
1250 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
1251 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1252 result_wrapper(
row, col) = (NumericT)0;
1254 #ifdef VIENNACL_WITH_OPENMP
1255 #pragma omp parallel for
1257 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
1259 for(
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
1264 NumericT sp_mat_val =
static_cast<NumericT
>(sp_mat_elements[offset]);
1265 unsigned int sp_mat_col = sp_mat_coords[offset];
1267 if( sp_mat_val != 0) {
1269 result_wrapper(
row, col) += sp_mat_val * d_mat_wrapper( sp_mat_col, col);
1287 template<
class ScalarType,
typename NumericT,
unsigned int ALIGNMENT,
typename F1,
typename F2>
1294 ScalarType
const * sp_mat_elements = detail::extract_raw_pointer<ScalarType>(sp_mat.
handle());
1295 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
1297 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1298 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1315 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1317 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1320 #ifdef VIENNACL_WITH_OPENMP
1321 #pragma omp parallel for
1323 for(
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1324 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1325 result_wrapper(
row, col) = (NumericT)0;
1327 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1329 for(
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
1334 NumericT sp_mat_val =
static_cast<NumericT
>(sp_mat_elements[offset]);
1335 unsigned int sp_mat_col = sp_mat_coords[offset];
1337 if( sp_mat_val != 0) {
1339 result_wrapper(
row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col);
1346 #ifdef VIENNACL_WITH_OPENMP
1347 #pragma omp parallel for
1349 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1351 result_wrapper(
row, col) = (NumericT)0;
1353 #ifdef VIENNACL_WITH_OPENMP
1354 #pragma omp parallel for
1356 for(
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id) {
1361 NumericT sp_mat_val =
static_cast<NumericT
>(sp_mat_elements[offset]);
1362 unsigned int sp_mat_col = sp_mat_coords[offset];
1364 if( sp_mat_val != 0) {
1366 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1368 result_wrapper(
row, col) += sp_mat_val * d_mat_wrapper( col, sp_mat_col);
1388 template<
class ScalarType,
unsigned int ALIGNMENT>
1393 ScalarType * result_buf = detail::extract_raw_pointer<ScalarType>(result.
handle());
1394 ScalarType
const * vec_buf = detail::extract_raw_pointer<ScalarType>(vec.
handle());
1395 ScalarType
const * elements = detail::extract_raw_pointer<ScalarType>(mat.
handle());
1396 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1397 ScalarType
const * csr_elements = detail::extract_raw_pointer<ScalarType>(mat.
handle5());
1398 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1399 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1409 for(
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1412 ScalarType val = elements[offset];
1416 unsigned int col = coords[offset];
1417 sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
1427 for(
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1429 sum += (vec_buf[csr_col_buffer[item_id] * vec.
stride() + vec.
start()] * csr_elements[item_id]);
1448 template<
typename NumericT,
unsigned int ALIGNMENT,
typename F1,
typename F2>
1453 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1454 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1471 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1473 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1475 NumericT
const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1476 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1477 NumericT
const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
1478 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1479 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1482 for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
1491 for(
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1494 NumericT val = elements[offset];
1498 unsigned int col = coords[offset];
1499 sum += d_mat_wrapper(col, result_col) * val;
1509 for(
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1510 sum += d_mat_wrapper(csr_col_buffer[item_id], result_col) * csr_elements[item_id];
1512 result_wrapper(
row, result_col) = sum;
1526 template<
typename NumericT,
unsigned int ALIGNMENT,
typename F1,
typename F2>
1533 NumericT
const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1534 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1551 d_mat_wrapper(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1553 result_wrapper(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1555 NumericT
const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1556 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1557 NumericT
const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
1558 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1559 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1562 for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
1571 for(
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1574 NumericT val = elements[offset];
1578 unsigned int col = coords[offset];
1579 sum += d_mat_wrapper(result_col, col) * val;
1589 for(
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1590 sum += d_mat_wrapper(result_col, csr_col_buffer[item_id]) * csr_elements[item_id];
1592 result_wrapper(
row, result_col) = sum;
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:321
void vector_assign(vector_base< T > &vec1, const T &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
Definition: vector_operations.hpp:253
vcl_size_t size1() const
Definition: hyb_matrix.hpp:74
bool is_row_major(viennacl::row_major_tag)
Definition: common.hpp:73
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_compressed_matrix.hpp:452
std::size_t vcl_size_t
Definition: forwards.h:58
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_compressed_matrix.hpp:456
const handle_type & handle3() const
Definition: hyb_matrix.hpp:83
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:859
result_of::size_type< matrix_base< NumericT, F > >::type stride2(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:68
const handle_type & handle4() const
Definition: hyb_matrix.hpp:84
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_compressed_matrix.hpp:450
A tag class representing a lower triangular matrix.
Definition: forwards.h:703
vcl_size_t size1() const
Definition: ell_matrix.hpp:80
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:77
A dense matrix class.
Definition: forwards.h:290
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
vcl_size_t size1() const
Returns the number of rows.
Definition: coordinate_matrix.hpp:343
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
vcl_size_t internal_maxnnz() const
Definition: ell_matrix.hpp:83
size_type start() const
Returns the offset within the buffer.
Definition: vector.hpp:867
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:737
const handle_type & handle2() const
Definition: hyb_matrix.hpp:82
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Definition: compressed_matrix.hpp:701
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:71
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
const vcl_size_t & size2() const
Returns the number of columns.
Definition: compressed_matrix.hpp:694
void inplace_solve(const matrix_base< NumericT, F1 > &A, matrix_base< NumericT, F2 > &B, SOLVERTAG)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
Definition: direct_solve.hpp:130
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: coordinate_matrix.hpp:354
const handle_type & handle() const
Definition: hyb_matrix.hpp:81
result_of::size_type< matrix_base< NumericT, F > >::type stride1(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:57
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
Definition: compressed_compressed_matrix.hpp:445
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:100
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
A tag class representing an upper triangular matrix.
Definition: forwards.h:708
Definition: forwards.h:693
handle_type & handle()
Definition: ell_matrix.hpp:89
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
Definition: compressed_compressed_matrix.hpp:263
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Definition: compressed_compressed_matrix.hpp:454
Definition: forwards.h:694
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
Definition: coordinate_matrix.hpp:352
const vcl_size_t & size1() const
Returns the number of rows.
Definition: compressed_matrix.hpp:692
vcl_size_t internal_size2(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
void block_inplace_solve(const matrix_expression< const compressed_matrix< ScalarType, MAT_ALIGNMENT >, const compressed_matrix< ScalarType, MAT_ALIGNMENT >, op_trans > &L, viennacl::backend::mem_handle const &, vcl_size_t, vector_base< ScalarType > const &, vector_base< ScalarType > &vec, viennacl::linalg::unit_lower_tag)
Definition: sparse_matrix_operations.hpp:620
void dot_prod(const MatrixType &A, unsigned int beg_ind, ScalarType &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
Definition: qr.hpp:154
row_info_types
Definition: forwards.h:691
vcl_size_t maxnnz() const
Definition: ell_matrix.hpp:84
Common routines for single-threaded or OpenMP-enabled execution on CPU.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
handle_type & handle2()
Definition: ell_matrix.hpp:92
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:713
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
A tag class representing transposed matrices.
Definition: forwards.h:165
void csr_trans_inplace_solve(SizeTypeArray const &row_buffer, SizeTypeArray const &col_buffer, ConstScalarTypeArray const &element_buffer, ScalarTypeArray &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
Definition: sparse_matrix_operations.hpp:492
vcl_size_t internal_size1(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
const handle_type & handle() const
Returns the memory handle.
Definition: vector.hpp:878
void csr_inplace_solve(SizeTypeArray const &row_buffer, SizeTypeArray const &col_buffer, ConstScalarTypeArray const &element_buffer, ScalarTypeArray &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
Definition: sparse_matrix_operations.hpp:290
const handle_type & handle5() const
Definition: hyb_matrix.hpp:85
Implementation of the ViennaCL scalar class.
vcl_size_t internal_size1() const
Definition: ell_matrix.hpp:77
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(SCALARTYPE))
Definition: vector.hpp:871
void row_info(compressed_matrix< ScalarType, MAT_ALIGNMENT > const &mat, vector_base< ScalarType > &vec, viennacl::linalg::detail::row_info_types info_selector)
Definition: sparse_matrix_operations.hpp:47
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:718
Definition: forwards.h:695
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
Definition: coordinate_matrix.hpp:186
vcl_size_t nnz() const
Returns the number of nonzero entries.
Definition: coordinate_matrix.hpp:347
Definition: forwards.h:696
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
Definition: compressed_matrix.hpp:703
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Definition: compressed_matrix.hpp:699
Implementations of vector operations using a plain single-threaded or OpenMP-enabled execution on CPU...