42 #ifndef TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP 43 #define TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP 54 #include "Tpetra_ConfigDefs.hpp" 55 #include "Kokkos_ArithTraits.hpp" 56 #include "Kokkos_Complex.hpp" 71 namespace Experimental {
81 template<
class ViewType1,
83 const int rank1 = ViewType1::rank>
85 static void run (
const ViewType2& Y,
const ViewType1& X);
92 template<
class ViewType1,
94 struct AbsMax<ViewType1, ViewType2, 2> {
97 static void run (
const ViewType2& Y,
const ViewType1& X)
99 static_assert (ViewType1::rank == ViewType2::rank,
100 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
101 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
102 static_assert(! std::is_const<STY>::value,
103 "AbsMax: The type of each entry of Y must be nonconst.");
104 typedef typename std::decay<decltype (X(0,0)) >::type STX;
105 static_assert( std::is_same<STX, STY>::value,
106 "AbsMax: The type of each entry of X and Y must be the same.");
107 typedef Kokkos::Details::ArithTraits<STY> KAT;
109 for (
int j = 0; j < Y.dimension_1 (); ++j) {
110 for (
int i = 0; i < Y.dimension_0 (); ++i) {
112 const STX X_ij = X(i,j);
115 const auto Y_ij_abs = KAT::abs (Y_ij);
116 const auto X_ij_abs = KAT::abs (X_ij);
117 Y_ij = (Y_ij_abs >= X_ij_abs) ?
118 static_cast<STY> (Y_ij_abs) :
119 static_cast<STY
> (X_ij_abs);
129 template<
class ViewType1,
134 static void run (
const ViewType2& Y,
const ViewType1& X)
136 static_assert (ViewType1::rank == ViewType2::rank,
137 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
139 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
140 static_assert(! std::is_const<STY>::value,
141 "AbsMax: The type of each entry of Y must be nonconst.");
142 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
143 static_assert( std::is_same<STX, STY>::value,
144 "AbsMax: The type of each entry of X and Y must be the same.");
145 typedef Kokkos::Details::ArithTraits<STY> KAT;
147 for (
int i = 0; i < Y.dimension_0 (); ++i) {
149 const STX X_i = X(i);
152 const auto Y_i_abs = KAT::abs (Y_i);
153 const auto X_i_abs = KAT::abs (X_i);
154 Y_i = (Y_i_abs >= X_i_abs) ?
155 static_cast<STY> (Y_i_abs) :
156 static_cast<STY
> (X_i_abs);
167 template<
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
168 void absMax (
const ViewType2& Y,
const ViewType1& X) {
169 static_assert (ViewType1::rank == ViewType2::rank,
170 "absMax: ViewType1 and ViewType2 must have the same rank.");
178 template<
class ViewType,
179 class CoefficientType,
180 class LayoutType =
typename ViewType::array_layout,
181 class IndexType = int,
182 const int rank = ViewType::rank>
184 static void run (
const CoefficientType& alpha,
const ViewType& x);
189 template<
class ViewType,
190 class CoefficientType,
193 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
195 static void run (
const CoefficientType& alpha,
const ViewType& x)
197 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
199 for (IndexType i = 0; i < numRows; ++i) {
207 template<
class ViewType,
208 class CoefficientType,
211 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
213 static void run (
const CoefficientType& alpha,
const ViewType& A)
215 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
216 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
219 for (IndexType i = 0; i < numRows; ++i) {
220 for (IndexType j = 0; j < numCols; ++j) {
221 A(i,j) = alpha * A(i,j);
232 template<
class ViewType,
233 class CoefficientType,
235 struct SCAL<ViewType, CoefficientType,
Kokkos::LayoutRight, IndexType, 2> {
237 static void run (
const CoefficientType& alpha,
const ViewType& A)
239 const IndexType N = A.size ();
240 typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
241 scalar_type*
const A_raw = A.ptr_on_device ();
243 for (IndexType i = 0; i < N; ++i) {
244 A_raw[i] = alpha * A_raw[i];
253 template<
class CoefficientType,
256 class LayoutType1 =
typename ViewType1::array_layout,
257 class LayoutType2 =
typename ViewType2::array_layout,
258 class IndexType = int,
259 const int rank = ViewType1::rank>
262 run (
const CoefficientType& alpha,
269 template<
class CoefficientType,
275 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
278 run (
const CoefficientType& alpha,
282 static_assert (ViewType1::rank == ViewType2::rank,
283 "AXPY: x and y must have the same rank.");
284 const IndexType numRows =
static_cast<IndexType
> (y.dimension_0 ());
286 for (IndexType i = 0; i < numRows; ++i) {
287 y(i) += alpha * x(i);
295 template<
class CoefficientType,
301 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
304 run (
const CoefficientType& alpha,
308 static_assert (ViewType1::rank == ViewType2::rank,
309 "AXPY: X and Y must have the same rank.");
310 const IndexType numRows =
static_cast<IndexType
> (Y.dimension_0 ());
311 const IndexType numCols =
static_cast<IndexType
> (Y.dimension_1 ());
313 throw std::runtime_error (
"OH HAI");
316 for (IndexType i = 0; i < numRows; ++i) {
317 for (IndexType j = 0; j < numCols; ++j) {
318 Y(i,j) += alpha * X(i,j);
328 template<
class CoefficientType,
332 struct AXPY<CoefficientType, ViewType1, ViewType2,
Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
335 run (
const CoefficientType& alpha,
339 static_assert (ViewType1::rank == ViewType2::rank,
340 "AXPY: X and Y must have the same rank.");
341 typedef typename std::decay<decltype (X(0,0)) >::type SX;
342 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
344 const IndexType N =
static_cast<IndexType
> (Y.size ());
345 const SX*
const X_raw = X.ptr_on_device ();
346 SY*
const Y_raw = Y.ptr_on_device ();
349 for (IndexType i = 0; i < N; ++i) {
350 Y_raw[i] += alpha * X_raw[i];
360 template<
class ViewType1,
362 class LayoutType1 =
typename ViewType1::array_layout,
363 class LayoutType2 =
typename ViewType2::array_layout,
364 class IndexType = int,
365 const int rank = ViewType1::rank>
367 static void run (
const ViewType1& x,
const ViewType2& y);
372 template<
class ViewType1,
377 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
379 static void run (
const ViewType1& x,
const ViewType2& y)
381 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
382 for (IndexType i = 0; i < numRows; ++i) {
390 template<
class ViewType1,
395 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
397 static void run (
const ViewType1& X,
const ViewType2& Y)
399 const IndexType numRows =
static_cast<IndexType
> (Y.dimension_0 ());
400 const IndexType numCols =
static_cast<IndexType
> (Y.dimension_1 ());
402 throw std::runtime_error (
"OH HAI");
405 for (IndexType i = 0; i < numRows; ++i) {
406 for (IndexType j = 0; j < numCols; ++j) {
417 template<
class ViewType1,
420 struct COPY<ViewType1, ViewType2,
Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
422 static void run (
const ViewType1& X,
const ViewType2& Y)
424 typedef typename std::decay<decltype (X(0,0)) >::type SX;
425 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
427 const IndexType N =
static_cast<IndexType
> (Y.size ());
428 const SX*
const X_raw = X.ptr_on_device ();
429 SY*
const Y_raw = Y.ptr_on_device ();
432 for (IndexType i = 0; i < N; ++i) {
442 template<
class ViewType,
443 class CoefficientType,
444 class LayoutType =
typename ViewType::array_layout,
445 class IndexType = int,
446 const int rank = ViewType::rank>
447 void SCAL (
const CoefficientType& alpha,
const ViewType& x) {
456 template<
class CoefficientType,
459 class LayoutType1 =
typename ViewType1::array_layout,
460 class LayoutType2 =
typename ViewType2::array_layout,
461 class IndexType = int,
462 const int rank = ViewType1::rank>
464 AXPY (
const CoefficientType& alpha,
468 static_assert (ViewType1::rank == ViewType2::rank,
469 "AXPY: x and y must have the same rank.");
481 template<
class ViewType1,
483 class LayoutType1 =
typename ViewType1::array_layout,
484 class LayoutType2 =
typename ViewType2::array_layout,
485 class IndexType = int,
486 const int rank = ViewType1::rank>
487 void COPY (
const ViewType1& x,
const ViewType2& y) {
488 static_assert (ViewType1::rank == ViewType2::rank,
489 "COPY: x and y must have the same rank.");
505 template<
class LittleVectorType1,
506 class LittleBlockType,
507 class LittleVectorType2,
508 class CoefficientType,
509 class IndexType =
int>
511 GEMV (
const CoefficientType& alpha,
512 const LittleBlockType& A,
513 const LittleVectorType1& x,
514 const LittleVectorType2& y)
516 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
517 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
520 for (IndexType i = 0; i < numRows; ++i) {
521 typename std::remove_reference<decltype (y(i)) >::type y_i = y(i);
522 for (IndexType j = 0; j < numCols; ++j) {
523 y_i += alpha * A(i,j) * x(j);
538 template<
class ViewType1,
541 class CoefficientType,
542 class IndexType =
int>
546 const CoefficientType& alpha,
549 const CoefficientType& beta,
553 static_assert (ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
554 static_assert (ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
555 static_assert (ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
557 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
558 typedef Kokkos::Details::ArithTraits<Scalar> STS;
559 const Scalar
ZERO = STS::zero();
560 const Scalar ONE = STS::one();
564 if(transA[0] ==
'N' || transA[0] ==
'n') {
565 m =
static_cast<IndexType
> (A.dimension_0 ());
566 n =
static_cast<IndexType
> (A.dimension_1 ());
569 m =
static_cast<IndexType
> (A.dimension_1 ());
570 n =
static_cast<IndexType
> (A.dimension_0 ());
572 k =
static_cast<IndexType
> (C.dimension_1 ());
575 if(alpha == ZERO && beta == ONE)
581 for(IndexType i=0; i<m; i++) {
582 for(IndexType j=0; j<k; j++) {
588 for(IndexType i=0; i<m; i++) {
589 for(IndexType j=0; j<k; j++) {
590 C(i,j) = beta*C(i,j);
597 if(transB[0] ==
'n' || transB[0] ==
'N') {
598 if(transA[0] ==
'n' || transA[0] ==
'N') {
600 for(IndexType j=0; j<n; j++) {
602 for(IndexType i=0; i<m; i++) {
606 else if(beta != ONE) {
607 for(IndexType i=0; i<m; i++) {
608 C(i,j) = beta*C(i,j);
611 for(IndexType l=0; l<k; l++) {
612 Scalar temp = alpha*B(l,j);
613 for(IndexType i=0; i<m; i++) {
614 C(i,j) = C(i,j) + temp*A(i,l);
621 for(IndexType j=0; j<n; j++) {
622 for(IndexType i=0; i<m; i++) {
624 for(IndexType l=0; l<k; l++) {
625 temp = temp + A(l,i)*B(l,j);
631 C(i,j) = alpha*temp + beta*C(i,j);
638 if(transA[0] ==
'n' || transA[0] ==
'N') {
640 for(IndexType j=0; j<n; j++) {
642 for(IndexType i=0; i<m; i++) {
646 else if(beta != ONE) {
647 for(IndexType i=0; i<m; i++) {
648 C(i,j) = beta*C(i,j);
651 for(IndexType l=0; l<k; l++) {
652 Scalar temp = alpha*B(j,l);
653 for(IndexType i=0; i<m; i++) {
654 C(i,j) = C(i,j) + temp*A(i,l);
661 for(IndexType j=0; j<n; j++) {
662 for(IndexType i=0; i<m; i++) {
664 for(IndexType l=0; l<k; l++) {
665 temp = temp + A(l,i)*B(j,l);
671 C(i,j) = alpha*temp + beta*C(i,j);
680 template<
class LittleBlockType,
681 class LittleVectorType>
683 GETF2 (
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info)
686 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
687 static_assert (std::is_integral<IndexType>::value,
688 "GETF2: The type of each entry of ipiv must be an integer type.");
689 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
690 static_assert (! std::is_const<Scalar>::value,
691 "GETF2: A must not be a const View (or LittleBlock).");
692 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
693 "GETF2: ipiv must not be a const View (or LittleBlock).");
694 static_assert (LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
695 typedef Kokkos::Details::ArithTraits<Scalar> STS;
696 const Scalar
ZERO = STS::zero();
698 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
699 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
700 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
703 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
704 if (pivDim < minPivDim) {
712 for(IndexType j=0; j < pivDim; j++)
716 for(IndexType i=j+1; i<numRows; i++)
718 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
729 for(IndexType i=0; i < numCols; i++)
731 Scalar temp = A(jp,i);
738 for(IndexType i=j+1; i<numRows; i++) {
739 A(i,j) = A(i,j) / A(j,j);
747 for(IndexType r=j+1; r < numRows; r++)
749 for(IndexType c=j+1; c < numCols; c++) {
750 A(r,c) = A(r,c) - A(r,j) * A(j,c);
761 template<
class LittleBlockType,
762 class LittleIntVectorType,
763 class LittleScalarVectorType,
764 const int rank = LittleScalarVectorType::rank>
767 run (
const char mode[],
768 const LittleBlockType& A,
769 const LittleIntVectorType& ipiv,
770 const LittleScalarVectorType& B,
775 template<
class LittleBlockType,
776 class LittleIntVectorType,
777 class LittleScalarVectorType>
778 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
780 run (
const char mode[],
781 const LittleBlockType& A,
782 const LittleIntVectorType& ipiv,
783 const LittleScalarVectorType& B,
787 typedef typename std::remove_const<typename std::remove_reference<decltype (ipiv(0))>::type>::type IndexType;
790 static_assert (std::is_integral<IndexType>::value &&
791 std::is_signed<IndexType>::value,
792 "GETRS: The type of each entry of ipiv must be a signed integer.");
793 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
794 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
795 "GETRS: B must not be a const View (or LittleBlock).");
796 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
797 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
798 static_assert (LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
800 typedef Kokkos::Details::ArithTraits<Scalar> STS;
801 const Scalar
ZERO = STS::zero();
802 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
803 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
804 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
809 if (numRows != numCols) {
815 if (pivDim < numRows) {
821 if(mode[0] ==
'n' || mode[0] ==
'N') {
823 for(IndexType i=0; i<numRows; i++) {
832 for(IndexType r=1; r < numRows; r++) {
833 for(IndexType c=0; c < r; c++) {
834 B(r) = B(r) - A(r,c)*B(c);
839 for(IndexType r=numRows-1; r >= 0; r--) {
846 for(IndexType c=r+1; c < numCols; c++) {
847 B(r) = B(r) - A(r,c)*B(c);
849 B(r) = B(r) / A(r,r);
853 else if(mode[0] ==
't' || mode[0] ==
'T') {
858 else if (mode[0] ==
'c' || mode[0] ==
'C') {
871 template<
class LittleBlockType,
872 class LittleIntVectorType,
873 class LittleScalarVectorType>
874 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
876 run (
const char mode[],
877 const LittleBlockType& A,
878 const LittleIntVectorType& ipiv,
879 const LittleScalarVectorType& B,
883 typedef typename std::remove_const<typename std::remove_reference<decltype (ipiv(0)) >::type>::type IndexType;
884 static_assert (std::is_integral<IndexType>::value,
885 "GETRS: The type of each entry of ipiv must be an integer type.");
886 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
887 "GETRS: B must not be a const View (or LittleBlock).");
888 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
889 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
890 static_assert (LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
895 const IndexType numRhs = B.dimension_1 ();
898 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
899 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
913 template<
class LittleBlockType,
914 class LittleIntVectorType,
915 class LittleScalarVectorType>
917 GETRS (
const char mode[],
const LittleBlockType& A,
const LittleIntVectorType& ipiv,
const LittleScalarVectorType& B,
int& info)
937 template<
class LittleBlockType,
938 class LittleIntVectorType,
939 class LittleScalarVectorType>
942 const LittleIntVectorType& ipiv,
943 const LittleScalarVectorType& work,
947 typedef typename std::remove_const<typename std::remove_reference<decltype (ipiv(0))>::type>::type IndexType;
950 static_assert (std::is_integral<IndexType>::value &&
951 std::is_signed<IndexType>::value,
952 "GETRI: The type of each entry of ipiv must be a signed integer.");
953 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
954 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
955 "GETRI: A must not be a const View (or LittleBlock).");
956 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
957 "GETRI: work must not be a const View (or LittleBlock).");
958 static_assert (LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
959 typedef Kokkos::Details::ArithTraits<Scalar> STS;
960 const Scalar
ZERO = STS::zero();
961 const Scalar ONE = STS::one();
963 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
964 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
965 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
966 const IndexType workDim =
static_cast<IndexType
> (work.dimension_0 ());
971 if (numRows != numCols) {
977 if (pivDim < numRows) {
983 if (workDim < numRows) {
989 for(IndexType j=0; j < numRows; j++) {
995 A(j,j) = ONE / A(j,j);
998 for(IndexType r=0; r < j; r++) {
999 A(r,j) = A(r,r)*A(r,j);
1000 for(IndexType c=r+1; c < j; c++) {
1001 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1004 for(IndexType r=0; r < j; r++) {
1005 A(r,j) = -A(j,j)*A(r,j);
1010 for(IndexType j = numCols-2; j >= 0; j--) {
1012 for(IndexType r=j+1; r < numRows; r++) {
1017 for(IndexType r=0; r < numRows; r++) {
1018 for(IndexType i=j+1; i < numRows; i++) {
1019 A(r,j) = A(r,j) - work(i)*A(r,i);
1025 for(IndexType j=numRows-1; j >= 0; j--) {
1026 IndexType jp = ipiv(j)-1;
1028 for(IndexType r=0; r < numRows; r++) {
1029 Scalar temp = A(r,j);
1042 template<
class LittleBlockType,
1043 class LittleVectorTypeX,
1044 class LittleVectorTypeY,
1045 class CoefficientType,
1046 class IndexType =
int>
1048 GEMV (
const char trans,
1049 const CoefficientType& alpha,
1050 const LittleBlockType& A,
1051 const LittleVectorTypeX& x,
1052 const CoefficientType& beta,
1053 const LittleVectorTypeY& y)
1059 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1060 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
1061 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
1065 for (IndexType i = 0; i < numRows; ++i) {
1070 for (IndexType i = 0; i < numRows; ++i) {
1071 y_value_type y_i = 0.0;
1072 for (IndexType j = 0; j < numCols; ++j) {
1073 y_i += A(i,j) * x(j);
1082 for (IndexType i = 0; i < numRows; ++i) {
1087 for (IndexType i = 0; i < numRows; ++i) {
1093 for (IndexType i = 0; i < numRows; ++i) {
1094 y_value_type y_i = beta * y(i);
1095 for (IndexType j = 0; j < numCols; ++j) {
1096 y_i += alpha * A(i,j) * x(j);
1120 template<
class Scalar,
class LO =
int>
1123 typedef Scalar scalar_type;
1124 typedef typename Kokkos::Details::ArithTraits<Scalar>::val_type impl_scalar_type;
1127 typedef Kokkos::Details::ArithTraits<impl_scalar_type> STS;
1131 static const int rank = 2;
1145 A_ (reinterpret_cast<impl_scalar_type*> (A)),
1146 blockSize_ (blockSize),
1173 typename std::enable_if<
1174 ! std::is_same<Scalar, T>::value &&
1175 std::is_convertible<Scalar, T>::value &&
1176 sizeof (Scalar) ==
sizeof (T),
1177 int*>::type ignoreMe = NULL) :
1178 A_ (reinterpret_cast<impl_scalar_type*> (A)),
1179 blockSize_ (blockSize),
1201 return blockSize_ * blockSize_;
1204 template<
class IntegerType>
1205 void stride (IntegerType*
const s)
const {
1212 return reinterpret_cast<Scalar*
> (A_);
1217 return reinterpret_cast<Scalar*
> (A_);
1230 impl_scalar_type& operator() (
const LO i,
const LO j)
const {
1231 return A_[i * strideX_ + j * strideY_];
1235 impl_scalar_type*
const A_;
1236 const LO blockSize_;
1258 template<
class Scalar,
class LO =
int>
1261 typedef Scalar scalar_type;
1262 typedef typename Kokkos::Details::ArithTraits<Scalar>::val_type impl_scalar_type;
1265 typedef Kokkos::Details::ArithTraits<impl_scalar_type> STS;
1269 static const int rank = 1;
1279 A_ (reinterpret_cast<impl_scalar_type*> (A)),
1280 blockSize_ (blockSize),
1304 typename std::enable_if<
1305 ! std::is_same<Scalar, T>::value &&
1306 std::is_convertible<Scalar, T>::value &&
1307 sizeof (Scalar) ==
sizeof (T),
1308 int*>::type ignoreMe = NULL) :
1309 A_ (reinterpret_cast<impl_scalar_type*> (A)),
1310 blockSize_ (blockSize),
1316 return reinterpret_cast<Scalar*
> (A_);
1321 return reinterpret_cast<Scalar*
> (A_);
1347 template<
class IntegerType>
1362 impl_scalar_type& operator() (
const LO i)
const {
1363 return A_[i * strideX_];
1367 impl_scalar_type*
const A_;
1368 const LO blockSize_;
1385 template<
class ST1,
class ST2,
class LO>
1389 typename std::enable_if<std::is_convertible<ST1, ST2>::value && ! std::is_const<ST2>::value,
int>::type* = NULL)
1407 template<
class ST1,
class ST2,
class LO>
1411 typename std::enable_if<std::is_convertible<ST1, ST2>::value && ! std::is_const<ST2>::value,
int>::type* = NULL)
1429 template<
class ST1,
class ST2,
class LO>
1433 typename std::enable_if<std::is_convertible<ST1, ST2>::value && ! std::is_const<ST2>::value,
int>::type* = NULL)
1438 const ST2 theVal =
static_cast<ST2
> (val);
1439 for (LO j = 0; j < numCols; ++j) {
1440 for (LO i = 0; i < numRows; ++i) {
1459 template<
class ST1,
class ST2,
class LO>
1463 typename std::enable_if<std::is_convertible<ST1, ST2>::value && ! std::is_const<ST2>::value,
int>::type* = NULL)
1466 const ST2 theVal =
static_cast<ST2
> (val);
1467 for (LO i = 0; i < N; ++i) {
1475 #endif // TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
LO size() const
Number of rows times number of columns.
LO dimension_0() const
Number of rows in the block.
void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
Scalar * getRawPtr() const
Pointer to the block's entries, as Scalar*.
LO dimension_1() const
Number of columns in the block.
LO size() const
Number of entries in the vector.
void COPY(const ViewType1 &x, const ViewType2 &y)
y := x, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dimension(s)...
Kokkos::LayoutRight array_layout
Data layout (in the same sense as Kokkos::View).
static void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
Nonowning view of a square dense block in a block matrix.
static void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
static void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
static void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
void deep_copy(const LittleBlock< ST2, LO > &dst, const LittleBlock< ST1, LO > &src, typename std::enable_if< std::is_convertible< ST1, ST2 >::value &&!std::is_const< ST2 >::value, int >::type *=NULL)
Copy the LittleBlock src into the LittleBlock dst.
Kokkos::LayoutRight array_layout
Data layout (in the same sense as Kokkos::View).
static void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
LO getStride() const
Stride between consecutive entries.
void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
Computes the solution to Ax=b.
Implementation of Tpetra::Experimental::SCAL function.
void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
LittleBlock(Scalar *const A, const LO blockSize, const LO strideX, const LO strideY)
Constructor.
void GEMV(const CoefficientType &alpha, const LittleBlockType &A, const LittleVectorType1 &x, const LittleVectorType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
LittleVector(T *const A, const LO blockSize, const LO strideX, typename std::enable_if< !std::is_same< Scalar, T >::value &&std::is_convertible< Scalar, T >::value &&sizeof(Scalar)==sizeof(T), int * >::type ignoreMe=NULL)
Constructor that takes an impl_scalar_type pointer.
void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
static void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Scalar * getRawPtr() const
Pointer to the vector's entries.
static void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Nonowning view of a set of degrees of freedom corresponding to a mesh point in a block vector or mult...
static void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Scalar * ptr_on_device() const
Pointer to the vector's entries.
Replace old values with zero.
LO getBlockSize() const
The block size (number of rows, and number of columns).
Scalar * ptr_on_device() const
Pointer to the block's entries, as Scalar*.
LO getBlockSize() const
The block size (number of degrees of freedom per mesh point).
static void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
LittleBlock(T *const A, const LO blockSize, const LO strideX, const LO strideY, typename std::enable_if< !std::is_same< Scalar, T >::value &&std::is_convertible< Scalar, T >::value &&sizeof(Scalar)==sizeof(T), int * >::type ignoreMe=NULL)
Constructor that takes an impl_scalar_type pointer.
void stride(IntegerType *const s) const
Stride between consecutive entries.
Implementation of Tpetra::Experimental::COPY function.
void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
Implementation of Tpetra::Experimental::AXPY function.
LittleVector(Scalar *const A, const LO blockSize, const LO strideX)
Constructor.
LO dimension_0() const
Number of entries in the vector.