59 #ifndef _TEUCHOS_BLAS_HPP_ 60 #define _TEUCHOS_BLAS_HPP_ 73 #include "Teuchos_Assert.hpp" 109 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char ESideChar[];
110 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char ETranspChar[];
111 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char EUploChar[];
112 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char EDiagChar[];
113 extern TEUCHOSNUMERICS_LIB_DLL_EXPORT
const char ETypeChar[];
117 template<
typename ScalarType,
128 template<
typename OrdinalType,
typename ScalarType>
154 typedef typename details::GivensRotator<ScalarType>::c_type
rotg_c_type;
157 void ROTG(ScalarType* da, ScalarType* db, rotg_c_type* c, ScalarType* s)
const;
160 void ROT(
const OrdinalType n, ScalarType* dx,
const OrdinalType incx, ScalarType* dy,
const OrdinalType incy, MagnitudeType* c, ScalarType* s)
const;
163 void SCAL(
const OrdinalType n,
const ScalarType alpha, ScalarType* x,
const OrdinalType incx)
const;
166 void COPY(
const OrdinalType n,
const ScalarType* x,
const OrdinalType incx, ScalarType* y,
const OrdinalType incy)
const;
169 template <
typename alpha_type,
typename x_type>
170 void AXPY(
const OrdinalType n,
const alpha_type alpha,
const x_type* x,
const OrdinalType incx, ScalarType* y,
const OrdinalType incy)
const;
176 template <
typename x_type,
typename y_type>
177 ScalarType DOT(
const OrdinalType n,
const x_type* x,
const OrdinalType incx,
const y_type* y,
const OrdinalType incy)
const;
183 OrdinalType IAMAX(
const OrdinalType n,
const ScalarType* x,
const OrdinalType incx)
const;
190 template <
typename alpha_type,
typename A_type,
typename x_type,
typename beta_type>
191 void GEMV(
ETransp trans,
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const A_type* A,
192 const OrdinalType lda,
const x_type* x,
const OrdinalType incx,
const beta_type beta, ScalarType* y,
const OrdinalType incy)
const;
195 template <
typename A_type>
196 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const OrdinalType n,
const A_type* A,
197 const OrdinalType lda, ScalarType* x,
const OrdinalType incx)
const;
201 template <
typename alpha_type,
typename x_type,
typename y_type>
202 void GER(
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const x_type* x,
const OrdinalType incx,
203 const y_type* y,
const OrdinalType incy, ScalarType* A,
const OrdinalType lda)
const;
215 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
216 void GEMM(
ETransp transa,
ETransp transb,
const OrdinalType m,
const OrdinalType n,
const OrdinalType k,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda,
const B_type* B,
const OrdinalType ldb,
const beta_type beta, ScalarType* C,
const OrdinalType ldc)
const;
220 SWAP (
const OrdinalType n, ScalarType*
const x,
const OrdinalType incx,
221 ScalarType*
const y,
const OrdinalType incy)
const;
224 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
225 void SYMM(
ESide side,
EUplo uplo,
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda,
const B_type* B,
const OrdinalType ldb,
const beta_type beta, ScalarType* C,
const OrdinalType ldc)
const;
228 template <
typename alpha_type,
typename A_type,
typename beta_type>
229 void SYRK(
EUplo uplo,
ETransp trans,
const OrdinalType n,
const OrdinalType k,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda,
const beta_type beta, ScalarType* C,
const OrdinalType ldc)
const;
232 template <
typename alpha_type,
typename A_type>
234 const alpha_type alpha,
const A_type* A,
const OrdinalType lda, ScalarType* B,
const OrdinalType ldb)
const;
237 template <
typename alpha_type,
typename A_type>
239 const alpha_type alpha,
const A_type* A,
const OrdinalType lda, ScalarType* B,
const OrdinalType ldb)
const;
243 template<
typename OrdinalType,
typename ScalarType>
278 template<
typename ScalarType,
bool isComplex>
286 template<
typename ScalarType>
287 class MagValue<ScalarType, true> {
294 template<
typename ScalarType>
295 class MagValue<ScalarType, false> {
298 blas_dabs1(
const ScalarType* a, ScalarType* ret)
const;
301 template<
typename ScalarType,
bool isComplex>
302 class GivensRotator {};
305 template<
typename ScalarType>
306 class GivensRotator<ScalarType, true> {
310 ROTG (ScalarType* ca,
313 ScalarType* s)
const;
317 template<
typename ScalarType>
318 class GivensRotator<ScalarType, false> {
320 typedef ScalarType c_type;
322 ROTG (ScalarType* da,
325 ScalarType* s)
const;
339 ScalarType SIGN (ScalarType x, ScalarType y)
const {
342 if (y > STS::zero()) {
343 return STS::magnitude (x);
344 }
else if (y < STS::zero()) {
345 return -STS::magnitude (x);
358 ScalarType signedInfinity = STS::one() / y;
359 if (signedInfinity > STS::zero()) {
360 return STS::magnitude (x);
365 return -STS::magnitude (x);
372 template<
typename ScalarType>
374 GivensRotator<ScalarType, true>::
375 ROTG (ScalarType* ca,
381 typedef typename STS::magnitudeType MagnitudeType;
401 MagnitudeType norm, scale;
403 if (STS::magnitude (*ca) == STM::zero()) {
408 scale = STS::magnitude (*ca) + STS::magnitude (*cb);
412 const MagnitudeType ca_scaled =
413 STS::magnitude (*ca / ScalarType(scale, STM::zero()));
414 const MagnitudeType cb_scaled =
415 STS::magnitude (*cb / ScalarType(scale, STM::zero()));
417 STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
419 alpha = *ca / STS::magnitude (*ca);
420 *c = STS::magnitude (*ca) / norm;
421 *s = alpha * STS::conjugate (*cb) / norm;
427 template<
typename ScalarType>
429 GivensRotator<ScalarType, false>::
430 ROTG (ScalarType* da,
458 ScalarType r, roe, scale, z;
461 if (STS::magnitude (*da) > STS::magnitude (*db)) {
464 scale = STS::magnitude (*da) + STS::magnitude (*db);
465 if (scale == STS::zero()) {
474 const ScalarType da_scaled = *da / scale;
475 const ScalarType db_scaled = *db / scale;
476 r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
477 r = SIGN (STS::one(), roe) * r;
481 if (STS::magnitude (*da) > STS::magnitude (*db)) {
484 if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
494 template<
typename ScalarType>
496 MagValue<ScalarType, false>::
497 blas_dabs1(
const ScalarType* a, ScalarType* ret)
const 503 template<
typename ScalarType>
505 MagValue<ScalarType, true>::
514 template<
typename OrdinalType,
typename ScalarType>
522 details::GivensRotator<ScalarType> rotator;
523 rotator.ROTG (da, db, c, s);
526 template<
typename OrdinalType,
typename ScalarType>
532 if (incx==1 && incy==1) {
533 for(OrdinalType i=0; i<n; ++i) {
534 ScalarType temp = *c*dx[i] + sconj*dy[i];
535 dy[i] = *c*dy[i] - sconj*dx[i];
540 OrdinalType ix = 0, iy = 0;
541 if (incx < izero) ix = (-n+1)*incx;
542 if (incy < izero) iy = (-n+1)*incy;
543 for(OrdinalType i=0; i<n; ++i) {
544 ScalarType temp = *c*dx[ix] + sconj*dy[iy];
545 dy[iy] = *c*dy[iy] - sconj*dx[ix];
553 template<
typename OrdinalType,
typename ScalarType>
558 OrdinalType i, ix = izero;
560 if ( n < ione || incx < ione )
564 for(i = izero; i < n; i++)
571 template<
typename OrdinalType,
typename ScalarType>
576 OrdinalType i, ix = izero, iy = izero;
579 if (incx < izero) { ix = (-n+ione)*incx; }
580 if (incy < izero) { iy = (-n+ione)*incy; }
582 for(i = izero; i < n; i++)
591 template<
typename OrdinalType,
typename ScalarType>
592 template <
typename alpha_type,
typename x_type>
597 OrdinalType i, ix = izero, iy = izero;
601 if (incx < izero) { ix = (-n+ione)*incx; }
602 if (incy < izero) { iy = (-n+ione)*incy; }
604 for(i = izero; i < n; i++)
606 y[iy] += alpha * x[ix];
613 template<
typename OrdinalType,
typename ScalarType>
620 OrdinalType i, ix = izero;
622 if ( n < ione || incx < ione )
625 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
626 for (i = izero; i < n; i++)
628 mval.blas_dabs1( &x[ix], &temp );
636 template<
typename OrdinalType,
typename ScalarType>
637 template <
typename x_type,
typename y_type>
643 OrdinalType i, ix = izero, iy = izero;
647 if (incx < izero) { ix = (-n+ione)*incx; }
648 if (incy < izero) { iy = (-n+ione)*incy; }
650 for(i = izero; i < n; i++)
660 template<
typename OrdinalType,
typename ScalarType>
667 OrdinalType i, ix = izero;
669 if ( n < ione || incx < ione )
672 for(i = izero; i < n; i++)
681 template<
typename OrdinalType,
typename ScalarType>
686 OrdinalType result = izero, ix = izero, i;
692 if ( n < ione || incx < ione )
695 details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
697 mval.blas_dabs1( &x[ix], &maxval );
699 for(i = ione; i < n; i++)
701 mval.blas_dabs1( &x[ix], &curval );
716 template<
typename OrdinalType,
typename ScalarType>
717 template <
typename alpha_type,
typename A_type,
typename x_type,
typename beta_type>
718 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(
ETransp trans,
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda,
const x_type* x,
const OrdinalType incx,
const beta_type beta, ScalarType* y,
const OrdinalType incy)
const 728 bool BadArgument =
false;
731 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){
return; }
735 std::cout <<
"BLAS::GEMV Error: M == " << m << std::endl;
739 std::cout <<
"BLAS::GEMV Error: N == " << n << std::endl;
743 std::cout <<
"BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
746 if( incx == izero ) {
747 std::cout <<
"BLAS::GEMV Error: INCX == 0"<< std::endl;
750 if( incy == izero ) {
751 std::cout <<
"BLAS::GEMV Error: INCY == 0"<< std::endl;
756 OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
757 OrdinalType kx = izero, ky = izero;
761 if(ETranspChar[trans] ==
'N') {
770 noConj = (ETranspChar[trans] ==
'T');
773 if (incx < izero ) { kx = (ione - lenx)*incx; }
774 if (incy < izero ) { ky = (ione - leny)*incy; }
778 if(beta != beta_one) {
780 if (beta == beta_zero) {
781 for(i = izero; i < leny; i++) { y[i] = y_zero; }
783 for(i = izero; i < leny; i++) { y[i] *= beta; }
786 if (beta == beta_zero) {
787 for(i = izero; i < leny; i++) {
792 for(i = izero; i < leny; i++) {
801 if(alpha == alpha_zero) {
return; }
803 if( ETranspChar[trans] ==
'N' ) {
807 for(j = izero; j < n; j++) {
808 if (x[jx] != x_zero) {
810 for(i = izero; i < m; i++) {
811 y[i] += temp*A[j*lda + i];
817 for(j = izero; j < n; j++) {
818 if (x[jx] != x_zero) {
821 for(i = izero; i < m; i++) {
822 y[iy] += temp*A[j*lda + i];
832 for(j = izero; j < n; j++) {
835 for(i = izero; i < m; i++) {
836 temp += A[j*lda + i]*x[i];
839 for(i = izero; i < m; i++) {
847 for(j = izero; j < n; j++) {
851 for (i = izero; i < m; i++) {
852 temp += A[j*lda + i]*x[ix];
856 for (i = izero; i < m; i++) {
869 template<
typename OrdinalType,
typename ScalarType>
870 template <
typename A_type>
876 bool BadArgument =
false;
880 if( n == izero ){
return; }
884 std::cout <<
"BLAS::TRMV Error: N == " << n << std::endl;
888 std::cout <<
"BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
891 if( incx == izero ) {
892 std::cout <<
"BLAS::TRMV Error: INCX == 0"<< std::endl;
897 OrdinalType i, j, ix, jx, kx = izero;
899 bool noUnit = (EDiagChar[diag] ==
'N');
902 noConj = (ETranspChar[trans] ==
'T');
905 if (incx < izero) { kx = (-n+ione)*incx; }
908 if (ETranspChar[trans] ==
'N') {
910 if (EUploChar[uplo] ==
'U') {
913 for (j=izero; j<n; j++) {
916 for (i=izero; i<j; i++) {
917 x[i] += temp*A[j*lda + i];
920 x[j] *= A[j*lda + j];
925 for (j=izero; j<n; j++) {
929 for (i=izero; i<j; i++) {
930 x[ix] += temp*A[j*lda + i];
934 x[jx] *= A[j*lda + j];
941 for (j=n-ione; j>-ione; j--) {
944 for (i=n-ione; i>j; i--) {
945 x[i] += temp*A[j*lda + i];
948 x[j] *= A[j*lda + j];
954 for (j=n-ione; j>-ione; j--) {
958 for (i=n-ione; i>j; i--) {
959 x[ix] += temp*A[j*lda + i];
963 x[jx] *= A[j*lda + j];
971 if (EUploChar[uplo]==
'U') {
974 for (j=n-ione; j>-ione; j--) {
978 temp *= A[j*lda + j];
979 for (i=j-ione; i>-ione; i--) {
980 temp += A[j*lda + i]*x[i];
985 for (i=j-ione; i>-ione; i--) {
992 jx = kx + (n-ione)*incx;
993 for (j=n-ione; j>-ione; j--) {
998 temp *= A[j*lda + j];
999 for (i=j-ione; i>-ione; i--) {
1001 temp += A[j*lda + i]*x[ix];
1006 for (i=j-ione; i>-ione; i--) {
1018 for (j=izero; j<n; j++) {
1022 temp *= A[j*lda + j];
1023 for (i=j+ione; i<n; i++) {
1024 temp += A[j*lda + i]*x[i];
1029 for (i=j+ione; i<n; i++) {
1037 for (j=izero; j<n; j++) {
1042 temp *= A[j*lda + j];
1043 for (i=j+ione; i<n; i++) {
1045 temp += A[j*lda + i]*x[ix];
1050 for (i=j+ione; i<n; i++) {
1064 template<
typename OrdinalType,
typename ScalarType>
1065 template <
typename alpha_type,
typename x_type,
typename y_type>
1066 void DefaultBLASImpl<OrdinalType, ScalarType>::GER(
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const x_type* x,
const OrdinalType incx,
const y_type* y,
const OrdinalType incy, ScalarType* A,
const OrdinalType lda)
const 1072 bool BadArgument =
false;
1075 if( m == izero || n == izero || alpha == alpha_zero ){
return; }
1079 std::cout <<
"BLAS::GER Error: M == " << m << std::endl;
1083 std::cout <<
"BLAS::GER Error: N == " << n << std::endl;
1087 std::cout <<
"BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
1091 std::cout <<
"BLAS::GER Error: INCX == 0"<< std::endl;
1095 std::cout <<
"BLAS::GER Error: INCY == 0"<< std::endl;
1100 OrdinalType i, j, ix, jy = izero, kx = izero;
1104 if (incx < izero) { kx = (-m+ione)*incx; }
1105 if (incy < izero) { jy = (-n+ione)*incy; }
1108 if( incx == ione ) {
1109 for( j=izero; j<n; j++ ) {
1110 if ( y[jy] != y_zero ) {
1112 for ( i=izero; i<m; i++ ) {
1113 A[j*lda + i] += x[i]*temp;
1120 for( j=izero; j<n; j++ ) {
1121 if ( y[jy] != y_zero ) {
1124 for( i=izero; i<m; i++ ) {
1125 A[j*lda + i] += x[ix]*temp;
1139 template<
typename OrdinalType,
typename ScalarType>
1140 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
1141 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(
ETransp transa,
ETransp transb,
const OrdinalType m,
const OrdinalType n,
const OrdinalType k,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda,
const B_type* B,
const OrdinalType ldb,
const beta_type beta, ScalarType* C,
const OrdinalType ldc)
const 1150 OrdinalType i, j, p;
1151 OrdinalType NRowA = m, NRowB = k;
1153 bool BadArgument =
false;
1154 bool conjA =
false, conjB =
false;
1157 if( !(ETranspChar[transa]==
'N') ) {
1160 if( !(ETranspChar[transb]==
'N') ) {
1165 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){
return; }
1167 std::cout <<
"BLAS::GEMM Error: M == " << m << std::endl;
1171 std::cout <<
"BLAS::GEMM Error: N == " << n << std::endl;
1175 std::cout <<
"BLAS::GEMM Error: K == " << k << std::endl;
1179 std::cout <<
"BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
1183 std::cout <<
"BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
1187 std::cout <<
"BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
1194 conjA = (ETranspChar[transa] ==
'C');
1195 conjB = (ETranspChar[transb] ==
'C');
1198 if( alpha == alpha_zero ) {
1199 if( beta == beta_zero ) {
1200 for (j=izero; j<n; j++) {
1201 for (i=izero; i<m; i++) {
1202 C[j*ldc + i] = C_zero;
1206 for (j=izero; j<n; j++) {
1207 for (i=izero; i<m; i++) {
1208 C[j*ldc + i] *= beta;
1217 if ( ETranspChar[transb]==
'N' ) {
1218 if ( ETranspChar[transa]==
'N' ) {
1220 for (j=izero; j<n; j++) {
1221 if( beta == beta_zero ) {
1222 for (i=izero; i<m; i++){
1223 C[j*ldc + i] = C_zero;
1225 }
else if( beta != beta_one ) {
1226 for (i=izero; i<m; i++){
1227 C[j*ldc + i] *= beta;
1230 for (p=izero; p<k; p++){
1231 if (B[j*ldb + p] != B_zero ){
1232 temp = alpha*B[j*ldb + p];
1233 for (i=izero; i<m; i++) {
1234 C[j*ldc + i] += temp*A[p*lda + i];
1239 }
else if ( conjA ) {
1241 for (j=izero; j<n; j++) {
1242 for (i=izero; i<m; i++) {
1244 for (p=izero; p<k; p++) {
1247 if (beta == beta_zero) {
1248 C[j*ldc + i] = alpha*temp;
1250 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1256 for (j=izero; j<n; j++) {
1257 for (i=izero; i<m; i++) {
1259 for (p=izero; p<k; p++) {
1260 temp += A[i*lda + p]*B[j*ldb + p];
1262 if (beta == beta_zero) {
1263 C[j*ldc + i] = alpha*temp;
1265 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1270 }
else if ( ETranspChar[transa]==
'N' ) {
1273 for (j=izero; j<n; j++) {
1274 if (beta == beta_zero) {
1275 for (i=izero; i<m; i++) {
1276 C[j*ldc + i] = C_zero;
1278 }
else if ( beta != beta_one ) {
1279 for (i=izero; i<m; i++) {
1280 C[j*ldc + i] *= beta;
1283 for (p=izero; p<k; p++) {
1284 if (B[p*ldb + j] != B_zero) {
1286 for (i=izero; i<m; i++) {
1287 C[j*ldc + i] += temp*A[p*lda + i];
1294 for (j=izero; j<n; j++) {
1295 if (beta == beta_zero) {
1296 for (i=izero; i<m; i++) {
1297 C[j*ldc + i] = C_zero;
1299 }
else if ( beta != beta_one ) {
1300 for (i=izero; i<m; i++) {
1301 C[j*ldc + i] *= beta;
1304 for (p=izero; p<k; p++) {
1305 if (B[p*ldb + j] != B_zero) {
1306 temp = alpha*B[p*ldb + j];
1307 for (i=izero; i<m; i++) {
1308 C[j*ldc + i] += temp*A[p*lda + i];
1314 }
else if ( conjA ) {
1317 for (j=izero; j<n; j++) {
1318 for (i=izero; i<m; i++) {
1320 for (p=izero; p<k; p++) {
1324 if (beta == beta_zero) {
1325 C[j*ldc + i] = alpha*temp;
1327 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1333 for (j=izero; j<n; j++) {
1334 for (i=izero; i<m; i++) {
1336 for (p=izero; p<k; p++) {
1340 if (beta == beta_zero) {
1341 C[j*ldc + i] = alpha*temp;
1343 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1351 for (j=izero; j<n; j++) {
1352 for (i=izero; i<m; i++) {
1354 for (p=izero; p<k; p++) {
1355 temp += A[i*lda + p]
1358 if (beta == beta_zero) {
1359 C[j*ldc + i] = alpha*temp;
1361 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1367 for (j=izero; j<n; j++) {
1368 for (i=izero; i<m; i++) {
1370 for (p=izero; p<k; p++) {
1371 temp += A[i*lda + p]*B[p*ldb + j];
1373 if (beta == beta_zero) {
1374 C[j*ldc + i] = alpha*temp;
1376 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1386 template<
typename OrdinalType,
typename ScalarType>
1388 SWAP (
const OrdinalType n, ScalarType*
const x,
const OrdinalType incx,
1389 ScalarType*
const y,
const OrdinalType incy)
const 1395 if (incx == 1 && incy == 1) {
1396 for (
int i = 0; i < n; ++i) {
1397 ScalarType tmp = x[i];
1407 ix = (1-n) * incx + 1;
1410 iy = (1-n) * incy + 1;
1413 for (
int i = 1; i <= n; ++i) {
1414 ScalarType tmp = x[ix - 1];
1415 x[ix - 1] = y[iy - 1];
1423 template<
typename OrdinalType,
typename ScalarType>
1424 template <
typename alpha_type,
typename A_type,
typename B_type,
typename beta_type>
1425 void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(
ESide side,
EUplo uplo,
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda,
const B_type* B,
const OrdinalType ldb,
const beta_type beta, ScalarType* C,
const OrdinalType ldc)
const 1433 OrdinalType i, j, k, NRowA = m;
1434 ScalarType temp1, temp2;
1435 bool BadArgument =
false;
1436 bool Upper = (EUploChar[uplo] ==
'U');
1437 if (ESideChar[side] ==
'R') { NRowA = n; }
1440 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) {
return; }
1442 std::cout <<
"BLAS::SYMM Error: M == "<< m << std::endl;
1443 BadArgument =
true; }
1445 std::cout <<
"BLAS::SYMM Error: N == "<< n << std::endl;
1446 BadArgument =
true; }
1448 std::cout <<
"BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
1449 BadArgument =
true; }
1451 std::cout <<
"BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
1452 BadArgument =
true; }
1454 std::cout <<
"BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
1455 BadArgument =
true; }
1460 if (alpha == alpha_zero) {
1461 if (beta == beta_zero ) {
1462 for (j=izero; j<n; j++) {
1463 for (i=izero; i<m; i++) {
1464 C[j*ldc + i] = C_zero;
1468 for (j=izero; j<n; j++) {
1469 for (i=izero; i<m; i++) {
1470 C[j*ldc + i] *= beta;
1477 if ( ESideChar[side] ==
'L') {
1482 for (j=izero; j<n; j++) {
1483 for (i=izero; i<m; i++) {
1484 temp1 = alpha*B[j*ldb + i];
1486 for (k=izero; k<i; k++) {
1487 C[j*ldc + k] += temp1*A[i*lda + k];
1488 temp2 += B[j*ldb + k]*A[i*lda + k];
1490 if (beta == beta_zero) {
1491 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1493 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1499 for (j=izero; j<n; j++) {
1500 for (i=m-ione; i>-ione; i--) {
1501 temp1 = alpha*B[j*ldb + i];
1503 for (k=i+ione; k<m; k++) {
1504 C[j*ldc + k] += temp1*A[i*lda + k];
1505 temp2 += B[j*ldb + k]*A[i*lda + k];
1507 if (beta == beta_zero) {
1508 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1510 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1517 for (j=izero; j<n; j++) {
1518 temp1 = alpha*A[j*lda + j];
1519 if (beta == beta_zero) {
1520 for (i=izero; i<m; i++) {
1521 C[j*ldc + i] = temp1*B[j*ldb + i];
1524 for (i=izero; i<m; i++) {
1525 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
1528 for (k=izero; k<j; k++) {
1530 temp1 = alpha*A[j*lda + k];
1532 temp1 = alpha*A[k*lda + j];
1534 for (i=izero; i<m; i++) {
1535 C[j*ldc + i] += temp1*B[k*ldb + i];
1538 for (k=j+ione; k<n; k++) {
1540 temp1 = alpha*A[k*lda + j];
1542 temp1 = alpha*A[j*lda + k];
1544 for (i=izero; i<m; i++) {
1545 C[j*ldc + i] += temp1*B[k*ldb + i];
1553 template<
typename OrdinalType,
typename ScalarType>
1554 template <
typename alpha_type,
typename A_type,
typename beta_type>
1555 void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(
EUplo uplo,
ETransp trans,
const OrdinalType n,
const OrdinalType k,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda,
const beta_type beta, ScalarType* C,
const OrdinalType ldc)
const 1566 OrdinalType i, j, l, NRowA = n;
1567 bool BadArgument =
false;
1568 bool Upper = (EUploChar[uplo] ==
'U');
1574 "Teuchos::BLAS<"<<OTNT::name()<<
","<<STNT::name()<<
">::SYRK()" 1575 " does not support CONJ_TRANS for complex data types." 1579 if( !(ETranspChar[trans]==
'N') ) {
1584 if ( n==izero ) {
return; }
1585 if ( ( (alpha==alpha_zero) || (k==izero) ) && (beta==beta_one) ) {
return; }
1587 std::cout <<
"BLAS::SYRK Error: N == "<< n <<std::endl;
1588 BadArgument =
true; }
1590 std::cout <<
"BLAS::SYRK Error: K == "<< k <<std::endl;
1591 BadArgument =
true; }
1593 std::cout <<
"BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
1594 BadArgument =
true; }
1596 std::cout <<
"BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
1597 BadArgument =
true; }
1602 if (alpha == alpha_zero) {
1604 if (beta==beta_zero) {
1605 for (j=izero; j<n; j++) {
1606 for (i=izero; i<=j; i++) {
1607 C[j*ldc + i] = C_zero;
1612 for (j=izero; j<n; j++) {
1613 for (i=izero; i<=j; i++) {
1614 C[j*ldc + i] *= beta;
1620 if (beta==beta_zero) {
1621 for (j=izero; j<n; j++) {
1622 for (i=j; i<n; i++) {
1623 C[j*ldc + i] = C_zero;
1628 for (j=izero; j<n; j++) {
1629 for (i=j; i<n; i++) {
1630 C[j*ldc + i] *= beta;
1640 if ( ETranspChar[trans]==
'N' ) {
1644 for (j=izero; j<n; j++) {
1645 if (beta==beta_zero) {
1646 for (i=izero; i<=j; i++) {
1647 C[j*ldc + i] = C_zero;
1650 else if (beta!=beta_one) {
1651 for (i=izero; i<=j; i++) {
1652 C[j*ldc + i] *= beta;
1655 for (l=izero; l<k; l++) {
1656 if (A[l*lda + j] != A_zero) {
1657 temp = alpha*A[l*lda + j];
1658 for (i = izero; i <=j; i++) {
1659 C[j*ldc + i] += temp*A[l*lda + i];
1666 for (j=izero; j<n; j++) {
1667 if (beta==beta_zero) {
1668 for (i=j; i<n; i++) {
1669 C[j*ldc + i] = C_zero;
1672 else if (beta!=beta_one) {
1673 for (i=j; i<n; i++) {
1674 C[j*ldc + i] *= beta;
1677 for (l=izero; l<k; l++) {
1678 if (A[l*lda + j] != A_zero) {
1679 temp = alpha*A[l*lda + j];
1680 for (i=j; i<n; i++) {
1681 C[j*ldc + i] += temp*A[l*lda + i];
1692 for (j=izero; j<n; j++) {
1693 for (i=izero; i<=j; i++) {
1695 for (l=izero; l<k; l++) {
1696 temp += A[i*lda + l]*A[j*lda + l];
1698 if (beta==beta_zero) {
1699 C[j*ldc + i] = alpha*temp;
1702 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1708 for (j=izero; j<n; j++) {
1709 for (i=j; i<n; i++) {
1711 for (l=izero; l<k; ++l) {
1712 temp += A[i*lda + l]*A[j*lda + l];
1714 if (beta==beta_zero) {
1715 C[j*ldc + i] = alpha*temp;
1718 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1727 template<
typename OrdinalType,
typename ScalarType>
1728 template <
typename alpha_type,
typename A_type>
1729 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda, ScalarType* B,
const OrdinalType ldb)
const 1737 OrdinalType i, j, k, NRowA = m;
1739 bool BadArgument =
false;
1740 bool LSide = (ESideChar[side] ==
'L');
1741 bool noUnit = (EDiagChar[diag] ==
'N');
1742 bool Upper = (EUploChar[uplo] ==
'U');
1743 bool noConj = (ETranspChar[transa] ==
'T');
1745 if(!LSide) { NRowA = n; }
1748 if (n==izero || m==izero) {
return; }
1750 std::cout <<
"BLAS::TRMM Error: M == "<< m <<std::endl;
1751 BadArgument =
true; }
1753 std::cout <<
"BLAS::TRMM Error: N == "<< n <<std::endl;
1754 BadArgument =
true; }
1756 std::cout <<
"BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
1757 BadArgument =
true; }
1759 std::cout <<
"BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
1760 BadArgument =
true; }
1765 if( alpha == alpha_zero ) {
1766 for( j=izero; j<n; j++ ) {
1767 for( i=izero; i<m; i++ ) {
1768 B[j*ldb + i] = B_zero;
1778 if ( ETranspChar[transa]==
'N' ) {
1783 for( j=izero; j<n; j++ ) {
1784 for( k=izero; k<m; k++) {
1785 if ( B[j*ldb + k] != B_zero ) {
1786 temp = alpha*B[j*ldb + k];
1787 for( i=izero; i<k; i++ ) {
1788 B[j*ldb + i] += temp*A[k*lda + i];
1791 temp *=A[k*lda + k];
1792 B[j*ldb + k] = temp;
1798 for( j=izero; j<n; j++ ) {
1799 for( k=m-ione; k>-ione; k-- ) {
1800 if( B[j*ldb + k] != B_zero ) {
1801 temp = alpha*B[j*ldb + k];
1802 B[j*ldb + k] = temp;
1804 B[j*ldb + k] *= A[k*lda + k];
1805 for( i=k+ione; i<m; i++ ) {
1806 B[j*ldb + i] += temp*A[k*lda + i];
1815 for( j=izero; j<n; j++ ) {
1816 for( i=m-ione; i>-ione; i-- ) {
1817 temp = B[j*ldb + i];
1820 temp *= A[i*lda + i];
1821 for( k=izero; k<i; k++ ) {
1822 temp += A[i*lda + k]*B[j*ldb + k];
1827 for( k=izero; k<i; k++ ) {
1831 B[j*ldb + i] = alpha*temp;
1835 for( j=izero; j<n; j++ ) {
1836 for( i=izero; i<m; i++ ) {
1837 temp = B[j*ldb + i];
1840 temp *= A[i*lda + i];
1841 for( k=i+ione; k<m; k++ ) {
1842 temp += A[i*lda + k]*B[j*ldb + k];
1847 for( k=i+ione; k<m; k++ ) {
1851 B[j*ldb + i] = alpha*temp;
1859 if( ETranspChar[transa] ==
'N' ) {
1864 for( j=n-ione; j>-ione; j-- ) {
1867 temp *= A[j*lda + j];
1868 for( i=izero; i<m; i++ ) {
1869 B[j*ldb + i] *= temp;
1871 for( k=izero; k<j; k++ ) {
1872 if( A[j*lda + k] != A_zero ) {
1873 temp = alpha*A[j*lda + k];
1874 for( i=izero; i<m; i++ ) {
1875 B[j*ldb + i] += temp*B[k*ldb + i];
1882 for( j=izero; j<n; j++ ) {
1885 temp *= A[j*lda + j];
1886 for( i=izero; i<m; i++ ) {
1887 B[j*ldb + i] *= temp;
1889 for( k=j+ione; k<n; k++ ) {
1890 if( A[j*lda + k] != A_zero ) {
1891 temp = alpha*A[j*lda + k];
1892 for( i=izero; i<m; i++ ) {
1893 B[j*ldb + i] += temp*B[k*ldb + i];
1903 for( k=izero; k<n; k++ ) {
1904 for( j=izero; j<k; j++ ) {
1905 if( A[k*lda + j] != A_zero ) {
1907 temp = alpha*A[k*lda + j];
1910 for( i=izero; i<m; i++ ) {
1911 B[j*ldb + i] += temp*B[k*ldb + i];
1918 temp *= A[k*lda + k];
1923 for( i=izero; i<m; i++ ) {
1924 B[k*ldb + i] *= temp;
1929 for( k=n-ione; k>-ione; k-- ) {
1930 for( j=k+ione; j<n; j++ ) {
1931 if( A[k*lda + j] != A_zero ) {
1933 temp = alpha*A[k*lda + j];
1936 for( i=izero; i<m; i++ ) {
1937 B[j*ldb + i] += temp*B[k*ldb + i];
1944 temp *= A[k*lda + k];
1949 for( i=izero; i<m; i++ ) {
1950 B[k*ldb + i] *= temp;
1960 template<
typename OrdinalType,
typename ScalarType>
1961 template <
typename alpha_type,
typename A_type>
1962 void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const OrdinalType m,
const OrdinalType n,
const alpha_type alpha,
const A_type* A,
const OrdinalType lda, ScalarType* B,
const OrdinalType ldb)
const 1972 OrdinalType NRowA = m;
1973 bool BadArgument =
false;
1974 bool noUnit = (EDiagChar[diag]==
'N');
1975 bool noConj = (ETranspChar[transa] ==
'T');
1977 if (!(ESideChar[side] ==
'L')) { NRowA = n; }
1980 if (n == izero || m == izero) {
return; }
1982 std::cout <<
"BLAS::TRSM Error: M == "<<m<<std::endl;
1983 BadArgument =
true; }
1985 std::cout <<
"BLAS::TRSM Error: N == "<<n<<std::endl;
1986 BadArgument =
true; }
1988 std::cout <<
"BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
1989 BadArgument =
true; }
1991 std::cout <<
"BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
1992 BadArgument =
true; }
1998 if(alpha == alpha_zero) {
1999 for(j = izero; j < n; j++) {
2000 for( i = izero; i < m; i++) {
2001 B[j*ldb+i] = B_zero;
2007 if(ESideChar[side] ==
'L') {
2011 if(ETranspChar[transa] ==
'N') {
2015 if(EUploChar[uplo] ==
'U') {
2017 for(j = izero; j < n; j++) {
2019 if(alpha != alpha_one) {
2020 for( i = izero; i < m; i++) {
2021 B[j*ldb+i] *= alpha;
2025 for(k = (m - ione); k > -ione; k--) {
2027 if (B[j*ldb + k] != B_zero) {
2029 B[j*ldb + k] /= A[k*lda + k];
2031 for(i = izero; i < k; i++) {
2032 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2040 for(j = izero; j < n; j++) {
2042 if(alpha != alpha_one) {
2043 for( i = izero; i < m; i++) {
2044 B[j*ldb+i] *= alpha;
2048 for(k = izero; k < m; k++) {
2050 if (B[j*ldb + k] != B_zero) {
2052 B[j*ldb + k] /= A[k*lda + k];
2054 for(i = k+ione; i < m; i++) {
2055 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2067 if(EUploChar[uplo] ==
'U') {
2069 for(j = izero; j < n; j++) {
2070 for( i = izero; i < m; i++) {
2071 temp = alpha*B[j*ldb+i];
2073 for(k = izero; k < i; k++) {
2074 temp -= A[i*lda + k] * B[j*ldb + k];
2077 temp /= A[i*lda + i];
2080 for(k = izero; k < i; k++) {
2088 B[j*ldb + i] = temp;
2094 for(j = izero; j < n; j++) {
2095 for(i = (m - ione) ; i > -ione; i--) {
2096 temp = alpha*B[j*ldb+i];
2098 for(k = i+ione; k < m; k++) {
2099 temp -= A[i*lda + k] * B[j*ldb + k];
2102 temp /= A[i*lda + i];
2105 for(k = i+ione; k < m; k++) {
2113 B[j*ldb + i] = temp;
2124 if (ETranspChar[transa] ==
'N') {
2128 if(EUploChar[uplo] ==
'U') {
2131 for(j = izero; j < n; j++) {
2133 if(alpha != alpha_one) {
2134 for( i = izero; i < m; i++) {
2135 B[j*ldb+i] *= alpha;
2138 for(k = izero; k < j; k++) {
2140 if (A[j*lda + k] != A_zero) {
2141 for(i = izero; i < m; i++) {
2142 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2147 temp = B_one/A[j*lda + j];
2148 for(i = izero; i < m; i++) {
2149 B[j*ldb + i] *= temp;
2156 for(j = (n - ione); j > -ione; j--) {
2158 if(alpha != alpha_one) {
2159 for( i = izero; i < m; i++) {
2160 B[j*ldb+i] *= alpha;
2164 for(k = j+ione; k < n; k++) {
2166 if (A[j*lda + k] != A_zero) {
2167 for(i = izero; i < m; i++) {
2168 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2173 temp = B_one/A[j*lda + j];
2174 for(i = izero; i < m; i++) {
2175 B[j*ldb + i] *= temp;
2186 if(EUploChar[uplo] ==
'U') {
2188 for(k = (n - ione); k > -ione; k--) {
2191 temp = B_one/A[k*lda + k];
2194 for(i = izero; i < m; i++) {
2195 B[k*ldb + i] *= temp;
2198 for(j = izero; j < k; j++) {
2199 if (A[k*lda + j] != A_zero) {
2201 temp = A[k*lda + j];
2204 for(i = izero; i < m; i++) {
2205 B[j*ldb + i] -= temp*B[k*ldb + i];
2209 if (alpha != alpha_one) {
2210 for (i = izero; i < m; i++) {
2211 B[k*ldb + i] *= alpha;
2218 for(k = izero; k < n; k++) {
2221 temp = B_one/A[k*lda + k];
2224 for (i = izero; i < m; i++) {
2225 B[k*ldb + i] *= temp;
2228 for(j = k+ione; j < n; j++) {
2229 if(A[k*lda + j] != A_zero) {
2231 temp = A[k*lda + j];
2234 for(i = izero; i < m; i++) {
2235 B[j*ldb + i] -= temp*B[k*ldb + i];
2239 if (alpha != alpha_one) {
2240 for (i = izero; i < m; i++) {
2241 B[k*ldb + i] *= alpha;
2255 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, float>
2258 inline BLAS(
void) {}
2260 inline virtual ~BLAS(
void) {}
2261 void ROTG(
float* da,
float* db,
float* c,
float* s)
const;
2262 void ROT(
const int n,
float* dx,
const int incx,
float* dy,
const int incy,
float* c,
float* s)
const;
2263 float ASUM(
const int n,
const float* x,
const int incx)
const;
2264 void AXPY(
const int n,
const float alpha,
const float* x,
const int incx,
float* y,
const int incy)
const;
2265 void COPY(
const int n,
const float* x,
const int incx,
float* y,
const int incy)
const;
2266 float DOT(
const int n,
const float* x,
const int incx,
const float* y,
const int incy)
const;
2267 float NRM2(
const int n,
const float* x,
const int incx)
const;
2268 void SCAL(
const int n,
const float alpha,
float* x,
const int incx)
const;
2269 int IAMAX(
const int n,
const float* x,
const int incx)
const;
2270 void GEMV(
ETransp trans,
const int m,
const int n,
const float alpha,
const float* A,
const int lda,
const float* x,
const int incx,
const float beta,
float* y,
const int incy)
const;
2271 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int n,
const float* A,
const int lda,
float* x,
const int incx)
const;
2272 void GER(
const int m,
const int n,
const float alpha,
const float* x,
const int incx,
const float* y,
const int incy,
float* A,
const int lda)
const;
2273 void GEMM(
ETransp transa,
ETransp transb,
const int m,
const int n,
const int k,
const float alpha,
const float* A,
const int lda,
const float* B,
const int ldb,
const float beta,
float* C,
const int ldc)
const;
2274 void SWAP(
const int n,
float*
const x,
const int incx,
float*
const y,
const int incy)
const;
2275 void SYMM(
ESide side,
EUplo uplo,
const int m,
const int n,
const float alpha,
const float* A,
const int lda,
const float *B,
const int ldb,
const float beta,
float *C,
const int ldc)
const;
2276 void SYRK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const float alpha,
const float* A,
const int lda,
const float beta,
float* C,
const int ldc)
const;
2277 void HERK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const float alpha,
const float* A,
const int lda,
const float beta,
float* C,
const int ldc)
const;
2278 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const float alpha,
const float* A,
const int lda,
float* B,
const int ldb)
const;
2279 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const float alpha,
const float* A,
const int lda,
float* B,
const int ldb)
const;
2285 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, double>
2288 inline BLAS(
void) {}
2290 inline virtual ~BLAS(
void) {}
2291 void ROTG(
double* da,
double* db,
double* c,
double* s)
const;
2292 void ROT(
const int n,
double* dx,
const int incx,
double* dy,
const int incy,
double* c,
double* s)
const;
2293 double ASUM(
const int n,
const double* x,
const int incx)
const;
2294 void AXPY(
const int n,
const double alpha,
const double* x,
const int incx,
double* y,
const int incy)
const;
2295 void COPY(
const int n,
const double* x,
const int incx,
double* y,
const int incy)
const;
2296 double DOT(
const int n,
const double* x,
const int incx,
const double* y,
const int incy)
const;
2297 double NRM2(
const int n,
const double* x,
const int incx)
const;
2298 void SCAL(
const int n,
const double alpha,
double* x,
const int incx)
const;
2299 int IAMAX(
const int n,
const double* x,
const int incx)
const;
2300 void GEMV(
ETransp trans,
const int m,
const int n,
const double alpha,
const double* A,
const int lda,
const double* x,
const int incx,
const double beta,
double* y,
const int incy)
const;
2301 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int n,
const double* A,
const int lda,
double* x,
const int incx)
const;
2302 void GER(
const int m,
const int n,
const double alpha,
const double* x,
const int incx,
const double* y,
const int incy,
double* A,
const int lda)
const;
2303 void GEMM(
ETransp transa,
ETransp transb,
const int m,
const int n,
const int k,
const double alpha,
const double* A,
const int lda,
const double* B,
const int ldb,
const double beta,
double* C,
const int ldc)
const;
2304 void SWAP(
const int n,
double*
const x,
const int incx,
double*
const y,
const int incy)
const;
2305 void SYMM(
ESide side,
EUplo uplo,
const int m,
const int n,
const double alpha,
const double* A,
const int lda,
const double *B,
const int ldb,
const double beta,
double *C,
const int ldc)
const;
2306 void SYRK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const double alpha,
const double* A,
const int lda,
const double beta,
double* C,
const int ldc)
const;
2307 void HERK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const double alpha,
const double* A,
const int lda,
const double beta,
double* C,
const int ldc)
const;
2308 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const double alpha,
const double* A,
const int lda,
double* B,
const int ldb)
const;
2309 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const double alpha,
const double* A,
const int lda,
double* B,
const int ldb)
const;
2315 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
2318 inline BLAS(
void) {}
2319 inline BLAS(
const BLAS<
int, std::complex<float> >& ) {}
2320 inline virtual ~BLAS(
void) {}
2321 void ROTG(std::complex<float>* da, std::complex<float>* db,
float* c, std::complex<float>* s)
const;
2322 void ROT(
const int n, std::complex<float>* dx,
const int incx, std::complex<float>* dy,
const int incy,
float* c, std::complex<float>* s)
const;
2323 float ASUM(
const int n,
const std::complex<float>* x,
const int incx)
const;
2324 void AXPY(
const int n,
const std::complex<float> alpha,
const std::complex<float>* x,
const int incx, std::complex<float>* y,
const int incy)
const;
2325 void COPY(
const int n,
const std::complex<float>* x,
const int incx, std::complex<float>* y,
const int incy)
const;
2326 std::complex<float> DOT(
const int n,
const std::complex<float>* x,
const int incx,
const std::complex<float>* y,
const int incy)
const;
2327 float NRM2(
const int n,
const std::complex<float>* x,
const int incx)
const;
2328 void SCAL(
const int n,
const std::complex<float> alpha, std::complex<float>* x,
const int incx)
const;
2329 int IAMAX(
const int n,
const std::complex<float>* x,
const int incx)
const;
2330 void GEMV(
ETransp trans,
const int m,
const int n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int lda,
const std::complex<float>* x,
const int incx,
const std::complex<float> beta, std::complex<float>* y,
const int incy)
const;
2331 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int n,
const std::complex<float>* A,
const int lda, std::complex<float>* x,
const int incx)
const;
2332 void GER(
const int m,
const int n,
const std::complex<float> alpha,
const std::complex<float>* x,
const int incx,
const std::complex<float>* y,
const int incy, std::complex<float>* A,
const int lda)
const;
2333 void GEMM(
ETransp transa,
ETransp transb,
const int m,
const int n,
const int k,
const std::complex<float> alpha,
const std::complex<float>* A,
const int lda,
const std::complex<float>* B,
const int ldb,
const std::complex<float> beta, std::complex<float>* C,
const int ldc)
const;
2334 void SWAP(
const int n, std::complex<float>*
const x,
const int incx, std::complex<float>*
const y,
const int incy)
const;
2335 void SYMM(
ESide side,
EUplo uplo,
const int m,
const int n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int lda,
const std::complex<float> *B,
const int ldb,
const std::complex<float> beta, std::complex<float> *C,
const int ldc)
const;
2336 void SYRK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const std::complex<float> alpha,
const std::complex<float>* A,
const int lda,
const std::complex<float> beta, std::complex<float>* C,
const int ldc)
const;
2337 void HERK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const std::complex<float> alpha,
const std::complex<float>* A,
const int lda,
const std::complex<float> beta, std::complex<float>* C,
const int ldc)
const;
2338 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int lda, std::complex<float>* B,
const int ldb)
const;
2339 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const std::complex<float> alpha,
const std::complex<float>* A,
const int lda, std::complex<float>* B,
const int ldb)
const;
2345 class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
2348 inline BLAS(
void) {}
2349 inline BLAS(
const BLAS<
int, std::complex<double> >& ) {}
2350 inline virtual ~BLAS(
void) {}
2351 void ROTG(std::complex<double>* da, std::complex<double>* db,
double* c, std::complex<double>* s)
const;
2352 void ROT(
const int n, std::complex<double>* dx,
const int incx, std::complex<double>* dy,
const int incy,
double* c, std::complex<double>* s)
const;
2353 double ASUM(
const int n,
const std::complex<double>* x,
const int incx)
const;
2354 void AXPY(
const int n,
const std::complex<double> alpha,
const std::complex<double>* x,
const int incx, std::complex<double>* y,
const int incy)
const;
2355 void COPY(
const int n,
const std::complex<double>* x,
const int incx, std::complex<double>* y,
const int incy)
const;
2356 std::complex<double> DOT(
const int n,
const std::complex<double>* x,
const int incx,
const std::complex<double>* y,
const int incy)
const;
2357 double NRM2(
const int n,
const std::complex<double>* x,
const int incx)
const;
2358 void SCAL(
const int n,
const std::complex<double> alpha, std::complex<double>* x,
const int incx)
const;
2359 int IAMAX(
const int n,
const std::complex<double>* x,
const int incx)
const;
2360 void GEMV(
ETransp trans,
const int m,
const int n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int lda,
const std::complex<double>* x,
const int incx,
const std::complex<double> beta, std::complex<double>* y,
const int incy)
const;
2361 void TRMV(
EUplo uplo,
ETransp trans,
EDiag diag,
const int n,
const std::complex<double>* A,
const int lda, std::complex<double>* x,
const int incx)
const;
2362 void GER(
const int m,
const int n,
const std::complex<double> alpha,
const std::complex<double>* x,
const int incx,
const std::complex<double>* y,
const int incy, std::complex<double>* A,
const int lda)
const;
2363 void GEMM(
ETransp transa,
ETransp transb,
const int m,
const int n,
const int k,
const std::complex<double> alpha,
const std::complex<double>* A,
const int lda,
const std::complex<double>* B,
const int ldb,
const std::complex<double> beta, std::complex<double>* C,
const int ldc)
const;
2364 void SWAP(
const int n, std::complex<double>*
const x,
const int incx, std::complex<double>*
const y,
const int incy)
const;
2365 void SYMM(
ESide side,
EUplo uplo,
const int m,
const int n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int lda,
const std::complex<double> *B,
const int ldb,
const std::complex<double> beta, std::complex<double> *C,
const int ldc)
const;
2366 void SYRK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const std::complex<double> alpha,
const std::complex<double>* A,
const int lda,
const std::complex<double> beta, std::complex<double>* C,
const int ldc)
const;
2367 void HERK(
EUplo uplo,
ETransp trans,
const int n,
const int k,
const std::complex<double> alpha,
const std::complex<double>* A,
const int lda,
const std::complex<double> beta, std::complex<double>* C,
const int ldc)
const;
2368 void TRMM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int lda, std::complex<double>* B,
const int ldb)
const;
2369 void TRSM(
ESide side,
EUplo uplo,
ETransp transa,
EDiag diag,
const int m,
const int n,
const std::complex<double> alpha,
const std::complex<double>* A,
const int lda, std::complex<double>* B,
const int ldb)
const;
2374 #endif // _TEUCHOS_BLAS_HPP_ OrdinalType IAMAX(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Return the index of the element of x with the maximum magnitude.
static T one()
Returns representation of one for this ordinal type.
BLAS(const BLAS< OrdinalType, ScalarType > &)
Copy constructor.
Enumerated types for BLAS input characters.
virtual ~DefaultBLASImpl(void)
Destructor.
T magnitudeType
Mandatory typedef for result of magnitude.
virtual ~BLAS(void)
Destructor.
static T zero()
Returns representation of zero for this ordinal type.
void COPY(const OrdinalType n, const ScalarType *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
Copy the vector x to the vector y.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
BLAS(void)
Default constructor.
DefaultBLASImpl(void)
Default constructor.
Teuchos header file which uses auto-configuration information to include necessary C++ headers...
details::GivensRotator< ScalarType >::c_type rotg_c_type
The type used for c in ROTG.
void SWAP(const OrdinalType n, ScalarType *const x, const OrdinalType incx, ScalarType *const y, const OrdinalType incy) const
Swap the entries of x and y.
This structure defines some basic traits for a scalar field type.
void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType *x, const OrdinalType incx) const
Scale the vector x by the constant alpha.
DefaultBLASImpl(const DefaultBLASImpl< OrdinalType, ScalarType > &)
Copy constructor.
static T conjugate(T a)
Returns the conjugate of the scalar type a.
void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const x_type *x, const OrdinalType incx, const beta_type beta, ScalarType *y, const OrdinalType incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a ge...
Teuchos implementation details.
void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type *A, const OrdinalType lda, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A'+beta*C or C <- alpha*A'*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
General matrix-matrix multiply.
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
void AXPY(const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
Perform the operation: y <- y+alpha*x.
void ROT(const OrdinalType n, ScalarType *dx, const OrdinalType incx, ScalarType *dy, const OrdinalType incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
static magnitudeType magnitude(T a)
Returns the magnitudeType of the scalar type a.
Defines basic traits for the ordinal field type.
Default traits class that just returns typeid(T).name().
Default implementation for BLAS routines.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type *A, const OrdinalType lda, ScalarType *x, const OrdinalType incx) const
Performs the matrix-vector operation: x <- A*x or x <- A'*x where A is a unit/non-unit n by n upper/l...
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy, ScalarType *A, const OrdinalType lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
Defines basic traits for the scalar field type.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Sum the absolute values of the entries of x.
static T zero()
Returns representation of zero for this scalar type.
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
ScalarType DOT(const OrdinalType n, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy) const
Form the dot product of the vectors x and y.
static T one()
Returns representation of one for this scalar type.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Compute the 2-norm of the vector x.