Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_BLAS.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 // Kris
43 // 06.16.03 -- Start over from scratch
44 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
45 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
46 // -- Added warning messages for generic calls
47 // 07.08.03 -- Move into Teuchos package/namespace
48 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
49 // * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
50 // * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
51 // * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
52 // * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
53 // * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
54 // -- Removed warning messages for generic calls
55 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
56 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
57 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
58 
59 #ifndef _TEUCHOS_BLAS_HPP_
60 #define _TEUCHOS_BLAS_HPP_
61 
69 #include "Teuchos_ConfigDefs.hpp"
70 #include "Teuchos_ScalarTraits.hpp"
72 #include "Teuchos_BLAS_types.hpp"
73 #include "Teuchos_Assert.hpp"
74 
107 namespace Teuchos
108 {
109  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[];
110  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[];
111  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[];
112  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[];
113  extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETypeChar[];
114 
115  // Forward declaration
116  namespace details {
117  template<typename ScalarType,
119  class GivensRotator;
120  }
121 
123 
128  template<typename OrdinalType, typename ScalarType>
130  {
131 
132  typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
133 
134  public:
136 
137 
139  inline DefaultBLASImpl(void) {}
140 
143 
145  inline virtual ~DefaultBLASImpl(void) {}
147 
149 
150 
152 
154  typedef typename details::GivensRotator<ScalarType>::c_type rotg_c_type;
155 
157  void ROTG(ScalarType* da, ScalarType* db, rotg_c_type* c, ScalarType* s) const;
158 
160  void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const;
161 
163  void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
164 
166  void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
167 
169  template <typename alpha_type, typename x_type>
170  void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
171 
173  typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
174 
176  template <typename x_type, typename y_type>
177  ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const;
178 
180  typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
181 
183  OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
185 
187 
188 
190  template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
191  void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A,
192  const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const;
193 
195  template <typename A_type>
196  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A,
197  const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
198 
201  template <typename alpha_type, typename x_type, typename y_type>
202  void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx,
203  const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
205 
207 
208 
215  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
216  void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
217 
219  void
220  SWAP (const OrdinalType n, ScalarType* const x, const OrdinalType incx,
221  ScalarType* const y, const OrdinalType incy) const;
222 
224  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
225  void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
226 
228  template <typename alpha_type, typename A_type, typename beta_type>
229  void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
230 
232  template <typename alpha_type, typename A_type>
233  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
234  const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
235 
237  template <typename alpha_type, typename A_type>
238  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
239  const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
241  };
242 
243  template<typename OrdinalType, typename ScalarType>
244  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
245  {
246 
247  typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
248 
249  public:
251 
252 
254  inline BLAS(void) {}
255 
257  inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {}
258 
260  inline virtual ~BLAS(void) {}
262  };
263 
264 //------------------------------------------------------------------------------------------
265 // LEVEL 1 BLAS ROUTINES
266 //------------------------------------------------------------------------------------------
267 
275  namespace details {
276 
277  // Compute magnitude.
278  template<typename ScalarType, bool isComplex>
279  class MagValue {
280  public:
281  void
282  blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const;
283  };
284 
285  // Complex-arithmetic specialization.
286  template<typename ScalarType>
287  class MagValue<ScalarType, true> {
288  public:
289  void
290  blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const;
291  };
292 
293  // Real-arithmetic specialization.
294  template<typename ScalarType>
295  class MagValue<ScalarType, false> {
296  public:
297  void
298  blas_dabs1(const ScalarType* a, ScalarType* ret) const;
299  };
300 
301  template<typename ScalarType, bool isComplex>
302  class GivensRotator {};
303 
304  // Complex-arithmetic specialization.
305  template<typename ScalarType>
306  class GivensRotator<ScalarType, true> {
307  public:
308  typedef typename ScalarTraits<ScalarType>::magnitudeType c_type;
309  void
310  ROTG (ScalarType* ca,
311  ScalarType* cb,
313  ScalarType* s) const;
314  };
315 
316  // Real-arithmetic specialization.
317  template<typename ScalarType>
318  class GivensRotator<ScalarType, false> {
319  public:
320  typedef ScalarType c_type;
321  void
322  ROTG (ScalarType* da,
323  ScalarType* db,
324  ScalarType* c,
325  ScalarType* s) const;
326 
339  ScalarType SIGN (ScalarType x, ScalarType y) const {
340  typedef ScalarTraits<ScalarType> STS;
341 
342  if (y > STS::zero()) {
343  return STS::magnitude (x);
344  } else if (y < STS::zero()) {
345  return -STS::magnitude (x);
346  } else { // y == STS::zero()
347  // Suppose that ScalarType implements signed zero, as IEEE
348  // 754 - compliant floating-point numbers should. You can't
349  // use == to test for signed zero, since +0 == -0. However,
350  // 1/0 = Inf > 0 and 1/-0 = -Inf < 0. Let's hope ScalarType
351  // supports Inf... we don't need to test for Inf, just see
352  // if it's greater than or less than zero.
353  //
354  // NOTE: This ONLY works if ScalarType is real. Complex
355  // infinity doesn't have a sign, so we can't compare it with
356  // zero. That's OK, because finite complex numbers don't
357  // have a sign either; they have an angle.
358  ScalarType signedInfinity = STS::one() / y;
359  if (signedInfinity > STS::zero()) {
360  return STS::magnitude (x);
361  } else {
362  // Even if ScalarType doesn't implement signed zero,
363  // Fortran's SIGN intrinsic returns -ABS(X) if the second
364  // argument Y is zero. We imitate this behavior here.
365  return -STS::magnitude (x);
366  }
367  }
368  }
369  };
370 
371  // Implementation of complex-arithmetic specialization.
372  template<typename ScalarType>
373  void
374  GivensRotator<ScalarType, true>::
375  ROTG (ScalarType* ca,
376  ScalarType* cb,
378  ScalarType* s) const
379  {
380  typedef ScalarTraits<ScalarType> STS;
381  typedef typename STS::magnitudeType MagnitudeType;
382  typedef ScalarTraits<MagnitudeType> STM;
383 
384  // This is a straightforward translation into C++ of the
385  // reference BLAS' implementation of ZROTG. You can get
386  // the Fortran 77 source code of ZROTG here:
387  //
388  // http://www.netlib.org/blas/zrotg.f
389  //
390  // I used the following rules to translate Fortran types and
391  // intrinsic functions into C++:
392  //
393  // DOUBLE PRECISION -> MagnitudeType
394  // DOUBLE COMPLEX -> ScalarType
395  // CDABS -> STS::magnitude
396  // DCMPLX -> ScalarType constructor (assuming that ScalarType
397  // is std::complex<MagnitudeType>)
398  // DCONJG -> STS::conjugate
399  // DSQRT -> STM::squareroot
400  ScalarType alpha;
401  MagnitudeType norm, scale;
402 
403  if (STS::magnitude (*ca) == STM::zero()) {
404  *c = STM::zero();
405  *s = STS::one();
406  *ca = *cb;
407  } else {
408  scale = STS::magnitude (*ca) + STS::magnitude (*cb);
409  { // I introduced temporaries into the translated BLAS code in
410  // order to make the expression easier to read and also save a
411  // few floating-point operations.
412  const MagnitudeType ca_scaled =
413  STS::magnitude (*ca / ScalarType(scale, STM::zero()));
414  const MagnitudeType cb_scaled =
415  STS::magnitude (*cb / ScalarType(scale, STM::zero()));
416  norm = scale *
417  STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
418  }
419  alpha = *ca / STS::magnitude (*ca);
420  *c = STS::magnitude (*ca) / norm;
421  *s = alpha * STS::conjugate (*cb) / norm;
422  *ca = alpha * norm;
423  }
424  }
425 
426  // Implementation of real-arithmetic specialization.
427  template<typename ScalarType>
428  void
429  GivensRotator<ScalarType, false>::
430  ROTG (ScalarType* da,
431  ScalarType* db,
432  ScalarType* c,
433  ScalarType* s) const
434  {
435  typedef ScalarTraits<ScalarType> STS;
436 
437  // This is a straightforward translation into C++ of the
438  // reference BLAS' implementation of DROTG. You can get
439  // the Fortran 77 source code of DROTG here:
440  //
441  // http://www.netlib.org/blas/drotg.f
442  //
443  // I used the following rules to translate Fortran types and
444  // intrinsic functions into C++:
445  //
446  // DOUBLE PRECISION -> ScalarType
447  // DABS -> STS::magnitude
448  // DSQRT -> STM::squareroot
449  // DSIGN -> SIGN (see below)
450  //
451  // DSIGN(x,y) (the old DOUBLE PRECISION type-specific form of
452  // the Fortran type-generic SIGN intrinsic) required special
453  // translation, which we did in a separate utility function in
454  // the specializaton of GivensRotator for real arithmetic.
455  // (ROTG for complex arithmetic doesn't require this function.)
456  // C99 provides a copysign() math library function, but we are
457  // not able to rely on the existence of C99 functions here.
458  ScalarType r, roe, scale, z;
459 
460  roe = *db;
461  if (STS::magnitude (*da) > STS::magnitude (*db)) {
462  roe = *da;
463  }
464  scale = STS::magnitude (*da) + STS::magnitude (*db);
465  if (scale == STS::zero()) {
466  *c = STS::one();
467  *s = STS::zero();
468  r = STS::zero();
469  z = STS::zero();
470  } else {
471  // I introduced temporaries into the translated BLAS code in
472  // order to make the expression easier to read and also save
473  // a few floating-point operations.
474  const ScalarType da_scaled = *da / scale;
475  const ScalarType db_scaled = *db / scale;
476  r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
477  r = SIGN (STS::one(), roe) * r;
478  *c = *da / r;
479  *s = *db / r;
480  z = STS::one();
481  if (STS::magnitude (*da) > STS::magnitude (*db)) {
482  z = *s;
483  }
484  if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
485  z = STS::one() / *c;
486  }
487  }
488 
489  *da = r;
490  *db = z;
491  }
492 
493  // Real-valued implementation of MagValue
494  template<typename ScalarType>
495  void
496  MagValue<ScalarType, false>::
497  blas_dabs1(const ScalarType* a, ScalarType* ret) const
498  {
500  }
501 
502  // Complex-valued implementation of MagValue
503  template<typename ScalarType>
504  void
505  MagValue<ScalarType, true>::
506  blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const
507  {
510  }
511 
512  } // namespace details
513 
514  template<typename OrdinalType, typename ScalarType>
515  void
517  ROTG (ScalarType* da,
518  ScalarType* db,
519  rotg_c_type* c,
520  ScalarType* s) const
521  {
522  details::GivensRotator<ScalarType> rotator;
523  rotator.ROTG (da, db, c, s);
524  }
525 
526  template<typename OrdinalType, typename ScalarType>
527  void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
528  {
529  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
530  ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s);
531  if (n <= 0) return;
532  if (incx==1 && incy==1) {
533  for(OrdinalType i=0; i<n; ++i) {
534  ScalarType temp = *c*dx[i] + sconj*dy[i];
535  dy[i] = *c*dy[i] - sconj*dx[i];
536  dx[i] = temp;
537  }
538  }
539  else {
540  OrdinalType ix = 0, iy = 0;
541  if (incx < izero) ix = (-n+1)*incx;
542  if (incy < izero) iy = (-n+1)*incy;
543  for(OrdinalType i=0; i<n; ++i) {
544  ScalarType temp = *c*dx[ix] + sconj*dy[iy];
545  dy[iy] = *c*dy[iy] - sconj*dx[ix];
546  dx[ix] = temp;
547  ix += incx;
548  iy += incy;
549  }
550  }
551  }
552 
553  template<typename OrdinalType, typename ScalarType>
554  void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
555  {
556  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
557  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
558  OrdinalType i, ix = izero;
559 
560  if ( n < ione || incx < ione )
561  return;
562 
563  // Scale the vector.
564  for(i = izero; i < n; i++)
565  {
566  x[ix] *= alpha;
567  ix += incx;
568  }
569  } /* end SCAL */
570 
571  template<typename OrdinalType, typename ScalarType>
572  void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
573  {
574  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
575  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
576  OrdinalType i, ix = izero, iy = izero;
577  if ( n > izero ) {
578  // Set the initial indices (ix, iy).
579  if (incx < izero) { ix = (-n+ione)*incx; }
580  if (incy < izero) { iy = (-n+ione)*incy; }
581 
582  for(i = izero; i < n; i++)
583  {
584  y[iy] = x[ix];
585  ix += incx;
586  iy += incy;
587  }
588  }
589  } /* end COPY */
590 
591  template<typename OrdinalType, typename ScalarType>
592  template <typename alpha_type, typename x_type>
593  void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
594  {
595  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
596  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
597  OrdinalType i, ix = izero, iy = izero;
598  if( n > izero && alpha != ScalarTraits<alpha_type>::zero())
599  {
600  // Set the initial indices (ix, iy).
601  if (incx < izero) { ix = (-n+ione)*incx; }
602  if (incy < izero) { iy = (-n+ione)*incy; }
603 
604  for(i = izero; i < n; i++)
605  {
606  y[iy] += alpha * x[ix];
607  ix += incx;
608  iy += incy;
609  }
610  }
611  } /* end AXPY */
612 
613  template<typename OrdinalType, typename ScalarType>
614  typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
615  {
616  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
617  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
618  typename ScalarTraits<ScalarType>::magnitudeType temp, result =
620  OrdinalType i, ix = izero;
621 
622  if ( n < ione || incx < ione )
623  return result;
624 
625  details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
626  for (i = izero; i < n; i++)
627  {
628  mval.blas_dabs1( &x[ix], &temp );
629  result += temp;
630  ix += incx;
631  }
632 
633  return result;
634  } /* end ASUM */
635 
636  template<typename OrdinalType, typename ScalarType>
637  template <typename x_type, typename y_type>
638  ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const
639  {
640  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
641  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
642  ScalarType result = ScalarTraits<ScalarType>::zero();
643  OrdinalType i, ix = izero, iy = izero;
644  if( n > izero )
645  {
646  // Set the initial indices (ix,iy).
647  if (incx < izero) { ix = (-n+ione)*incx; }
648  if (incy < izero) { iy = (-n+ione)*incy; }
649 
650  for(i = izero; i < n; i++)
651  {
652  result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy];
653  ix += incx;
654  iy += incy;
655  }
656  }
657  return result;
658  } /* end DOT */
659 
660  template<typename OrdinalType, typename ScalarType>
661  typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
662  {
663  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
664  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
667  OrdinalType i, ix = izero;
668 
669  if ( n < ione || incx < ione )
670  return result;
671 
672  for(i = izero; i < n; i++)
673  {
675  ix += incx;
676  }
678  return result;
679  } /* end NRM2 */
680 
681  template<typename OrdinalType, typename ScalarType>
682  OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
683  {
684  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
685  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
686  OrdinalType result = izero, ix = izero, i;
691 
692  if ( n < ione || incx < ione )
693  return result;
694 
695  details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
696 
697  mval.blas_dabs1( &x[ix], &maxval );
698  ix += incx;
699  for(i = ione; i < n; i++)
700  {
701  mval.blas_dabs1( &x[ix], &curval );
702  if(curval > maxval)
703  {
704  result = i;
705  maxval = curval;
706  }
707  ix += incx;
708  }
709 
710  return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
711  } /* end IAMAX */
712 
713 //------------------------------------------------------------------------------------------
714 // LEVEL 2 BLAS ROUTINES
715 //------------------------------------------------------------------------------------------
716  template<typename OrdinalType, typename ScalarType>
717  template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
718  void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const
719  {
720  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
721  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
722  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
723  beta_type beta_zero = ScalarTraits<beta_type>::zero();
724  x_type x_zero = ScalarTraits<x_type>::zero();
725  ScalarType y_zero = ScalarTraits<ScalarType>::zero();
726  beta_type beta_one = ScalarTraits<beta_type>::one();
727  bool noConj = true;
728  bool BadArgument = false;
729 
730  // Quick return if there is nothing to do!
731  if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
732 
733  // Otherwise, we need to check the argument list.
734  if( m < izero ) {
735  std::cout << "BLAS::GEMV Error: M == " << m << std::endl;
736  BadArgument = true;
737  }
738  if( n < izero ) {
739  std::cout << "BLAS::GEMV Error: N == " << n << std::endl;
740  BadArgument = true;
741  }
742  if( lda < m ) {
743  std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
744  BadArgument = true;
745  }
746  if( incx == izero ) {
747  std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
748  BadArgument = true;
749  }
750  if( incy == izero ) {
751  std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
752  BadArgument = true;
753  }
754 
755  if(!BadArgument) {
756  OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
757  OrdinalType kx = izero, ky = izero;
758  ScalarType temp;
759 
760  // Determine the lengths of the vectors x and y.
761  if(ETranspChar[trans] == 'N') {
762  lenx = n;
763  leny = m;
764  } else {
765  lenx = m;
766  leny = n;
767  }
768 
769  // Determine if this is a conjugate tranpose
770  noConj = (ETranspChar[trans] == 'T');
771 
772  // Set the starting pointers for the vectors x and y if incx/y < 0.
773  if (incx < izero ) { kx = (ione - lenx)*incx; }
774  if (incy < izero ) { ky = (ione - leny)*incy; }
775 
776  // Form y = beta*y
777  ix = kx; iy = ky;
778  if(beta != beta_one) {
779  if (incy == ione) {
780  if (beta == beta_zero) {
781  for(i = izero; i < leny; i++) { y[i] = y_zero; }
782  } else {
783  for(i = izero; i < leny; i++) { y[i] *= beta; }
784  }
785  } else {
786  if (beta == beta_zero) {
787  for(i = izero; i < leny; i++) {
788  y[iy] = y_zero;
789  iy += incy;
790  }
791  } else {
792  for(i = izero; i < leny; i++) {
793  y[iy] *= beta;
794  iy += incy;
795  }
796  }
797  }
798  }
799 
800  // Return if we don't have to do anything more.
801  if(alpha == alpha_zero) { return; }
802 
803  if( ETranspChar[trans] == 'N' ) {
804  // Form y = alpha*A*y
805  jx = kx;
806  if (incy == ione) {
807  for(j = izero; j < n; j++) {
808  if (x[jx] != x_zero) {
809  temp = alpha*x[jx];
810  for(i = izero; i < m; i++) {
811  y[i] += temp*A[j*lda + i];
812  }
813  }
814  jx += incx;
815  }
816  } else {
817  for(j = izero; j < n; j++) {
818  if (x[jx] != x_zero) {
819  temp = alpha*x[jx];
820  iy = ky;
821  for(i = izero; i < m; i++) {
822  y[iy] += temp*A[j*lda + i];
823  iy += incy;
824  }
825  }
826  jx += incx;
827  }
828  }
829  } else {
830  jy = ky;
831  if (incx == ione) {
832  for(j = izero; j < n; j++) {
833  temp = y_zero;
834  if ( noConj ) {
835  for(i = izero; i < m; i++) {
836  temp += A[j*lda + i]*x[i];
837  }
838  } else {
839  for(i = izero; i < m; i++) {
840  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
841  }
842  }
843  y[jy] += alpha*temp;
844  jy += incy;
845  }
846  } else {
847  for(j = izero; j < n; j++) {
848  temp = y_zero;
849  ix = kx;
850  if ( noConj ) {
851  for (i = izero; i < m; i++) {
852  temp += A[j*lda + i]*x[ix];
853  ix += incx;
854  }
855  } else {
856  for (i = izero; i < m; i++) {
857  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
858  ix += incx;
859  }
860  }
861  y[jy] += alpha*temp;
862  jy += incy;
863  }
864  }
865  }
866  } /* if (!BadArgument) */
867  } /* end GEMV */
868 
869  template<typename OrdinalType, typename ScalarType>
870  template <typename A_type>
871  void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
872  {
873  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
874  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
875  ScalarType zero = ScalarTraits<ScalarType>::zero();
876  bool BadArgument = false;
877  bool noConj = true;
878 
879  // Quick return if there is nothing to do!
880  if( n == izero ){ return; }
881 
882  // Otherwise, we need to check the argument list.
883  if( n < izero ) {
884  std::cout << "BLAS::TRMV Error: N == " << n << std::endl;
885  BadArgument = true;
886  }
887  if( lda < n ) {
888  std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
889  BadArgument = true;
890  }
891  if( incx == izero ) {
892  std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
893  BadArgument = true;
894  }
895 
896  if(!BadArgument) {
897  OrdinalType i, j, ix, jx, kx = izero;
898  ScalarType temp;
899  bool noUnit = (EDiagChar[diag] == 'N');
900 
901  // Determine if this is a conjugate tranpose
902  noConj = (ETranspChar[trans] == 'T');
903 
904  // Set the starting pointer for the vector x if incx < 0.
905  if (incx < izero) { kx = (-n+ione)*incx; }
906 
907  // Start the operations for a nontransposed triangular matrix
908  if (ETranspChar[trans] == 'N') {
909  /* Compute x = A*x */
910  if (EUploChar[uplo] == 'U') {
911  /* A is an upper triangular matrix */
912  if (incx == ione) {
913  for (j=izero; j<n; j++) {
914  if (x[j] != zero) {
915  temp = x[j];
916  for (i=izero; i<j; i++) {
917  x[i] += temp*A[j*lda + i];
918  }
919  if ( noUnit )
920  x[j] *= A[j*lda + j];
921  }
922  }
923  } else {
924  jx = kx;
925  for (j=izero; j<n; j++) {
926  if (x[jx] != zero) {
927  temp = x[jx];
928  ix = kx;
929  for (i=izero; i<j; i++) {
930  x[ix] += temp*A[j*lda + i];
931  ix += incx;
932  }
933  if ( noUnit )
934  x[jx] *= A[j*lda + j];
935  }
936  jx += incx;
937  }
938  } /* if (incx == ione) */
939  } else { /* A is a lower triangular matrix */
940  if (incx == ione) {
941  for (j=n-ione; j>-ione; j--) {
942  if (x[j] != zero) {
943  temp = x[j];
944  for (i=n-ione; i>j; i--) {
945  x[i] += temp*A[j*lda + i];
946  }
947  if ( noUnit )
948  x[j] *= A[j*lda + j];
949  }
950  }
951  } else {
952  kx += (n-ione)*incx;
953  jx = kx;
954  for (j=n-ione; j>-ione; j--) {
955  if (x[jx] != zero) {
956  temp = x[jx];
957  ix = kx;
958  for (i=n-ione; i>j; i--) {
959  x[ix] += temp*A[j*lda + i];
960  ix -= incx;
961  }
962  if ( noUnit )
963  x[jx] *= A[j*lda + j];
964  }
965  jx -= incx;
966  }
967  }
968  } /* if (EUploChar[uplo]=='U') */
969  } else { /* A is transposed/conjugated */
970  /* Compute x = A'*x */
971  if (EUploChar[uplo]=='U') {
972  /* A is an upper triangular matrix */
973  if (incx == ione) {
974  for (j=n-ione; j>-ione; j--) {
975  temp = x[j];
976  if ( noConj ) {
977  if ( noUnit )
978  temp *= A[j*lda + j];
979  for (i=j-ione; i>-ione; i--) {
980  temp += A[j*lda + i]*x[i];
981  }
982  } else {
983  if ( noUnit )
984  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
985  for (i=j-ione; i>-ione; i--) {
986  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
987  }
988  }
989  x[j] = temp;
990  }
991  } else {
992  jx = kx + (n-ione)*incx;
993  for (j=n-ione; j>-ione; j--) {
994  temp = x[jx];
995  ix = jx;
996  if ( noConj ) {
997  if ( noUnit )
998  temp *= A[j*lda + j];
999  for (i=j-ione; i>-ione; i--) {
1000  ix -= incx;
1001  temp += A[j*lda + i]*x[ix];
1002  }
1003  } else {
1004  if ( noUnit )
1005  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
1006  for (i=j-ione; i>-ione; i--) {
1007  ix -= incx;
1008  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
1009  }
1010  }
1011  x[jx] = temp;
1012  jx -= incx;
1013  }
1014  }
1015  } else {
1016  /* A is a lower triangular matrix */
1017  if (incx == ione) {
1018  for (j=izero; j<n; j++) {
1019  temp = x[j];
1020  if ( noConj ) {
1021  if ( noUnit )
1022  temp *= A[j*lda + j];
1023  for (i=j+ione; i<n; i++) {
1024  temp += A[j*lda + i]*x[i];
1025  }
1026  } else {
1027  if ( noUnit )
1028  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
1029  for (i=j+ione; i<n; i++) {
1030  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
1031  }
1032  }
1033  x[j] = temp;
1034  }
1035  } else {
1036  jx = kx;
1037  for (j=izero; j<n; j++) {
1038  temp = x[jx];
1039  ix = jx;
1040  if ( noConj ) {
1041  if ( noUnit )
1042  temp *= A[j*lda + j];
1043  for (i=j+ione; i<n; i++) {
1044  ix += incx;
1045  temp += A[j*lda + i]*x[ix];
1046  }
1047  } else {
1048  if ( noUnit )
1049  temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
1050  for (i=j+ione; i<n; i++) {
1051  ix += incx;
1052  temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
1053  }
1054  }
1055  x[jx] = temp;
1056  jx += incx;
1057  }
1058  }
1059  } /* if (EUploChar[uplo]=='U') */
1060  } /* if (ETranspChar[trans]=='N') */
1061  } /* if (!BadArgument) */
1062  } /* end TRMV */
1063 
1064  template<typename OrdinalType, typename ScalarType>
1065  template <typename alpha_type, typename x_type, typename y_type>
1066  void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
1067  {
1068  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1069  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1070  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1071  y_type y_zero = ScalarTraits<y_type>::zero();
1072  bool BadArgument = false;
1073 
1074  // Quick return if there is nothing to do!
1075  if( m == izero || n == izero || alpha == alpha_zero ){ return; }
1076 
1077  // Otherwise, we need to check the argument list.
1078  if( m < izero ) {
1079  std::cout << "BLAS::GER Error: M == " << m << std::endl;
1080  BadArgument = true;
1081  }
1082  if( n < izero ) {
1083  std::cout << "BLAS::GER Error: N == " << n << std::endl;
1084  BadArgument = true;
1085  }
1086  if( lda < m ) {
1087  std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
1088  BadArgument = true;
1089  }
1090  if( incx == 0 ) {
1091  std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
1092  BadArgument = true;
1093  }
1094  if( incy == 0 ) {
1095  std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
1096  BadArgument = true;
1097  }
1098 
1099  if(!BadArgument) {
1100  OrdinalType i, j, ix, jy = izero, kx = izero;
1101  ScalarType temp;
1102 
1103  // Set the starting pointers for the vectors x and y if incx/y < 0.
1104  if (incx < izero) { kx = (-m+ione)*incx; }
1105  if (incy < izero) { jy = (-n+ione)*incy; }
1106 
1107  // Start the operations for incx == 1
1108  if( incx == ione ) {
1109  for( j=izero; j<n; j++ ) {
1110  if ( y[jy] != y_zero ) {
1111  temp = alpha*y[jy];
1112  for ( i=izero; i<m; i++ ) {
1113  A[j*lda + i] += x[i]*temp;
1114  }
1115  }
1116  jy += incy;
1117  }
1118  }
1119  else { // Start the operations for incx != 1
1120  for( j=izero; j<n; j++ ) {
1121  if ( y[jy] != y_zero ) {
1122  temp = alpha*y[jy];
1123  ix = kx;
1124  for( i=izero; i<m; i++ ) {
1125  A[j*lda + i] += x[ix]*temp;
1126  ix += incx;
1127  }
1128  }
1129  jy += incy;
1130  }
1131  }
1132  } /* if(!BadArgument) */
1133  } /* end GER */
1134 
1135 //------------------------------------------------------------------------------------------
1136 // LEVEL 3 BLAS ROUTINES
1137 //------------------------------------------------------------------------------------------
1138 
1139  template<typename OrdinalType, typename ScalarType>
1140  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
1141  void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
1142  {
1143 
1144  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1145  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1146  beta_type beta_zero = ScalarTraits<beta_type>::zero();
1147  B_type B_zero = ScalarTraits<B_type>::zero();
1148  ScalarType C_zero = ScalarTraits<ScalarType>::zero();
1149  beta_type beta_one = ScalarTraits<beta_type>::one();
1150  OrdinalType i, j, p;
1151  OrdinalType NRowA = m, NRowB = k;
1152  ScalarType temp;
1153  bool BadArgument = false;
1154  bool conjA = false, conjB = false;
1155 
1156  // Change dimensions of matrix if either matrix is transposed
1157  if( !(ETranspChar[transa]=='N') ) {
1158  NRowA = k;
1159  }
1160  if( !(ETranspChar[transb]=='N') ) {
1161  NRowB = n;
1162  }
1163 
1164  // Quick return if there is nothing to do!
1165  if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
1166  if( m < izero ) {
1167  std::cout << "BLAS::GEMM Error: M == " << m << std::endl;
1168  BadArgument = true;
1169  }
1170  if( n < izero ) {
1171  std::cout << "BLAS::GEMM Error: N == " << n << std::endl;
1172  BadArgument = true;
1173  }
1174  if( k < izero ) {
1175  std::cout << "BLAS::GEMM Error: K == " << k << std::endl;
1176  BadArgument = true;
1177  }
1178  if( lda < NRowA ) {
1179  std::cout << "BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
1180  BadArgument = true;
1181  }
1182  if( ldb < NRowB ) {
1183  std::cout << "BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
1184  BadArgument = true;
1185  }
1186  if( ldc < m ) {
1187  std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
1188  BadArgument = true;
1189  }
1190 
1191  if(!BadArgument) {
1192 
1193  // Determine if this is a conjugate tranpose
1194  conjA = (ETranspChar[transa] == 'C');
1195  conjB = (ETranspChar[transb] == 'C');
1196 
1197  // Only need to scale the resulting matrix C.
1198  if( alpha == alpha_zero ) {
1199  if( beta == beta_zero ) {
1200  for (j=izero; j<n; j++) {
1201  for (i=izero; i<m; i++) {
1202  C[j*ldc + i] = C_zero;
1203  }
1204  }
1205  } else {
1206  for (j=izero; j<n; j++) {
1207  for (i=izero; i<m; i++) {
1208  C[j*ldc + i] *= beta;
1209  }
1210  }
1211  }
1212  return;
1213  }
1214  //
1215  // Now start the operations.
1216  //
1217  if ( ETranspChar[transb]=='N' ) {
1218  if ( ETranspChar[transa]=='N' ) {
1219  // Compute C = alpha*A*B + beta*C
1220  for (j=izero; j<n; j++) {
1221  if( beta == beta_zero ) {
1222  for (i=izero; i<m; i++){
1223  C[j*ldc + i] = C_zero;
1224  }
1225  } else if( beta != beta_one ) {
1226  for (i=izero; i<m; i++){
1227  C[j*ldc + i] *= beta;
1228  }
1229  }
1230  for (p=izero; p<k; p++){
1231  if (B[j*ldb + p] != B_zero ){
1232  temp = alpha*B[j*ldb + p];
1233  for (i=izero; i<m; i++) {
1234  C[j*ldc + i] += temp*A[p*lda + i];
1235  }
1236  }
1237  }
1238  }
1239  } else if ( conjA ) {
1240  // Compute C = alpha*conj(A')*B + beta*C
1241  for (j=izero; j<n; j++) {
1242  for (i=izero; i<m; i++) {
1243  temp = C_zero;
1244  for (p=izero; p<k; p++) {
1245  temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])*B[j*ldb + p];
1246  }
1247  if (beta == beta_zero) {
1248  C[j*ldc + i] = alpha*temp;
1249  } else {
1250  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1251  }
1252  }
1253  }
1254  } else {
1255  // Compute C = alpha*A'*B + beta*C
1256  for (j=izero; j<n; j++) {
1257  for (i=izero; i<m; i++) {
1258  temp = C_zero;
1259  for (p=izero; p<k; p++) {
1260  temp += A[i*lda + p]*B[j*ldb + p];
1261  }
1262  if (beta == beta_zero) {
1263  C[j*ldc + i] = alpha*temp;
1264  } else {
1265  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1266  }
1267  }
1268  }
1269  }
1270  } else if ( ETranspChar[transa]=='N' ) {
1271  if ( conjB ) {
1272  // Compute C = alpha*A*conj(B') + beta*C
1273  for (j=izero; j<n; j++) {
1274  if (beta == beta_zero) {
1275  for (i=izero; i<m; i++) {
1276  C[j*ldc + i] = C_zero;
1277  }
1278  } else if ( beta != beta_one ) {
1279  for (i=izero; i<m; i++) {
1280  C[j*ldc + i] *= beta;
1281  }
1282  }
1283  for (p=izero; p<k; p++) {
1284  if (B[p*ldb + j] != B_zero) {
1285  temp = alpha*ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
1286  for (i=izero; i<m; i++) {
1287  C[j*ldc + i] += temp*A[p*lda + i];
1288  }
1289  }
1290  }
1291  }
1292  } else {
1293  // Compute C = alpha*A*B' + beta*C
1294  for (j=izero; j<n; j++) {
1295  if (beta == beta_zero) {
1296  for (i=izero; i<m; i++) {
1297  C[j*ldc + i] = C_zero;
1298  }
1299  } else if ( beta != beta_one ) {
1300  for (i=izero; i<m; i++) {
1301  C[j*ldc + i] *= beta;
1302  }
1303  }
1304  for (p=izero; p<k; p++) {
1305  if (B[p*ldb + j] != B_zero) {
1306  temp = alpha*B[p*ldb + j];
1307  for (i=izero; i<m; i++) {
1308  C[j*ldc + i] += temp*A[p*lda + i];
1309  }
1310  }
1311  }
1312  }
1313  }
1314  } else if ( conjA ) {
1315  if ( conjB ) {
1316  // Compute C = alpha*conj(A')*conj(B') + beta*C
1317  for (j=izero; j<n; j++) {
1318  for (i=izero; i<m; i++) {
1319  temp = C_zero;
1320  for (p=izero; p<k; p++) {
1321  temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
1322  * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
1323  }
1324  if (beta == beta_zero) {
1325  C[j*ldc + i] = alpha*temp;
1326  } else {
1327  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1328  }
1329  }
1330  }
1331  } else {
1332  // Compute C = alpha*conj(A')*B' + beta*C
1333  for (j=izero; j<n; j++) {
1334  for (i=izero; i<m; i++) {
1335  temp = C_zero;
1336  for (p=izero; p<k; p++) {
1337  temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
1338  * B[p*ldb + j];
1339  }
1340  if (beta == beta_zero) {
1341  C[j*ldc + i] = alpha*temp;
1342  } else {
1343  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1344  }
1345  }
1346  }
1347  }
1348  } else {
1349  if ( conjB ) {
1350  // Compute C = alpha*A'*conj(B') + beta*C
1351  for (j=izero; j<n; j++) {
1352  for (i=izero; i<m; i++) {
1353  temp = C_zero;
1354  for (p=izero; p<k; p++) {
1355  temp += A[i*lda + p]
1356  * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
1357  }
1358  if (beta == beta_zero) {
1359  C[j*ldc + i] = alpha*temp;
1360  } else {
1361  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1362  }
1363  }
1364  }
1365  } else {
1366  // Compute C = alpha*A'*B' + beta*C
1367  for (j=izero; j<n; j++) {
1368  for (i=izero; i<m; i++) {
1369  temp = C_zero;
1370  for (p=izero; p<k; p++) {
1371  temp += A[i*lda + p]*B[p*ldb + j];
1372  }
1373  if (beta == beta_zero) {
1374  C[j*ldc + i] = alpha*temp;
1375  } else {
1376  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1377  }
1378  }
1379  }
1380  } // end if (ETranspChar[transa]=='N') ...
1381  } // end if (ETranspChar[transb]=='N') ...
1382  } // end if (!BadArgument) ...
1383  } // end of GEMM
1384 
1385 
1386  template<typename OrdinalType, typename ScalarType>
1388  SWAP (const OrdinalType n, ScalarType* const x, const OrdinalType incx,
1389  ScalarType* const y, const OrdinalType incy) const
1390  {
1391  if (n <= 0) {
1392  return;
1393  }
1394 
1395  if (incx == 1 && incy == 1) {
1396  for (int i = 0; i < n; ++i) {
1397  ScalarType tmp = x[i];
1398  x[i] = y[i];
1399  y[i] = tmp;
1400  }
1401  return;
1402  }
1403 
1404  int ix = 1;
1405  int iy = 1;
1406  if (incx < 0) {
1407  ix = (1-n) * incx + 1;
1408  }
1409  if (incy < 0) {
1410  iy = (1-n) * incy + 1;
1411  }
1412 
1413  for (int i = 1; i <= n; ++i) {
1414  ScalarType tmp = x[ix - 1];
1415  x[ix - 1] = y[iy - 1];
1416  y[iy - 1] = tmp;
1417  ix += incx;
1418  iy += incy;
1419  }
1420  }
1421 
1422 
1423  template<typename OrdinalType, typename ScalarType>
1424  template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
1425  void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
1426  {
1427  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1428  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1429  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1430  beta_type beta_zero = ScalarTraits<beta_type>::zero();
1431  ScalarType C_zero = ScalarTraits<ScalarType>::zero();
1432  beta_type beta_one = ScalarTraits<beta_type>::one();
1433  OrdinalType i, j, k, NRowA = m;
1434  ScalarType temp1, temp2;
1435  bool BadArgument = false;
1436  bool Upper = (EUploChar[uplo] == 'U');
1437  if (ESideChar[side] == 'R') { NRowA = n; }
1438 
1439  // Quick return.
1440  if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
1441  if( m < izero ) {
1442  std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
1443  BadArgument = true; }
1444  if( n < izero ) {
1445  std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
1446  BadArgument = true; }
1447  if( lda < NRowA ) {
1448  std::cout << "BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
1449  BadArgument = true; }
1450  if( ldb < m ) {
1451  std::cout << "BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
1452  BadArgument = true; }
1453  if( ldc < m ) {
1454  std::cout << "BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
1455  BadArgument = true; }
1456 
1457  if(!BadArgument) {
1458 
1459  // Only need to scale C and return.
1460  if (alpha == alpha_zero) {
1461  if (beta == beta_zero ) {
1462  for (j=izero; j<n; j++) {
1463  for (i=izero; i<m; i++) {
1464  C[j*ldc + i] = C_zero;
1465  }
1466  }
1467  } else {
1468  for (j=izero; j<n; j++) {
1469  for (i=izero; i<m; i++) {
1470  C[j*ldc + i] *= beta;
1471  }
1472  }
1473  }
1474  return;
1475  }
1476 
1477  if ( ESideChar[side] == 'L') {
1478  // Compute C = alpha*A*B + beta*C
1479 
1480  if (Upper) {
1481  // The symmetric part of A is stored in the upper triangular part of the matrix.
1482  for (j=izero; j<n; j++) {
1483  for (i=izero; i<m; i++) {
1484  temp1 = alpha*B[j*ldb + i];
1485  temp2 = C_zero;
1486  for (k=izero; k<i; k++) {
1487  C[j*ldc + k] += temp1*A[i*lda + k];
1488  temp2 += B[j*ldb + k]*A[i*lda + k];
1489  }
1490  if (beta == beta_zero) {
1491  C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1492  } else {
1493  C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1494  }
1495  }
1496  }
1497  } else {
1498  // The symmetric part of A is stored in the lower triangular part of the matrix.
1499  for (j=izero; j<n; j++) {
1500  for (i=m-ione; i>-ione; i--) {
1501  temp1 = alpha*B[j*ldb + i];
1502  temp2 = C_zero;
1503  for (k=i+ione; k<m; k++) {
1504  C[j*ldc + k] += temp1*A[i*lda + k];
1505  temp2 += B[j*ldb + k]*A[i*lda + k];
1506  }
1507  if (beta == beta_zero) {
1508  C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
1509  } else {
1510  C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
1511  }
1512  }
1513  }
1514  }
1515  } else {
1516  // Compute C = alpha*B*A + beta*C.
1517  for (j=izero; j<n; j++) {
1518  temp1 = alpha*A[j*lda + j];
1519  if (beta == beta_zero) {
1520  for (i=izero; i<m; i++) {
1521  C[j*ldc + i] = temp1*B[j*ldb + i];
1522  }
1523  } else {
1524  for (i=izero; i<m; i++) {
1525  C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
1526  }
1527  }
1528  for (k=izero; k<j; k++) {
1529  if (Upper) {
1530  temp1 = alpha*A[j*lda + k];
1531  } else {
1532  temp1 = alpha*A[k*lda + j];
1533  }
1534  for (i=izero; i<m; i++) {
1535  C[j*ldc + i] += temp1*B[k*ldb + i];
1536  }
1537  }
1538  for (k=j+ione; k<n; k++) {
1539  if (Upper) {
1540  temp1 = alpha*A[k*lda + j];
1541  } else {
1542  temp1 = alpha*A[j*lda + k];
1543  }
1544  for (i=izero; i<m; i++) {
1545  C[j*ldc + i] += temp1*B[k*ldb + i];
1546  }
1547  }
1548  }
1549  } // end if (ESideChar[side]=='L') ...
1550  } // end if(!BadArgument) ...
1551 } // end SYMM
1552 
1553  template<typename OrdinalType, typename ScalarType>
1554  template <typename alpha_type, typename A_type, typename beta_type>
1555  void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
1556  {
1557  typedef TypeNameTraits<OrdinalType> OTNT;
1558  typedef TypeNameTraits<ScalarType> STNT;
1559 
1560  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1561  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1562  beta_type beta_zero = ScalarTraits<beta_type>::zero();
1563  beta_type beta_one = ScalarTraits<beta_type>::one();
1564  A_type temp, A_zero = ScalarTraits<A_type>::zero();
1565  ScalarType C_zero = ScalarTraits<ScalarType>::zero();
1566  OrdinalType i, j, l, NRowA = n;
1567  bool BadArgument = false;
1568  bool Upper = (EUploChar[uplo] == 'U');
1569 
1572  && (trans == CONJ_TRANS),
1573  std::logic_error,
1574  "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::SYRK()"
1575  " does not support CONJ_TRANS for complex data types."
1576  );
1577 
1578  // Change dimensions of A matrix is transposed
1579  if( !(ETranspChar[trans]=='N') ) {
1580  NRowA = k;
1581  }
1582 
1583  // Quick return.
1584  if ( n==izero ) { return; }
1585  if ( ( (alpha==alpha_zero) || (k==izero) ) && (beta==beta_one) ) { return; }
1586  if( n < izero ) {
1587  std::cout << "BLAS::SYRK Error: N == "<< n <<std::endl;
1588  BadArgument = true; }
1589  if( k < izero ) {
1590  std::cout << "BLAS::SYRK Error: K == "<< k <<std::endl;
1591  BadArgument = true; }
1592  if( lda < NRowA ) {
1593  std::cout << "BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
1594  BadArgument = true; }
1595  if( ldc < n ) {
1596  std::cout << "BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
1597  BadArgument = true; }
1598 
1599  if(!BadArgument) {
1600 
1601  // Scale C when alpha is zero
1602  if (alpha == alpha_zero) {
1603  if (Upper) {
1604  if (beta==beta_zero) {
1605  for (j=izero; j<n; j++) {
1606  for (i=izero; i<=j; i++) {
1607  C[j*ldc + i] = C_zero;
1608  }
1609  }
1610  }
1611  else {
1612  for (j=izero; j<n; j++) {
1613  for (i=izero; i<=j; i++) {
1614  C[j*ldc + i] *= beta;
1615  }
1616  }
1617  }
1618  }
1619  else {
1620  if (beta==beta_zero) {
1621  for (j=izero; j<n; j++) {
1622  for (i=j; i<n; i++) {
1623  C[j*ldc + i] = C_zero;
1624  }
1625  }
1626  }
1627  else {
1628  for (j=izero; j<n; j++) {
1629  for (i=j; i<n; i++) {
1630  C[j*ldc + i] *= beta;
1631  }
1632  }
1633  }
1634  }
1635  return;
1636  }
1637 
1638  // Now we can start the computation
1639 
1640  if ( ETranspChar[trans]=='N' ) {
1641 
1642  // Form C <- alpha*A*A' + beta*C
1643  if (Upper) {
1644  for (j=izero; j<n; j++) {
1645  if (beta==beta_zero) {
1646  for (i=izero; i<=j; i++) {
1647  C[j*ldc + i] = C_zero;
1648  }
1649  }
1650  else if (beta!=beta_one) {
1651  for (i=izero; i<=j; i++) {
1652  C[j*ldc + i] *= beta;
1653  }
1654  }
1655  for (l=izero; l<k; l++) {
1656  if (A[l*lda + j] != A_zero) {
1657  temp = alpha*A[l*lda + j];
1658  for (i = izero; i <=j; i++) {
1659  C[j*ldc + i] += temp*A[l*lda + i];
1660  }
1661  }
1662  }
1663  }
1664  }
1665  else {
1666  for (j=izero; j<n; j++) {
1667  if (beta==beta_zero) {
1668  for (i=j; i<n; i++) {
1669  C[j*ldc + i] = C_zero;
1670  }
1671  }
1672  else if (beta!=beta_one) {
1673  for (i=j; i<n; i++) {
1674  C[j*ldc + i] *= beta;
1675  }
1676  }
1677  for (l=izero; l<k; l++) {
1678  if (A[l*lda + j] != A_zero) {
1679  temp = alpha*A[l*lda + j];
1680  for (i=j; i<n; i++) {
1681  C[j*ldc + i] += temp*A[l*lda + i];
1682  }
1683  }
1684  }
1685  }
1686  }
1687  }
1688  else {
1689 
1690  // Form C <- alpha*A'*A + beta*C
1691  if (Upper) {
1692  for (j=izero; j<n; j++) {
1693  for (i=izero; i<=j; i++) {
1694  temp = A_zero;
1695  for (l=izero; l<k; l++) {
1696  temp += A[i*lda + l]*A[j*lda + l];
1697  }
1698  if (beta==beta_zero) {
1699  C[j*ldc + i] = alpha*temp;
1700  }
1701  else {
1702  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1703  }
1704  }
1705  }
1706  }
1707  else {
1708  for (j=izero; j<n; j++) {
1709  for (i=j; i<n; i++) {
1710  temp = A_zero;
1711  for (l=izero; l<k; ++l) {
1712  temp += A[i*lda + l]*A[j*lda + l];
1713  }
1714  if (beta==beta_zero) {
1715  C[j*ldc + i] = alpha*temp;
1716  }
1717  else {
1718  C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
1719  }
1720  }
1721  }
1722  }
1723  }
1724  } /* if (!BadArgument) */
1725  } /* END SYRK */
1726 
1727  template<typename OrdinalType, typename ScalarType>
1728  template <typename alpha_type, typename A_type>
1729  void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
1730  {
1731  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1732  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1733  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1734  A_type A_zero = ScalarTraits<A_type>::zero();
1735  ScalarType B_zero = ScalarTraits<ScalarType>::zero();
1736  ScalarType one = ScalarTraits<ScalarType>::one();
1737  OrdinalType i, j, k, NRowA = m;
1738  ScalarType temp;
1739  bool BadArgument = false;
1740  bool LSide = (ESideChar[side] == 'L');
1741  bool noUnit = (EDiagChar[diag] == 'N');
1742  bool Upper = (EUploChar[uplo] == 'U');
1743  bool noConj = (ETranspChar[transa] == 'T');
1744 
1745  if(!LSide) { NRowA = n; }
1746 
1747  // Quick return.
1748  if (n==izero || m==izero) { return; }
1749  if( m < izero ) {
1750  std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
1751  BadArgument = true; }
1752  if( n < izero ) {
1753  std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
1754  BadArgument = true; }
1755  if( lda < NRowA ) {
1756  std::cout << "BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
1757  BadArgument = true; }
1758  if( ldb < m ) {
1759  std::cout << "BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
1760  BadArgument = true; }
1761 
1762  if(!BadArgument) {
1763 
1764  // B only needs to be zeroed out.
1765  if( alpha == alpha_zero ) {
1766  for( j=izero; j<n; j++ ) {
1767  for( i=izero; i<m; i++ ) {
1768  B[j*ldb + i] = B_zero;
1769  }
1770  }
1771  return;
1772  }
1773 
1774  // Start the computations.
1775  if ( LSide ) {
1776  // A is on the left side of B.
1777 
1778  if ( ETranspChar[transa]=='N' ) {
1779  // Compute B = alpha*A*B
1780 
1781  if ( Upper ) {
1782  // A is upper triangular
1783  for( j=izero; j<n; j++ ) {
1784  for( k=izero; k<m; k++) {
1785  if ( B[j*ldb + k] != B_zero ) {
1786  temp = alpha*B[j*ldb + k];
1787  for( i=izero; i<k; i++ ) {
1788  B[j*ldb + i] += temp*A[k*lda + i];
1789  }
1790  if ( noUnit )
1791  temp *=A[k*lda + k];
1792  B[j*ldb + k] = temp;
1793  }
1794  }
1795  }
1796  } else {
1797  // A is lower triangular
1798  for( j=izero; j<n; j++ ) {
1799  for( k=m-ione; k>-ione; k-- ) {
1800  if( B[j*ldb + k] != B_zero ) {
1801  temp = alpha*B[j*ldb + k];
1802  B[j*ldb + k] = temp;
1803  if ( noUnit )
1804  B[j*ldb + k] *= A[k*lda + k];
1805  for( i=k+ione; i<m; i++ ) {
1806  B[j*ldb + i] += temp*A[k*lda + i];
1807  }
1808  }
1809  }
1810  }
1811  }
1812  } else {
1813  // Compute B = alpha*A'*B or B = alpha*conj(A')*B
1814  if( Upper ) {
1815  for( j=izero; j<n; j++ ) {
1816  for( i=m-ione; i>-ione; i-- ) {
1817  temp = B[j*ldb + i];
1818  if ( noConj ) {
1819  if( noUnit )
1820  temp *= A[i*lda + i];
1821  for( k=izero; k<i; k++ ) {
1822  temp += A[i*lda + k]*B[j*ldb + k];
1823  }
1824  } else {
1825  if( noUnit )
1826  temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
1827  for( k=izero; k<i; k++ ) {
1828  temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
1829  }
1830  }
1831  B[j*ldb + i] = alpha*temp;
1832  }
1833  }
1834  } else {
1835  for( j=izero; j<n; j++ ) {
1836  for( i=izero; i<m; i++ ) {
1837  temp = B[j*ldb + i];
1838  if ( noConj ) {
1839  if( noUnit )
1840  temp *= A[i*lda + i];
1841  for( k=i+ione; k<m; k++ ) {
1842  temp += A[i*lda + k]*B[j*ldb + k];
1843  }
1844  } else {
1845  if( noUnit )
1846  temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
1847  for( k=i+ione; k<m; k++ ) {
1848  temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
1849  }
1850  }
1851  B[j*ldb + i] = alpha*temp;
1852  }
1853  }
1854  }
1855  }
1856  } else {
1857  // A is on the right hand side of B.
1858 
1859  if( ETranspChar[transa] == 'N' ) {
1860  // Compute B = alpha*B*A
1861 
1862  if( Upper ) {
1863  // A is upper triangular.
1864  for( j=n-ione; j>-ione; j-- ) {
1865  temp = alpha;
1866  if( noUnit )
1867  temp *= A[j*lda + j];
1868  for( i=izero; i<m; i++ ) {
1869  B[j*ldb + i] *= temp;
1870  }
1871  for( k=izero; k<j; k++ ) {
1872  if( A[j*lda + k] != A_zero ) {
1873  temp = alpha*A[j*lda + k];
1874  for( i=izero; i<m; i++ ) {
1875  B[j*ldb + i] += temp*B[k*ldb + i];
1876  }
1877  }
1878  }
1879  }
1880  } else {
1881  // A is lower triangular.
1882  for( j=izero; j<n; j++ ) {
1883  temp = alpha;
1884  if( noUnit )
1885  temp *= A[j*lda + j];
1886  for( i=izero; i<m; i++ ) {
1887  B[j*ldb + i] *= temp;
1888  }
1889  for( k=j+ione; k<n; k++ ) {
1890  if( A[j*lda + k] != A_zero ) {
1891  temp = alpha*A[j*lda + k];
1892  for( i=izero; i<m; i++ ) {
1893  B[j*ldb + i] += temp*B[k*ldb + i];
1894  }
1895  }
1896  }
1897  }
1898  }
1899  } else {
1900  // Compute B = alpha*B*A' or B = alpha*B*conj(A')
1901 
1902  if( Upper ) {
1903  for( k=izero; k<n; k++ ) {
1904  for( j=izero; j<k; j++ ) {
1905  if( A[k*lda + j] != A_zero ) {
1906  if ( noConj )
1907  temp = alpha*A[k*lda + j];
1908  else
1909  temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
1910  for( i=izero; i<m; i++ ) {
1911  B[j*ldb + i] += temp*B[k*ldb + i];
1912  }
1913  }
1914  }
1915  temp = alpha;
1916  if( noUnit ) {
1917  if ( noConj )
1918  temp *= A[k*lda + k];
1919  else
1920  temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
1921  }
1922  if( temp != one ) {
1923  for( i=izero; i<m; i++ ) {
1924  B[k*ldb + i] *= temp;
1925  }
1926  }
1927  }
1928  } else {
1929  for( k=n-ione; k>-ione; k-- ) {
1930  for( j=k+ione; j<n; j++ ) {
1931  if( A[k*lda + j] != A_zero ) {
1932  if ( noConj )
1933  temp = alpha*A[k*lda + j];
1934  else
1935  temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
1936  for( i=izero; i<m; i++ ) {
1937  B[j*ldb + i] += temp*B[k*ldb + i];
1938  }
1939  }
1940  }
1941  temp = alpha;
1942  if( noUnit ) {
1943  if ( noConj )
1944  temp *= A[k*lda + k];
1945  else
1946  temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
1947  }
1948  if( temp != one ) {
1949  for( i=izero; i<m; i++ ) {
1950  B[k*ldb + i] *= temp;
1951  }
1952  }
1953  }
1954  }
1955  } // end if( ETranspChar[transa] == 'N' ) ...
1956  } // end if ( LSide ) ...
1957  } // end if (!BadArgument)
1958  } // end TRMM
1959 
1960  template<typename OrdinalType, typename ScalarType>
1961  template <typename alpha_type, typename A_type>
1962  void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
1963  {
1964  OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
1965  OrdinalType ione = OrdinalTraits<OrdinalType>::one();
1966  alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
1967  A_type A_zero = ScalarTraits<A_type>::zero();
1968  ScalarType B_zero = ScalarTraits<ScalarType>::zero();
1969  alpha_type alpha_one = ScalarTraits<alpha_type>::one();
1970  ScalarType B_one = ScalarTraits<ScalarType>::one();
1971  ScalarType temp;
1972  OrdinalType NRowA = m;
1973  bool BadArgument = false;
1974  bool noUnit = (EDiagChar[diag]=='N');
1975  bool noConj = (ETranspChar[transa] == 'T');
1976 
1977  if (!(ESideChar[side] == 'L')) { NRowA = n; }
1978 
1979  // Quick return.
1980  if (n == izero || m == izero) { return; }
1981  if( m < izero ) {
1982  std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
1983  BadArgument = true; }
1984  if( n < izero ) {
1985  std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
1986  BadArgument = true; }
1987  if( lda < NRowA ) {
1988  std::cout << "BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
1989  BadArgument = true; }
1990  if( ldb < m ) {
1991  std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
1992  BadArgument = true; }
1993 
1994  if(!BadArgument)
1995  {
1996  int i, j, k;
1997  // Set the solution to the zero vector.
1998  if(alpha == alpha_zero) {
1999  for(j = izero; j < n; j++) {
2000  for( i = izero; i < m; i++) {
2001  B[j*ldb+i] = B_zero;
2002  }
2003  }
2004  }
2005  else
2006  { // Start the operations.
2007  if(ESideChar[side] == 'L') {
2008  //
2009  // Perform computations for OP(A)*X = alpha*B
2010  //
2011  if(ETranspChar[transa] == 'N') {
2012  //
2013  // Compute B = alpha*inv( A )*B
2014  //
2015  if(EUploChar[uplo] == 'U') {
2016  // A is upper triangular.
2017  for(j = izero; j < n; j++) {
2018  // Perform alpha*B if alpha is not 1.
2019  if(alpha != alpha_one) {
2020  for( i = izero; i < m; i++) {
2021  B[j*ldb+i] *= alpha;
2022  }
2023  }
2024  // Perform a backsolve for column j of B.
2025  for(k = (m - ione); k > -ione; k--) {
2026  // If this entry is zero, we don't have to do anything.
2027  if (B[j*ldb + k] != B_zero) {
2028  if ( noUnit ) {
2029  B[j*ldb + k] /= A[k*lda + k];
2030  }
2031  for(i = izero; i < k; i++) {
2032  B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2033  }
2034  }
2035  }
2036  }
2037  }
2038  else
2039  { // A is lower triangular.
2040  for(j = izero; j < n; j++) {
2041  // Perform alpha*B if alpha is not 1.
2042  if(alpha != alpha_one) {
2043  for( i = izero; i < m; i++) {
2044  B[j*ldb+i] *= alpha;
2045  }
2046  }
2047  // Perform a forward solve for column j of B.
2048  for(k = izero; k < m; k++) {
2049  // If this entry is zero, we don't have to do anything.
2050  if (B[j*ldb + k] != B_zero) {
2051  if ( noUnit ) {
2052  B[j*ldb + k] /= A[k*lda + k];
2053  }
2054  for(i = k+ione; i < m; i++) {
2055  B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
2056  }
2057  }
2058  }
2059  }
2060  } // end if (uplo == 'U')
2061  } // if (transa =='N')
2062  else {
2063  //
2064  // Compute B = alpha*inv( A' )*B
2065  // or B = alpha*inv( conj(A') )*B
2066  //
2067  if(EUploChar[uplo] == 'U') {
2068  // A is upper triangular.
2069  for(j = izero; j < n; j++) {
2070  for( i = izero; i < m; i++) {
2071  temp = alpha*B[j*ldb+i];
2072  if ( noConj ) {
2073  for(k = izero; k < i; k++) {
2074  temp -= A[i*lda + k] * B[j*ldb + k];
2075  }
2076  if ( noUnit ) {
2077  temp /= A[i*lda + i];
2078  }
2079  } else {
2080  for(k = izero; k < i; k++) {
2081  temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
2082  * B[j*ldb + k];
2083  }
2084  if ( noUnit ) {
2085  temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
2086  }
2087  }
2088  B[j*ldb + i] = temp;
2089  }
2090  }
2091  }
2092  else
2093  { // A is lower triangular.
2094  for(j = izero; j < n; j++) {
2095  for(i = (m - ione) ; i > -ione; i--) {
2096  temp = alpha*B[j*ldb+i];
2097  if ( noConj ) {
2098  for(k = i+ione; k < m; k++) {
2099  temp -= A[i*lda + k] * B[j*ldb + k];
2100  }
2101  if ( noUnit ) {
2102  temp /= A[i*lda + i];
2103  }
2104  } else {
2105  for(k = i+ione; k < m; k++) {
2106  temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
2107  * B[j*ldb + k];
2108  }
2109  if ( noUnit ) {
2110  temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
2111  }
2112  }
2113  B[j*ldb + i] = temp;
2114  }
2115  }
2116  }
2117  }
2118  } // if (side == 'L')
2119  else {
2120  // side == 'R'
2121  //
2122  // Perform computations for X*OP(A) = alpha*B
2123  //
2124  if (ETranspChar[transa] == 'N') {
2125  //
2126  // Compute B = alpha*B*inv( A )
2127  //
2128  if(EUploChar[uplo] == 'U') {
2129  // A is upper triangular.
2130  // Perform a backsolve for column j of B.
2131  for(j = izero; j < n; j++) {
2132  // Perform alpha*B if alpha is not 1.
2133  if(alpha != alpha_one) {
2134  for( i = izero; i < m; i++) {
2135  B[j*ldb+i] *= alpha;
2136  }
2137  }
2138  for(k = izero; k < j; k++) {
2139  // If this entry is zero, we don't have to do anything.
2140  if (A[j*lda + k] != A_zero) {
2141  for(i = izero; i < m; i++) {
2142  B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2143  }
2144  }
2145  }
2146  if ( noUnit ) {
2147  temp = B_one/A[j*lda + j];
2148  for(i = izero; i < m; i++) {
2149  B[j*ldb + i] *= temp;
2150  }
2151  }
2152  }
2153  }
2154  else
2155  { // A is lower triangular.
2156  for(j = (n - ione); j > -ione; j--) {
2157  // Perform alpha*B if alpha is not 1.
2158  if(alpha != alpha_one) {
2159  for( i = izero; i < m; i++) {
2160  B[j*ldb+i] *= alpha;
2161  }
2162  }
2163  // Perform a forward solve for column j of B.
2164  for(k = j+ione; k < n; k++) {
2165  // If this entry is zero, we don't have to do anything.
2166  if (A[j*lda + k] != A_zero) {
2167  for(i = izero; i < m; i++) {
2168  B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
2169  }
2170  }
2171  }
2172  if ( noUnit ) {
2173  temp = B_one/A[j*lda + j];
2174  for(i = izero; i < m; i++) {
2175  B[j*ldb + i] *= temp;
2176  }
2177  }
2178  }
2179  } // end if (uplo == 'U')
2180  } // if (transa =='N')
2181  else {
2182  //
2183  // Compute B = alpha*B*inv( A' )
2184  // or B = alpha*B*inv( conj(A') )
2185  //
2186  if(EUploChar[uplo] == 'U') {
2187  // A is upper triangular.
2188  for(k = (n - ione); k > -ione; k--) {
2189  if ( noUnit ) {
2190  if ( noConj )
2191  temp = B_one/A[k*lda + k];
2192  else
2193  temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
2194  for(i = izero; i < m; i++) {
2195  B[k*ldb + i] *= temp;
2196  }
2197  }
2198  for(j = izero; j < k; j++) {
2199  if (A[k*lda + j] != A_zero) {
2200  if ( noConj )
2201  temp = A[k*lda + j];
2202  else
2203  temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
2204  for(i = izero; i < m; i++) {
2205  B[j*ldb + i] -= temp*B[k*ldb + i];
2206  }
2207  }
2208  }
2209  if (alpha != alpha_one) {
2210  for (i = izero; i < m; i++) {
2211  B[k*ldb + i] *= alpha;
2212  }
2213  }
2214  }
2215  }
2216  else
2217  { // A is lower triangular.
2218  for(k = izero; k < n; k++) {
2219  if ( noUnit ) {
2220  if ( noConj )
2221  temp = B_one/A[k*lda + k];
2222  else
2223  temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
2224  for (i = izero; i < m; i++) {
2225  B[k*ldb + i] *= temp;
2226  }
2227  }
2228  for(j = k+ione; j < n; j++) {
2229  if(A[k*lda + j] != A_zero) {
2230  if ( noConj )
2231  temp = A[k*lda + j];
2232  else
2233  temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
2234  for(i = izero; i < m; i++) {
2235  B[j*ldb + i] -= temp*B[k*ldb + i];
2236  }
2237  }
2238  }
2239  if (alpha != alpha_one) {
2240  for (i = izero; i < m; i++) {
2241  B[k*ldb + i] *= alpha;
2242  }
2243  }
2244  }
2245  }
2246  }
2247  }
2248  }
2249  }
2250  }
2251 
2252  // Explicit instantiation for template<int,float>
2253 
2254  template <>
2255  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, float>
2256  {
2257  public:
2258  inline BLAS(void) {}
2259  inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {}
2260  inline virtual ~BLAS(void) {}
2261  void ROTG(float* da, float* db, float* c, float* s) const;
2262  void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const;
2263  float ASUM(const int n, const float* x, const int incx) const;
2264  void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const;
2265  void COPY(const int n, const float* x, const int incx, float* y, const int incy) const;
2266  float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const;
2267  float NRM2(const int n, const float* x, const int incx) const;
2268  void SCAL(const int n, const float alpha, float* x, const int incx) const;
2269  int IAMAX(const int n, const float* x, const int incx) const;
2270  void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const;
2271  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const;
2272  void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const;
2273  void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const;
2274  void SWAP(const int n, float* const x, const int incx, float* const y, const int incy) const;
2275  void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const;
2276  void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const;
2277  void HERK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const;
2278  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
2279  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
2280  };
2281 
2282  // Explicit instantiation for template<int,double>
2283 
2284  template<>
2285  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, double>
2286  {
2287  public:
2288  inline BLAS(void) {}
2289  inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {}
2290  inline virtual ~BLAS(void) {}
2291  void ROTG(double* da, double* db, double* c, double* s) const;
2292  void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const;
2293  double ASUM(const int n, const double* x, const int incx) const;
2294  void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const;
2295  void COPY(const int n, const double* x, const int incx, double* y, const int incy) const;
2296  double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const;
2297  double NRM2(const int n, const double* x, const int incx) const;
2298  void SCAL(const int n, const double alpha, double* x, const int incx) const;
2299  int IAMAX(const int n, const double* x, const int incx) const;
2300  void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const;
2301  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const;
2302  void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const;
2303  void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const;
2304  void SWAP(const int n, double* const x, const int incx, double* const y, const int incy) const;
2305  void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const;
2306  void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const;
2307  void HERK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const;
2308  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
2309  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
2310  };
2311 
2312  // Explicit instantiation for template<int,complex<float> >
2313 
2314  template<>
2315  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
2316  {
2317  public:
2318  inline BLAS(void) {}
2319  inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {}
2320  inline virtual ~BLAS(void) {}
2321  void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
2322  void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const;
2323  float ASUM(const int n, const std::complex<float>* x, const int incx) const;
2324  void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
2325  void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
2326  std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const;
2327  float NRM2(const int n, const std::complex<float>* x, const int incx) const;
2328  void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const;
2329  int IAMAX(const int n, const std::complex<float>* x, const int incx) const;
2330  void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const;
2331  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const;
2332  void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const;
2333  void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
2334  void SWAP(const int n, std::complex<float>* const x, const int incx, std::complex<float>* const y, const int incy) const;
2335  void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const;
2336  void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
2337  void HERK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
2338  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
2339  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
2340  };
2341 
2342  // Explicit instantiation for template<int,complex<double> >
2343 
2344  template<>
2345  class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
2346  {
2347  public:
2348  inline BLAS(void) {}
2349  inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {}
2350  inline virtual ~BLAS(void) {}
2351  void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
2352  void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const;
2353  double ASUM(const int n, const std::complex<double>* x, const int incx) const;
2354  void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
2355  void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
2356  std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const;
2357  double NRM2(const int n, const std::complex<double>* x, const int incx) const;
2358  void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const;
2359  int IAMAX(const int n, const std::complex<double>* x, const int incx) const;
2360  void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const;
2361  void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const;
2362  void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const;
2363  void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
2364  void SWAP(const int n, std::complex<double>* const x, const int incx, std::complex<double>* const y, const int incy) const;
2365  void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const;
2366  void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
2367  void HERK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
2368  void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
2369  void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
2370  };
2371 
2372 } // namespace Teuchos
2373 
2374 #endif // _TEUCHOS_BLAS_HPP_
OrdinalType IAMAX(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Return the index of the element of x with the maximum magnitude.
static T one()
Returns representation of one for this ordinal type.
BLAS(const BLAS< OrdinalType, ScalarType > &)
Copy constructor.
Enumerated types for BLAS input characters.
virtual ~DefaultBLASImpl(void)
Destructor.
T magnitudeType
Mandatory typedef for result of magnitude.
virtual ~BLAS(void)
Destructor.
static T zero()
Returns representation of zero for this ordinal type.
void COPY(const OrdinalType n, const ScalarType *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
Copy the vector x to the vector y.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
BLAS(void)
Default constructor.
DefaultBLASImpl(void)
Default constructor.
Teuchos header file which uses auto-configuration information to include necessary C++ headers...
Templated BLAS wrapper.
details::GivensRotator< ScalarType >::c_type rotg_c_type
The type used for c in ROTG.
void SWAP(const OrdinalType n, ScalarType *const x, const OrdinalType incx, ScalarType *const y, const OrdinalType incy) const
Swap the entries of x and y.
This structure defines some basic traits for a scalar field type.
void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType *x, const OrdinalType incx) const
Scale the vector x by the constant alpha.
DefaultBLASImpl(const DefaultBLASImpl< OrdinalType, ScalarType > &)
Copy constructor.
static T conjugate(T a)
Returns the conjugate of the scalar type a.
void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const x_type *x, const OrdinalType incx, const beta_type beta, ScalarType *y, const OrdinalType incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A&#39;*x+beta*y where A is a ge...
Teuchos implementation details.
void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type *A, const OrdinalType lda, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A&#39;+beta*C or C <- alpha*A&#39;*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
General matrix-matrix multiply.
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
void AXPY(const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
Perform the operation: y <- y+alpha*x.
void ROT(const OrdinalType n, ScalarType *dx, const OrdinalType incx, ScalarType *dy, const OrdinalType incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
static magnitudeType magnitude(T a)
Returns the magnitudeType of the scalar type a.
Defines basic traits for the ordinal field type.
Default traits class that just returns typeid(T).name().
Default implementation for BLAS routines.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type *A, const OrdinalType lda, ScalarType *x, const OrdinalType incx) const
Performs the matrix-vector operation: x <- A*x or x <- A&#39;*x where A is a unit/non-unit n by n upper/l...
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy, ScalarType *A, const OrdinalType lda) const
Performs the rank 1 operation: A <- alpha*x*y&#39;+A.
Defines basic traits for the scalar field type.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Sum the absolute values of the entries of x.
static T zero()
Returns representation of zero for this scalar type.
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
ScalarType DOT(const OrdinalType n, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy) const
Form the dot product of the vectors x and y.
static T one()
Returns representation of one for this scalar type.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Compute the 2-norm of the vector x.