Go to the documentation of this file.
18 #ifndef __PASO_BLOCKOPS_H__
19 #define __PASO_BLOCKOPS_H__
26 #ifdef ESYS_HAVE_LAPACK
27 #ifdef ESYS_MKL_LAPACK
28 #include <mkl_lapack.h>
29 #include <mkl_cblas.h>
42 memcpy((
void*)R, (
void*)
V,
N*
sizeof(
double));
46 inline void BlockOps_SMV_2(
double* R,
const double* mat,
const double*
V)
48 const double S1 =
V[0];
49 const double S2 =
V[1];
50 const double A11 = mat[0];
51 const double A12 = mat[2];
52 const double A21 = mat[1];
53 const double A22 = mat[3];
54 R[0] -= A11 * S1 + A12 * S2;
55 R[1] -= A21 * S1 + A22 * S2;
59 inline void BlockOps_SMV_3(
double* R,
const double* mat,
const double*
V)
61 const double S1 =
V[0];
62 const double S2 =
V[1];
63 const double S3 =
V[2];
64 const double A11 = mat[0];
65 const double A21 = mat[1];
66 const double A31 = mat[2];
67 const double A12 = mat[3];
68 const double A22 = mat[4];
69 const double A32 = mat[5];
70 const double A13 = mat[6];
71 const double A23 = mat[7];
72 const double A33 = mat[8];
73 R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
74 R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
75 R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
78 #define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.")
83 #ifdef ESYS_HAVE_LAPACK
84 cblas_dgemv(CblasColMajor,CblasNoTrans,
N,
N, -1., mat,
N,
V, 1, 1., R, 1);
92 #ifdef ESYS_HAVE_LAPACK
93 cblas_dgemv(CblasColMajor,CblasNoTrans,
N,
N, 1., mat,
N,
V, 1, 0., R, 1);
101 const double A11 = A[0];
102 const double A12 = A[2];
103 const double A21 = A[1];
104 const double A22 = A[3];
105 double D = A11*A22-A12*A21;
106 if (std::abs(D) > 0) {
117 inline void BlockOps_invM_3(
double* invA,
const double* A,
int* failed)
119 const double A11 = A[0];
120 const double A21 = A[1];
121 const double A31 = A[2];
122 const double A12 = A[3];
123 const double A22 = A[4];
124 const double A32 = A[5];
125 const double A13 = A[6];
126 const double A23 = A[7];
127 const double A33 = A[8];
128 double D = A11*(A22*A33-A23*A32) +
129 A12*(A31*A23-A21*A33) +
130 A13*(A21*A32-A31*A22);
131 if (std::abs(D) > 0) {
133 invA[0] = (A22*A33-A23*A32)*D;
134 invA[1] = (A31*A23-A21*A33)*D;
135 invA[2] = (A21*A32-A31*A22)*D;
136 invA[3] = (A13*A32-A12*A33)*D;
137 invA[4] = (A11*A33-A31*A13)*D;
138 invA[5] = (A12*A31-A11*A32)*D;
139 invA[6] = (A12*A23-A13*A22)*D;
140 invA[7] = (A13*A21-A11*A23)*D;
141 invA[8] = (A11*A22-A12*A21)*D;
150 #ifdef ESYS_HAVE_LAPACK
151 #ifdef ESYS_MKL_LAPACK
153 dgetrf(&
N, &
N, mat, &
N, pivot, &res);
157 int res = clapack_dgetrf(CblasColMajor,
N,
N, mat,
N, pivot);
160 #endif // ESYS_MKL_LAPACK
169 #ifdef ESYS_HAVE_LAPACK
170 #ifdef ESYS_MKL_LAPACK
173 dgetrs(
"N", &
N, &ONE, mat, &
N, pivot, X, &
N, &res);
177 int res = clapack_dgetrs(CblasColMajor, CblasNoTrans,
N, 1, mat,
N, pivot, X,
N);
180 #endif // ESYS_MKL_LAPACK
189 const double S1 =
V[0];
190 const double S2 =
V[1];
191 const double A11 = mat[0];
192 const double A12 = mat[2];
193 const double A21 = mat[1];
194 const double A22 = mat[3];
195 V[0] = A11 * S1 + A12 * S2;
196 V[1] = A21 * S1 + A22 * S2;
202 const double S1 =
V[0];
203 const double S2 =
V[1];
204 const double S3 =
V[2];
205 const double A11 = mat[0];
206 const double A21 = mat[1];
207 const double A31 = mat[2];
208 const double A12 = mat[3];
209 const double A22 = mat[4];
210 const double A32 = mat[5];
211 const double A13 = mat[6];
212 const double A23 = mat[7];
213 const double A33 = mat[8];
214 V[0] = A11 * S1 + A12 * S2 + A13 * S3;
215 V[1] = A21 * S1 + A22 * S2 + A23 * S3;
216 V[2] = A31 * S1 + A32 * S2 + A33 * S3;
223 #pragma omp parallel for
224 for (
dim_t i=0; i<n; ++i)
226 }
else if (n_block == 2) {
227 #pragma omp parallel for
228 for (
dim_t i=0; i<n; ++i)
230 }
else if (n_block == 3) {
231 #pragma omp parallel for
232 for (
dim_t i=0; i<n; ++i)
236 #pragma omp parallel for
237 for (
dim_t i=0; i<n; ++i) {
238 const dim_t block_size = n_block*n_block;
239 BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
242 throw PasoException(
"BlockOps_solveAll: solution failed.");
249 #endif // __PASO_BLOCKOPS_H__
static dim_t N
Definition: SparseMatrix_saveHB.cpp:50
void BlockOps_MViP_3(const double *mat, double *V)
inplace matrix vector product - order 3
Definition: BlockOps.h:212
void BlockOps_invM_2(double *invA, const double *A, int *failed)
Definition: BlockOps.h:111
void BlockOps_invM_N(dim_t N, double *mat, index_t *pivot, int *failed)
LU factorization of NxN matrix mat with partial pivoting.
Definition: BlockOps.h:160
void BlockOps_solveAll(dim_t n_block, dim_t n, double *D, index_t *pivot, double *x)
Definition: BlockOps.h:231
index_t dim_t
Definition: DataTypes.h:90
void BlockOps_SMV_2(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 2x2
Definition: BlockOps.h:58
void BlockOps_invM_3(double *invA, const double *A, int *failed)
Definition: BlockOps.h:129
void BlockOps_solve_N(dim_t N, double *X, double *mat, index_t *pivot, int *failed)
solves system of linear equations A*X=B
Definition: BlockOps.h:179
#define PASO_MISSING_CLAPACK
Definition: BlockOps.h:90
#define V(_K_, _I_)
Definition: ShapeFunctions.cpp:135
void BlockOps_Cpy_N(dim_t N, double *R, const double *V)
Definition: BlockOps.h:52
void BlockOps_MV_N(dim_t N, double *R, const double *mat, const double *V)
Definition: BlockOps.h:102
int index_t
type for array/matrix indices used both globally and on each rank
Definition: DataTypes.h:85
void BlockOps_SMV_3(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 3x3
Definition: BlockOps.h:71
void BlockOps_MViP_2(const double *mat, double *V)
inplace matrix vector product - order 2
Definition: BlockOps.h:199
Definition: BiCGStab.cpp:26
void BlockOps_SMV_N(dim_t N, double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - NxN
Definition: BlockOps.h:93