Actual source code: bvec1.c

  2: /*
  3:    Defines the BLAS based vector operations. Code shared by parallel
  4:   and sequential vectors.
  5: */

  7: #include <private/vecimpl.h> 
  8: #include <../src/vec/vec/impls/dvecimpl.h> 
  9: #include <petscblaslapack.h>

 13: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 14: {
 15:   const PetscScalar *ya,*xa;
 16:   PetscErrorCode    ierr;
 17: #if !defined(PETSC_USE_COMPLEX)
 18:   PetscBLASInt      one = 1,bn = PetscBLASIntCast(xin->map->n);
 19: #endif

 22:   VecGetArrayRead(xin,&xa);
 23:   VecGetArrayRead(yin,&ya);
 24: #if defined(PETSC_USE_COMPLEX)
 25:   /* cannot use BLAS dot for complex because compiler/linker is 
 26:      not happy about returning a double complex */
 27:   {
 28:     PetscInt    i;
 29:     PetscScalar sum = 0.0;
 30:     for (i=0; i<xin->map->n; i++) {
 31:       sum += xa[i]*PetscConj(ya[i]);
 32:     }
 33:     *z = sum;
 34:   }
 35: #else
 36:   *z = BLASdot_(&bn,xa,&one,ya,&one);
 37: #endif
 38:   VecRestoreArrayRead(xin,&xa);
 39:   VecRestoreArrayRead(yin,&ya);
 40:   if (xin->map->n > 0) {
 41:     PetscLogFlops(2.0*xin->map->n-1);
 42:   }
 43:   return(0);
 44: }

 48: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 49: {
 50:   const PetscScalar *ya,*xa;
 51:   PetscErrorCode    ierr;
 52: #if !defined(PETSC_USE_COMPLEX)
 53:   PetscBLASInt      one = 1, bn = PetscBLASIntCast(xin->map->n);
 54: #endif

 57:   VecGetArrayRead(xin,&xa);
 58:   VecGetArrayRead(yin,&ya);
 59: #if defined(PETSC_USE_COMPLEX)
 60:   /* cannot use BLAS dot for complex because compiler/linker is 
 61:      not happy about returning a double complex */
 62:  {
 63:    PetscInt    i;
 64:    PetscScalar sum = 0.0;
 65:    for (i=0; i<xin->map->n; i++) {
 66:      sum += xa[i]*ya[i];
 67:    }
 68:    *z = sum;
 69:  }
 70: #else
 71:   *z = BLASdot_(&bn,xa,&one,ya,&one);
 72: #endif
 73:   VecRestoreArrayRead(xin,&xa);
 74:   VecRestoreArrayRead(yin,&ya);
 75:   if (xin->map->n > 0) {
 76:     PetscLogFlops(2.0*xin->map->n-1);
 77:   }
 78:   return(0);
 79: }

 83: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
 84: {
 86:   PetscBLASInt   one = 1,bn = PetscBLASIntCast(xin->map->n);

 89:   if (alpha == (PetscScalar)0.0) {
 90:     VecSet_Seq(xin,alpha);
 91:   } else if (alpha != (PetscScalar)1.0) {
 92:     PetscScalar a = alpha,*xarray;
 93:     VecGetArray(xin,&xarray);
 94:     BLASscal_(&bn,&a,xarray,&one);
 95:     VecRestoreArray(xin,&xarray);
 96:   }
 97:   PetscLogFlops(xin->map->n);
 98:   return(0);
 99: }


104: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
105: {
106:   PetscErrorCode    ierr;
107:   const PetscScalar *xarray;
108:   PetscScalar       *yarray;
109:   PetscBLASInt      one = 1,bn = PetscBLASIntCast(yin->map->n);

112:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
113:   if (alpha != (PetscScalar)0.0) {
114:     VecGetArrayRead(xin,&xarray);
115:     VecGetArray(yin,&yarray);
116:     BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one);
117:     VecRestoreArrayRead(xin,&xarray);
118:     VecRestoreArray(yin,&yarray);
119:     PetscLogFlops(2.0*yin->map->n);
120:   }
121:   return(0);
122: }

126: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
127: {
128:   PetscErrorCode    ierr;
129:   PetscInt          n = yin->map->n,i;
130:   const PetscScalar *xx;
131:   PetscScalar       *yy,a = alpha,b = beta;

134:   if (a == (PetscScalar)0.0) {
135:     VecScale_Seq(yin,beta);
136:   } else if (b == (PetscScalar)1.0) {
137:     VecAXPY_Seq(yin,alpha,xin);
138:   } else if (a == (PetscScalar)1.0) {
139:     VecAYPX_Seq(yin,beta,xin);
140:   } else if (b == (PetscScalar)0.0) {
141:     VecGetArrayRead(xin,&xx);
142:     VecGetArray(yin,(PetscScalar**)&yy);
143:     for (i=0; i<n; i++) {
144:       yy[i] = a*xx[i];
145:     }
146:     VecRestoreArrayRead(xin,&xx);
147:     VecRestoreArray(yin,(PetscScalar**)&yy);
148:     PetscLogFlops(xin->map->n);
149:   } else {
150:     VecGetArrayRead(xin,&xx);
151:     VecGetArray(yin,(PetscScalar**)&yy);
152:     for (i=0; i<n; i++) {
153:       yy[i] = a*xx[i] + b*yy[i];
154:     }
155:     VecRestoreArrayRead(xin,&xx);
156:     VecRestoreArray(yin,(PetscScalar**)&yy);
157:     PetscLogFlops(3.0*xin->map->n);
158:   }
159:   return(0);
160: }

164: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
165: {
166:   PetscErrorCode     ierr;
167:   PetscInt           n = zin->map->n,i;
168:   const PetscScalar  *yy,*xx;
169:   PetscScalar        *zz;

172:   VecGetArrayRead(xin,&xx);
173:   VecGetArrayRead(yin,&yy);
174:   VecGetArray(zin,&zz);
175:   if (alpha == (PetscScalar)1.0) {
176:     for (i=0; i<n; i++) {
177:       zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
178:     }
179:     PetscLogFlops(4.0*n);
180:   } else if (gamma == (PetscScalar)1.0) {
181:     for (i=0; i<n; i++) {
182:       zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
183:     }
184:     PetscLogFlops(4.0*n);
185:   } else if (gamma == (PetscScalar)0.0) {
186:     for (i=0; i<n; i++) {
187:       zz[i] = alpha*xx[i] + beta*yy[i];
188:     }
189:     PetscLogFlops(3.0*n);
190:   } else {
191:     for (i=0; i<n; i++) {
192:       zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
193:     }
194:     PetscLogFlops(5.0*n);
195:   }
196:   VecRestoreArrayRead(xin,&xx);
197:   VecRestoreArrayRead(yin,&yy);
198:   VecRestoreArray(zin,&zz);
199:   return(0);
200: }