Actual source code: dvec2.c

  2: /* 
  3:    Defines some vector operation functions that are shared by 
  4:   sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>   
  7: #include <private/petscaxpy.h>

  9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 13: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 14: {
 15:   PetscErrorCode    ierr;
 16:   PetscInt          i,nv_rem,n = xin->map->n;
 17:   PetscScalar       sum0,sum1,sum2,sum3;
 18:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 19:   Vec               *yy;

 22:   sum0 = 0.0;
 23:   sum1 = 0.0;
 24:   sum2 = 0.0;

 26:   i      = nv;
 27:   nv_rem = nv&0x3;
 28:   yy     = (Vec*)yin;
 29:   VecGetArrayRead(xin,&x);

 31:   switch (nv_rem) {
 32:   case 3:
 33:     VecGetArrayRead(yy[0],&yy0);
 34:     VecGetArrayRead(yy[1],&yy1);
 35:     VecGetArrayRead(yy[2],&yy2);
 36:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 37:     VecRestoreArrayRead(yy[0],&yy0);
 38:     VecRestoreArrayRead(yy[1],&yy1);
 39:     VecRestoreArrayRead(yy[2],&yy2);
 40:     z[0] = sum0;
 41:     z[1] = sum1;
 42:     z[2] = sum2;
 43:     break;
 44:   case 2:
 45:     VecGetArrayRead(yy[0],&yy0);
 46:     VecGetArrayRead(yy[1],&yy1);
 47:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 48:     VecRestoreArrayRead(yy[0],&yy0);
 49:     VecRestoreArrayRead(yy[1],&yy1);
 50:     z[0] = sum0;
 51:     z[1] = sum1;
 52:     break;
 53:   case 1:
 54:     VecGetArrayRead(yy[0],&yy0);
 55:     fortranmdot1_(x,yy0,&n,&sum0);
 56:     VecRestoreArrayRead(yy[0],&yy0);
 57:     z[0] = sum0;
 58:     break;
 59:   case 0:
 60:     break;
 61:   }
 62:   z  += nv_rem;
 63:   i  -= nv_rem;
 64:   yy += nv_rem;

 66:   while (i >0) {
 67:     sum0 = 0.;
 68:     sum1 = 0.;
 69:     sum2 = 0.;
 70:     sum3 = 0.;
 71:     VecGetArrayRead(yy[0],&yy0);
 72:     VecGetArrayRead(yy[1],&yy1);
 73:     VecGetArrayRead(yy[2],&yy2);
 74:     VecGetArrayRead(yy[3],&yy3);
 75:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 76:     VecRestoreArrayRead(yy[0],&yy0);
 77:     VecRestoreArrayRead(yy[1],&yy1);
 78:     VecRestoreArrayRead(yy[2],&yy2);
 79:     VecRestoreArrayRead(yy[3],&yy3);
 80:     yy  += 4;
 81:     z[0] = sum0;
 82:     z[1] = sum1;
 83:     z[2] = sum2;
 84:     z[3] = sum3;
 85:     z   += 4;
 86:     i   -= 4;
 87:   }
 88:   VecRestoreArrayRead(xin,&x);
 89:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 90:   return(0);
 91: }

 93: #else
 96: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 97: {
 98:   PetscErrorCode    ierr;
 99:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
100:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
101:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
102:   Vec               *yy;

105:   sum0 = 0.;
106:   sum1 = 0.;
107:   sum2 = 0.;

109:   i      = nv;
110:   nv_rem = nv&0x3;
111:   yy     = (Vec *)yin;
112:   j      = n;
113:   VecGetArrayRead(xin,&xbase);
114:   x      = xbase;

116:   switch (nv_rem) {
117:   case 3:
118:     VecGetArrayRead(yy[0],&yy0);
119:     VecGetArrayRead(yy[1],&yy1);
120:     VecGetArrayRead(yy[2],&yy2);
121:     switch (j_rem=j&0x3) {
122:     case 3:
123:       x2 = x[2];
124:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
125:       sum2 += x2*PetscConj(yy2[2]);
126:     case 2:
127:       x1 = x[1];
128:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
129:       sum2 += x1*PetscConj(yy2[1]);
130:     case 1:
131:       x0 = x[0];
132:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
133:       sum2 += x0*PetscConj(yy2[0]);
134:     case 0:
135:       x   += j_rem;
136:       yy0 += j_rem;
137:       yy1 += j_rem;
138:       yy2 += j_rem;
139:       j   -= j_rem;
140:       break;
141:     }
142:     while (j>0) {
143:       x0 = x[0];
144:       x1 = x[1];
145:       x2 = x[2];
146:       x3 = x[3];
147:       x += 4;
148: 
149:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
150:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
151:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
152:       j -= 4;
153:     }
154:     z[0] = sum0;
155:     z[1] = sum1;
156:     z[2] = sum2;
157:     VecRestoreArrayRead(yy[0],&yy0);
158:     VecRestoreArrayRead(yy[1],&yy1);
159:     VecRestoreArrayRead(yy[2],&yy2);
160:     break;
161:   case 2:
162:     VecGetArrayRead(yy[0],&yy0);
163:     VecGetArrayRead(yy[1],&yy1);
164:     switch (j_rem=j&0x3) {
165:     case 3:
166:       x2 = x[2];
167:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
168:     case 2:
169:       x1 = x[1];
170:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
171:     case 1:
172:       x0 = x[0];
173:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
174:     case 0:
175:       x   += j_rem;
176:       yy0 += j_rem;
177:       yy1 += j_rem;
178:       j   -= j_rem;
179:       break;
180:     }
181:     while (j>0) {
182:       x0 = x[0];
183:       x1 = x[1];
184:       x2 = x[2];
185:       x3 = x[3];
186:       x += 4;
187: 
188:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
189:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
190:       j -= 4;
191:     }
192:     z[0] = sum0;
193:     z[1] = sum1;
194: 
195:     VecRestoreArrayRead(yy[0],&yy0);
196:     VecRestoreArrayRead(yy[1],&yy1);
197:     break;
198:   case 1:
199:     VecGetArrayRead(yy[0],&yy0);
200:     switch (j_rem=j&0x3) {
201:     case 3:
202:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
203:     case 2:
204:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
205:     case 1:
206:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
207:     case 0:
208:       x   += j_rem;
209:       yy0 += j_rem;
210:       j   -= j_rem;
211:       break;
212:     }
213:     while (j>0) {
214:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
215:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
216:       yy0+=4;
217:       j -= 4; x+=4;
218:     }
219:     z[0] = sum0;

221:     VecRestoreArrayRead(yy[0],&yy0);
222:     break;
223:   case 0:
224:     break;
225:   }
226:   z  += nv_rem;
227:   i  -= nv_rem;
228:   yy += nv_rem;

230:   while (i >0) {
231:     sum0 = 0.;
232:     sum1 = 0.;
233:     sum2 = 0.;
234:     sum3 = 0.;
235:     VecGetArrayRead(yy[0],&yy0);
236:     VecGetArrayRead(yy[1],&yy1);
237:     VecGetArrayRead(yy[2],&yy2);
238:     VecGetArrayRead(yy[3],&yy3);

240:     j = n;
241:     x = xbase;
242:     switch (j_rem=j&0x3) {
243:     case 3:
244:       x2 = x[2];
245:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
246:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
247:     case 2:
248:       x1 = x[1];
249:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
250:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
251:     case 1:
252:       x0 = x[0];
253:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
254:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
255:     case 0:
256:       x   += j_rem;
257:       yy0 += j_rem;
258:       yy1 += j_rem;
259:       yy2 += j_rem;
260:       yy3 += j_rem;
261:       j   -= j_rem;
262:       break;
263:     }
264:     while (j>0) {
265:       x0 = x[0];
266:       x1 = x[1];
267:       x2 = x[2];
268:       x3 = x[3];
269:       x += 4;
270: 
271:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
272:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
273:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
274:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
275:       j -= 4;
276:     }
277:     z[0] = sum0;
278:     z[1] = sum1;
279:     z[2] = sum2;
280:     z[3] = sum3;
281:     z   += 4;
282:     i   -= 4;
283:     VecRestoreArrayRead(yy[0],&yy0);
284:     VecRestoreArrayRead(yy[1],&yy1);
285:     VecRestoreArrayRead(yy[2],&yy2);
286:     VecRestoreArrayRead(yy[3],&yy3);
287:     yy  += 4;
288:   }
289:   VecRestoreArrayRead(xin,&xbase);
290:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
291:   return(0);
292: }
293: #endif

295: /* ----------------------------------------------------------------------------*/
298: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
299: {
300:   PetscErrorCode    ierr;
301:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
302:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
303:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
304:   Vec               *yy;
305: 

308:   sum0 = 0.;
309:   sum1 = 0.;
310:   sum2 = 0.;

312:   i      = nv;
313:   nv_rem = nv&0x3;
314:   yy     = (Vec*)yin;
315:   j      = n;
316:   VecGetArrayRead(xin,&x);

318:   switch (nv_rem) {
319:   case 3:
320:     VecGetArrayRead(yy[0],&yy0);
321:     VecGetArrayRead(yy[1],&yy1);
322:     VecGetArrayRead(yy[2],&yy2);
323:     switch (j_rem=j&0x3) {
324:     case 3:
325:       x2 = x[2];
326:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
327:       sum2 += x2*yy2[2];
328:     case 2:
329:       x1 = x[1];
330:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
331:       sum2 += x1*yy2[1];
332:     case 1:
333:       x0 = x[0];
334:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
335:       sum2 += x0*yy2[0];
336:     case 0:
337:       x  += j_rem;
338:       yy0 += j_rem;
339:       yy1 += j_rem;
340:       yy2 += j_rem;
341:       j  -= j_rem;
342:       break;
343:     }
344:     while (j>0) {
345:       x0 = x[0];
346:       x1 = x[1];
347:       x2 = x[2];
348:       x3 = x[3];
349:       x += 4;
350: 
351:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
352:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
353:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
354:       j -= 4;
355:     }
356:     z[0] = sum0;
357:     z[1] = sum1;
358:     z[2] = sum2;
359:     VecRestoreArrayRead(yy[0],&yy0);
360:     VecRestoreArrayRead(yy[1],&yy1);
361:     VecRestoreArrayRead(yy[2],&yy2);
362:     break;
363:   case 2:
364:     VecGetArrayRead(yy[0],&yy0);
365:     VecGetArrayRead(yy[1],&yy1);
366:     switch (j_rem=j&0x3) {
367:     case 3:
368:       x2 = x[2];
369:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
370:     case 2:
371:       x1 = x[1];
372:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
373:     case 1:
374:       x0 = x[0];
375:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
376:     case 0:
377:       x  += j_rem;
378:       yy0 += j_rem;
379:       yy1 += j_rem;
380:       j  -= j_rem;
381:       break;
382:     }
383:     while (j>0) {
384:       x0 = x[0];
385:       x1 = x[1];
386:       x2 = x[2];
387:       x3 = x[3];
388:       x += 4;
389: 
390:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
391:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
392:       j -= 4;
393:     }
394:     z[0] = sum0;
395:     z[1] = sum1;
396: 
397:     VecRestoreArrayRead(yy[0],&yy0);
398:     VecRestoreArrayRead(yy[1],&yy1);
399:     break;
400:   case 1:
401:     VecGetArrayRead(yy[0],&yy0);
402:     switch (j_rem=j&0x3) {
403:     case 3:
404:       x2 = x[2]; sum0 += x2*yy0[2];
405:     case 2:
406:       x1 = x[1]; sum0 += x1*yy0[1];
407:     case 1:
408:       x0 = x[0]; sum0 += x0*yy0[0];
409:     case 0:
410:       x  += j_rem;
411:       yy0 += j_rem;
412:       j  -= j_rem;
413:       break;
414:     }
415:     while (j>0) {
416:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
417:       j -= 4; x+=4;
418:     }
419:     z[0] = sum0;

421:     VecRestoreArrayRead(yy[0],&yy0);
422:     break;
423:   case 0:
424:     break;
425:   }
426:   z  += nv_rem;
427:   i  -= nv_rem;
428:   yy += nv_rem;

430:   while (i >0) {
431:     sum0 = 0.;
432:     sum1 = 0.;
433:     sum2 = 0.;
434:     sum3 = 0.;
435:     VecGetArrayRead(yy[0],&yy0);
436:     VecGetArrayRead(yy[1],&yy1);
437:     VecGetArrayRead(yy[2],&yy2);
438:     VecGetArrayRead(yy[3],&yy3);

440:     j = n;
441:     switch (j_rem=j&0x3) {
442:     case 3:
443:       x2 = x[2];
444:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
445:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
446:     case 2:
447:       x1 = x[1];
448:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
449:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
450:     case 1:
451:       x0 = x[0];
452:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
453:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
454:     case 0:
455:       x  += j_rem;
456:       yy0 += j_rem;
457:       yy1 += j_rem;
458:       yy2 += j_rem;
459:       yy3 += j_rem;
460:       j  -= j_rem;
461:       break;
462:     }
463:     while (j>0) {
464:       x0 = x[0];
465:       x1 = x[1];
466:       x2 = x[2];
467:       x3 = x[3];
468:       x += 4;
469: 
470:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
471:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
472:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
473:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
474:       j -= 4;
475:     }
476:     z[0] = sum0;
477:     z[1] = sum1;
478:     z[2] = sum2;
479:     z[3] = sum3;
480:     z   += 4;
481:     i   -= 4;
482:     VecRestoreArrayRead(yy[0],&yy0);
483:     VecRestoreArrayRead(yy[1],&yy1);
484:     VecRestoreArrayRead(yy[2],&yy2);
485:     VecRestoreArrayRead(yy[3],&yy3);
486:     yy  += 4;
487:   }
488:   VecRestoreArrayRead(xin,&x);
489:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
490:   return(0);
491: }
492: 

496: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
497: {
498:   PetscInt          i,j=0,n = xin->map->n;
499:   PetscReal         max,tmp;
500:   const PetscScalar *xx;
501:   PetscErrorCode    ierr;

504:   VecGetArrayRead(xin,&xx);
505:   if (!n) {
506:     max = PETSC_MIN_REAL;
507:     j   = -1;
508:   } else {
509: #if defined(PETSC_USE_COMPLEX)
510:       max = PetscRealPart(*xx++); j = 0;
511: #else
512:       max = *xx++; j = 0;
513: #endif
514:     for (i=1; i<n; i++) {
515: #if defined(PETSC_USE_COMPLEX)
516:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
517: #else
518:       if ((tmp = *xx++) > max) { j = i; max = tmp; }
519: #endif
520:     }
521:   }
522:   *z   = max;
523:   if (idx) *idx = j;
524:   VecRestoreArrayRead(xin,&xx);
525:   return(0);
526: }

530: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
531: {
532:   PetscInt          i,j=0,n = xin->map->n;
533:   PetscReal         min,tmp;
534:   const PetscScalar *xx;
535:   PetscErrorCode    ierr;

538:   VecGetArrayRead(xin,&xx);
539:   if (!n) {
540:     min = PETSC_MAX_REAL;
541:     j   = -1;
542:   } else {
543: #if defined(PETSC_USE_COMPLEX)
544:     min = PetscRealPart(*xx++); j = 0;
545: #else
546:     min = *xx++; j = 0;
547: #endif
548:     for (i=1; i<n; i++) {
549: #if defined(PETSC_USE_COMPLEX)
550:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
551: #else
552:       if ((tmp = *xx++) < min) { j = i; min = tmp; }
553: #endif
554:     }
555:   }
556:   *z   = min;
557:   if (idx) *idx = j;
558:   VecGetArrayRead(xin,&xx);
559:   return(0);
560: }

564: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
565: {
566:   PetscInt       i,n = xin->map->n;
567:   PetscScalar    *xx;

571:   VecGetArray(xin,&xx);
572:   if (alpha == (PetscScalar)0.0) {
573:     PetscMemzero(xx,n*sizeof(PetscScalar));
574:   } else {
575:     for (i=0; i<n; i++) xx[i] = alpha;
576:   }
577:   VecRestoreArray(xin,&xx);
578:   return(0);
579: }


584: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
585: {
586:   PetscErrorCode    ierr;
587:   PetscInt          n = xin->map->n,j,j_rem;
588:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
589:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

591: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
592: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
593: #endif

596:   PetscLogFlops(nv*2.0*n);
597:   VecGetArray(xin,&xx);
598:   switch (j_rem=nv&0x3) {
599:   case 3:
600:     VecGetArrayRead(y[0],&yy0);
601:     VecGetArrayRead(y[1],&yy1);
602:     VecGetArrayRead(y[2],&yy2);
603:     alpha0 = alpha[0];
604:     alpha1 = alpha[1];
605:     alpha2 = alpha[2];
606:     alpha += 3;
607:     PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
608:     VecRestoreArrayRead(y[0],&yy0);
609:     VecRestoreArrayRead(y[1],&yy1);
610:     VecRestoreArrayRead(y[2],&yy2);
611:     y     += 3;
612:     break;
613:   case 2:
614:     VecGetArrayRead(y[0],&yy0);
615:     VecGetArrayRead(y[1],&yy1);
616:     alpha0 = alpha[0];
617:     alpha1 = alpha[1];
618:     alpha +=2;
619:     PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
620:     VecRestoreArrayRead(y[0],&yy0);
621:     VecRestoreArrayRead(y[1],&yy1);
622:     y     +=2;
623:     break;
624:   case 1:
625:     VecGetArrayRead(y[0],&yy0);
626:     alpha0 = *alpha++;
627:     PetscAXPY(xx,alpha0,yy0,n);
628:     VecRestoreArrayRead(y[0],&yy0);
629:     y     +=1;
630:     break;
631:   }
632:   for (j=j_rem; j<nv; j+=4) {
633:     VecGetArrayRead(y[0],&yy0);
634:     VecGetArrayRead(y[1],&yy1);
635:     VecGetArrayRead(y[2],&yy2);
636:     VecGetArrayRead(y[3],&yy3);
637:     alpha0 = alpha[0];
638:     alpha1 = alpha[1];
639:     alpha2 = alpha[2];
640:     alpha3 = alpha[3];
641:     alpha  += 4;

643:     PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
644:     VecRestoreArrayRead(y[0],&yy0);
645:     VecRestoreArrayRead(y[1],&yy1);
646:     VecRestoreArrayRead(y[2],&yy2);
647:     VecRestoreArrayRead(y[3],&yy3);
648:     y      += 4;
649:   }
650:   VecRestoreArray(xin,&xx);
651:   return(0);
652: }

654: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
657: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
658: {
659:   PetscErrorCode    ierr;
660:   PetscInt          n = yin->map->n;
661:   PetscScalar       *yy;
662:   const PetscScalar *xx;

665:   if (alpha == (PetscScalar)0.0) {
666:     VecCopy(xin,yin);
667:   } else if (alpha == (PetscScalar)1.0) {
668:     VecAXPY_Seq(yin,alpha,xin);
669:   } else if (alpha == (PetscScalar)-1.0) {
670:     PetscInt i;
671:     VecGetArrayRead(xin,&xx);
672:     VecGetArray(yin,&yy);
673:     for (i=0; i<n; i++) {
674:       yy[i] = xx[i] - yy[i];
675:     }
676:     VecRestoreArrayRead(xin,&xx);
677:     VecRestoreArray(yin,&yy);
678:     PetscLogFlops(1.0*n);
679:   } else {
680:     VecGetArrayRead(xin,&xx);
681:     VecGetArray(yin,&yy);
682: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
683:     {
684:       PetscScalar oalpha = alpha;
685:       fortranaypx_(&n,&oalpha,xx,yy);
686:     }
687: #else
688:     {
689:       PetscInt i;
690:       for (i=0; i<n; i++) {
691:         yy[i] = xx[i] + alpha*yy[i];
692:       }
693:     }
694: #endif
695:     VecRestoreArrayRead(xin,&xx);
696:     VecRestoreArray(yin,&yy);
697:     PetscLogFlops(2.0*n);
698:   }
699:   return(0);
700: }

702: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
703: /*
704:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
705:   to be slower than a regular C loop.  Hence,we do not include it.
706:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
707: */

711: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
712: {
713:   PetscErrorCode     ierr;
714:   PetscInt           i,n = win->map->n;
715:   PetscScalar        *ww;
716:   const PetscScalar  *yy,*xx;

719:   VecGetArrayRead(xin,&xx);
720:   VecGetArrayRead(yin,&yy);
721:   VecGetArray(win,&ww);
722:   if (alpha == (PetscScalar)1.0) {
723:     PetscLogFlops(n);
724:     /* could call BLAS axpy after call to memcopy, but may be slower */
725:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
726:   } else if (alpha == (PetscScalar)-1.0) {
727:     PetscLogFlops(n);
728:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
729:   } else if (alpha == (PetscScalar)0.0) {
730:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
731:   } else {
732:     PetscScalar oalpha = alpha;
733: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
734:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
735: #else
736:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
737: #endif
738:     PetscLogFlops(2.0*n);
739:   }
740:   VecRestoreArrayRead(xin,&xx);
741:   VecRestoreArrayRead(yin,&yy);
742:   VecRestoreArray(win,&ww);
743:   return(0);
744: }


749: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
750: {
751:   PetscErrorCode    ierr;
752:   PetscInt          n = xin->map->n,i;
753:   const PetscScalar *xx,*yy;
754:   PetscReal         m = 0.0;

757:   VecGetArrayRead(xin,&xx);
758:   VecGetArrayRead(yin,&yy);
759:   for(i = 0; i < n; i++) {
760:     if (yy[i] != (PetscScalar)0.0) {
761:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
762:     } else {
763:       m = PetscMax(PetscAbsScalar(xx[i]), m);
764:     }
765:   }
766:   VecRestoreArrayRead(xin,&xx);
767:   VecRestoreArrayRead(yin,&yy);
768:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
769:   PetscLogFlops(n);
770:   return(0);
771: }

775: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
776: {
777:   Vec_Seq *v = (Vec_Seq *)vin->data;

780:   if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
781:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
782:   v->array = (PetscScalar *)a;
783:   return(0);
784: }

788: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
789: {
790:   Vec_Seq        *v = (Vec_Seq *)vin->data;

794:   PetscFree(v->array_allocated);
795:   v->array_allocated = v->array = (PetscScalar *)a;
796:   return(0);
797: }

799: