Actual source code: dvec2.c
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <private/petscaxpy.h>
9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
13: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
14: {
15: PetscErrorCode ierr;
16: PetscInt i,nv_rem,n = xin->map->n;
17: PetscScalar sum0,sum1,sum2,sum3;
18: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
19: Vec *yy;
22: sum0 = 0.0;
23: sum1 = 0.0;
24: sum2 = 0.0;
26: i = nv;
27: nv_rem = nv&0x3;
28: yy = (Vec*)yin;
29: VecGetArrayRead(xin,&x);
31: switch (nv_rem) {
32: case 3:
33: VecGetArrayRead(yy[0],&yy0);
34: VecGetArrayRead(yy[1],&yy1);
35: VecGetArrayRead(yy[2],&yy2);
36: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
37: VecRestoreArrayRead(yy[0],&yy0);
38: VecRestoreArrayRead(yy[1],&yy1);
39: VecRestoreArrayRead(yy[2],&yy2);
40: z[0] = sum0;
41: z[1] = sum1;
42: z[2] = sum2;
43: break;
44: case 2:
45: VecGetArrayRead(yy[0],&yy0);
46: VecGetArrayRead(yy[1],&yy1);
47: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
48: VecRestoreArrayRead(yy[0],&yy0);
49: VecRestoreArrayRead(yy[1],&yy1);
50: z[0] = sum0;
51: z[1] = sum1;
52: break;
53: case 1:
54: VecGetArrayRead(yy[0],&yy0);
55: fortranmdot1_(x,yy0,&n,&sum0);
56: VecRestoreArrayRead(yy[0],&yy0);
57: z[0] = sum0;
58: break;
59: case 0:
60: break;
61: }
62: z += nv_rem;
63: i -= nv_rem;
64: yy += nv_rem;
66: while (i >0) {
67: sum0 = 0.;
68: sum1 = 0.;
69: sum2 = 0.;
70: sum3 = 0.;
71: VecGetArrayRead(yy[0],&yy0);
72: VecGetArrayRead(yy[1],&yy1);
73: VecGetArrayRead(yy[2],&yy2);
74: VecGetArrayRead(yy[3],&yy3);
75: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
76: VecRestoreArrayRead(yy[0],&yy0);
77: VecRestoreArrayRead(yy[1],&yy1);
78: VecRestoreArrayRead(yy[2],&yy2);
79: VecRestoreArrayRead(yy[3],&yy3);
80: yy += 4;
81: z[0] = sum0;
82: z[1] = sum1;
83: z[2] = sum2;
84: z[3] = sum3;
85: z += 4;
86: i -= 4;
87: }
88: VecRestoreArrayRead(xin,&x);
89: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
90: return(0);
91: }
93: #else
96: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
97: {
98: PetscErrorCode ierr;
99: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
100: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
101: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
102: Vec *yy;
105: sum0 = 0.;
106: sum1 = 0.;
107: sum2 = 0.;
109: i = nv;
110: nv_rem = nv&0x3;
111: yy = (Vec *)yin;
112: j = n;
113: VecGetArrayRead(xin,&xbase);
114: x = xbase;
116: switch (nv_rem) {
117: case 3:
118: VecGetArrayRead(yy[0],&yy0);
119: VecGetArrayRead(yy[1],&yy1);
120: VecGetArrayRead(yy[2],&yy2);
121: switch (j_rem=j&0x3) {
122: case 3:
123: x2 = x[2];
124: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
125: sum2 += x2*PetscConj(yy2[2]);
126: case 2:
127: x1 = x[1];
128: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
129: sum2 += x1*PetscConj(yy2[1]);
130: case 1:
131: x0 = x[0];
132: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
133: sum2 += x0*PetscConj(yy2[0]);
134: case 0:
135: x += j_rem;
136: yy0 += j_rem;
137: yy1 += j_rem;
138: yy2 += j_rem;
139: j -= j_rem;
140: break;
141: }
142: while (j>0) {
143: x0 = x[0];
144: x1 = x[1];
145: x2 = x[2];
146: x3 = x[3];
147: x += 4;
148:
149: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
150: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
151: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
152: j -= 4;
153: }
154: z[0] = sum0;
155: z[1] = sum1;
156: z[2] = sum2;
157: VecRestoreArrayRead(yy[0],&yy0);
158: VecRestoreArrayRead(yy[1],&yy1);
159: VecRestoreArrayRead(yy[2],&yy2);
160: break;
161: case 2:
162: VecGetArrayRead(yy[0],&yy0);
163: VecGetArrayRead(yy[1],&yy1);
164: switch (j_rem=j&0x3) {
165: case 3:
166: x2 = x[2];
167: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
168: case 2:
169: x1 = x[1];
170: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
171: case 1:
172: x0 = x[0];
173: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
174: case 0:
175: x += j_rem;
176: yy0 += j_rem;
177: yy1 += j_rem;
178: j -= j_rem;
179: break;
180: }
181: while (j>0) {
182: x0 = x[0];
183: x1 = x[1];
184: x2 = x[2];
185: x3 = x[3];
186: x += 4;
187:
188: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
189: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
190: j -= 4;
191: }
192: z[0] = sum0;
193: z[1] = sum1;
194:
195: VecRestoreArrayRead(yy[0],&yy0);
196: VecRestoreArrayRead(yy[1],&yy1);
197: break;
198: case 1:
199: VecGetArrayRead(yy[0],&yy0);
200: switch (j_rem=j&0x3) {
201: case 3:
202: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
203: case 2:
204: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
205: case 1:
206: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
207: case 0:
208: x += j_rem;
209: yy0 += j_rem;
210: j -= j_rem;
211: break;
212: }
213: while (j>0) {
214: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
215: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
216: yy0+=4;
217: j -= 4; x+=4;
218: }
219: z[0] = sum0;
221: VecRestoreArrayRead(yy[0],&yy0);
222: break;
223: case 0:
224: break;
225: }
226: z += nv_rem;
227: i -= nv_rem;
228: yy += nv_rem;
230: while (i >0) {
231: sum0 = 0.;
232: sum1 = 0.;
233: sum2 = 0.;
234: sum3 = 0.;
235: VecGetArrayRead(yy[0],&yy0);
236: VecGetArrayRead(yy[1],&yy1);
237: VecGetArrayRead(yy[2],&yy2);
238: VecGetArrayRead(yy[3],&yy3);
240: j = n;
241: x = xbase;
242: switch (j_rem=j&0x3) {
243: case 3:
244: x2 = x[2];
245: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
246: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
247: case 2:
248: x1 = x[1];
249: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
250: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
251: case 1:
252: x0 = x[0];
253: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
254: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
255: case 0:
256: x += j_rem;
257: yy0 += j_rem;
258: yy1 += j_rem;
259: yy2 += j_rem;
260: yy3 += j_rem;
261: j -= j_rem;
262: break;
263: }
264: while (j>0) {
265: x0 = x[0];
266: x1 = x[1];
267: x2 = x[2];
268: x3 = x[3];
269: x += 4;
270:
271: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
272: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
273: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
274: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
275: j -= 4;
276: }
277: z[0] = sum0;
278: z[1] = sum1;
279: z[2] = sum2;
280: z[3] = sum3;
281: z += 4;
282: i -= 4;
283: VecRestoreArrayRead(yy[0],&yy0);
284: VecRestoreArrayRead(yy[1],&yy1);
285: VecRestoreArrayRead(yy[2],&yy2);
286: VecRestoreArrayRead(yy[3],&yy3);
287: yy += 4;
288: }
289: VecRestoreArrayRead(xin,&xbase);
290: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
291: return(0);
292: }
293: #endif
295: /* ----------------------------------------------------------------------------*/
298: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
299: {
300: PetscErrorCode ierr;
301: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
302: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
303: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
304: Vec *yy;
305:
308: sum0 = 0.;
309: sum1 = 0.;
310: sum2 = 0.;
312: i = nv;
313: nv_rem = nv&0x3;
314: yy = (Vec*)yin;
315: j = n;
316: VecGetArrayRead(xin,&x);
318: switch (nv_rem) {
319: case 3:
320: VecGetArrayRead(yy[0],&yy0);
321: VecGetArrayRead(yy[1],&yy1);
322: VecGetArrayRead(yy[2],&yy2);
323: switch (j_rem=j&0x3) {
324: case 3:
325: x2 = x[2];
326: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
327: sum2 += x2*yy2[2];
328: case 2:
329: x1 = x[1];
330: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
331: sum2 += x1*yy2[1];
332: case 1:
333: x0 = x[0];
334: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
335: sum2 += x0*yy2[0];
336: case 0:
337: x += j_rem;
338: yy0 += j_rem;
339: yy1 += j_rem;
340: yy2 += j_rem;
341: j -= j_rem;
342: break;
343: }
344: while (j>0) {
345: x0 = x[0];
346: x1 = x[1];
347: x2 = x[2];
348: x3 = x[3];
349: x += 4;
350:
351: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
352: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
353: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
354: j -= 4;
355: }
356: z[0] = sum0;
357: z[1] = sum1;
358: z[2] = sum2;
359: VecRestoreArrayRead(yy[0],&yy0);
360: VecRestoreArrayRead(yy[1],&yy1);
361: VecRestoreArrayRead(yy[2],&yy2);
362: break;
363: case 2:
364: VecGetArrayRead(yy[0],&yy0);
365: VecGetArrayRead(yy[1],&yy1);
366: switch (j_rem=j&0x3) {
367: case 3:
368: x2 = x[2];
369: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
370: case 2:
371: x1 = x[1];
372: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
373: case 1:
374: x0 = x[0];
375: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
376: case 0:
377: x += j_rem;
378: yy0 += j_rem;
379: yy1 += j_rem;
380: j -= j_rem;
381: break;
382: }
383: while (j>0) {
384: x0 = x[0];
385: x1 = x[1];
386: x2 = x[2];
387: x3 = x[3];
388: x += 4;
389:
390: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
391: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
392: j -= 4;
393: }
394: z[0] = sum0;
395: z[1] = sum1;
396:
397: VecRestoreArrayRead(yy[0],&yy0);
398: VecRestoreArrayRead(yy[1],&yy1);
399: break;
400: case 1:
401: VecGetArrayRead(yy[0],&yy0);
402: switch (j_rem=j&0x3) {
403: case 3:
404: x2 = x[2]; sum0 += x2*yy0[2];
405: case 2:
406: x1 = x[1]; sum0 += x1*yy0[1];
407: case 1:
408: x0 = x[0]; sum0 += x0*yy0[0];
409: case 0:
410: x += j_rem;
411: yy0 += j_rem;
412: j -= j_rem;
413: break;
414: }
415: while (j>0) {
416: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
417: j -= 4; x+=4;
418: }
419: z[0] = sum0;
421: VecRestoreArrayRead(yy[0],&yy0);
422: break;
423: case 0:
424: break;
425: }
426: z += nv_rem;
427: i -= nv_rem;
428: yy += nv_rem;
430: while (i >0) {
431: sum0 = 0.;
432: sum1 = 0.;
433: sum2 = 0.;
434: sum3 = 0.;
435: VecGetArrayRead(yy[0],&yy0);
436: VecGetArrayRead(yy[1],&yy1);
437: VecGetArrayRead(yy[2],&yy2);
438: VecGetArrayRead(yy[3],&yy3);
440: j = n;
441: switch (j_rem=j&0x3) {
442: case 3:
443: x2 = x[2];
444: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
445: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
446: case 2:
447: x1 = x[1];
448: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
449: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
450: case 1:
451: x0 = x[0];
452: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
453: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
454: case 0:
455: x += j_rem;
456: yy0 += j_rem;
457: yy1 += j_rem;
458: yy2 += j_rem;
459: yy3 += j_rem;
460: j -= j_rem;
461: break;
462: }
463: while (j>0) {
464: x0 = x[0];
465: x1 = x[1];
466: x2 = x[2];
467: x3 = x[3];
468: x += 4;
469:
470: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
471: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
472: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
473: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
474: j -= 4;
475: }
476: z[0] = sum0;
477: z[1] = sum1;
478: z[2] = sum2;
479: z[3] = sum3;
480: z += 4;
481: i -= 4;
482: VecRestoreArrayRead(yy[0],&yy0);
483: VecRestoreArrayRead(yy[1],&yy1);
484: VecRestoreArrayRead(yy[2],&yy2);
485: VecRestoreArrayRead(yy[3],&yy3);
486: yy += 4;
487: }
488: VecRestoreArrayRead(xin,&x);
489: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
490: return(0);
491: }
492:
496: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
497: {
498: PetscInt i,j=0,n = xin->map->n;
499: PetscReal max,tmp;
500: const PetscScalar *xx;
501: PetscErrorCode ierr;
504: VecGetArrayRead(xin,&xx);
505: if (!n) {
506: max = PETSC_MIN_REAL;
507: j = -1;
508: } else {
509: #if defined(PETSC_USE_COMPLEX)
510: max = PetscRealPart(*xx++); j = 0;
511: #else
512: max = *xx++; j = 0;
513: #endif
514: for (i=1; i<n; i++) {
515: #if defined(PETSC_USE_COMPLEX)
516: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
517: #else
518: if ((tmp = *xx++) > max) { j = i; max = tmp; }
519: #endif
520: }
521: }
522: *z = max;
523: if (idx) *idx = j;
524: VecRestoreArrayRead(xin,&xx);
525: return(0);
526: }
530: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
531: {
532: PetscInt i,j=0,n = xin->map->n;
533: PetscReal min,tmp;
534: const PetscScalar *xx;
535: PetscErrorCode ierr;
538: VecGetArrayRead(xin,&xx);
539: if (!n) {
540: min = PETSC_MAX_REAL;
541: j = -1;
542: } else {
543: #if defined(PETSC_USE_COMPLEX)
544: min = PetscRealPart(*xx++); j = 0;
545: #else
546: min = *xx++; j = 0;
547: #endif
548: for (i=1; i<n; i++) {
549: #if defined(PETSC_USE_COMPLEX)
550: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
551: #else
552: if ((tmp = *xx++) < min) { j = i; min = tmp; }
553: #endif
554: }
555: }
556: *z = min;
557: if (idx) *idx = j;
558: VecGetArrayRead(xin,&xx);
559: return(0);
560: }
564: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
565: {
566: PetscInt i,n = xin->map->n;
567: PetscScalar *xx;
571: VecGetArray(xin,&xx);
572: if (alpha == (PetscScalar)0.0) {
573: PetscMemzero(xx,n*sizeof(PetscScalar));
574: } else {
575: for (i=0; i<n; i++) xx[i] = alpha;
576: }
577: VecRestoreArray(xin,&xx);
578: return(0);
579: }
584: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
585: {
586: PetscErrorCode ierr;
587: PetscInt n = xin->map->n,j,j_rem;
588: const PetscScalar *yy0,*yy1,*yy2,*yy3;
589: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
591: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
592: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
593: #endif
596: PetscLogFlops(nv*2.0*n);
597: VecGetArray(xin,&xx);
598: switch (j_rem=nv&0x3) {
599: case 3:
600: VecGetArrayRead(y[0],&yy0);
601: VecGetArrayRead(y[1],&yy1);
602: VecGetArrayRead(y[2],&yy2);
603: alpha0 = alpha[0];
604: alpha1 = alpha[1];
605: alpha2 = alpha[2];
606: alpha += 3;
607: PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
608: VecRestoreArrayRead(y[0],&yy0);
609: VecRestoreArrayRead(y[1],&yy1);
610: VecRestoreArrayRead(y[2],&yy2);
611: y += 3;
612: break;
613: case 2:
614: VecGetArrayRead(y[0],&yy0);
615: VecGetArrayRead(y[1],&yy1);
616: alpha0 = alpha[0];
617: alpha1 = alpha[1];
618: alpha +=2;
619: PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
620: VecRestoreArrayRead(y[0],&yy0);
621: VecRestoreArrayRead(y[1],&yy1);
622: y +=2;
623: break;
624: case 1:
625: VecGetArrayRead(y[0],&yy0);
626: alpha0 = *alpha++;
627: PetscAXPY(xx,alpha0,yy0,n);
628: VecRestoreArrayRead(y[0],&yy0);
629: y +=1;
630: break;
631: }
632: for (j=j_rem; j<nv; j+=4) {
633: VecGetArrayRead(y[0],&yy0);
634: VecGetArrayRead(y[1],&yy1);
635: VecGetArrayRead(y[2],&yy2);
636: VecGetArrayRead(y[3],&yy3);
637: alpha0 = alpha[0];
638: alpha1 = alpha[1];
639: alpha2 = alpha[2];
640: alpha3 = alpha[3];
641: alpha += 4;
643: PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
644: VecRestoreArrayRead(y[0],&yy0);
645: VecRestoreArrayRead(y[1],&yy1);
646: VecRestoreArrayRead(y[2],&yy2);
647: VecRestoreArrayRead(y[3],&yy3);
648: y += 4;
649: }
650: VecRestoreArray(xin,&xx);
651: return(0);
652: }
654: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
657: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
658: {
659: PetscErrorCode ierr;
660: PetscInt n = yin->map->n;
661: PetscScalar *yy;
662: const PetscScalar *xx;
665: if (alpha == (PetscScalar)0.0) {
666: VecCopy(xin,yin);
667: } else if (alpha == (PetscScalar)1.0) {
668: VecAXPY_Seq(yin,alpha,xin);
669: } else if (alpha == (PetscScalar)-1.0) {
670: PetscInt i;
671: VecGetArrayRead(xin,&xx);
672: VecGetArray(yin,&yy);
673: for (i=0; i<n; i++) {
674: yy[i] = xx[i] - yy[i];
675: }
676: VecRestoreArrayRead(xin,&xx);
677: VecRestoreArray(yin,&yy);
678: PetscLogFlops(1.0*n);
679: } else {
680: VecGetArrayRead(xin,&xx);
681: VecGetArray(yin,&yy);
682: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
683: {
684: PetscScalar oalpha = alpha;
685: fortranaypx_(&n,&oalpha,xx,yy);
686: }
687: #else
688: {
689: PetscInt i;
690: for (i=0; i<n; i++) {
691: yy[i] = xx[i] + alpha*yy[i];
692: }
693: }
694: #endif
695: VecRestoreArrayRead(xin,&xx);
696: VecRestoreArray(yin,&yy);
697: PetscLogFlops(2.0*n);
698: }
699: return(0);
700: }
702: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
703: /*
704: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
705: to be slower than a regular C loop. Hence,we do not include it.
706: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
707: */
711: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
712: {
713: PetscErrorCode ierr;
714: PetscInt i,n = win->map->n;
715: PetscScalar *ww;
716: const PetscScalar *yy,*xx;
719: VecGetArrayRead(xin,&xx);
720: VecGetArrayRead(yin,&yy);
721: VecGetArray(win,&ww);
722: if (alpha == (PetscScalar)1.0) {
723: PetscLogFlops(n);
724: /* could call BLAS axpy after call to memcopy, but may be slower */
725: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
726: } else if (alpha == (PetscScalar)-1.0) {
727: PetscLogFlops(n);
728: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
729: } else if (alpha == (PetscScalar)0.0) {
730: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
731: } else {
732: PetscScalar oalpha = alpha;
733: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
734: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
735: #else
736: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
737: #endif
738: PetscLogFlops(2.0*n);
739: }
740: VecRestoreArrayRead(xin,&xx);
741: VecRestoreArrayRead(yin,&yy);
742: VecRestoreArray(win,&ww);
743: return(0);
744: }
749: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
750: {
751: PetscErrorCode ierr;
752: PetscInt n = xin->map->n,i;
753: const PetscScalar *xx,*yy;
754: PetscReal m = 0.0;
757: VecGetArrayRead(xin,&xx);
758: VecGetArrayRead(yin,&yy);
759: for(i = 0; i < n; i++) {
760: if (yy[i] != (PetscScalar)0.0) {
761: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
762: } else {
763: m = PetscMax(PetscAbsScalar(xx[i]), m);
764: }
765: }
766: VecRestoreArrayRead(xin,&xx);
767: VecRestoreArrayRead(yin,&yy);
768: MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
769: PetscLogFlops(n);
770: return(0);
771: }
775: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
776: {
777: Vec_Seq *v = (Vec_Seq *)vin->data;
780: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
781: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
782: v->array = (PetscScalar *)a;
783: return(0);
784: }
788: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
789: {
790: Vec_Seq *v = (Vec_Seq *)vin->data;
794: PetscFree(v->array_allocated);
795: v->array_allocated = v->array = (PetscScalar *)a;
796: return(0);
797: }
799: