Actual source code: matstash.c
2: #include <private/matimpl.h>
4: #define DEFAULT_STASH_SIZE 10000
6: /*
7: MatStashCreate_Private - Creates a stash,currently used for all the parallel
8: matrix implementations. The stash is where elements of a matrix destined
9: to be stored on other processors are kept until matrix assembly is done.
11: This is a simple minded stash. Simply adds entries to end of stash.
13: Input Parameters:
14: comm - communicator, required for scatters.
15: bs - stash block size. used when stashing blocks of values
17: Output Parameters:
18: stash - the newly created stash
19: */
22: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
23: {
25: PetscInt max,*opt,nopt,i;
26: PetscBool flg;
29: /* Require 2 tags,get the second using PetscCommGetNewTag() */
30: stash->comm = comm;
31: PetscCommGetNewTag(stash->comm,&stash->tag1);
32: PetscCommGetNewTag(stash->comm,&stash->tag2);
33: MPI_Comm_size(stash->comm,&stash->size);
34: MPI_Comm_rank(stash->comm,&stash->rank);
35: PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);
36: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
39: nopt = stash->size;
40: PetscMalloc(nopt*sizeof(PetscInt),&opt);
41: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
42: if (flg) {
43: if (nopt == 1) max = opt[0];
44: else if (nopt == stash->size) max = opt[stash->rank];
45: else if (stash->rank < nopt) max = opt[stash->rank];
46: else max = 0; /* Use default */
47: stash->umax = max;
48: } else {
49: stash->umax = 0;
50: }
51: PetscFree(opt);
52: if (bs <= 0) bs = 1;
54: stash->bs = bs;
55: stash->nmax = 0;
56: stash->oldnmax = 0;
57: stash->n = 0;
58: stash->reallocs = -1;
59: stash->space_head = 0;
60: stash->space = 0;
62: stash->send_waits = 0;
63: stash->recv_waits = 0;
64: stash->send_status = 0;
65: stash->nsends = 0;
66: stash->nrecvs = 0;
67: stash->svalues = 0;
68: stash->rvalues = 0;
69: stash->rindices = 0;
70: stash->nprocessed = 0;
72: stash->reproduce = PETSC_FALSE;
73: PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);
74: return(0);
75: }
77: /*
78: MatStashDestroy_Private - Destroy the stash
79: */
82: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
83: {
87: PetscMatStashSpaceDestroy(&stash->space_head);
88: stash->space = 0;
89: PetscFree(stash->flg_v);
90: return(0);
91: }
93: /*
94: MatStashScatterEnd_Private - This is called as the final stage of
95: scatter. The final stages of message passing is done here, and
96: all the memory used for message passing is cleaned up. This
97: routine also resets the stash, and deallocates the memory used
98: for the stash. It also keeps track of the current memory usage
99: so that the same value can be used the next time through.
100: */
103: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
104: {
106: PetscInt nsends=stash->nsends,bs2,oldnmax,i;
107: MPI_Status *send_status;
110: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
111: /* wait on sends */
112: if (nsends) {
113: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
114: MPI_Waitall(2*nsends,stash->send_waits,send_status);
115: PetscFree(send_status);
116: }
118: /* Now update nmaxold to be app 10% more than max n used, this way the
119: wastage of space is reduced the next time this stash is used.
120: Also update the oldmax, only if it increases */
121: if (stash->n) {
122: bs2 = stash->bs*stash->bs;
123: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
124: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125: }
127: stash->nmax = 0;
128: stash->n = 0;
129: stash->reallocs = -1;
130: stash->nprocessed = 0;
131: PetscMatStashSpaceDestroy(&stash->space_head);
132: stash->space = 0;
133: PetscFree(stash->send_waits);
134: PetscFree(stash->recv_waits);
135: PetscFree2(stash->svalues,stash->sindices);
136: PetscFree(stash->rvalues[0]);
137: PetscFree(stash->rvalues);
138: PetscFree(stash->rindices[0]);
139: PetscFree(stash->rindices);
140: return(0);
141: }
143: /*
144: MatStashGetInfo_Private - Gets the relavant statistics of the stash
146: Input Parameters:
147: stash - the stash
148: nstash - the size of the stash. Indicates the number of values stored.
149: reallocs - the number of additional mallocs incurred.
150:
151: */
154: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
155: {
156: PetscInt bs2 = stash->bs*stash->bs;
159: if (nstash) *nstash = stash->n*bs2;
160: if (reallocs) {
161: if (stash->reallocs < 0) *reallocs = 0;
162: else *reallocs = stash->reallocs;
163: }
164: return(0);
165: }
167: /*
168: MatStashSetInitialSize_Private - Sets the initial size of the stash
170: Input Parameters:
171: stash - the stash
172: max - the value that is used as the max size of the stash.
173: this value is used while allocating memory.
174: */
177: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
178: {
180: stash->umax = max;
181: return(0);
182: }
184: /* MatStashExpand_Private - Expand the stash. This function is called
185: when the space in the stash is not sufficient to add the new values
186: being inserted into the stash.
187:
188: Input Parameters:
189: stash - the stash
190: incr - the minimum increase requested
191:
192: Notes:
193: This routine doubles the currently used memory.
194: */
197: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
198: {
200: PetscInt newnmax,bs2= stash->bs*stash->bs;
203: /* allocate a larger stash */
204: if (!stash->oldnmax && !stash->nmax) { /* new stash */
205: if (stash->umax) newnmax = stash->umax/bs2;
206: else newnmax = DEFAULT_STASH_SIZE/bs2;
207: } else if (!stash->nmax) { /* resuing stash */
208: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
209: else newnmax = stash->oldnmax/bs2;
210: } else newnmax = stash->nmax*2;
211: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
213: /* Get a MatStashSpace and attach it to stash */
214: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
215: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
216: stash->space_head = stash->space;
217: }
219: stash->reallocs++;
220: stash->nmax = newnmax;
221: return(0);
222: }
223: /*
224: MatStashValuesRow_Private - inserts values into the stash. This function
225: expects the values to be roworiented. Multiple columns belong to the same row
226: can be inserted with a single call to this function.
228: Input Parameters:
229: stash - the stash
230: row - the global row correspoiding to the values
231: n - the number of elements inserted. All elements belong to the above row.
232: idxn - the global column indices corresponding to each of the values.
233: values - the values inserted
234: */
237: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
238: {
239: PetscErrorCode ierr;
240: PetscInt i,k,cnt = 0;
241: PetscMatStashSpace space=stash->space;
244: /* Check and see if we have sufficient memory */
245: if (!space || space->local_remaining < n){
246: MatStashExpand_Private(stash,n);
247: }
248: space = stash->space;
249: k = space->local_used;
250: for (i=0; i<n; i++) {
251: if (ignorezeroentries && (values[i] == 0.0)) continue;
252: space->idx[k] = row;
253: space->idy[k] = idxn[i];
254: space->val[k] = values[i];
255: k++;
256: cnt++;
257: }
258: stash->n += cnt;
259: space->local_used += cnt;
260: space->local_remaining -= cnt;
261: return(0);
262: }
264: /*
265: MatStashValuesCol_Private - inserts values into the stash. This function
266: expects the values to be columnoriented. Multiple columns belong to the same row
267: can be inserted with a single call to this function.
269: Input Parameters:
270: stash - the stash
271: row - the global row correspoiding to the values
272: n - the number of elements inserted. All elements belong to the above row.
273: idxn - the global column indices corresponding to each of the values.
274: values - the values inserted
275: stepval - the consecutive values are sepated by a distance of stepval.
276: this happens because the input is columnoriented.
277: */
280: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
281: {
282: PetscErrorCode ierr;
283: PetscInt i,k,cnt = 0;
284: PetscMatStashSpace space=stash->space;
287: /* Check and see if we have sufficient memory */
288: if (!space || space->local_remaining < n){
289: MatStashExpand_Private(stash,n);
290: }
291: space = stash->space;
292: k = space->local_used;
293: for (i=0; i<n; i++) {
294: if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
295: space->idx[k] = row;
296: space->idy[k] = idxn[i];
297: space->val[k] = values[i*stepval];
298: k++;
299: cnt++;
300: }
301: stash->n += cnt;
302: space->local_used += cnt;
303: space->local_remaining -= cnt;
304: return(0);
305: }
307: /*
308: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
309: This function expects the values to be roworiented. Multiple columns belong
310: to the same block-row can be inserted with a single call to this function.
311: This function extracts the sub-block of values based on the dimensions of
312: the original input block, and the row,col values corresponding to the blocks.
314: Input Parameters:
315: stash - the stash
316: row - the global block-row correspoiding to the values
317: n - the number of elements inserted. All elements belong to the above row.
318: idxn - the global block-column indices corresponding to each of the blocks of
319: values. Each block is of size bs*bs.
320: values - the values inserted
321: rmax - the number of block-rows in the original block.
322: cmax - the number of block-columsn on the original block.
323: idx - the index of the current block-row in the original block.
324: */
327: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
328: {
329: PetscErrorCode ierr;
330: PetscInt i,j,k,bs2,bs=stash->bs,l;
331: const PetscScalar *vals;
332: PetscScalar *array;
333: PetscMatStashSpace space=stash->space;
336: if (!space || space->local_remaining < n){
337: MatStashExpand_Private(stash,n);
338: }
339: space = stash->space;
340: l = space->local_used;
341: bs2 = bs*bs;
342: for (i=0; i<n; i++) {
343: space->idx[l] = row;
344: space->idy[l] = idxn[i];
345: /* Now copy over the block of values. Store the values column oriented.
346: This enables inserting multiple blocks belonging to a row with a single
347: funtion call */
348: array = space->val + bs2*l;
349: vals = values + idx*bs2*n + bs*i;
350: for (j=0; j<bs; j++) {
351: for (k=0; k<bs; k++) array[k*bs] = vals[k];
352: array++;
353: vals += cmax*bs;
354: }
355: l++;
356: }
357: stash->n += n;
358: space->local_used += n;
359: space->local_remaining -= n;
360: return(0);
361: }
363: /*
364: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
365: This function expects the values to be roworiented. Multiple columns belong
366: to the same block-row can be inserted with a single call to this function.
367: This function extracts the sub-block of values based on the dimensions of
368: the original input block, and the row,col values corresponding to the blocks.
370: Input Parameters:
371: stash - the stash
372: row - the global block-row correspoiding to the values
373: n - the number of elements inserted. All elements belong to the above row.
374: idxn - the global block-column indices corresponding to each of the blocks of
375: values. Each block is of size bs*bs.
376: values - the values inserted
377: rmax - the number of block-rows in the original block.
378: cmax - the number of block-columsn on the original block.
379: idx - the index of the current block-row in the original block.
380: */
383: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
384: {
385: PetscErrorCode ierr;
386: PetscInt i,j,k,bs2,bs=stash->bs,l;
387: const PetscScalar *vals;
388: PetscScalar *array;
389: PetscMatStashSpace space=stash->space;
392: if (!space || space->local_remaining < n){
393: MatStashExpand_Private(stash,n);
394: }
395: space = stash->space;
396: l = space->local_used;
397: bs2 = bs*bs;
398: for (i=0; i<n; i++) {
399: space->idx[l] = row;
400: space->idy[l] = idxn[i];
401: /* Now copy over the block of values. Store the values column oriented.
402: This enables inserting multiple blocks belonging to a row with a single
403: funtion call */
404: array = space->val + bs2*l;
405: vals = values + idx*bs2*n + bs*i;
406: for (j=0; j<bs; j++) {
407: for (k=0; k<bs; k++) {array[k] = vals[k];}
408: array += bs;
409: vals += rmax*bs;
410: }
411: l++;
412: }
413: stash->n += n;
414: space->local_used += n;
415: space->local_remaining -= n;
416: return(0);
417: }
418: /*
419: MatStashScatterBegin_Private - Initiates the transfer of values to the
420: correct owners. This function goes through the stash, and check the
421: owners of each stashed value, and sends the values off to the owner
422: processors.
424: Input Parameters:
425: stash - the stash
426: owners - an array of size 'no-of-procs' which gives the ownership range
427: for each node.
429: Notes: The 'owners' array in the cased of the blocked-stash has the
430: ranges specified blocked global indices, and for the regular stash in
431: the proper global indices.
432: */
435: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
436: {
437: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
438: PetscInt size=stash->size,nsends;
439: PetscErrorCode ierr;
440: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
441: PetscScalar **rvalues,*svalues;
442: MPI_Comm comm = stash->comm;
443: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
444: PetscMPIInt *nprocs,*nlengths,nreceives;
445: PetscInt *sp_idx,*sp_idy;
446: PetscScalar *sp_val;
447: PetscMatStashSpace space,space_next;
450: bs2 = stash->bs*stash->bs;
451:
452: /* first count number of contributors to each processor */
453: PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);
454: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
455: PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);
456: PetscMemzero(nlengths,size*sizeof(PetscMPIInt));
457: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
459: i = j = 0;
460: lastidx = -1;
461: space = stash->space_head;
462: while (space != PETSC_NULL){
463: space_next = space->next;
464: sp_idx = space->idx;
465: for (l=0; l<space->local_used; l++){
466: /* if indices are NOT locally sorted, need to start search at the beginning */
467: if (lastidx > (idx = sp_idx[l])) j = 0;
468: lastidx = idx;
469: for (; j<size; j++) {
470: if (idx >= owners[j] && idx < owners[j+1]) {
471: nlengths[j]++; owner[i] = j; break;
472: }
473: }
474: i++;
475: }
476: space = space_next;
477: }
478: /* Now check what procs get messages - and compute nsends. */
479: for (i=0, nsends=0 ; i<size; i++) {
480: if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
481: }
483: {PetscMPIInt *onodes,*olengths;
484: /* Determine the number of messages to expect, their lengths, from from-ids */
485: PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);
486: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
487: /* since clubbing row,col - lengths are multiplied by 2 */
488: for (i=0; i<nreceives; i++) olengths[i] *=2;
489: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
490: /* values are size 'bs2' lengths (and remove earlier factor 2 */
491: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
492: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
493: PetscFree(onodes);
494: PetscFree(olengths);
495: }
497: /* do sends:
498: 1) starts[i] gives the starting index in svalues for stuff going to
499: the ith processor
500: */
501: PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);
502: PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);
503: PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);
504: /* use 2 sends the first with all_a, the next with all_i and all_j */
505: startv[0] = 0; starti[0] = 0;
506: for (i=1; i<size; i++) {
507: startv[i] = startv[i-1] + nlengths[i-1];
508: starti[i] = starti[i-1] + 2*nlengths[i-1];
509: }
510:
511: i = 0;
512: space = stash->space_head;
513: while (space != PETSC_NULL){
514: space_next = space->next;
515: sp_idx = space->idx;
516: sp_idy = space->idy;
517: sp_val = space->val;
518: for (l=0; l<space->local_used; l++){
519: j = owner[i];
520: if (bs2 == 1) {
521: svalues[startv[j]] = sp_val[l];
522: } else {
523: PetscInt k;
524: PetscScalar *buf1,*buf2;
525: buf1 = svalues+bs2*startv[j];
526: buf2 = space->val + bs2*l;
527: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
528: }
529: sindices[starti[j]] = sp_idx[l];
530: sindices[starti[j]+nlengths[j]] = sp_idy[l];
531: startv[j]++;
532: starti[j]++;
533: i++;
534: }
535: space = space_next;
536: }
537: startv[0] = 0;
538: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
540: for (i=0,count=0; i<size; i++) {
541: if (nprocs[i]) {
542: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
543: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
544: }
545: }
546: #if defined(PETSC_USE_INFO)
547: PetscInfo1(mat,"No of messages: %d \n",nsends);
548: for (i=0; i<size; i++) {
549: if (nprocs[i]) {
550: PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));
551: }
552: }
553: #endif
554: PetscFree(nlengths);
555: PetscFree(owner);
556: PetscFree2(startv,starti);
557: PetscFree(nprocs);
558:
559: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
560: PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);
562: for (i=0; i<nreceives; i++) {
563: recv_waits[2*i] = recv_waits1[i];
564: recv_waits[2*i+1] = recv_waits2[i];
565: }
566: stash->recv_waits = recv_waits;
567: PetscFree(recv_waits1);
568: PetscFree(recv_waits2);
570: stash->svalues = svalues;
571: stash->sindices = sindices;
572: stash->rvalues = rvalues;
573: stash->rindices = rindices;
574: stash->send_waits = send_waits;
575: stash->nsends = nsends;
576: stash->nrecvs = nreceives;
577: stash->reproduce_count = 0;
578: return(0);
579: }
581: /*
582: MatStashScatterGetMesg_Private - This function waits on the receives posted
583: in the function MatStashScatterBegin_Private() and returns one message at
584: a time to the calling function. If no messages are left, it indicates this
585: by setting flg = 0, else it sets flg = 1.
587: Input Parameters:
588: stash - the stash
590: Output Parameters:
591: nvals - the number of entries in the current message.
592: rows - an array of row indices (or blocked indices) corresponding to the values
593: cols - an array of columnindices (or blocked indices) corresponding to the values
594: vals - the values
595: flg - 0 indicates no more message left, and the current call has no values associated.
596: 1 indicates that the current call successfully received a message, and the
597: other output parameters nvals,rows,cols,vals are set appropriately.
598: */
601: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
602: {
604: PetscMPIInt i,*flg_v = stash->flg_v,i1,i2;
605: PetscInt bs2;
606: MPI_Status recv_status;
607: PetscBool match_found = PETSC_FALSE;
611: *flg = 0; /* When a message is discovered this is reset to 1 */
612: /* Return if no more messages to process */
613: if (stash->nprocessed == stash->nrecvs) { return(0); }
615: bs2 = stash->bs*stash->bs;
616: /* If a matching pair of receives are found, process them, and return the data to
617: the calling function. Until then keep receiving messages */
618: while (!match_found) {
619: CHKMEMQ;
620: if (stash->reproduce) {
621: i = stash->reproduce_count++;
622: MPI_Wait(stash->recv_waits+i,&recv_status);
623: } else {
624: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
625: }
626: CHKMEMQ;
627: if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
629: /* Now pack the received message into a structure which is usable by others */
630: if (i % 2) {
631: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
632: flg_v[2*recv_status.MPI_SOURCE] = i/2;
633: *nvals = *nvals/bs2;
634: } else {
635: MPI_Get_count(&recv_status,MPIU_INT,nvals);
636: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
637: *nvals = *nvals/2; /* This message has both row indices and col indices */
638: }
639:
640: /* Check if we have both messages from this proc */
641: i1 = flg_v[2*recv_status.MPI_SOURCE];
642: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
643: if (i1 != -1 && i2 != -1) {
644: *rows = stash->rindices[i2];
645: *cols = *rows + *nvals;
646: *vals = stash->rvalues[i1];
647: *flg = 1;
648: stash->nprocessed ++;
649: match_found = PETSC_TRUE;
650: }
651: }
652: return(0);
653: }