Actual source code: vpscat.h

  2: /*
  3:      Defines the methods VecScatterBegin/End_1,2,......
  4:      This is included by vpscat.c with different values for BS

  6:      This is a terrible way of doing "templates" in C.
  7: */
  8: #define PETSCMAP1_a(a,b)  a ## _ ## b
  9: #define PETSCMAP1_b(a,b)  PETSCMAP1_a(a,b)
 10: #define PETSCMAP1(a)      PETSCMAP1_b(a,BS)

 14: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
 15: {
 16:   VecScatter_MPI_General *to,*from;
 17:   PetscScalar            *xv,*yv,*svalues;
 18:   MPI_Request            *rwaits,*swaits;
 19:   PetscErrorCode         ierr;
 20:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;

 23:   if (mode & SCATTER_REVERSE) {
 24:     to   = (VecScatter_MPI_General*)ctx->fromdata;
 25:     from = (VecScatter_MPI_General*)ctx->todata;
 26:     rwaits   = from->rev_requests;
 27:     swaits   = to->rev_requests;
 28:   } else {
 29:     to   = (VecScatter_MPI_General*)ctx->todata;
 30:     from = (VecScatter_MPI_General*)ctx->fromdata;
 31:     rwaits   = from->requests;
 32:     swaits   = to->requests;
 33:   }
 34:   bs       = to->bs;
 35:   svalues  = to->values;
 36:   nrecvs   = from->n;
 37:   nsends   = to->n;
 38:   indices  = to->indices;
 39:   sstarts  = to->starts;
 40: #if defined(PETSC_HAVE_CUSP)
 41:   if ((xin->map->n > 10000) && (sstarts[nsends]*bs < 0.05*xin->map->n) && (xin->valid_GPU_array == PETSC_CUSP_GPU) && !(to->local.n)) {
 42:     if (!ctx->spptr) {
 43:       PetscInt k,*tindices,n = sstarts[nsends],*sindices;
 44:       PetscMalloc(n*sizeof(PetscInt),&tindices);
 45:       PetscMemcpy(tindices,to->indices,n*sizeof(PetscInt));
 46:       PetscSortRemoveDupsInt(&n,tindices);
 47:       PetscMalloc(bs*n*sizeof(PetscInt),&sindices);
 48:       for (i=0; i<n; i++) {
 49:         for (k=0; k<bs; k++) {
 50:           sindices[i*bs+k] = tindices[i]+k;
 51:         }
 52:       }
 53:       PetscFree(tindices);
 54:       PetscCUSPIndicesCreate(n*bs,sindices,(PetscCUSPIndices*)&ctx->spptr);
 55:       PetscFree(sindices);
 56:     }
 57:     VecCUSPCopyFromGPUSome_Public(xin,(PetscCUSPIndices)ctx->spptr);
 58:     xv   = *(PetscScalar**)xin->data;
 59:     } else {
 60:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 61:   }
 62: #else
 63:   VecGetArrayRead(xin,(const PetscScalar**)&xv);
 64: #endif
 65:   if (xin != yin) {VecGetArray(yin,&yv);} else {yv = xv;}

 67:   if (!(mode & SCATTER_LOCAL)) {

 69:     if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv  & !to->use_window) {
 70:       /* post receives since they were not previously posted    */
 71:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
 72:     }

 74: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
 75:     if (to->use_alltoallw && addv == INSERT_VALUES) {
 76:       MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,((PetscObject)ctx)->comm);
 77:     } else
 78: #endif
 79:     if (ctx->packtogether || to->use_alltoallv || to->use_window) {
 80:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
 81:       PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues);
 82:       if (to->use_alltoallv) {
 83:         MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,((PetscObject)ctx)->comm);
 84: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
 85:       } else if (to->use_window) {
 86:         PetscInt cnt;

 88:         MPI_Win_fence(0,from->window);
 89:         for (i=0; i<nsends; i++) {
 90:           cnt  = bs*(to->starts[i+1]-to->starts[i]);
 91:           MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
 92:         }
 93: #endif
 94:       } else if (nsends) {
 95:         MPI_Startall_isend(to->starts[to->n],nsends,swaits);
 96:       }
 97:     } else {
 98:       /* this version packs and sends one at a time */
 99:       for (i=0; i<nsends; i++) {
100:         PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i]);
101:         MPI_Start_isend(sstarts[i+1]-sstarts[i],swaits+i);
102:       }
103:     }

105:     if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
106:       /* post receives since they were not previously posted   */
107:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
108:     }
109:   }

111:   /* take care of local scatters */
112:   if (to->local.n) {
113:     if (to->local.is_copy && addv == INSERT_VALUES) {
114:       PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
115:     } else {
116:       PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv);
117:     }
118:   }
119: #if defined(PETSC_HAVE_CUSP)
120:   if (xin->valid_GPU_array != PETSC_CUSP_GPU) {
121:     VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
122:   } else {
123:     VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
124:   }
125: #else
126:   VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
127: #endif
128:   if (xin != yin) {VecRestoreArray(yin,&yv);}
129:   return(0);
130: }

132: /* --------------------------------------------------------------------------------------*/

136: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
137: {
138:   VecScatter_MPI_General *to,*from;
139:   PetscScalar            *rvalues,*yv;
140:   PetscErrorCode         ierr;
141:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
142:   PetscMPIInt            imdex;
143:   MPI_Request            *rwaits,*swaits;
144:   MPI_Status             xrstatus,*rstatus,*sstatus;

147:   if (mode & SCATTER_LOCAL) return(0);
148:   VecGetArray(yin,&yv);

150:   to       = (VecScatter_MPI_General*)ctx->todata;
151:   from     = (VecScatter_MPI_General*)ctx->fromdata;
152:   rwaits   = from->requests;
153:   swaits   = to->requests;
154:   sstatus  = to->sstatus;   /* sstatus and rstatus are always stored in to */
155:   rstatus  = to->rstatus;
156:   if (mode & SCATTER_REVERSE) {
157:     to       = (VecScatter_MPI_General*)ctx->fromdata;
158:     from     = (VecScatter_MPI_General*)ctx->todata;
159:     rwaits   = from->rev_requests;
160:     swaits   = to->rev_requests;
161:   }
162:   bs       = from->bs;
163:   rvalues  = from->values;
164:   nrecvs   = from->n;
165:   nsends   = to->n;
166:   indices  = from->indices;
167:   rstarts  = from->starts;

169:   if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
170: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
171:     if (to->use_window) {MPI_Win_fence(0,from->window);}
172:     else
173: #endif
174:     if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
175:     PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv);
176:   } else if (!to->use_alltoallw) {
177:     /* unpack one at a time */
178:     count = nrecvs;
179:     while (count) {
180:       if (ctx->reproduce) {
181:         imdex = count - 1;
182:         MPI_Wait(rwaits+imdex,&xrstatus);
183:       } else {
184:         MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
185:       }
186:       /* unpack receives into our local space */
187:       PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv);
188:       count--;
189:     }
190:   }
191:   if (from->use_readyreceiver) {
192:     if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
193:     MPI_Barrier(((PetscObject)ctx)->comm);
194:   }

196:   /* wait on sends */
197:   if (nsends  && !to->use_alltoallv  && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
198:   VecRestoreArray(yin,&yv);
199:   return(0);
200: }

202: #undef PETSCMAP1_a
203: #undef PETSCMAP1_b
204: #undef PETSCMAP1
205: #undef BS