Actual source code: cuspvecimpl.h

  1: #ifndef __CUSPVECIMPL

  4: #include <private/vecimpl.h>
  5: #include <cublas.h>
  6: #include <cusp/blas.h>
  7: #include <thrust/device_vector.h>
  8: #include <thrust/iterator/constant_iterator.h>
  9: #include <thrust/transform.h>
 10: #include <thrust/iterator/permutation_iterator.h>

 12: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
 13: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
 14: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>



 49: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)

 51: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])

 53: #define WaitForGPU() synchronizeCUSP ? cudaThreadSynchronize() : 0

 55: struct Vec_CUSP {
 56:   /* eventually we should probably move the GPU flag into here */
 57:   CUSPARRAY*       GPUarray;  /* this always holds the GPU data */
 58: };

 62: PETSC_STATIC_INLINE PetscErrorCode VecCUSPAllocateCheck(Vec v)
 63: {
 65:   Vec_Seq        *s = (Vec_Seq*)v->data;;

 68:   if (v->valid_GPU_array == PETSC_CUSP_UNALLOCATED){
 69:     try {
 70:       v->spptr = new Vec_CUSP;
 71:       ((Vec_CUSP*)v->spptr)->GPUarray = new CUSPARRAY;
 72:       ((Vec_CUSP*)v->spptr)->GPUarray->resize((PetscBLASInt)v->map->n);
 73:       WaitForGPU();
 74:     } catch(char* ex) {
 75:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
 76:     }
 77:     if (s->array == 0){
 78:       v->valid_GPU_array = PETSC_CUSP_GPU;
 79:     } else{
 80:       v->valid_GPU_array = PETSC_CUSP_CPU;
 81:     }
 82:   }
 83:   return(0);
 84: }


 89: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
 90: PETSC_STATIC_INLINE PetscErrorCode VecCUSPCopyToGPU(Vec v)
 91: {
 92:   PetscBLASInt   cn = v->map->n;

 96:   VecCUSPAllocateCheck(v);
 97:   if (v->valid_GPU_array == PETSC_CUSP_CPU){
 98:     PetscLogEventBegin(VEC_CUSPCopyToGPU,v,0,0,0);
 99:     try{
100:       ((Vec_CUSP*)v->spptr)->GPUarray->assign(*(PetscScalar**)v->data,*(PetscScalar**)v->data + cn);
101:       WaitForGPU();
102:     } catch(char* ex) {
103:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
104:     }
105:     PetscLogEventEnd(VEC_CUSPCopyToGPU,v,0,0,0);
106:     v->valid_GPU_array = PETSC_CUSP_BOTH;
107:   }
108:   return(0);
109: }

113: PETSC_STATIC_INLINE PetscErrorCode VecCUSPCopyToGPUSome(Vec v,CUSPINTARRAYCPU *indicesCPU,CUSPINTARRAYGPU *indicesGPU)
114: {
115:   Vec_Seq        *s = (Vec_Seq *)v->data;

119:   VecCUSPAllocateCheck(v);
120:   if (v->valid_GPU_array == PETSC_CUSP_CPU) {
121:     PetscLogEventBegin(VEC_CUSPCopyToGPUSome,v,0,0,0);
122:     thrust::copy(thrust::make_permutation_iterator(s->array,indicesCPU->begin()),
123:                  thrust::make_permutation_iterator(s->array,indicesCPU->end()),
124:                  thrust::make_permutation_iterator(((Vec_CUSP *)v->spptr)->GPUarray->begin(),indicesGPU->begin()));
125:     PetscLogEventEnd(VEC_CUSPCopyToGPUSome,v,0,0,0);
126:   }
127:   v->valid_GPU_array = PETSC_CUSP_GPU;
128:   return(0);
129: }

133: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, CUSPARRAY** a)
134: {

138:   *a   = 0;
139:   VecCUSPCopyToGPU(v);
140:   *a   = ((Vec_CUSP *)v->spptr)->GPUarray;
141:   return(0);
142: }

146: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, CUSPARRAY** a)
147: {

151:   if (v->valid_GPU_array != PETSC_CUSP_UNALLOCATED){
152:     v->valid_GPU_array = PETSC_CUSP_GPU;
153:   }
154:   PetscObjectStateIncrease((PetscObject)v);
155:   return(0);
156: }

160: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayRead(Vec v, CUSPARRAY** a)
161: {

165:   *a   = 0;
166:   VecCUSPCopyToGPU(v);
167:   *a   = ((Vec_CUSP *)v->spptr)->GPUarray;
168:   return(0);
169: }

173: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayRead(Vec v, CUSPARRAY** a)
174: {
176:   return(0);
177: }

181: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayWrite(Vec v, CUSPARRAY** a)
182: {

186:   *a   = 0;
187:   VecCUSPAllocateCheck(v);
188:   *a   = ((Vec_CUSP *)v->spptr)->GPUarray;
189:   return(0);
190: }

194: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, CUSPARRAY** a)
195: {

199:   if (v->valid_GPU_array != PETSC_CUSP_UNALLOCATED){
200:     v->valid_GPU_array = PETSC_CUSP_GPU;
201:   }
202:   PetscObjectStateIncrease((PetscObject)v);
203:   return(0);
204: }
205: #endif