Actual source code: cuspvecimpl.h
1: #ifndef __CUSPVECIMPL
4: #include <private/vecimpl.h>
5: #include <cublas.h>
6: #include <cusp/blas.h>
7: #include <thrust/device_vector.h>
8: #include <thrust/iterator/constant_iterator.h>
9: #include <thrust/transform.h>
10: #include <thrust/iterator/permutation_iterator.h>
12: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
13: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
14: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
49: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)
51: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])
53: #define WaitForGPU() synchronizeCUSP ? cudaThreadSynchronize() : 0
55: struct Vec_CUSP {
56: /* eventually we should probably move the GPU flag into here */
57: CUSPARRAY* GPUarray; /* this always holds the GPU data */
58: };
62: PETSC_STATIC_INLINE PetscErrorCode VecCUSPAllocateCheck(Vec v)
63: {
65: Vec_Seq *s = (Vec_Seq*)v->data;;
68: if (v->valid_GPU_array == PETSC_CUSP_UNALLOCATED){
69: try {
70: v->spptr = new Vec_CUSP;
71: ((Vec_CUSP*)v->spptr)->GPUarray = new CUSPARRAY;
72: ((Vec_CUSP*)v->spptr)->GPUarray->resize((PetscBLASInt)v->map->n);
73: WaitForGPU();
74: } catch(char* ex) {
75: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
76: }
77: if (s->array == 0){
78: v->valid_GPU_array = PETSC_CUSP_GPU;
79: } else{
80: v->valid_GPU_array = PETSC_CUSP_CPU;
81: }
82: }
83: return(0);
84: }
89: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
90: PETSC_STATIC_INLINE PetscErrorCode VecCUSPCopyToGPU(Vec v)
91: {
92: PetscBLASInt cn = v->map->n;
96: VecCUSPAllocateCheck(v);
97: if (v->valid_GPU_array == PETSC_CUSP_CPU){
98: PetscLogEventBegin(VEC_CUSPCopyToGPU,v,0,0,0);
99: try{
100: ((Vec_CUSP*)v->spptr)->GPUarray->assign(*(PetscScalar**)v->data,*(PetscScalar**)v->data + cn);
101: WaitForGPU();
102: } catch(char* ex) {
103: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
104: }
105: PetscLogEventEnd(VEC_CUSPCopyToGPU,v,0,0,0);
106: v->valid_GPU_array = PETSC_CUSP_BOTH;
107: }
108: return(0);
109: }
113: PETSC_STATIC_INLINE PetscErrorCode VecCUSPCopyToGPUSome(Vec v,CUSPINTARRAYCPU *indicesCPU,CUSPINTARRAYGPU *indicesGPU)
114: {
115: Vec_Seq *s = (Vec_Seq *)v->data;
119: VecCUSPAllocateCheck(v);
120: if (v->valid_GPU_array == PETSC_CUSP_CPU) {
121: PetscLogEventBegin(VEC_CUSPCopyToGPUSome,v,0,0,0);
122: thrust::copy(thrust::make_permutation_iterator(s->array,indicesCPU->begin()),
123: thrust::make_permutation_iterator(s->array,indicesCPU->end()),
124: thrust::make_permutation_iterator(((Vec_CUSP *)v->spptr)->GPUarray->begin(),indicesGPU->begin()));
125: PetscLogEventEnd(VEC_CUSPCopyToGPUSome,v,0,0,0);
126: }
127: v->valid_GPU_array = PETSC_CUSP_GPU;
128: return(0);
129: }
133: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, CUSPARRAY** a)
134: {
138: *a = 0;
139: VecCUSPCopyToGPU(v);
140: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
141: return(0);
142: }
146: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, CUSPARRAY** a)
147: {
151: if (v->valid_GPU_array != PETSC_CUSP_UNALLOCATED){
152: v->valid_GPU_array = PETSC_CUSP_GPU;
153: }
154: PetscObjectStateIncrease((PetscObject)v);
155: return(0);
156: }
160: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayRead(Vec v, CUSPARRAY** a)
161: {
165: *a = 0;
166: VecCUSPCopyToGPU(v);
167: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
168: return(0);
169: }
173: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayRead(Vec v, CUSPARRAY** a)
174: {
176: return(0);
177: }
181: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayWrite(Vec v, CUSPARRAY** a)
182: {
186: *a = 0;
187: VecCUSPAllocateCheck(v);
188: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
189: return(0);
190: }
194: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, CUSPARRAY** a)
195: {
199: if (v->valid_GPU_array != PETSC_CUSP_UNALLOCATED){
200: v->valid_GPU_array = PETSC_CUSP_GPU;
201: }
202: PetscObjectStateIncrease((PetscObject)v);
203: return(0);
204: }
205: #endif