Actual source code: mpikok.kokkos.cxx
  2: /*
  3:    This file contains routines for Parallel vector operations.
  4:  */
  6: #include <petscvec_kokkos.hpp>
  7: #include <petsc/private/deviceimpl.h>
  8: #include <petsc/private/vecimpl.h>
  9: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
 10: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
 11: #include <petscsf.h>
 13: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
 14: {
 15:   PetscFunctionBegin;
 16:   delete static_cast<Vec_Kokkos *>(v->spptr);
 17:   PetscCall(VecDestroy_MPI(v));
 18:   PetscFunctionReturn(PETSC_SUCCESS);
 19: }
 21: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
 22: {
 23:   PetscFunctionBegin;
 24:   PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
 25:   PetscFunctionReturn(PETSC_SUCCESS);
 26: }
 28: /* z = y^H x */
 29: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 30: {
 31:   PetscFunctionBegin;
 32:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
 33:   PetscFunctionReturn(PETSC_SUCCESS);
 34: }
 36: /* z = y^T x */
 37: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
 38: {
 39:   PetscFunctionBegin;
 40:   PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }
 44: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 45: {
 46:   PetscFunctionBegin;
 47:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }
 51: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
 52: {
 53:   PetscFunctionBegin;
 54:   PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }
 58: PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 59: {
 60:   const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};
 62:   PetscFunctionBegin;
 63:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
 64:   PetscFunctionReturn(PETSC_SUCCESS);
 65: }
 67: PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
 68: {
 69:   const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};
 71:   PetscFunctionBegin;
 72:   PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
 73:   PetscFunctionReturn(PETSC_SUCCESS);
 74: }
 76: PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
 77: {
 78:   Vec         v;
 79:   Vec_MPI    *vecmpi;
 80:   Vec_Kokkos *veckok;
 82:   PetscFunctionBegin;
 83:   /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
 84:   PetscCall(VecDuplicate_MPI(win, &v)); /* after the call, v is a VECMPI, with data zero'ed */
 85:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
 86:   PetscCall(PetscMemcpy(v->ops, win->ops, sizeof(*win->ops)));
 88:   /* Build the Vec_Kokkos struct */
 89:   vecmpi = static_cast<Vec_MPI *>(v->data);
 90:   veckok = new Vec_Kokkos(v->map->n, vecmpi->array);
 91:   Kokkos::deep_copy(veckok->v_dual.view_device(), 0.0);
 92:   v->spptr       = veckok;
 93:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
 94:   *vv            = v;
 95:   PetscFunctionReturn(PETSC_SUCCESS);
 96: }
 98: PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
 99: {
100:   PetscFunctionBegin;
101:   PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
102:   PetscFunctionReturn(PETSC_SUCCESS);
103: }
105: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
106: {
107:   PetscFunctionBegin;
108:   PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
109:   PetscFunctionReturn(PETSC_SUCCESS);
110: }
112: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
113: {
114:   const auto vecmpi = static_cast<Vec_MPI *>(x->data);
115:   const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
116:   PetscInt   m;
118:   PetscFunctionBegin;
119:   PetscCall(VecGetLocalSize(x, &m));
120:   PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
121:   PetscCallCXX(veckok->SetUpCOO(vecmpi, m));
122:   PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
126: {
127:   const auto                  vecmpi  = static_cast<Vec_MPI *>(x->data);
128:   const auto                  veckok  = static_cast<Vec_Kokkos *>(x->spptr);
129:   const PetscCountKokkosView &jmap1   = veckok->jmap1_d;
130:   const PetscCountKokkosView &perm1   = veckok->perm1_d;
131:   const PetscCountKokkosView &imap2   = veckok->imap2_d;
132:   const PetscCountKokkosView &jmap2   = veckok->jmap2_d;
133:   const PetscCountKokkosView &perm2   = veckok->perm2_d;
134:   const PetscCountKokkosView &Cperm   = veckok->Cperm_d;
135:   PetscScalarKokkosView      &sendbuf = veckok->sendbuf_d;
136:   PetscScalarKokkosView      &recvbuf = veckok->recvbuf_d;
137:   PetscScalarKokkosView       xv;
138:   ConstPetscScalarKokkosView  vv;
139:   PetscMemType                memtype;
140:   PetscInt                    m;
142:   PetscFunctionBegin;
143:   PetscCall(VecGetLocalSize(x, &m));
144:   PetscCall(PetscGetMemType(v, &memtype));
145:   if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
146:     vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecmpi->coo_n));
147:   } else {
148:     vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
149:   }
151:   /* Pack entries to be sent to remote */
152:   Kokkos::parallel_for(
153:     vecmpi->sendlen, KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
154:   PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));
156:   if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
157:   else PetscCall(VecGetKokkosView(x, &xv));                             /* read & write vector */
159:   Kokkos::parallel_for(
160:     m, KOKKOS_LAMBDA(const PetscCount i) {
161:       PetscScalar sum = 0.0;
162:       for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
163:       xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
164:     });
166:   PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));
168:   /* Add received remote entries */
169:   Kokkos::parallel_for(
170:     vecmpi->nnz2, KOKKOS_LAMBDA(PetscCount i) {
171:       for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
172:     });
174:   if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
175:   else PetscCall(VecRestoreKokkosView(x, &xv));
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }
179: static PetscErrorCode VecSetOps_MPIKokkos(Vec v)
180: {
181:   PetscFunctionBegin;
182:   v->ops->abs             = VecAbs_SeqKokkos;
183:   v->ops->reciprocal      = VecReciprocal_SeqKokkos;
184:   v->ops->pointwisemult   = VecPointwiseMult_SeqKokkos;
185:   v->ops->setrandom       = VecSetRandom_SeqKokkos;
186:   v->ops->dotnorm2        = VecDotNorm2_MPIKokkos;
187:   v->ops->waxpy           = VecWAXPY_SeqKokkos;
188:   v->ops->norm            = VecNorm_MPIKokkos;
189:   v->ops->min             = VecMin_MPIKokkos;
190:   v->ops->max             = VecMax_MPIKokkos;
191:   v->ops->sum             = VecSum_SeqKokkos;
192:   v->ops->shift           = VecShift_SeqKokkos;
193:   v->ops->scale           = VecScale_SeqKokkos;
194:   v->ops->copy            = VecCopy_SeqKokkos;
195:   v->ops->set             = VecSet_SeqKokkos;
196:   v->ops->swap            = VecSwap_SeqKokkos;
197:   v->ops->axpy            = VecAXPY_SeqKokkos;
198:   v->ops->axpby           = VecAXPBY_SeqKokkos;
199:   v->ops->maxpy           = VecMAXPY_SeqKokkos;
200:   v->ops->aypx            = VecAYPX_SeqKokkos;
201:   v->ops->axpbypcz        = VecAXPBYPCZ_SeqKokkos;
202:   v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
203:   v->ops->placearray      = VecPlaceArray_SeqKokkos;
204:   v->ops->replacearray    = VecReplaceArray_SeqKokkos;
205:   v->ops->resetarray      = VecResetArray_SeqKokkos;
207:   v->ops->dot   = VecDot_MPIKokkos;
208:   v->ops->tdot  = VecTDot_MPIKokkos;
209:   v->ops->mdot  = VecMDot_MPIKokkos;
210:   v->ops->mtdot = VecMTDot_MPIKokkos;
212:   v->ops->dot_local   = VecDot_SeqKokkos;
213:   v->ops->tdot_local  = VecTDot_SeqKokkos;
214:   v->ops->mdot_local  = VecMDot_SeqKokkos;
215:   v->ops->mtdot_local = VecMTDot_SeqKokkos;
217:   v->ops->norm_local              = VecNorm_SeqKokkos;
218:   v->ops->duplicate               = VecDuplicate_MPIKokkos;
219:   v->ops->destroy                 = VecDestroy_MPIKokkos;
220:   v->ops->getlocalvector          = VecGetLocalVector_SeqKokkos;
221:   v->ops->restorelocalvector      = VecRestoreLocalVector_SeqKokkos;
222:   v->ops->getlocalvectorread      = VecGetLocalVector_SeqKokkos;
223:   v->ops->restorelocalvectorread  = VecRestoreLocalVector_SeqKokkos;
224:   v->ops->getarraywrite           = VecGetArrayWrite_SeqKokkos;
225:   v->ops->getarray                = VecGetArray_SeqKokkos;
226:   v->ops->restorearray            = VecRestoreArray_SeqKokkos;
227:   v->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqKokkos;
228:   v->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqKokkos;
229:   v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
230:   v->ops->getsubvector            = VecGetSubVector_MPIKokkos;
231:   v->ops->restoresubvector        = VecRestoreSubVector_SeqKokkos;
233:   v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
234:   v->ops->setvaluescoo        = VecSetValuesCOO_MPIKokkos;
235:   PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: /*MC
239:    VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos
241:    Options Database Keys:
242: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()
244:   Level: beginner
246: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
247: M*/
248: PetscErrorCode VecCreate_MPIKokkos(Vec v)
249: {
250:   Vec_MPI    *vecmpi;
251:   Vec_Kokkos *veckok;
253:   PetscFunctionBegin;
254:   PetscCall(PetscKokkosInitializeCheck());
255:   PetscCall(PetscLayoutSetUp(v->map));
256:   PetscCall(VecCreate_MPI(v)); /* Calloc host array */
258:   vecmpi = static_cast<Vec_MPI *>(v->data);
259:   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
260:   PetscCall(VecSetOps_MPIKokkos(v));
261:   veckok         = new Vec_Kokkos(v->map->n, vecmpi->array, NULL); /* Alloc device array but do not init it */
262:   v->spptr       = static_cast<void *>(veckok);
263:   v->offloadmask = PETSC_OFFLOAD_KOKKOS;
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: /*@C
268:    VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
269:    where the user provides the GPU array space to store the vector values.
271:    Collective
273:    Input Parameters:
274: +  comm  - the MPI communicator to use
275: .  bs    - block size, same meaning as VecSetBlockSize()
276: .  n     - local vector length, cannot be PETSC_DECIDE
277: .  N     - global vector length (or PETSC_DECIDE to have calculated)
278: -  array - the user provided GPU array to store the vector values
280:    Output Parameter:
281: .  vv - the vector
283:    Notes:
284:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
285:    same type as an existing vector.
287:    If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
288:    at a later stage to SET the array for storing the vector values.
290:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
291:    The user should not free the array until the vector is destroyed.
293:    Level: intermediate
295: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
296:           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
297:           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
299: @*/
300: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
301: {
302:   Vec          w;
303:   Vec_Kokkos  *veckok;
304:   Vec_MPI     *vecmpi;
305:   PetscScalar *harray;
307:   PetscFunctionBegin;
308:   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
309:   PetscCall(PetscKokkosInitializeCheck());
310:   PetscCall(PetscSplitOwnership(comm, &n, &N));
311:   PetscCall(VecCreate(comm, &w));
312:   PetscCall(VecSetSizes(w, n, N));
313:   PetscCall(VecSetBlockSize(w, bs));
314:   PetscCall(PetscLayoutSetUp(w->map));
316:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
317:     harray = const_cast<PetscScalar *>(darray);
318:   } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */
320:   PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
321:   vecmpi = static_cast<Vec_MPI *>(w->data);
323:   if (!std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */
325:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
326:   PetscCall(VecSetOps_MPIKokkos(w));
327:   veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
328:   veckok->v_dual.modify_device(); /* Mark the device is modified */
329:   w->spptr       = static_cast<void *>(veckok);
330:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
331:   *v             = w;
332:   PetscFunctionReturn(PETSC_SUCCESS);
333: }
335: /*
336:    VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
337:    with user-provided arrays on host and device.
339:    Collective
341:    Input Parameter:
342: +  comm - the communicator
343: .  bs - the block size
344: .  n - the local vector length
345: .  N - the global vector length
346: -  harray - host memory where the vector elements are to be stored.
347: -  darray - device memory where the vector elements are to be stored.
349:    Output Parameter:
350: .  v - the vector
352:    Notes:
353:    If there is no device, then harray and darray must be the same.
354:    If n is not zero, then harray and darray must be allocated.
355:    After the call, the created vector is supposed to be in a synchronized state, i.e.,
356:    we suppose harray and darray have the same data.
358:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
359:    The user should not free the array until the vector is destroyed.
360: */
361: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
362: {
363:   Vec w;
365:   PetscFunctionBegin;
366:   PetscCall(PetscKokkosInitializeCheck());
367:   if (n) {
369:     PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
370:   }
371:   if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
372:   PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
373:   PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
374:   PetscCall(VecSetOps_MPIKokkos(w));
375:   PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
376:   w->offloadmask = PETSC_OFFLOAD_KOKKOS;
377:   *v             = w;
378:   PetscFunctionReturn(PETSC_SUCCESS);
379: }
381: /*MC
382:    VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos
384:    Options Database Keys:
385: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()
387:   Level: beginner
389: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
390: M*/
391: PetscErrorCode VecCreate_Kokkos(Vec v)
392: {
393:   PetscMPIInt size;
395:   PetscFunctionBegin;
396:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
397:   if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
398:   else PetscCall(VecSetType(v, VECMPIKOKKOS));
399:   PetscFunctionReturn(PETSC_SUCCESS);
400: }