This source file includes following definitions.
- mca_coll_cuda_reduce
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 #include "ompi_config.h"
  14 #include "coll_cuda.h"
  15 
  16 #include <stdio.h>
  17 
  18 #include "ompi/op/op.h"
  19 #include "opal/datatype/opal_convertor.h"
  20 #include "opal/datatype/opal_datatype_cuda.h"
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 int
  30 mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
  31                      struct ompi_datatype_t *dtype,
  32                      struct ompi_op_t *op,
  33                      int root, struct ompi_communicator_t *comm,
  34                      mca_coll_base_module_t *module)
  35 {
  36     mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
  37     ptrdiff_t gap;
  38     char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
  39     const char *sbuf2;
  40     size_t bufsize;
  41     int rc;
  42 
  43     bufsize = opal_datatype_span(&dtype->super, count, &gap);
  44 
  45 
  46     if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
  47         sbuf1 = (char*)malloc(bufsize);
  48         if (NULL == sbuf1) {
  49             return OMPI_ERR_OUT_OF_RESOURCE;
  50         }
  51         opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
  52         sbuf2 = sbuf; 
  53         sbuf = sbuf1 - gap;
  54     }
  55 
  56     if (opal_cuda_check_bufs(rbuf, NULL)) {
  57         rbuf1 = (char*)malloc(bufsize);
  58         if (NULL == rbuf1) {
  59             if (NULL != sbuf1) free(sbuf1);
  60             return OMPI_ERR_OUT_OF_RESOURCE;
  61         }
  62         opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
  63         rbuf2 = rbuf; 
  64         rbuf = rbuf1 - gap;
  65     }
  66     rc = s->c_coll.coll_reduce((void *) sbuf, rbuf, count,
  67                                dtype, op, root, comm,
  68                                s->c_coll.coll_reduce_module);
  69 
  70     if (NULL != sbuf1) {
  71         free(sbuf1);
  72     }
  73     if (NULL != rbuf1) {
  74         rbuf = rbuf2;
  75         opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
  76         free(rbuf1);
  77     }
  78     return rc;
  79 }