root/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_coll_cuda_reduce_scatter_block

   1 /*
   2  * Copyright (c) 2014-2017 The University of Tennessee and The University
   3  *                         of Tennessee Research Foundation.  All rights
   4  *                         reserved.
   5  * Copyright (c) 2014-2015 NVIDIA Corporation.  All rights reserved.
   6  * $COPYRIGHT$
   7  *
   8  * Additional copyrights may follow
   9  *
  10  * $HEADER$
  11  */
  12 
  13 #include "ompi_config.h"
  14 #include "coll_cuda.h"
  15 
  16 #include <stdio.h>
  17 
  18 #include "ompi/op/op.h"
  19 #include "opal/datatype/opal_convertor.h"
  20 #include "opal/datatype/opal_datatype_cuda.h"
  21 
  22 /*
  23  *      reduce_scatter_block
  24  *
  25  *      Function:       - reduce then scatter
  26  *      Accepts:        - same as MPI_Reduce_scatter_block()
  27  *      Returns:        - MPI_SUCCESS or error code
  28  *
  29  * Algorithm:
  30  *     reduce and scatter (needs to be cleaned
  31  *     up at some point)
  32  */
  33 int
  34 mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount,
  35                                    struct ompi_datatype_t *dtype,
  36                                    struct ompi_op_t *op,
  37                                    struct ompi_communicator_t *comm,
  38                                    mca_coll_base_module_t *module)
  39 {
  40     mca_coll_cuda_module_t *s = (mca_coll_cuda_module_t*) module;
  41     ptrdiff_t gap;
  42     char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
  43     size_t sbufsize, rbufsize;
  44     int rc;
  45 
  46     rbufsize = opal_datatype_span(&dtype->super, rcount, &gap);
  47 
  48     sbufsize = rbufsize * ompi_comm_size(comm);
  49 
  50     if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
  51         sbuf1 = (char*)malloc(sbufsize);
  52         if (NULL == sbuf1) {
  53             return OMPI_ERR_OUT_OF_RESOURCE;
  54         }
  55         opal_cuda_memcpy_sync(sbuf1, sbuf, sbufsize);
  56         sbuf = sbuf1 - gap;
  57     }
  58 
  59     if (opal_cuda_check_bufs(rbuf, NULL)) {
  60         rbuf1 = (char*)malloc(rbufsize);
  61         if (NULL == rbuf1) {
  62             if (NULL != sbuf1) free(sbuf1);
  63             return OMPI_ERR_OUT_OF_RESOURCE;
  64         }
  65         opal_cuda_memcpy_sync(rbuf1, rbuf, rbufsize);
  66         rbuf2 = rbuf; /* save away original buffer */
  67         rbuf = rbuf1 - gap;
  68     }
  69     rc = s->c_coll.coll_reduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm,
  70                                              s->c_coll.coll_reduce_scatter_block_module);
  71     if (NULL != sbuf1) {
  72         free(sbuf1);
  73     }
  74     if (NULL != rbuf1) {
  75         rbuf = rbuf2;
  76         opal_cuda_memcpy_sync(rbuf, rbuf1, rbufsize);
  77         free(rbuf1);
  78     }
  79     return rc;
  80 }
  81 

/* [<][>][^][v][top][bottom][index][help] */