root/ompi/mca/coll/base/coll_base_scatter.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_coll_base_scatter_intra_binomial
  2. ompi_coll_base_scatter_intra_basic_linear

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2017 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
  14  *                         reserved.
  15  * Copyright (c) 2015-2016 Research Organization for Information Science
  16  *                         and Technology (RIST). All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "ompi_config.h"
  25 
  26 #include "mpi.h"
  27 #include "ompi/constants.h"
  28 #include "ompi/datatype/ompi_datatype.h"
  29 #include "ompi/communicator/communicator.h"
  30 #include "ompi/mca/coll/coll.h"
  31 #include "ompi/mca/coll/base/coll_tags.h"
  32 #include "ompi/mca/pml/pml.h"
  33 #include "ompi/mca/coll/base/coll_base_functions.h"
  34 #include "coll_base_topo.h"
  35 #include "coll_base_util.h"
  36 
  37 /*
  38  * ompi_coll_base_scatter_intra_binomial
  39  *
  40  * Function:  Binomial tree algorithm for scatter
  41  * Accepts:   Same as MPI_Scatter
  42  * Returns:   MPI_SUCCESS or error code
  43  *
  44  * Time complexity: \alpha\log(p) + \beta*m((p-1)/p),
  45  *                  where m = scount * comm_size, p = comm_size
  46  *
  47  * Memory requirements (per process):
  48  *   root process (root > 0): scount * comm_size * sdtype_size
  49  *   non-root, non-leaf process: rcount * comm_size * rdtype_size
  50  *
  51  * Examples:
  52  *   comm_size=8          comm_size=10          comm_size=12
  53  *         0                    0                     0
  54  *       / | \             /  / | \               /  / \  \
  55  *      4  2  1           8  4  2  1            8   4   2  1
  56  *    / |  |            /  / |  |             / |  / |  |
  57  *   6  5  3           9  6  5  3            10 9 6  5  3
  58  *   |                    |                  |    |
  59  *   7                    7                  11   7
  60  */
  61 int
  62 ompi_coll_base_scatter_intra_binomial(
  63     const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
  64     void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
  65     int root, struct ompi_communicator_t *comm,
  66     mca_coll_base_module_t *module)
  67 {
  68     int line = -1, rank, vrank, size, err;
  69     char *ptmp, *tempbuf = NULL;
  70     MPI_Status status;
  71     mca_coll_base_module_t *base_module = (mca_coll_base_module_t*)module;
  72     mca_coll_base_comm_t *data = base_module->base_data;
  73     ptrdiff_t sextent, rextent, ssize, rsize, sgap = 0, rgap = 0;
  74 
  75     size = ompi_comm_size(comm);
  76     rank = ompi_comm_rank(comm);
  77 
  78     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
  79                  "coll:base:scatter_intra_binomial rank %d/%d", rank, size));
  80 
  81     /* Create the binomial tree */
  82     COLL_BASE_UPDATE_IN_ORDER_BMTREE(comm, base_module, root);
  83     if (NULL == data->cached_in_order_bmtree) {
  84         err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
  85     }
  86     ompi_coll_tree_t *bmtree = data->cached_in_order_bmtree;
  87 
  88     vrank = (rank - root + size) % size;
  89     ptmp = (char *)rbuf;  /* by default suppose leaf nodes, just use rbuf */
  90 
  91     if (rank == root) {
  92         ompi_datatype_type_extent(sdtype, &sextent);
  93         ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap);
  94         if (0 == root) {
  95             /* root on 0, just use the send buffer */
  96             ptmp = (char *)sbuf;
  97             if (rbuf != MPI_IN_PLACE) {
  98                 /* local copy to rbuf */
  99                 err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
 100                                            rbuf, rcount, rdtype);
 101                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 102             }
 103         } else {
 104             /* root is not on 0, allocate temp buffer for send */
 105             tempbuf = (char *)malloc(ssize);
 106             if (NULL == tempbuf) {
 107                 err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
 108             }
 109             ptmp = tempbuf - sgap;
 110 
 111             /* and rotate data so they will eventually in the right place */
 112             err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
 113                                                       ptmp, (char *) sbuf + sextent * (ptrdiff_t)root * (ptrdiff_t)scount);
 114             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 115 
 116             err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)root,
 117                                                       ptmp + sextent * (ptrdiff_t)scount * (ptrdiff_t)(size - root), (char *)sbuf);
 118             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 119 
 120             if (rbuf != MPI_IN_PLACE) {
 121                 /* local copy to rbuf */
 122                 err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
 123                                            rbuf, rcount, rdtype);
 124                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 125             }
 126         }
 127     } else if (!(vrank % 2)) {
 128         /* non-root, non-leaf nodes, allocate temp buffer for recv
 129          * the most we need is rcount*size/2 */
 130         ompi_datatype_type_extent(rdtype, &rextent);
 131         rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap);
 132         tempbuf = (char *)malloc(rsize / 2);
 133         if (NULL == tempbuf) {
 134             err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
 135         }
 136         ptmp = tempbuf - rgap;
 137         sdtype = rdtype;
 138         scount = rcount;
 139         sextent = rextent;
 140     }
 141 
 142     int curr_count = (rank == root) ? scount * size : 0;
 143     if (!(vrank % 2)) {
 144         if (rank != root) {
 145             /* recv from parent on non-root */
 146             err = MCA_PML_CALL(recv(ptmp, (ptrdiff_t)rcount * (ptrdiff_t)size, rdtype, bmtree->tree_prev,
 147                                     MCA_COLL_BASE_TAG_SCATTER, comm, &status));
 148             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 149 
 150             /* Get received count */
 151             size_t rdtype_size;
 152             ompi_datatype_type_size(rdtype, &rdtype_size);
 153             curr_count = (int)(status._ucount / rdtype_size);
 154 
 155             /* local copy to rbuf */
 156             err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
 157                                        rbuf, rcount, rdtype);
 158             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 159         }
 160         /* send to children on all non-leaf */
 161         for (int i = bmtree->tree_nextsize - 1; i >= 0; i--) {
 162             /* figure out how much data I have to send to this child */
 163             int vchild = (bmtree->tree_next[i] - root + size) % size;
 164             int send_count = vchild - vrank;
 165             if (send_count > size - vchild)
 166                 send_count = size - vchild;
 167             send_count *= scount;
 168             err = MCA_PML_CALL(send(ptmp + (ptrdiff_t)(curr_count - send_count) * sextent,
 169                                     send_count, sdtype, bmtree->tree_next[i],
 170                                     MCA_COLL_BASE_TAG_SCATTER,
 171                                     MCA_PML_BASE_SEND_STANDARD, comm));
 172             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 173             curr_count -= send_count;
 174         }
 175         if (NULL != tempbuf)
 176             free(tempbuf);
 177     } else {
 178         /* recv from parent on leaf nodes */
 179         err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, bmtree->tree_prev,
 180                                 MCA_COLL_BASE_TAG_SCATTER, comm, &status));
 181         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 182     }
 183 
 184     return MPI_SUCCESS;
 185 
 186  err_hndl:
 187     if (NULL != tempbuf)
 188         free(tempbuf);
 189 
 190     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
 191                  __FILE__, line, err, rank));
 192     (void)line;  // silence compiler warning
 193     return err;
 194 }
 195 
 196 /*
 197  * Linear functions are copied from the BASIC coll module
 198  * they do not segment the message and are simple implementations
 199  * but for some small number of nodes and/or small data sizes they
 200  * are just as fast as base/tree based segmenting operations
 201  * and as such may be selected by the decision functions
 202  * These are copied into this module due to the way we select modules
 203  * in V1. i.e. in V2 we will handle this differently and so will not
 204  * have to duplicate code.
 205  * JPG following the examples from other coll_base implementations. Dec06.
 206  */
 207 
 208 /* copied function (with appropriate renaming) starts here */
 209 /*
 210  *      scatter_intra
 211  *
 212  *      Function:       - basic scatter operation
 213  *      Accepts:        - same arguments as MPI_Scatter()
 214  *      Returns:        - MPI_SUCCESS or error code
 215  */
 216 int
 217 ompi_coll_base_scatter_intra_basic_linear(const void *sbuf, int scount,
 218                                           struct ompi_datatype_t *sdtype,
 219                                           void *rbuf, int rcount,
 220                                           struct ompi_datatype_t *rdtype,
 221                                           int root,
 222                                           struct ompi_communicator_t *comm,
 223                                           mca_coll_base_module_t *module)
 224 {
 225     int i, rank, size, err;
 226     ptrdiff_t incr;
 227     char *ptmp;
 228 
 229     /* Initialize */
 230 
 231     rank = ompi_comm_rank(comm);
 232     size = ompi_comm_size(comm);
 233 
 234     /* If not root, receive data. */
 235 
 236     if (rank != root) {
 237         err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
 238                                 MCA_COLL_BASE_TAG_SCATTER,
 239                                 comm, MPI_STATUS_IGNORE));
 240         return err;
 241     }
 242 
 243     /* I am the root, loop sending data. */
 244 
 245     err = ompi_datatype_type_extent(sdtype, &incr);
 246     if (OMPI_SUCCESS != err) {
 247         return OMPI_ERROR;
 248     }
 249 
 250     incr *= scount;
 251     for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
 252 
 253         /* simple optimization */
 254 
 255         if (i == rank) {
 256             if (MPI_IN_PLACE != rbuf) {
 257                 err =
 258                     ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
 259                                          rdtype);
 260             }
 261         } else {
 262             err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
 263                                     MCA_COLL_BASE_TAG_SCATTER,
 264                                     MCA_PML_BASE_SEND_STANDARD, comm));
 265         }
 266         if (MPI_SUCCESS != err) {
 267             return err;
 268         }
 269     }
 270 
 271     /* All done */
 272 
 273     return MPI_SUCCESS;
 274 }
 275 
 276 
 277 /* copied function (with appropriate renaming) ends here */

/* [<][>][^][v][top][bottom][index][help] */