root/ompi/mca/coll/base/coll_base_allgatherv.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_coll_base_allgatherv_intra_bruck
  2. ompi_coll_base_allgatherv_intra_ring
  3. ompi_coll_base_allgatherv_intra_neighborexchange
  4. ompi_coll_base_allgatherv_intra_two_procs
  5. ompi_coll_base_allgatherv_intra_basic_default

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2017 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2009      University of Houston. All rights reserved.
  14  * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All Rights
  15  *                         reserved.
  16  * Copyright (c) 2015-2016 Research Organization for Information Science
  17  *                         and Technology (RIST). All rights reserved.
  18  * Copyright (c) 2017      IBM Corporation.  All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 
  26 #include "ompi_config.h"
  27 
  28 #include "mpi.h"
  29 #include "ompi/constants.h"
  30 #include "ompi/datatype/ompi_datatype.h"
  31 #include "ompi/communicator/communicator.h"
  32 #include "ompi/mca/coll/coll.h"
  33 #include "ompi/mca/coll/base/coll_tags.h"
  34 #include "ompi/mca/coll/base/coll_base_functions.h"
  35 #include "coll_base_topo.h"
  36 #include "coll_base_util.h"
  37 
  38 /*
  39  * ompi_coll_base_allgatherv_intra_bruck
  40  *
  41  * Function:     allgather using O(log(N)) steps.
  42  * Accepts:      Same arguments as MPI_Allgather
  43  * Returns:      MPI_SUCCESS or error code
  44  *
  45  * Description:  Variation to All-to-all algorithm described by Bruck et al.in
  46  *               "Efficient Algorithms for All-to-all Communications
  47  *                in Multiport Message-Passing Systems"
  48  * Note:         Unlike in case of allgather implementation, we relay on
  49  *               indexed datatype to select buffers appropriately.
  50  *               The only additional memory requirement is for creation of
  51  *               temporary datatypes.
  52  * Example on 7 nodes (memory lay out need not be in-order)
  53  *   Initial set up:
  54  *    #     0      1      2      3      4      5      6
  55  *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]    [ ]
  56  *         [ ]    [1]    [ ]    [ ]    [ ]    [ ]    [ ]
  57  *         [ ]    [ ]    [2]    [ ]    [ ]    [ ]    [ ]
  58  *         [ ]    [ ]    [ ]    [3]    [ ]    [ ]    [ ]
  59  *         [ ]    [ ]    [ ]    [ ]    [4]    [ ]    [ ]
  60  *         [ ]    [ ]    [ ]    [ ]    [ ]    [5]    [ ]
  61  *         [ ]    [ ]    [ ]    [ ]    [ ]    [ ]    [6]
  62  *   Step 0: send message to (rank - 2^0), receive message from (rank + 2^0)
  63  *    #     0      1      2      3      4      5      6
  64  *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]    [0]
  65  *         [1]    [1]    [ ]    [ ]    [ ]    [ ]    [ ]
  66  *         [ ]    [2]    [2]    [ ]    [ ]    [ ]    [ ]
  67  *         [ ]    [ ]    [3]    [3]    [ ]    [ ]    [ ]
  68  *         [ ]    [ ]    [ ]    [4]    [4]    [ ]    [ ]
  69  *         [ ]    [ ]    [ ]    [ ]    [5]    [5]    [ ]
  70  *         [ ]    [ ]    [ ]    [ ]    [ ]    [6]    [6]
  71  *   Step 1: send message to (rank - 2^1), receive message from (rank + 2^1).
  72  *           message contains all blocks from (rank) .. (rank + 2^2) with
  73  *           wrap around.
  74  *    #     0      1      2      3      4      5      6
  75  *         [0]    [ ]    [ ]    [ ]    [0]    [0]    [0]
  76  *         [1]    [1]    [ ]    [ ]    [ ]    [1]    [1]
  77  *         [2]    [2]    [2]    [ ]    [ ]    [ ]    [2]
  78  *         [3]    [3]    [3]    [3]    [ ]    [ ]    [ ]
  79  *         [ ]    [4]    [4]    [4]    [4]    [ ]    [ ]
  80  *         [ ]    [ ]    [5]    [5]    [5]    [5]    [ ]
  81  *         [ ]    [ ]    [ ]    [6]    [6]    [6]    [6]
  82  *   Step 2: send message to (rank - 2^2), receive message from (rank + 2^2).
  83  *           message size is "all remaining blocks"
  84  *    #     0      1      2      3      4      5      6
  85  *         [0]    [0]    [0]    [0]    [0]    [0]    [0]
  86  *         [1]    [1]    [1]    [1]    [1]    [1]    [1]
  87  *         [2]    [2]    [2]    [2]    [2]    [2]    [2]
  88  *         [3]    [3]    [3]    [3]    [3]    [3]    [3]
  89  *         [4]    [4]    [4]    [4]    [4]    [4]    [4]
  90  *         [5]    [5]    [5]    [5]    [5]    [5]    [5]
  91  *         [6]    [6]    [6]    [6]    [6]    [6]    [6]
  92  */
  93 int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, int scount,
  94                                            struct ompi_datatype_t *sdtype,
  95                                            void *rbuf, const int *rcounts,
  96                                            const int *rdispls,
  97                                            struct ompi_datatype_t *rdtype,
  98                                            struct ompi_communicator_t *comm,
  99                                            mca_coll_base_module_t *module)
 100 {
 101     int line = -1, err = 0, rank, size, sendto, recvfrom, distance, blockcount, i;
 102     int *new_rcounts = NULL, *new_rdispls = NULL, *new_scounts = NULL, *new_sdispls = NULL;
 103     ptrdiff_t rlb, rext;
 104     char *tmpsend = NULL, *tmprecv = NULL;
 105     struct ompi_datatype_t *new_rdtype, *new_sdtype;
 106 
 107     size = ompi_comm_size(comm);
 108     rank = ompi_comm_rank(comm);
 109 
 110     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 111                  "coll:base:allgather_intra_bruck rank %d", rank));
 112 
 113     err = ompi_datatype_get_extent (rdtype, &rlb, &rext);
 114     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 115 
 116     /* Initialization step:
 117        - if send buffer is not MPI_IN_PLACE, copy send buffer to block rank of
 118        the receive buffer.
 119     */
 120     tmprecv = (char*) rbuf + (ptrdiff_t)rdispls[rank] * rext;
 121     if (MPI_IN_PLACE != sbuf) {
 122         tmpsend = (char*) sbuf;
 123         err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
 124                                    tmprecv, rcounts[rank], rdtype);
 125         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
 126 
 127     }
 128 
 129     /* Communication step:
 130        At every step i, rank r:
 131        - doubles the distance
 132        - sends message with blockcount blocks, (rbuf[rank] .. rbuf[rank + 2^i])
 133        to rank (r - distance)
 134        - receives message of blockcount blocks,
 135        (rbuf[r + distance] ... rbuf[(r+distance) + 2^i]) from
 136        rank (r + distance)
 137        - blockcount doubles until the last step when only the remaining data is
 138        exchanged.
 139     */
 140     blockcount = 1;
 141     tmpsend = (char*) rbuf;
 142 
 143     new_rcounts = (int*) calloc(4*size, sizeof(int));
 144     if (NULL == new_rcounts) { err = -1; line = __LINE__; goto err_hndl; }
 145     new_rdispls = new_rcounts + size;
 146     new_scounts = new_rdispls + size;
 147     new_sdispls = new_scounts + size;
 148 
 149     for (distance = 1; distance < size; distance<<=1) {
 150 
 151         recvfrom = (rank + distance) % size;
 152         sendto = (rank - distance + size) % size;
 153 
 154         if (distance <= (size >> 1)) {
 155             blockcount = distance;
 156         } else {
 157             blockcount = size - distance;
 158         }
 159 
 160         /* create send and receive datatypes */
 161         for (i = 0; i < blockcount; i++) {
 162             const int tmp_srank = (rank + i) % size;
 163             const int tmp_rrank = (recvfrom + i) % size;
 164             new_scounts[i] = rcounts[tmp_srank];
 165             new_sdispls[i] = rdispls[tmp_srank];
 166             new_rcounts[i] = rcounts[tmp_rrank];
 167             new_rdispls[i] = rdispls[tmp_rrank];
 168         }
 169         err = ompi_datatype_create_indexed(blockcount, new_scounts, new_sdispls,
 170                                            rdtype, &new_sdtype);
 171         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 172         err = ompi_datatype_create_indexed(blockcount, new_rcounts, new_rdispls,
 173                                            rdtype, &new_rdtype);
 174 
 175         err = ompi_datatype_commit(&new_sdtype);
 176         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 177         err = ompi_datatype_commit(&new_rdtype);
 178         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 179 
 180         /* Sendreceive */
 181         err = ompi_coll_base_sendrecv(rbuf, 1, new_sdtype, sendto,
 182                                        MCA_COLL_BASE_TAG_ALLGATHERV,
 183                                        rbuf, 1, new_rdtype, recvfrom,
 184                                        MCA_COLL_BASE_TAG_ALLGATHERV,
 185                                        comm, MPI_STATUS_IGNORE, rank);
 186         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 187 
 188         ompi_datatype_destroy(&new_sdtype);
 189         ompi_datatype_destroy(&new_rdtype);
 190     }
 191 
 192     free(new_rcounts);
 193 
 194     return OMPI_SUCCESS;
 195 
 196  err_hndl:
 197     if( NULL != new_rcounts ) free(new_rcounts);
 198 
 199     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
 200                  __FILE__, line, err, rank));
 201     (void)line;  // silence compiler warning
 202     return err;
 203 }
 204 
 205 
 206 /*
 207  * ompi_coll_base_allgatherv_intra_ring
 208  *
 209  * Function:     allgatherv using O(N) steps.
 210  * Accepts:      Same arguments as MPI_Allgatherv
 211  * Returns:      MPI_SUCCESS or error code
 212  *
 213  * Description:  Ring algorithm for all gather.
 214  *               At every step i, rank r receives message from rank (r - 1)
 215  *               containing data from rank (r - i - 1) and sends message to rank
 216  *               (r + 1) containing data from rank (r - i), with wrap arounds.
 217  * Memory requirements:
 218  *               No additional memory requirements.
 219  *
 220  */
 221 int ompi_coll_base_allgatherv_intra_ring(const void *sbuf, int scount,
 222                                           struct ompi_datatype_t *sdtype,
 223                                           void* rbuf, const int *rcounts, const int *rdisps,
 224                                           struct ompi_datatype_t *rdtype,
 225                                           struct ompi_communicator_t *comm,
 226                                           mca_coll_base_module_t *module)
 227 {
 228     int line = -1, rank, size, sendto, recvfrom, i, recvdatafrom, senddatafrom, err = 0;
 229     ptrdiff_t rlb, rext;
 230     char *tmpsend = NULL, *tmprecv = NULL;
 231 
 232     size = ompi_comm_size(comm);
 233     rank = ompi_comm_rank(comm);
 234 
 235     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 236                  "coll:base:allgatherv_intra_ring rank %d", rank));
 237 
 238     err = ompi_datatype_get_extent (rdtype, &rlb, &rext);
 239     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 240 
 241     /* Initialization step:
 242        - if send buffer is not MPI_IN_PLACE, copy send buffer to
 243        the appropriate block of receive buffer
 244     */
 245     tmprecv = (char*) rbuf + (ptrdiff_t)rdisps[rank] * rext;
 246     if (MPI_IN_PLACE != sbuf) {
 247         tmpsend = (char*) sbuf;
 248         err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
 249                                    tmprecv, rcounts[rank], rdtype);
 250         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
 251     }
 252 
 253     /* Communication step:
 254        At every step i: 0 .. (P-1), rank r:
 255        - receives message from [(r - 1 + size) % size] containing data from rank
 256        [(r - i - 1 + size) % size]
 257        - sends message to rank [(r + 1) % size] containing data from rank
 258        [(r - i + size) % size]
 259        - sends message which starts at begining of rbuf and has size
 260     */
 261     sendto = (rank + 1) % size;
 262     recvfrom  = (rank - 1 + size) % size;
 263 
 264     for (i = 0; i < size - 1; i++) {
 265         recvdatafrom = (rank - i - 1 + size) % size;
 266         senddatafrom = (rank - i + size) % size;
 267 
 268         tmprecv = (char*)rbuf + rdisps[recvdatafrom] * rext;
 269         tmpsend = (char*)rbuf + rdisps[senddatafrom] * rext;
 270 
 271         /* Sendreceive */
 272         err = ompi_coll_base_sendrecv(tmpsend, rcounts[senddatafrom], rdtype,
 273                                        sendto, MCA_COLL_BASE_TAG_ALLGATHERV,
 274                                        tmprecv, rcounts[recvdatafrom], rdtype,
 275                                        recvfrom, MCA_COLL_BASE_TAG_ALLGATHERV,
 276                                        comm, MPI_STATUS_IGNORE, rank);
 277         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 278     }
 279 
 280     return OMPI_SUCCESS;
 281 
 282  err_hndl:
 283     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
 284                  __FILE__, line, err, rank));
 285     (void)line;  // silence compiler warning
 286     return err;
 287 }
 288 
 289 /*
 290  * ompi_coll_base_allgatherv_intra_neighborexchange
 291  *
 292  * Function:     allgatherv using N/2 steps (O(N))
 293  * Accepts:      Same arguments as MPI_Allgatherv
 294  * Returns:      MPI_SUCCESS or error code
 295  *
 296  * Description:  Neighbor Exchange algorithm for allgather adapted for
 297  *               allgatherv.
 298  *               Described by Chen et.al. in
 299  *               "Performance Evaluation of Allgather Algorithms on
 300  *                Terascale Linux Cluster with Fast Ethernet",
 301  *               Proceedings of the Eighth International Conference on
 302  *               High-Performance Computing inn Asia-Pacific Region
 303  *               (HPCASIA'05), 2005
 304  *
 305  *               Rank r exchanges message with one of its neighbors and
 306  *               forwards the data further in the next step.
 307  *
 308  *               No additional memory requirements.
 309  *
 310  * Limitations:  Algorithm works only on even number of processes.
 311  *               For odd number of processes we switch to ring algorithm.
 312  *
 313  * Example on 6 nodes:
 314  *  Initial state
 315  *    #     0      1      2      3      4      5
 316  *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]
 317  *         [ ]    [1]    [ ]    [ ]    [ ]    [ ]
 318  *         [ ]    [ ]    [2]    [ ]    [ ]    [ ]
 319  *         [ ]    [ ]    [ ]    [3]    [ ]    [ ]
 320  *         [ ]    [ ]    [ ]    [ ]    [4]    [ ]
 321  *         [ ]    [ ]    [ ]    [ ]    [ ]    [5]
 322  *   Step 0:
 323  *    #     0      1      2      3      4      5
 324  *         [0]    [0]    [ ]    [ ]    [ ]    [ ]
 325  *         [1]    [1]    [ ]    [ ]    [ ]    [ ]
 326  *         [ ]    [ ]    [2]    [2]    [ ]    [ ]
 327  *         [ ]    [ ]    [3]    [3]    [ ]    [ ]
 328  *         [ ]    [ ]    [ ]    [ ]    [4]    [4]
 329  *         [ ]    [ ]    [ ]    [ ]    [5]    [5]
 330  *   Step 1:
 331  *    #     0      1      2      3      4      5
 332  *         [0]    [0]    [0]    [ ]    [ ]    [0]
 333  *         [1]    [1]    [1]    [ ]    [ ]    [1]
 334  *         [ ]    [2]    [2]    [2]    [2]    [ ]
 335  *         [ ]    [3]    [3]    [3]    [3]    [ ]
 336  *         [4]    [ ]    [ ]    [4]    [4]    [4]
 337  *         [5]    [ ]    [ ]    [5]    [5]    [5]
 338  *   Step 2:
 339  *    #     0      1      2      3      4      5
 340  *         [0]    [0]    [0]    [0]    [0]    [0]
 341  *         [1]    [1]    [1]    [1]    [1]    [1]
 342  *         [2]    [2]    [2]    [2]    [2]    [2]
 343  *         [3]    [3]    [3]    [3]    [3]    [3]
 344  *         [4]    [4]    [4]    [4]    [4]    [4]
 345  *         [5]    [5]    [5]    [5]    [5]    [5]
 346  */
 347 int
 348 ompi_coll_base_allgatherv_intra_neighborexchange(const void *sbuf, int scount,
 349                                                   struct ompi_datatype_t *sdtype,
 350                                                   void* rbuf, const int *rcounts, const int *rdispls,
 351                                                   struct ompi_datatype_t *rdtype,
 352                                                   struct ompi_communicator_t *comm,
 353                                                   mca_coll_base_module_t *module)
 354 {
 355     int line = -1, rank, size, i, even_rank, err = 0;
 356     int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
 357     int new_scounts[2], new_sdispls[2], new_rcounts[2], new_rdispls[2];
 358     ptrdiff_t rlb, rext;
 359     char *tmpsend = NULL, *tmprecv = NULL;
 360     struct ompi_datatype_t  *new_rdtype, *new_sdtype;
 361 
 362     size = ompi_comm_size(comm);
 363     rank = ompi_comm_rank(comm);
 364 
 365     if (size % 2) {
 366         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 367                      "coll:base:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
 368                      size));
 369         return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
 370                                                      rbuf, rcounts,
 371                                                      rdispls, rdtype,
 372                                                      comm, module);
 373     }
 374 
 375     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 376                  "coll:base:allgatherv_intra_neighborexchange rank %d", rank));
 377 
 378     err = ompi_datatype_get_extent (rdtype, &rlb, &rext);
 379     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 380 
 381     /* Initialization step:
 382        - if send buffer is not MPI_IN_PLACE, copy send buffer to
 383        the appropriate block of receive buffer
 384     */
 385     tmprecv = (char*) rbuf + (ptrdiff_t)rdispls[rank] * rext;
 386     if (MPI_IN_PLACE != sbuf) {
 387         tmpsend = (char*) sbuf;
 388         err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
 389                                    tmprecv, rcounts[rank], rdtype);
 390         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
 391     }
 392 
 393     /* Determine neighbors, order in which blocks will arrive, etc. */
 394     even_rank = !(rank % 2);
 395     if (even_rank) {
 396         neighbor[0] = (rank + 1) % size;
 397         neighbor[1] = (rank - 1 + size) % size;
 398         recv_data_from[0] = rank;
 399         recv_data_from[1] = rank;
 400         offset_at_step[0] = (+2);
 401         offset_at_step[1] = (-2);
 402     } else {
 403         neighbor[0] = (rank - 1 + size) % size;
 404         neighbor[1] = (rank + 1) % size;
 405         recv_data_from[0] = neighbor[0];
 406         recv_data_from[1] = neighbor[0];
 407         offset_at_step[0] = (-2);
 408         offset_at_step[1] = (+2);
 409     }
 410 
 411     /* Communication loop:
 412        - First step is special: exchange a single block with neighbor[0].
 413        - Rest of the steps:
 414        update recv_data_from according to offset, and
 415        exchange two blocks with appropriate neighbor.
 416        the send location becomes previous receve location.
 417        Note, we need to create indexed datatype to send and receive these
 418        blocks properly.
 419     */
 420     tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[neighbor[0]] * rext;
 421     tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
 422     err = ompi_coll_base_sendrecv(tmpsend, rcounts[rank], rdtype,
 423                                    neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
 424                                    tmprecv, rcounts[neighbor[0]], rdtype,
 425                                    neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
 426                                    comm, MPI_STATUS_IGNORE, rank);
 427     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 428 
 429     /* Determine initial sending counts and displacements*/
 430     if (even_rank) {
 431         send_data_from = rank;
 432     } else {
 433         send_data_from = recv_data_from[0];
 434     }
 435 
 436     for (i = 1; i < (size / 2); i++) {
 437         const int i_parity = i % 2;
 438         recv_data_from[i_parity] =
 439             (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
 440 
 441         /* Create new indexed types for sending and receiving.
 442            We are sending data from ranks (send_data_from) and (send_data_from+1)
 443            We are receiving data from ranks (recv_data_from[i_parity]) and
 444            (recv_data_from[i_parity]+1).
 445         */
 446         new_scounts[0] = rcounts[send_data_from];
 447         new_scounts[1] = rcounts[(send_data_from + 1)];
 448         new_sdispls[0] = rdispls[send_data_from];
 449         new_sdispls[1] = rdispls[(send_data_from + 1)];
 450         err = ompi_datatype_create_indexed(2, new_scounts, new_sdispls, rdtype,
 451                                            &new_sdtype);
 452         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 453         err = ompi_datatype_commit(&new_sdtype);
 454         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 455 
 456         new_rcounts[0] = rcounts[recv_data_from[i_parity]];
 457         new_rcounts[1] = rcounts[(recv_data_from[i_parity] + 1)];
 458         new_rdispls[0] = rdispls[recv_data_from[i_parity]];
 459         new_rdispls[1] = rdispls[(recv_data_from[i_parity] + 1)];
 460         err = ompi_datatype_create_indexed(2, new_rcounts, new_rdispls, rdtype,
 461                                            &new_rdtype);
 462         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 463         err = ompi_datatype_commit(&new_rdtype);
 464         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 465 
 466         tmprecv = (char*)rbuf;
 467         tmpsend = (char*)rbuf;
 468 
 469         /* Sendreceive */
 470         err = ompi_coll_base_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
 471                                        MCA_COLL_BASE_TAG_ALLGATHERV,
 472                                        tmprecv, 1, new_rdtype, neighbor[i_parity],
 473                                        MCA_COLL_BASE_TAG_ALLGATHERV,
 474                                        comm, MPI_STATUS_IGNORE, rank);
 475         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 476 
 477         send_data_from = recv_data_from[i_parity];
 478 
 479         ompi_datatype_destroy(&new_sdtype);
 480         ompi_datatype_destroy(&new_rdtype);
 481     }
 482 
 483     return OMPI_SUCCESS;
 484 
 485  err_hndl:
 486     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
 487                  __FILE__, line, err, rank));
 488     (void)line;  // silence compiler warning
 489     return err;
 490 }
 491 
 492 
 493 int ompi_coll_base_allgatherv_intra_two_procs(const void *sbuf, int scount,
 494                                                struct ompi_datatype_t *sdtype,
 495                                                void* rbuf, const int *rcounts,
 496                                                const int *rdispls,
 497                                                struct ompi_datatype_t *rdtype,
 498                                                struct ompi_communicator_t *comm,
 499                                                mca_coll_base_module_t *module)
 500 {
 501     int line = -1, err = 0, rank, remote;
 502     char *tmpsend = NULL, *tmprecv = NULL;
 503     ptrdiff_t rext, lb;
 504 
 505     rank = ompi_comm_rank(comm);
 506 
 507     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 508                  "ompi_coll_base_allgatherv_intra_two_procs rank %d", rank));
 509 
 510     if (2 != ompi_comm_size(comm)) {
 511         return MPI_ERR_UNSUPPORTED_OPERATION;
 512     }
 513 
 514     err = ompi_datatype_get_extent (rdtype, &lb, &rext);
 515     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 516 
 517     /* Exchange data:
 518        - compute source and destinations
 519        - send receive data
 520     */
 521     remote  = rank ^ 0x1;
 522 
 523     tmpsend = (char*)sbuf;
 524     if (MPI_IN_PLACE == sbuf) {
 525         tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
 526         scount = rcounts[rank];
 527         sdtype = rdtype;
 528     }
 529     tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[remote] * rext;
 530 
 531     err = ompi_coll_base_sendrecv(tmpsend, scount, sdtype, remote,
 532                                    MCA_COLL_BASE_TAG_ALLGATHERV,
 533                                    tmprecv, rcounts[remote], rdtype, remote,
 534                                    MCA_COLL_BASE_TAG_ALLGATHERV,
 535                                    comm, MPI_STATUS_IGNORE, rank);
 536     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 537 
 538     /* Place your data in correct location if necessary */
 539     if (MPI_IN_PLACE != sbuf) {
 540         err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype,
 541                                    (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext,
 542                                    rcounts[rank], rdtype);
 543         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
 544     }
 545 
 546     return MPI_SUCCESS;
 547 
 548  err_hndl:
 549     OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
 550                  __FILE__, line, err, rank));
 551     (void)line;  // silence compiler warning
 552     return err;
 553 }
 554 
 555 
 556 /*
 557  * Linear functions are copied from the BASIC coll module
 558  * they do not segment the message and are simple implementations
 559  * but for some small number of nodes and/or small data sizes they
 560  * are just as fast as base/tree based segmenting operations
 561  * and as such may be selected by the decision functions
 562  * These are copied into this module due to the way we select modules
 563  * in V1. i.e. in V2 we will handle this differently and so will not
 564  * have to duplicate code.
 565  * JPG following the examples from other coll_base implementations. Dec06.
 566  */
 567 
 568 /* copied function (with appropriate renaming) starts here */
 569 
 570 /*
 571  *      allgatherv_intra_basic
 572  *
 573  *      Function:       - allgatherv using other MPI collectives:
 574  *                        gatherv + bcast (from basic module).
 575  *      Accepts:        - same as MPI_Allgatherv()
 576  *      Returns:        - MPI_SUCCESS or error code
 577  */
 578 int
 579 ompi_coll_base_allgatherv_intra_basic_default(const void *sbuf, int scount,
 580                                               struct ompi_datatype_t *sdtype,
 581                                               void *rbuf, const int *rcounts,
 582                                               const int *disps,
 583                                               struct ompi_datatype_t *rdtype,
 584                                               struct ompi_communicator_t *comm,
 585                                               mca_coll_base_module_t *module)
 586 {
 587     int size, rank, err;
 588     MPI_Aint extent, lb;
 589     char *send_buf = NULL;
 590     struct ompi_datatype_t *newtype, *send_type;
 591 
 592     size = ompi_comm_size(comm);
 593     rank = ompi_comm_rank(comm);
 594     /*
 595      * We don't have a root process defined. Arbitrarily assign root
 596      * to process with rank 0 (OMPI convention)
 597      */
 598 
 599     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 600                  "ompi_coll_base_allgatherv_intra_basic_default rank %d",
 601                  rank));
 602 
 603     if (MPI_IN_PLACE == sbuf) {
 604         ompi_datatype_get_extent(rdtype, &lb, &extent);
 605         send_type = rdtype;
 606         send_buf = (char*)rbuf;
 607         send_buf += ((ptrdiff_t)disps[rank] * extent);
 608         scount = rcounts[rank];
 609     } else {
 610         send_buf = (char*)sbuf;
 611         send_type = sdtype;
 612     }
 613 
 614     err = comm->c_coll->coll_gatherv(send_buf,
 615                                     scount, send_type,rbuf,
 616                                     rcounts, disps, rdtype, 0,
 617                                     comm, comm->c_coll->coll_gatherv_module);
 618     if (MPI_SUCCESS != err) {
 619         return err;
 620     }
 621     /*
 622      * we now have all the data in the root's rbuf. Need to
 623      * broadcast the data out to the other processes
 624      *
 625      * Need to define a datatype that captures the different vectors
 626      * from each process. MPI_TYPE_INDEXED with params
 627      *                    size,rcount,displs,rdtype,newtype
 628      * should do the trick.
 629      * Use underlying ddt functions to create, and commit the
 630      * new datatype on each process, then broadcast and destroy the
 631      * datatype.
 632      */
 633 
 634     err = ompi_datatype_create_indexed(size,rcounts,disps,rdtype,&newtype);
 635     if (MPI_SUCCESS != err) {
 636         return err;
 637     }
 638 
 639     err = ompi_datatype_commit(&newtype);
 640     if(MPI_SUCCESS != err) {
 641         return err;
 642     }
 643 
 644     comm->c_coll->coll_bcast(rbuf, 1, newtype, 0, comm,
 645                             comm->c_coll->coll_bcast_module);
 646 
 647     ompi_datatype_destroy (&newtype);
 648 
 649     return MPI_SUCCESS;
 650 }
 651 
 652 /* copied function (with appropriate renaming) ends here */

/* [<][>][^][v][top][bottom][index][help] */