This source file includes following definitions.
- ompi_coll_base_gather_intra_binomial
- ompi_coll_base_gather_intra_linear_sync
- ompi_coll_base_gather_intra_basic_linear
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 #include "ompi_config.h"
  26 
  27 #include "mpi.h"
  28 #include "ompi/constants.h"
  29 #include "ompi/datatype/ompi_datatype.h"
  30 #include "ompi/communicator/communicator.h"
  31 #include "ompi/mca/coll/coll.h"
  32 #include "ompi/mca/coll/base/coll_tags.h"
  33 #include "ompi/mca/pml/pml.h"
  34 #include "ompi/mca/coll/base/coll_base_functions.h"
  35 #include "coll_base_topo.h"
  36 #include "coll_base_util.h"
  37 
  38 
  39 
  40 int
  41 ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount,
  42                                       struct ompi_datatype_t *sdtype,
  43                                       void *rbuf, int rcount,
  44                                       struct ompi_datatype_t *rdtype,
  45                                       int root,
  46                                       struct ompi_communicator_t *comm,
  47                                       mca_coll_base_module_t *module)
  48 {
  49     int line = -1, i, rank, vrank, size, total_recv = 0, err;
  50     char *ptmp     = NULL, *tempbuf  = NULL;
  51     ompi_coll_tree_t* bmtree;
  52     MPI_Status status;
  53     MPI_Aint sextent, sgap = 0, ssize;
  54     MPI_Aint rextent, rgap = 0, rsize;
  55     mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
  56     mca_coll_base_comm_t *data = base_module->base_data;
  57 
  58     size = ompi_comm_size(comm);
  59     rank = ompi_comm_rank(comm);
  60 
  61     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
  62                  "ompi_coll_base_gather_intra_binomial rank %d", rank));
  63 
  64     
  65     COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
  66     bmtree = data->cached_in_order_bmtree;
  67 
  68     vrank = (rank - root + size) % size;
  69 
  70     if (rank == root) {
  71         ompi_datatype_type_extent(rdtype, &rextent);
  72         rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * size, &rgap);
  73         if (0 == root){
  74             
  75             ptmp = (char *) rbuf;
  76             if (sbuf != MPI_IN_PLACE) {
  77                 err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype,
  78                                            ptmp, rcount, rdtype);
  79                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
  80             }
  81         } else {
  82             
  83 
  84             tempbuf = (char *) malloc(rsize);
  85             if (NULL == tempbuf) {
  86                 err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
  87             }
  88 
  89             ptmp = tempbuf - rgap;
  90             if (sbuf != MPI_IN_PLACE) {
  91                 
  92                 err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype,
  93                                            ptmp, rcount, rdtype);
  94                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
  95             } else {
  96                 
  97                 err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, ptmp,
  98                                                           (char *)rbuf + (ptrdiff_t)rank * rextent * (ptrdiff_t)rcount);
  99                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 100             }
 101         }
 102         total_recv = rcount;
 103     } else if (!(vrank % 2)) {
 104         
 105 
 106 
 107         ompi_datatype_type_extent(sdtype, &sextent);
 108         ssize = opal_datatype_span(&sdtype->super, (int64_t)scount * size, &sgap);
 109         tempbuf = (char *) malloc(ssize);
 110         if (NULL == tempbuf) {
 111             err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
 112         }
 113 
 114         ptmp = tempbuf - sgap;
 115         
 116         err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype,
 117                                    ptmp, scount, sdtype);
 118         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 119 
 120         
 121 
 122         rdtype = sdtype;
 123         rcount = scount;
 124         rextent = sextent;
 125         total_recv = rcount;
 126     } else {
 127         
 128 
 129         ptmp = (char *) sbuf;
 130         total_recv = scount;
 131     }
 132 
 133     if (!(vrank % 2)) {
 134         
 135         for (i = 0; i < bmtree->tree_nextsize; i++) {
 136             int mycount = 0, vkid;
 137             
 138             vkid = (bmtree->tree_next[i] - root + size) % size;
 139             mycount = vkid - vrank;
 140             if (mycount > (size - vkid))
 141                 mycount = size - vkid;
 142             mycount *= rcount;
 143 
 144             OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 145                          "ompi_coll_base_gather_intra_binomial rank %d recv %d mycount = %d",
 146                          rank, bmtree->tree_next[i], mycount));
 147 
 148             err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, (ptrdiff_t)rcount * size - total_recv, rdtype,
 149                                     bmtree->tree_next[i], MCA_COLL_BASE_TAG_GATHER,
 150                                     comm, &status));
 151             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 152 
 153             total_recv += mycount;
 154         }
 155     }
 156 
 157     if (rank != root) {
 158         
 159         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 160                      "ompi_coll_base_gather_intra_binomial rank %d send %d count %d\n",
 161                      rank, bmtree->tree_prev, total_recv));
 162 
 163         err = MCA_PML_CALL(send(ptmp, total_recv, sdtype,
 164                                 bmtree->tree_prev,
 165                                 MCA_COLL_BASE_TAG_GATHER,
 166                                 MCA_PML_BASE_SEND_STANDARD, comm));
 167         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 168     }
 169 
 170     if (rank == root) {
 171         if (root != 0) {
 172             
 173             err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rcount * (ptrdiff_t)(size - root),
 174                                                       (char *)rbuf + rextent * (ptrdiff_t)root * (ptrdiff_t)rcount, ptmp);
 175             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 176 
 177 
 178             err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rcount * (ptrdiff_t)root,
 179                                                       (char *) rbuf, ptmp + rextent * (ptrdiff_t)rcount * (ptrdiff_t)(size-root));
 180             if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 181 
 182             free(tempbuf);
 183         }
 184     } else if (!(vrank % 2)) {
 185         
 186         free(tempbuf);
 187     }
 188     return MPI_SUCCESS;
 189 
 190  err_hndl:
 191     if (NULL != tempbuf)
 192         free(tempbuf);
 193 
 194     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
 195                  __FILE__, line, err, rank));
 196     (void)line;  
 197     return err;
 198 }
 199 
 200 
 201 
 202 
 203 
 204 
 205 
 206 
 207 int
 208 ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount,
 209                                          struct ompi_datatype_t *sdtype,
 210                                          void *rbuf, int rcount,
 211                                          struct ompi_datatype_t *rdtype,
 212                                          int root,
 213                                          struct ompi_communicator_t *comm,
 214                                          mca_coll_base_module_t *module,
 215                                          int first_segment_size)
 216 {
 217     int i, ret, line, rank, size, first_segment_count;
 218     ompi_request_t **reqs = NULL;
 219     MPI_Aint extent, lb;
 220     size_t typelng;
 221 
 222     size = ompi_comm_size(comm);
 223     rank = ompi_comm_rank(comm);
 224 
 225     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 226                  "ompi_coll_base_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size));
 227 
 228     if (rank != root) {
 229         
 230 
 231 
 232 
 233 
 234 
 235         ompi_datatype_type_size(sdtype, &typelng);
 236         ompi_datatype_get_extent(sdtype, &lb, &extent);
 237         first_segment_count = scount;
 238         COLL_BASE_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng,
 239                                       first_segment_count );
 240 
 241         ret = MCA_PML_CALL(recv(rbuf, 0, MPI_BYTE, root,
 242                                 MCA_COLL_BASE_TAG_GATHER,
 243                                 comm, MPI_STATUS_IGNORE));
 244         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 245 
 246         ret = MCA_PML_CALL(send(sbuf, first_segment_count, sdtype, root,
 247                                 MCA_COLL_BASE_TAG_GATHER,
 248                                 MCA_PML_BASE_SEND_STANDARD, comm));
 249         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 250 
 251         ret = MCA_PML_CALL(send((char*)sbuf + extent * first_segment_count,
 252                                 (scount - first_segment_count), sdtype,
 253                                 root, MCA_COLL_BASE_TAG_GATHER,
 254                                 MCA_PML_BASE_SEND_STANDARD, comm));
 255         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 256 
 257     } else {
 258 
 259         
 260 
 261 
 262 
 263 
 264 
 265 
 266 
 267 
 268         char *ptmp;
 269         ompi_request_t *first_segment_req;
 270         reqs = ompi_coll_base_comm_get_reqs(module->base_data, size);
 271         if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; }
 272 
 273         ompi_datatype_type_size(rdtype, &typelng);
 274         ompi_datatype_get_extent(rdtype, &lb, &extent);
 275         first_segment_count = rcount;
 276         COLL_BASE_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng,
 277                                       first_segment_count );
 278 
 279         ptmp = (char *) rbuf;
 280         for (i = 0; i < size; ++i) {
 281             if (i == rank) {
 282                 
 283                 reqs[i] = MPI_REQUEST_NULL;
 284                 continue;
 285             }
 286 
 287             
 288             ptmp = (char*)rbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * extent;
 289             ret = MCA_PML_CALL(irecv(ptmp, first_segment_count, rdtype, i,
 290                                      MCA_COLL_BASE_TAG_GATHER, comm,
 291                                      &first_segment_req));
 292             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 293 
 294             
 295             ret = MCA_PML_CALL(send(rbuf, 0, MPI_BYTE, i,
 296                                     MCA_COLL_BASE_TAG_GATHER,
 297                                     MCA_PML_BASE_SEND_STANDARD, comm));
 298             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 299 
 300             
 301             ptmp = (char*)rbuf + ((ptrdiff_t)i * (ptrdiff_t)rcount + first_segment_count) * extent;
 302             ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count),
 303                                      rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm,
 304                                      &reqs[i]));
 305             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 306 
 307             
 308             ret = ompi_request_wait(&first_segment_req, MPI_STATUS_IGNORE);
 309             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 310         }
 311 
 312         
 313         if (MPI_IN_PLACE != sbuf) {
 314             ret = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype,
 315                                        (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent,
 316                                        rcount, rdtype);
 317             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 318         }
 319 
 320         
 321         ret = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
 322         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 323     }
 324 
 325     
 326     return MPI_SUCCESS;
 327  error_hndl:
 328     if (NULL != reqs) {
 329         
 330         if (MPI_ERR_IN_STATUS == ret) {
 331             for( i = 0; i < size; i++ ) {
 332                 if (MPI_REQUEST_NULL == reqs[i]) continue;
 333                 if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
 334                 ret = reqs[i]->req_status.MPI_ERROR;
 335                 break;
 336             }
 337         }
 338         ompi_coll_base_free_reqs(reqs, size);
 339     }
 340     OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
 341                    "ERROR_HNDL: node %d file %s line %d error %d\n",
 342                    rank, __FILE__, line, ret ));
 343     (void)line;  
 344     return ret;
 345 }
 346 
 347 
 348 
 349 
 350 
 351 
 352 
 353 
 354 
 355 
 356 
 357 
 358 
 359 
 360 
 361 
 362 
 363 
 364 
 365 
 366 
 367 int
 368 ompi_coll_base_gather_intra_basic_linear(const void *sbuf, int scount,
 369                                           struct ompi_datatype_t *sdtype,
 370                                           void *rbuf, int rcount,
 371                                           struct ompi_datatype_t *rdtype,
 372                                           int root,
 373                                           struct ompi_communicator_t *comm,
 374                                           mca_coll_base_module_t *module)
 375 {
 376     int i, err, rank, size;
 377     char *ptmp;
 378     MPI_Aint incr, extent, lb;
 379 
 380     size = ompi_comm_size(comm);
 381     rank = ompi_comm_rank(comm);
 382 
 383     
 384     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 385                  "ompi_coll_base_gather_intra_basic_linear rank %d", rank));
 386 
 387     if (rank != root) {
 388         return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
 389                                  MCA_COLL_BASE_TAG_GATHER,
 390                                  MCA_PML_BASE_SEND_STANDARD, comm));
 391     }
 392 
 393     
 394 
 395     ompi_datatype_get_extent(rdtype, &lb, &extent);
 396     incr = extent * (ptrdiff_t)rcount;
 397     for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
 398         if (i == rank) {
 399             if (MPI_IN_PLACE != sbuf) {
 400                 err = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype,
 401                                            ptmp, rcount, rdtype);
 402             } else {
 403                 err = MPI_SUCCESS;
 404             }
 405         } else {
 406             err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i,
 407                                     MCA_COLL_BASE_TAG_GATHER,
 408                                     comm, MPI_STATUS_IGNORE));
 409         }
 410         if (MPI_SUCCESS != err) {
 411             return err;
 412         }
 413     }
 414 
 415     
 416     return MPI_SUCCESS;
 417 }
 418 
 419 
 420