This source file includes following definitions.
- NBC_Allreduce_args_compare
- nbc_allreduce_init
- ompi_coll_libnbc_iallreduce
- nbc_allreduce_inter_init
- ompi_coll_libnbc_iallreduce_inter
- allred_sched_diss
- allred_sched_ring
- allred_sched_linear
- allred_sched_redscat_allgather
- ompi_coll_libnbc_allreduce_init
- ompi_coll_libnbc_allreduce_inter_init
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 #include "nbc_internal.h"
  22 #include "ompi/communicator/communicator.h"
  23 #include "ompi/datatype/ompi_datatype.h"
  24 #include "ompi/op/op.h"
  25 #include "opal/util/bit_ops.h"
  26 
  27 #include <assert.h>
  28 
  29 static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf,
  30                                     void *recvbuf, MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf);
  31 static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, const void *sendbuf,
  32                                     void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule,
  33                                     void *tmpbuf);
  34 static inline int allred_sched_linear(int rank, int p, const void *sendbuf, void *recvbuf, int count,
  35                                       MPI_Datatype datatype, ptrdiff_t gap, MPI_Op op, int ext, int size,
  36                                       NBC_Schedule *schedule, void *tmpbuf);
  37 static inline int allred_sched_redscat_allgather(
  38     int rank, int comm_size, int count, MPI_Datatype datatype, ptrdiff_t gap,
  39     const void *sbuf, void *rbuf, MPI_Op op, char inplace,
  40     NBC_Schedule *schedule, void *tmpbuf, struct ompi_communicator_t *comm);
  41 
  42 #ifdef NBC_CACHE_SCHEDULE
  43 
  44 int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param) {
  45   if ((a->sendbuf == b->sendbuf) &&
  46       (a->recvbuf == b->recvbuf) &&
  47       (a->count == b->count) &&
  48       (a->datatype == b->datatype) &&
  49       (a->op == b->op)) {
  50     return 0;
  51   }
  52 
  53   if( a->sendbuf < b->sendbuf ) {
  54     return -1;
  55   }
  56 
  57   return 1;
  58 }
  59 #endif
  60 
  61 static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
  62                               struct ompi_communicator_t *comm, ompi_request_t ** request,
  63                               struct mca_coll_base_module_2_3_0_t *module, bool persistent)
  64 {
  65   int rank, p, res;
  66   ptrdiff_t ext, lb;
  67   NBC_Schedule *schedule;
  68   size_t size;
  69 #ifdef NBC_CACHE_SCHEDULE
  70   NBC_Allreduce_args *args, *found, search;
  71 #endif
  72   enum { NBC_ARED_BINOMIAL, NBC_ARED_RING, NBC_ARED_REDSCAT_ALLGATHER } alg;
  73   char inplace;
  74   void *tmpbuf = NULL;
  75   ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
  76   ptrdiff_t span, gap;
  77 
  78   NBC_IN_PLACE(sendbuf, recvbuf, inplace);
  79 
  80   rank = ompi_comm_rank (comm);
  81   p = ompi_comm_size (comm);
  82 
  83   res = ompi_datatype_get_extent(datatype, &lb, &ext);
  84   if (OMPI_SUCCESS != res) {
  85     NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res);
  86     return res;
  87   }
  88 
  89   res = ompi_datatype_type_size (datatype, &size);
  90   if (OMPI_SUCCESS != res) {
  91     NBC_Error ("MPI Error in ompi_datatype_type_size() (%i)", res);
  92     return res;
  93   }
  94 
  95   if (1 == p && (!persistent || inplace)) {
  96     if (!inplace) {
  97       
  98       res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm);
  99       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 100         return res;
 101       }
 102     }
 103     return nbc_get_noop_request(persistent, request);
 104   }
 105 
 106   span = opal_datatype_span(&datatype->super, count, &gap);
 107   tmpbuf = malloc (span);
 108   if (OPAL_UNLIKELY(NULL == tmpbuf)) {
 109     return OMPI_ERR_OUT_OF_RESOURCE;
 110   }
 111 
 112   
 113   int nprocs_pof2 = opal_next_poweroftwo(p) >> 1;
 114   if (libnbc_iallreduce_algorithm == 0) {
 115     if(p < 4 || size*count < 65536 || !ompi_op_is_commute(op) || inplace) {
 116       alg = NBC_ARED_BINOMIAL;
 117     } else if (count >= nprocs_pof2 && ompi_op_is_commute(op)) {
 118       alg = NBC_ARED_REDSCAT_ALLGATHER;
 119     } else {
 120       alg = NBC_ARED_RING;
 121     }
 122   } else {
 123     if (libnbc_iallreduce_algorithm == 1)
 124       alg = NBC_ARED_RING;
 125     else if (libnbc_iallreduce_algorithm == 2)
 126       alg = NBC_ARED_BINOMIAL;
 127     else if (libnbc_iallreduce_algorithm == 3 && count >= nprocs_pof2 && ompi_op_is_commute(op)) {
 128       alg = NBC_ARED_REDSCAT_ALLGATHER;
 129     } else
 130       alg = NBC_ARED_RING;
 131   }
 132 #ifdef NBC_CACHE_SCHEDULE
 133   
 134   search.sendbuf = sendbuf;
 135   search.recvbuf = recvbuf;
 136   search.count = count;
 137   search.datatype = datatype;
 138   search.op = op;
 139   found = (NBC_Allreduce_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], &search);
 140   if (NULL == found) {
 141 #endif
 142     schedule = OBJ_NEW(NBC_Schedule);
 143     if (NULL == schedule) {
 144       free(tmpbuf);
 145       return OMPI_ERR_OUT_OF_RESOURCE;
 146     }
 147 
 148     if (p == 1) {
 149       res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
 150                            recvbuf, false, count, datatype, schedule, false);
 151     } else {
 152       switch(alg) {
 153         case NBC_ARED_BINOMIAL:
 154           res = allred_sched_diss(rank, p, count, datatype, gap, sendbuf, recvbuf, op, inplace, schedule, tmpbuf);
 155           break;
 156         case NBC_ARED_REDSCAT_ALLGATHER:
 157           res = allred_sched_redscat_allgather(rank, p, count, datatype, gap, sendbuf, recvbuf, op, inplace, schedule, tmpbuf, comm);
 158           break;
 159         case NBC_ARED_RING:
 160           res = allred_sched_ring(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, tmpbuf);
 161           break;
 162       }
 163     }
 164 
 165     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 166       OBJ_RELEASE(schedule);
 167       free(tmpbuf);
 168       return res;
 169     }
 170 
 171     res = NBC_Sched_commit(schedule);
 172     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 173       OBJ_RELEASE(schedule);
 174       free(tmpbuf);
 175       return res;
 176     }
 177 
 178 #ifdef NBC_CACHE_SCHEDULE
 179     
 180     args = (NBC_Allreduce_args *) malloc (sizeof(args));
 181     if (NULL != args) {
 182       args->sendbuf = sendbuf;
 183       args->recvbuf = recvbuf;
 184       args->count = count;
 185       args->datatype = datatype;
 186       args->op = op;
 187       args->schedule = schedule;
 188       res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], args, args, 0);
 189       if (0 == res) {
 190         OBJ_RETAIN(schedule);
 191 
 192         
 193         if (++libnbc_module->NBC_Dict_size[NBC_ALLREDUCE] > NBC_SCHED_DICT_UPPER) {
 194           NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE],
 195                                    &libnbc_module->NBC_Dict_size[NBC_ALLREDUCE]);
 196         }
 197       } else {
 198         NBC_Error("error in dict_insert() (%i)", res);
 199         free (args);
 200       }
 201     }
 202   } else {
 203     
 204     schedule = found->schedule;
 205     OBJ_RETAIN(schedule);
 206   }
 207 #endif
 208 
 209   res = NBC_Schedule_request (schedule, comm, libnbc_module, persistent, request, tmpbuf);
 210   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 211     OBJ_RELEASE(schedule);
 212     free(tmpbuf);
 213     return res;
 214   }
 215 
 216   return OMPI_SUCCESS;
 217 }
 218 
 219 int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
 220                                 struct ompi_communicator_t *comm, ompi_request_t ** request,
 221                                 struct mca_coll_base_module_2_3_0_t *module) {
 222     int res = nbc_allreduce_init(sendbuf, recvbuf, count, datatype, op,
 223                                  comm, request, module, false);
 224     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 225         return res;
 226     }
 227   
 228     res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
 229     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 230         NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
 231         *request = &ompi_request_null.request;
 232         return res;
 233     }
 234 
 235     return OMPI_SUCCESS;
 236 }
 237 
 238 static int nbc_allreduce_inter_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
 239                                     struct ompi_communicator_t *comm, ompi_request_t ** request,
 240                                     struct mca_coll_base_module_2_3_0_t *module, bool persistent)
 241 {
 242   int rank, res, rsize;
 243   size_t size;
 244   MPI_Aint ext;
 245   NBC_Schedule *schedule;
 246   void *tmpbuf = NULL;
 247   ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
 248   ptrdiff_t span, gap;
 249 
 250   rank = ompi_comm_rank (comm);
 251   rsize = ompi_comm_remote_size (comm);
 252 
 253   res = ompi_datatype_type_extent(datatype, &ext);
 254   if (MPI_SUCCESS != res) {
 255     NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res);
 256     return res;
 257   }
 258 
 259   res = ompi_datatype_type_size(datatype, &size);
 260   if (MPI_SUCCESS != res) {
 261     NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res);
 262     return res;
 263   }
 264 
 265   span = opal_datatype_span(&datatype->super, count, &gap);
 266   tmpbuf = malloc (span);
 267   if (OPAL_UNLIKELY(NULL == tmpbuf)) {
 268     return OMPI_ERR_OUT_OF_RESOURCE;
 269   }
 270 
 271   schedule = OBJ_NEW(NBC_Schedule);
 272   if (OPAL_UNLIKELY(NULL == schedule)) {
 273     free(tmpbuf);
 274     return OMPI_ERR_OUT_OF_RESOURCE;
 275   }
 276 
 277   res = allred_sched_linear (rank, rsize, sendbuf, recvbuf, count, datatype, gap, op,
 278                              ext, size, schedule, tmpbuf);
 279   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 280     OBJ_RELEASE(schedule);
 281     free(tmpbuf);
 282     return res;
 283   }
 284 
 285   res = NBC_Sched_commit(schedule);
 286   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 287     OBJ_RELEASE(schedule);
 288     free(tmpbuf);
 289     return res;
 290   }
 291 
 292   res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
 293   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 294     OBJ_RELEASE(schedule);
 295     free(tmpbuf);
 296     return res;
 297   }
 298 
 299   return OMPI_SUCCESS;
 300 }
 301 
 302 int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
 303                                       struct ompi_communicator_t *comm, ompi_request_t ** request,
 304                                       struct mca_coll_base_module_2_3_0_t *module) {
 305     int res = nbc_allreduce_inter_init(sendbuf, recvbuf, count, datatype, op,
 306                                        comm, request, module, false);
 307     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 308         return res;
 309     }
 310   
 311     res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
 312     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 313         NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
 314         *request = &ompi_request_null.request;
 315         return res;
 316     }
 317 
 318     return OMPI_SUCCESS;
 319 }
 320 
 321 
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332 
 333 
 334 
 335 
 336 
 337 
 338 
 339 
 340 
 341 
 342 
 343 
 344 #define RANK2VRANK(rank, vrank, root) \
 345 { \
 346   vrank = rank; \
 347   if (rank == 0) vrank = root; \
 348   if (rank == root) vrank = 0; \
 349 }
 350 #define VRANK2RANK(rank, vrank, root) \
 351 { \
 352   rank = vrank; \
 353   if (vrank == 0) rank = root; \
 354   if (vrank == root) rank = 0; \
 355 }
 356 static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf, void *recvbuf,
 357                                     MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf) {
 358   int root, vrank, maxr, vpeer, peer, res;
 359   char *rbuf, *lbuf, *buf;
 360   int tmprbuf, tmplbuf;
 361 
 362   root = 0; 
 363   RANK2VRANK(rank, vrank, root);
 364   maxr = (int)ceil((log((double)p)/LOG2));
 365   
 366   if (0 == (maxr%2)) {
 367     rbuf = (void *)(-gap);
 368     tmprbuf = true;
 369     lbuf = recvbuf;
 370     tmplbuf = false;
 371   } else {
 372     lbuf = (void *)(-gap);
 373     tmplbuf = true;
 374     rbuf = recvbuf;
 375     tmprbuf = false;
 376     if (inplace) {
 377         res = NBC_Sched_copy(rbuf, false, count, datatype,
 378                              ((char *)tmpbuf) - gap, false, count, datatype,
 379                              schedule, true);
 380         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 381           return res;
 382         }
 383     }
 384   }
 385 
 386   for (int r = 1, firstred = 1 ; r <= maxr ; ++r) {
 387     if ((vrank % (1 << r)) == 0) {
 388       
 389       vpeer = vrank + (1 << (r - 1));
 390       VRANK2RANK(peer, vpeer, root)
 391       if (peer < p) {
 392         
 393         res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true);
 394         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 395           return res;
 396         }
 397 
 398         
 399         if (firstred && !inplace) {
 400           
 401           res = NBC_Sched_op (sendbuf, false, rbuf, tmprbuf, count, datatype, op, schedule, true);
 402           firstred = 0;
 403         } else {
 404           
 405           res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true);
 406         }
 407         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 408           return res;
 409         }
 410         
 411         buf = rbuf; rbuf = lbuf ; lbuf = buf;
 412         tmprbuf ^= 1; tmplbuf ^= 1;
 413       }
 414     } else {
 415       
 416       vpeer = vrank - (1 << (r - 1));
 417       VRANK2RANK(peer, vpeer, root)
 418       if (firstred && !inplace) {
 419         
 420         res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false);
 421       } else {
 422         
 423         res = NBC_Sched_send (lbuf, tmplbuf, count, datatype, peer, schedule, false);
 424       }
 425 
 426       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 427         return res;
 428       }
 429 
 430       
 431       break;
 432     }
 433   }
 434 
 435   
 436 
 437   RANK2VRANK(rank, vrank, root);
 438 
 439   
 440   if (vrank != 0) {
 441     for (int r = 0; r < maxr ; ++r) {
 442       if ((vrank >= (1 << r)) && (vrank < (1 << (r + 1)))) {
 443         VRANK2RANK(peer, vrank - (1 << r), root);
 444         res = NBC_Sched_recv (recvbuf, false, count, datatype, peer, schedule, false);
 445         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 446           return res;
 447         }
 448       }
 449     }
 450 
 451     res = NBC_Sched_barrier (schedule);
 452     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 453       return res;
 454     }
 455   }
 456 
 457   if (0 == vrank) assert(lbuf == recvbuf);
 458   
 459   for (int r = 0; r < maxr; ++r) {
 460     if (((vrank + (1 << r) < p) && (vrank < (1 << r))) || (vrank == 0)) {
 461       VRANK2RANK(peer, vrank + (1 << r), root);
 462       res = NBC_Sched_send (recvbuf, false, count, datatype, peer, schedule, false);
 463       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 464         return res;
 465       }
 466     }
 467   }
 468 
 469   
 470   return OMPI_SUCCESS;
 471 }
 472 
 473 static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, MPI_Op op,
 474                                      int size, int ext, NBC_Schedule *schedule, void *tmpbuf) {
 475   int segsize, *segsizes, *segoffsets; 
 476   int speer, rpeer; 
 477   int res = OMPI_SUCCESS;
 478 
 479   if (count == 0) {
 480     return OMPI_SUCCESS;
 481   }
 482 
 483   segsizes = (int *) malloc (sizeof (int) * p);
 484   segoffsets = (int *) malloc (sizeof (int) * p);
 485   if (NULL == segsizes || NULL == segoffsets) {
 486     free (segsizes);
 487     free (segoffsets);
 488     return OMPI_ERR_OUT_OF_RESOURCE;
 489   }
 490 
 491   segsize = (count + p - 1) / p; 
 492 
 493   segoffsets[0] = 0;
 494   for (int i = 0, mycount = count ; i < p ; ++i) {
 495     mycount -= segsize;
 496     segsizes[i] = segsize;
 497     if (mycount < 0) {
 498       segsizes[i] = segsize + mycount;
 499       mycount = 0;
 500     }
 501 
 502     if (i) {
 503       segoffsets[i] = segoffsets[i-1] + segsizes[i-1];
 504     }
 505   }
 506 
 507   
 508   speer = (r + 1) % p;
 509   rpeer = (r - 1 + p) % p;
 510 
 511   
 512 
 513 
 514 
 515 
 516 
 517 
 518 
 519 
 520 
 521 
 522 
 523 
 524 
 525 
 526 
 527 
 528 
 529 
 530 
 531 
 532 
 533 
 534 
 535 
 536 
 537 
 538 
 539 
 540 
 541 
 542 
 543 
 544 
 545 
 546 
 547 
 548 
 549 
 550 
 551 
 552 
 553 
 554 
 555 
 556 
 557 
 558 
 559 
 560 
 561 
 562 
 563 
 564 
 565 
 566 
 567 
 568 
 569 
 570 
 571 
 572 
 573 
 574 
 575 
 576 
 577 
 578 
 579 
 580 
 581 
 582 
 583 
 584 
 585 
 586 
 587 
 588 
 589 
 590 
 591 
 592 
 593 
 594 
 595 
 596 
 597 
 598 
 599 
 600 
 601 
 602 
 603 
 604 
 605 
 606 
 607 
 608 
 609   
 610   for (int round = 0 ; round < p - 1 ; ++round) {
 611     int selement = (r+1-round + 2*p )%p; 
 612     int soffset = segoffsets[selement]*ext;
 613     int relement = (r-round + 2*p )%p; 
 614     int roffset = segoffsets[relement]*ext;
 615 
 616     
 617     if (round == 0) {
 618       res = NBC_Sched_send ((char *) sendbuf + soffset, false, segsizes[selement], datatype, speer,
 619                             schedule, false);
 620     } else {
 621       res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer,
 622                             schedule, false);
 623     }
 624 
 625     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 626       break;
 627     }
 628 
 629     res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer,
 630                           schedule, true);
 631     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 632       break;
 633     }
 634 
 635     res = NBC_Sched_op ((char *) sendbuf + roffset, false, (char *) recvbuf + roffset, false,
 636                          segsizes[relement], datatype, op, schedule, true);
 637     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 638       break;
 639     }
 640   }
 641 
 642   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 643     free (segsizes);
 644     free (segoffsets);
 645     return res;
 646   }
 647 
 648   for (int round = p - 1 ; round < 2 * p - 2 ; ++round) {
 649     int selement = (r+1-round + 2*p )%p; 
 650     int soffset = segoffsets[selement]*ext;
 651     int relement = (r-round + 2*p )%p; 
 652     int roffset = segoffsets[relement]*ext;
 653 
 654     res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer,
 655                           schedule, false);
 656     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 657       break;
 658     }
 659 
 660     res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer,
 661                           schedule, true);
 662     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 663       break;
 664     }
 665   }
 666 
 667   free (segsizes);
 668   free (segoffsets);
 669 
 670   return res;
 671 }
 672 
 673 static inline int allred_sched_linear(int rank, int rsize, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
 674                                       ptrdiff_t gap, MPI_Op op, int ext, int size, NBC_Schedule *schedule, void *tmpbuf) {
 675   int res;
 676 
 677   if (0 == count) {
 678     return OMPI_SUCCESS;
 679   }
 680 
 681   
 682   res = NBC_Sched_send (sendbuf, false, count, datatype, 0, schedule, false);
 683   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 684     return res;
 685   }
 686 
 687   
 688   if (0 != rank || 1 ==(rsize%2)) {
 689     res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule, false);
 690   } else {
 691     res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, 0, schedule, false);
 692   }
 693   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 694     return res;
 695   }
 696 
 697   if (0 == rank) {
 698     char *rbuf, *lbuf, *buf;
 699     int tmprbuf, tmplbuf;
 700 
 701     res = NBC_Sched_barrier (schedule);
 702     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 703       return res;
 704     }
 705 
 706     
 707     if (0 == (rsize%2)) {
 708       lbuf = (void *)(-gap);
 709       tmplbuf = true;
 710       rbuf = recvbuf;
 711       tmprbuf = false;
 712     } else {
 713       rbuf = (void *)(-gap);
 714       tmprbuf = true;
 715       lbuf = recvbuf;
 716       tmplbuf = false;
 717     }
 718 
 719     
 720     for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) {
 721       res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, rpeer, schedule, true);
 722       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 723         return res;
 724       }
 725 
 726       res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true);
 727       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 728         return res;
 729       }
 730       
 731       buf = rbuf; rbuf = lbuf ; lbuf = buf;
 732       tmprbuf ^= 1; tmplbuf ^= 1;
 733     }
 734 
 735     
 736     res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, 0, schedule, false);
 737     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 738       return res;
 739     }
 740 
 741     
 742     res = NBC_Sched_send (recvbuf, false, count, datatype, 0, schedule, true);
 743     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 744       return res;
 745     }
 746 
 747     
 748     for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) {
 749       res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rpeer, schedule, false);
 750       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 751         return res;
 752       }
 753     }
 754   }
 755 
 756   return OMPI_SUCCESS;
 757 }
 758 
 759 
 760 
 761 
 762 
 763 
 764 
 765 
 766 
 767 
 768 
 769 
 770 
 771 
 772 
 773 
 774 
 775 
 776 
 777 
 778 
 779 
 780 
 781 
 782 
 783 
 784 
 785 
 786 
 787 
 788 
 789 
 790 
 791 
 792 
 793 
 794 
 795 
 796 
 797 
 798 
 799 
 800 
 801 
 802 
 803 
 804 
 805 
 806 
 807 
 808 
 809 
 810 
 811 
 812 
 813 static inline int allred_sched_redscat_allgather(
 814     int rank, int comm_size, int count, MPI_Datatype datatype, ptrdiff_t gap,
 815     const void *sbuf, void *rbuf, MPI_Op op, char inplace,
 816     NBC_Schedule *schedule, void *tmpbuf, struct ompi_communicator_t *comm)
 817 {
 818     int res = OMPI_SUCCESS;
 819     int *rindex = NULL, *rcount = NULL, *sindex = NULL, *scount = NULL;
 820      
 821     int nsteps = opal_hibit(comm_size, comm->c_cube_dim + 1);   
 822     int nprocs_pof2 = 1 << nsteps;                              
 823      if (!inplace) {
 824         res = NBC_Sched_copy((char *)sbuf, false, count, datatype,
 825                              rbuf, false, count, datatype, schedule, true);
 826         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 827     }
 828     char *tmp_buf = (char *)tmpbuf - gap;
 829     ptrdiff_t lb, extent;
 830     ompi_datatype_get_extent(datatype, &lb, &extent);
 831      
 832 
 833 
 834 
 835 
 836 
 837 
 838 
 839 
 840 
 841 
 842 
 843 
 844 
 845 
 846 
 847     int vrank, step, wsize;
 848     int nprocs_rem = comm_size - nprocs_pof2;
 849      if (rank < 2 * nprocs_rem) {
 850         int count_lhalf = count / 2;
 851         int count_rhalf = count - count_lhalf;
 852          if (rank % 2 != 0) {
 853             
 854 
 855 
 856 
 857 
 858             res = NBC_Sched_send(rbuf, false, count_lhalf, datatype, rank - 1,
 859                                  schedule, false);
 860             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 861              res = NBC_Sched_recv(tmp_buf + (ptrdiff_t)count_lhalf * extent,
 862                                  false, count_rhalf, datatype, rank - 1, schedule, true);
 863             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 864              res = NBC_Sched_op(tmp_buf + (ptrdiff_t)count_lhalf * extent,
 865                                false, (char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 866                                false, count_rhalf, datatype, op, schedule, true);
 867             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 868              
 869             res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 870                                  false, count_rhalf, datatype, rank - 1, schedule, true);
 871             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 872              
 873             vrank = -1;
 874          } else {
 875             
 876 
 877 
 878 
 879 
 880             res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 881                                  false, count_rhalf, datatype, rank + 1, schedule, false);
 882             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 883              res = NBC_Sched_recv(tmp_buf, false, count_lhalf, datatype, rank + 1,
 884                                  schedule, true);
 885             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 886              res = NBC_Sched_op(tmp_buf, false, rbuf, false, count_lhalf,
 887                                datatype, op, schedule, true);
 888             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 889              
 890             res = NBC_Sched_recv((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 891                                  false, count_rhalf, datatype, rank + 1, schedule, true);
 892             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 893              vrank = rank / 2;
 894         }
 895     } else { 
 896         vrank = rank - nprocs_rem;
 897     }
 898      
 899 
 900 
 901 
 902 
 903 
 904 
 905 
 906 
 907 
 908 
 909 
 910     rindex = malloc(sizeof(*rindex) * nsteps);
 911     sindex = malloc(sizeof(*sindex) * nsteps);
 912     rcount = malloc(sizeof(*rcount) * nsteps);
 913     scount = malloc(sizeof(*scount) * nsteps);
 914     if (NULL == rindex || NULL == sindex || NULL == rcount || NULL == scount) {
 915         res = OMPI_ERR_OUT_OF_RESOURCE;
 916         goto cleanup_and_return;
 917     }
 918      if (vrank != -1) {
 919         step = 0;
 920         wsize = count;
 921         sindex[0] = rindex[0] = 0;
 922          for (int mask = 1; mask < nprocs_pof2; mask <<= 1) {
 923             
 924 
 925 
 926 
 927             int vdest = vrank ^ mask;
 928             
 929             int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem;
 930              if (rank < dest) {
 931                 
 932 
 933 
 934 
 935 
 936                 rcount[step] = wsize / 2;
 937                 scount[step] = wsize - rcount[step];
 938                 sindex[step] = rindex[step] + rcount[step];
 939             } else {
 940                 
 941 
 942 
 943 
 944 
 945                 scount[step] = wsize / 2;
 946                 rcount[step] = wsize - scount[step];
 947                 rindex[step] = sindex[step] + scount[step];
 948             }
 949              
 950             res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)sindex[step] * extent,
 951                                  false, scount[step], datatype, dest, schedule, false);
 952             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 953             res = NBC_Sched_recv((char *)tmp_buf + (ptrdiff_t)rindex[step] * extent,
 954                                  false, rcount[step], datatype, dest, schedule, true);
 955             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 956              
 957             res = NBC_Sched_op((char *)tmp_buf + (ptrdiff_t)rindex[step] * extent,
 958                                false, (char *)rbuf + (ptrdiff_t)rindex[step] * extent,
 959                                false, rcount[step], datatype, op, schedule, true);
 960             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 961              
 962             if (step + 1 < nsteps) {
 963                 rindex[step + 1] = rindex[step];
 964                 sindex[step + 1] = rindex[step];
 965                 wsize = rcount[step];
 966                 step++;
 967             }
 968         }
 969         
 970 
 971 
 972 
 973          
 974 
 975 
 976 
 977 
 978 
 979 
 980         step = nsteps - 1;
 981          for (int mask = nprocs_pof2 >> 1; mask > 0; mask >>= 1) {
 982             int vdest = vrank ^ mask;
 983             
 984             int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem;
 985              
 986 
 987 
 988 
 989             res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)rindex[step] * extent,
 990                                  false, rcount[step], datatype, dest, schedule, false);
 991             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 992             res = NBC_Sched_recv((char *)rbuf + (ptrdiff_t)sindex[step] * extent,
 993                                  false, scount[step], datatype, dest, schedule, true);
 994             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 995             step--;
 996         }
 997     }
 998      
 999 
1000 
1001     if (rank < 2 * nprocs_rem) {
1002         if (rank % 2 != 0) {
1003             
1004             res = NBC_Sched_recv(rbuf, false, count, datatype, rank - 1, schedule, false);
1005             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
1006         } else {
1007             
1008             res = NBC_Sched_send(rbuf, false, count, datatype, rank + 1, schedule, false);
1009             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
1010         }
1011     }
1012    cleanup_and_return:
1013     if (NULL != rindex)
1014         free(rindex);
1015     if (NULL != sindex)
1016         free(sindex);
1017     if (NULL != rcount)
1018         free(rcount);
1019     if (NULL != scount)
1020         free(scount);
1021     return res;
1022 }
1023 
1024 int ompi_coll_libnbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
1025                                     struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
1026                                     struct mca_coll_base_module_2_3_0_t *module) {
1027     int res = nbc_allreduce_init(sendbuf, recvbuf, count, datatype, op,
1028                                  comm, request, module, true);
1029     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
1030         return res;
1031     }
1032 
1033     return OMPI_SUCCESS;
1034 }
1035 
1036 int ompi_coll_libnbc_allreduce_inter_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
1037                                           struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
1038                                           struct mca_coll_base_module_2_3_0_t *module) {
1039     int res = nbc_allreduce_inter_init(sendbuf, recvbuf, count, datatype, op,
1040                                        comm, request, module, true);
1041     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
1042         return res;
1043     }
1044 
1045     return OMPI_SUCCESS;
1046 }
1047