This source file includes following definitions.
- NBC_Reduce_args_compare
- nbc_reduce_init
- ompi_coll_libnbc_ireduce
- nbc_reduce_inter_init
- ompi_coll_libnbc_ireduce_inter
- red_sched_binomial
- red_sched_chain
- red_sched_linear
- red_sched_redscat_gather
- ompi_coll_libnbc_reduce_init
- ompi_coll_libnbc_reduce_inter_init
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 #include "ompi_config.h"
  23 #include "opal/align.h"
  24 #include "opal/util/bit_ops.h"
  25 #include "ompi/op/op.h"
  26 
  27 #include "nbc_internal.h"
  28 
  29 static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype,
  30                                       MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf);
  31 static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
  32                                    MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, void *tmpbuf, int fragsize);
  33 
  34 static inline int red_sched_linear (int rank, int rsize, int root, const void *sendbuf, void *recvbuf, void *tmpbuf, int count, MPI_Datatype datatype,
  35                                     MPI_Op op, NBC_Schedule *schedule);
  36 static inline int red_sched_redscat_gather(
  37     int rank, int comm_size, int root, const void *sbuf, void *rbuf,
  38     char tmpredbuf, int count, MPI_Datatype datatype, MPI_Op op, char inplace,
  39     NBC_Schedule *schedule, void *tmp_buf, struct ompi_communicator_t *comm);
  40 
  41 #ifdef NBC_CACHE_SCHEDULE
  42 
  43 int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param) {
  44   if ((a->sendbuf == b->sendbuf) &&
  45       (a->recvbuf == b->recvbuf) &&
  46       (a->count == b->count) &&
  47       (a->datatype == b->datatype) &&
  48       (a->op == b->op) &&
  49       (a->root == b->root)) {
  50     return 0;
  51   }
  52 
  53   if (a->sendbuf < b->sendbuf) {
  54     return -1;
  55   }
  56 
  57   return 1;
  58 }
  59 #endif
  60 
  61 
  62 static int nbc_reduce_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
  63                            MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
  64                            struct mca_coll_base_module_2_3_0_t *module, bool persistent) {
  65   int rank, p, res, segsize;
  66   size_t size;
  67   MPI_Aint ext;
  68   NBC_Schedule *schedule;
  69   char *redbuf=NULL, inplace;
  70   void *tmpbuf;
  71   char tmpredbuf = 0;
  72   enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN, NBC_RED_REDSCAT_GATHER} alg;
  73   ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
  74   ptrdiff_t span, gap;
  75 
  76   NBC_IN_PLACE(sendbuf, recvbuf, inplace);
  77 
  78   rank = ompi_comm_rank (comm);
  79   p = ompi_comm_size (comm);
  80 
  81   res = ompi_datatype_type_extent(datatype, &ext);
  82   if (MPI_SUCCESS != res) {
  83     NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res);
  84     return res;
  85   }
  86 
  87   res = ompi_datatype_type_size(datatype, &size);
  88   if (MPI_SUCCESS != res) {
  89     NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res);
  90     return res;
  91   }
  92 
  93   
  94   if (1 == p && (!persistent || inplace)) {
  95     if (!inplace) {
  96       res = NBC_Copy (sendbuf, count, datatype, recvbuf, count, datatype, comm);
  97       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
  98         return res;
  99       }
 100     }
 101     return nbc_get_noop_request(persistent, request);
 102   }
 103 
 104   span = opal_datatype_span(&datatype->super, count, &gap);
 105 
 106   
 107   int nprocs_pof2 = opal_next_poweroftwo(p) >> 1;
 108   if (libnbc_ireduce_algorithm == 0) {
 109     if (ompi_op_is_commute(op) && p > 2 && count >= nprocs_pof2) {
 110       alg = NBC_RED_REDSCAT_GATHER;
 111     } else if (p > 4 || size * count < 65536 || !ompi_op_is_commute(op)) {
 112       alg = NBC_RED_BINOMIAL;
 113     } else {
 114       alg = NBC_RED_CHAIN;
 115     }
 116   } else {
 117     if (libnbc_ireduce_algorithm == 1) {
 118       alg = NBC_RED_CHAIN;
 119     } else if (libnbc_ireduce_algorithm == 2) {
 120       alg = NBC_RED_BINOMIAL;
 121     } else if (libnbc_ireduce_algorithm == 3 && ompi_op_is_commute(op) && p > 2 && count >= nprocs_pof2) {
 122       alg = NBC_RED_REDSCAT_GATHER;
 123     } else {
 124       alg = NBC_RED_CHAIN;
 125     }
 126   }
 127 
 128   
 129   if (alg == NBC_RED_REDSCAT_GATHER || alg == NBC_RED_BINOMIAL) {
 130     if (rank == root) {
 131       
 132       tmpbuf = malloc(span);
 133       redbuf = recvbuf;
 134     } else {
 135       
 136       ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
 137       tmpbuf = malloc(span_align + span);
 138       redbuf = (char *)span_align - gap;
 139       tmpredbuf = 1;
 140     }
 141   } else {
 142     tmpbuf = malloc (span);
 143     segsize = 16384/2;
 144   }
 145 
 146   if (OPAL_UNLIKELY(NULL == tmpbuf)) {
 147     return OMPI_ERR_OUT_OF_RESOURCE;
 148   }
 149 
 150 #ifdef NBC_CACHE_SCHEDULE
 151   NBC_Reduce_args *args, *found, search;
 152 
 153   
 154   search.sendbuf = sendbuf;
 155   search.recvbuf = recvbuf;
 156   search.count = count;
 157   search.datatype = datatype;
 158   search.op = op;
 159   search.root = root;
 160   found = (NBC_Reduce_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], &search);
 161   if (NULL == found) {
 162 #endif
 163     schedule = OBJ_NEW(NBC_Schedule);
 164     if (OPAL_UNLIKELY(NULL == schedule)) {
 165       free(tmpbuf);
 166       return OMPI_ERR_OUT_OF_RESOURCE;
 167     }
 168 
 169     if (p == 1) {
 170       res = NBC_Sched_copy ((void *)sendbuf, false, count, datatype,
 171                             recvbuf, false, count, datatype, schedule, false);
 172     } else {
 173       switch(alg) {
 174         case NBC_RED_BINOMIAL:
 175           res = red_sched_binomial(rank, p, root, sendbuf, redbuf, tmpredbuf, count, datatype, op, inplace, schedule, tmpbuf);
 176           break;
 177         case NBC_RED_CHAIN:
 178           res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, tmpbuf, segsize);
 179           break;
 180         case NBC_RED_REDSCAT_GATHER:
 181           res = red_sched_redscat_gather(rank, p, root, sendbuf, redbuf, tmpredbuf, count, datatype, op, inplace, schedule, tmpbuf, comm);
 182           break;
 183       }
 184     }
 185 
 186     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 187       OBJ_RELEASE(schedule);
 188       free(tmpbuf);
 189       return res;
 190     }
 191 
 192     res = NBC_Sched_commit(schedule);
 193     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 194       OBJ_RELEASE(schedule);
 195       free(tmpbuf);
 196       return res;
 197     }
 198 #ifdef NBC_CACHE_SCHEDULE
 199     
 200     args = (NBC_Reduce_args *) malloc (sizeof (args));
 201     if (NULL != args) {
 202       args->sendbuf = sendbuf;
 203       args->recvbuf = recvbuf;
 204       args->count = count;
 205       args->datatype = datatype;
 206       args->op = op;
 207       args->root = root;
 208       args->schedule = schedule;
 209       res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE], args, args, 0);
 210       if (0 == res) {
 211         OBJ_RETAIN(schedule);
 212 
 213         
 214         if (++libnbc_module->NBC_Dict_size[NBC_REDUCE] > NBC_SCHED_DICT_UPPER) {
 215           NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_REDUCE],
 216                                    &libnbc_module->NBC_Dict_size[NBC_REDUCE]);
 217         }
 218       } else {
 219         NBC_Error("error in dict_insert() (%i)", res);
 220         free (args);
 221       }
 222     }
 223   } else {
 224     
 225     schedule = found->schedule;
 226     OBJ_RETAIN(schedule);
 227   }
 228 #endif
 229 
 230   res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
 231   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 232     OBJ_RELEASE(schedule);
 233     free(tmpbuf);
 234     return res;
 235   }
 236 
 237   return OMPI_SUCCESS;
 238 }
 239 
 240 int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
 241                              MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
 242                              struct mca_coll_base_module_2_3_0_t *module) {
 243     int res = nbc_reduce_init(sendbuf, recvbuf, count, datatype, op, root,
 244                               comm, request, module, false);
 245     if (OPAL_LIKELY(OMPI_SUCCESS != res)) {
 246         return res;
 247     }
 248     res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
 249     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 250         NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
 251         *request = &ompi_request_null.request;
 252         return res;
 253     }
 254 
 255     return OMPI_SUCCESS;
 256 }
 257 
 258 static int nbc_reduce_inter_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
 259                                  MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
 260                                  struct mca_coll_base_module_2_3_0_t *module, bool persistent) {
 261   int rank, res, rsize;
 262   NBC_Schedule *schedule;
 263   ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
 264   ptrdiff_t span, gap;
 265   void *tmpbuf;
 266 
 267   rank = ompi_comm_rank (comm);
 268   rsize = ompi_comm_remote_size (comm);
 269 
 270   span = opal_datatype_span(&datatype->super, count, &gap);
 271   tmpbuf = malloc (span);
 272   if (OPAL_UNLIKELY(NULL == tmpbuf)) {
 273     return OMPI_ERR_OUT_OF_RESOURCE;
 274   }
 275 
 276   schedule = OBJ_NEW(NBC_Schedule);
 277   if (OPAL_UNLIKELY(NULL == schedule)) {
 278     free(tmpbuf);
 279     return OMPI_ERR_OUT_OF_RESOURCE;
 280   }
 281 
 282   res = red_sched_linear (rank, rsize, root, sendbuf, recvbuf, (void *)(-gap), count, datatype, op, schedule);
 283   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 284     OBJ_RELEASE(schedule);
 285     free(tmpbuf);
 286     return OMPI_ERR_OUT_OF_RESOURCE;
 287   }
 288 
 289   res = NBC_Sched_commit(schedule);
 290   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 291     OBJ_RELEASE(schedule);
 292     free(tmpbuf);
 293     return res;
 294   }
 295 
 296   res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
 297   if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 298     OBJ_RELEASE(schedule);
 299     free(tmpbuf);
 300     return OMPI_ERR_OUT_OF_RESOURCE;
 301   }
 302 
 303   return OMPI_SUCCESS;
 304 }
 305 
 306 int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
 307                                    MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
 308                                    struct mca_coll_base_module_2_3_0_t *module) {
 309     int res = nbc_reduce_inter_init(sendbuf, recvbuf, count, datatype, op, root,
 310                                     comm, request, module, false);
 311     if (OPAL_LIKELY(OMPI_SUCCESS != res)) {
 312         return res;
 313     }
 314     res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
 315     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 316         NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
 317         *request = &ompi_request_null.request;
 318         return res;
 319     }
 320 
 321     return OMPI_SUCCESS;
 322 }
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332 
 333 
 334 
 335 
 336 
 337 
 338 
 339 
 340 
 341 
 342 
 343 
 344 #define RANK2VRANK(rank, vrank, root) \
 345 { \
 346   vrank = rank; \
 347   if (rank == 0) vrank = root; \
 348   if (rank == root) vrank = 0; \
 349 }
 350 #define VRANK2RANK(rank, vrank, root) \
 351 { \
 352   rank = vrank; \
 353   if (vrank == 0) rank = root; \
 354   if (vrank == root) rank = 0; \
 355 }
 356 static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype,
 357                                       MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf) {
 358   int vroot, vrank, vpeer, peer, res, maxr;
 359   char *rbuf, *lbuf, *buf;
 360   int tmprbuf, tmplbuf;
 361   ptrdiff_t gap;
 362   (void)opal_datatype_span(&datatype->super, count, &gap);
 363 
 364   if (ompi_op_is_commute(op)) {
 365     vroot = root;
 366   } else {
 367     vroot = 0;
 368   }
 369   RANK2VRANK(rank, vrank, vroot);
 370   maxr = (int)ceil((log((double)p)/LOG2));
 371 
 372   if (rank != root) {
 373     inplace = 0;
 374   }
 375 
 376   
 377   if (0 == (maxr%2)) {
 378     rbuf = (void *)(-gap);
 379     tmprbuf = true;
 380     lbuf = redbuf;
 381     tmplbuf = tmpredbuf;
 382   } else {
 383     lbuf = (void *)(-gap);
 384     tmplbuf = true;
 385     rbuf = redbuf;
 386     tmprbuf = tmpredbuf;
 387     if (inplace) {
 388         res = NBC_Sched_copy(rbuf, false, count, datatype,
 389                              ((char *)tmpbuf)-gap, false, count, datatype,
 390                              schedule, true);
 391         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 392           return res;
 393         }
 394     }
 395   }
 396 
 397   for (int r = 1, firstred = 1 ; r <= maxr ; ++r) {
 398     if ((vrank % (1 << r)) == 0) {
 399       
 400       vpeer = vrank + (1 << (r - 1));
 401       VRANK2RANK(peer, vpeer, vroot)
 402       if (peer < p) {
 403         int tbuf;
 404         
 405         res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true);
 406         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 407           return res;
 408         }
 409 
 410         
 411         
 412         if (firstred && !inplace) {
 413           
 414           res = NBC_Sched_op (sendbuf, false, rbuf, tmprbuf, count, datatype, op, schedule, true);
 415           firstred = 0;
 416         } else {
 417           
 418           res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true);
 419         }
 420 
 421         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 422           return res;
 423         }
 424         
 425         buf = rbuf; rbuf = lbuf ; lbuf = buf;
 426         tbuf = tmprbuf; tmprbuf = tmplbuf; tmplbuf = tbuf;
 427       }
 428     } else {
 429       
 430       vpeer = vrank - (1 << (r - 1));
 431       VRANK2RANK(peer, vpeer, vroot)
 432       if (firstred && !inplace) {
 433         
 434         res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false);
 435       } else {
 436         
 437         res = NBC_Sched_send (lbuf, tmplbuf, count, datatype, peer, schedule, false);
 438       }
 439 
 440       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 441         return res;
 442       }
 443 
 444       
 445       break;
 446     }
 447   }
 448   
 449   if (vroot != root) {
 450     if (0 == rank) {
 451       res = NBC_Sched_send (redbuf, tmpredbuf, count, datatype, root, schedule, false);
 452     } else if (root == rank) {
 453       res = NBC_Sched_recv (redbuf, tmpredbuf, count, datatype, vroot, schedule, false);
 454     }
 455   }
 456 
 457   return OMPI_SUCCESS;
 458 }
 459 
 460 
 461 static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
 462                                    MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, void *tmpbuf, int fragsize) {
 463   int res, vrank, rpeer, speer, numfrag, fragcount, thiscount;
 464   long offset;
 465 
 466   RANK2VRANK(rank, vrank, root);
 467   VRANK2RANK(rpeer, vrank+1, root);
 468   VRANK2RANK(speer, vrank-1, root);
 469 
 470   if (0 == count) {
 471     return OMPI_SUCCESS;
 472   }
 473 
 474   numfrag = count * size / fragsize;
 475   if ((count * size) % fragsize != 0) {
 476     numfrag++;
 477   }
 478 
 479   fragcount = count / numfrag;
 480 
 481   for (int fragnum = 0 ; fragnum < numfrag ; ++fragnum) {
 482     offset = fragnum * fragcount * ext;
 483     thiscount = fragcount;
 484     if(fragnum == numfrag - 1) {
 485       
 486       thiscount = count - fragcount * fragnum;
 487     }
 488 
 489     
 490     if (vrank != p-1) {
 491       if (vrank == 0 && sendbuf != recvbuf) {
 492           res = NBC_Sched_recv ((char *)recvbuf+offset, false, thiscount, datatype, rpeer, schedule, true);
 493         } else {
 494           res = NBC_Sched_recv ((char *)offset, true, thiscount, datatype, rpeer, schedule, true);
 495         }
 496       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 497         return res;
 498       }
 499 
 500       
 501       if(vrank == 0) {
 502         if (sendbuf != recvbuf) {
 503             res = NBC_Sched_op ((char *) sendbuf + offset, false, (char *) recvbuf + offset, false,
 504                                  thiscount, datatype, op, schedule, true);
 505         } else {
 506             res = NBC_Sched_op ((char *)offset, true, (char *) recvbuf + offset, false,
 507                                  thiscount, datatype, op, schedule, true);
 508         }
 509       } else {
 510         res = NBC_Sched_op ((char *) sendbuf + offset, false, (char *) offset, true, thiscount,
 511                              datatype, op, schedule, true);
 512       }
 513 
 514       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 515         return res;
 516       }
 517     }
 518 
 519     
 520     if (vrank != 0) {
 521       
 522       
 523       if (vrank == p-1) {
 524         res = NBC_Sched_send ((char *) sendbuf + offset, false, thiscount, datatype, speer, schedule, true);
 525       } else {
 526         res = NBC_Sched_send ((char *) offset, true, thiscount, datatype, speer, schedule, true);
 527       }
 528 
 529       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 530         return res;
 531       }
 532     }
 533   }
 534 
 535   return OMPI_SUCCESS;
 536 }
 537 
 538 
 539 static inline int red_sched_linear (int rank, int rsize, int root, const void *sendbuf, void *recvbuf, void *tmpbuf, int count, MPI_Datatype datatype,
 540                                     MPI_Op op, NBC_Schedule *schedule) {
 541   int res;
 542   char *rbuf, *lbuf, *buf;
 543   int tmprbuf, tmplbuf;
 544 
 545   if (0 == count) {
 546     return OMPI_SUCCESS;
 547   }
 548 
 549   if (MPI_ROOT == root) {
 550     
 551     if (0 == (rsize%2)) {
 552       lbuf = tmpbuf;
 553       tmplbuf = true;
 554       rbuf = recvbuf;
 555       tmprbuf = false;
 556     } else {
 557       rbuf = tmpbuf;
 558       tmprbuf = true;
 559       lbuf = recvbuf;
 560       tmplbuf = false;
 561     }
 562 
 563     res = NBC_Sched_recv (lbuf, tmplbuf, count, datatype, 0, schedule, false);
 564     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 565       return res;
 566     }
 567 
 568     for (int peer = 1 ; peer < rsize ; ++peer) {
 569       res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true);
 570       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 571         return res;
 572       }
 573 
 574       res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true);
 575       if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 576         return res;
 577       }
 578       
 579       buf = rbuf; rbuf = lbuf ; lbuf = buf;
 580       tmprbuf ^= 1; tmplbuf ^= 1;
 581     }
 582   } else if (MPI_PROC_NULL != root) {
 583     res = NBC_Sched_send (sendbuf, false, count, datatype, root, schedule, true);
 584     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 585       return res;
 586     }
 587   }
 588 
 589   return OMPI_SUCCESS;
 590 }
 591 
 592 
 593 
 594 
 595 
 596 
 597 
 598 
 599 
 600 
 601 
 602 
 603 
 604 
 605 
 606 
 607 
 608 
 609 
 610 
 611 
 612 
 613 
 614 
 615 
 616 
 617 
 618 
 619 
 620 
 621 
 622 
 623 
 624 
 625 
 626 
 627 
 628 
 629 
 630 
 631 
 632 
 633 
 634 
 635 
 636 
 637 
 638 
 639 
 640 
 641 
 642 
 643 
 644 
 645 
 646 
 647 
 648 
 649 static inline int red_sched_redscat_gather(
 650     int rank, int comm_size, int root, const void *sbuf, void *rbuf,
 651     char tmpredbuf, int count, MPI_Datatype datatype, MPI_Op op, char inplace,
 652     NBC_Schedule *schedule, void *tmp_buf, struct ompi_communicator_t *comm)
 653 {
 654     int res = OMPI_SUCCESS;
 655     int *rindex = NULL, *rcount = NULL, *sindex = NULL, *scount = NULL;
 656 
 657     
 658     int nsteps = opal_hibit(comm_size, comm->c_cube_dim + 1);   
 659     if (nsteps < 1) {
 660         
 661         return OMPI_ERR_NOT_SUPPORTED;
 662     }
 663     int nprocs_pof2 = 1 << nsteps;                              
 664 
 665     ptrdiff_t lb, extent;
 666     ompi_datatype_get_extent(datatype, &lb, &extent);
 667 
 668     if ((rank != root) || !inplace) {
 669         res = NBC_Sched_copy((char *)sbuf, false, count, datatype,
 670                              rbuf, tmpredbuf, count, datatype, schedule, true);
 671         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 672     }
 673 
 674     
 675 
 676 
 677 
 678 
 679 
 680 
 681 
 682 
 683 
 684 
 685 
 686 
 687 
 688 
 689 
 690 
 691 
 692     int vrank, step, wsize;
 693     int nprocs_rem = comm_size - nprocs_pof2;
 694 
 695     if (rank < 2 * nprocs_rem) {
 696         int count_lhalf = count / 2;
 697         int count_rhalf = count - count_lhalf;
 698 
 699         if (rank % 2 != 0) {
 700             
 701 
 702 
 703 
 704 
 705             res = NBC_Sched_send(rbuf, tmpredbuf, count_lhalf, datatype, rank - 1,
 706                                  schedule, false);
 707             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 708 
 709             res = NBC_Sched_recv((char *)tmp_buf + (ptrdiff_t)count_lhalf * extent,
 710                                  false, count_rhalf, datatype, rank - 1, schedule, true);
 711             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 712 
 713             res = NBC_Sched_op((char *)tmp_buf + (ptrdiff_t)count_lhalf * extent,
 714                                false, (char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 715                                tmpredbuf, count_rhalf, datatype, op, schedule, true);
 716             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 717 
 718             
 719             res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 720                                  tmpredbuf, count_rhalf, datatype, rank - 1, schedule, true);
 721             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 722 
 723             
 724             vrank = -1;
 725 
 726         } else {
 727             
 728 
 729 
 730 
 731 
 732             res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 733                                  tmpredbuf, count_rhalf, datatype, rank + 1, schedule, false);
 734             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 735 
 736             res = NBC_Sched_recv((char *)tmp_buf, false, count_lhalf, datatype, rank + 1,
 737                                  schedule, true);
 738             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 739 
 740             res = NBC_Sched_op(tmp_buf, false, rbuf, tmpredbuf, count_lhalf,
 741                                datatype, op, schedule, true);
 742             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 743 
 744             
 745             res = NBC_Sched_recv((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
 746                                  tmpredbuf, count_rhalf, datatype, rank + 1, schedule, true);
 747             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 748 
 749             vrank = rank / 2;
 750         }
 751     } else { 
 752         vrank = rank - nprocs_rem;
 753     }
 754 
 755     
 756 
 757 
 758 
 759 
 760 
 761 
 762 
 763 
 764 
 765 
 766 
 767 
 768     rindex = malloc(sizeof(*rindex) * nsteps);    
 769     sindex = malloc(sizeof(*sindex) * nsteps);
 770     rcount = malloc(sizeof(*rcount) * nsteps);
 771     scount = malloc(sizeof(*scount) * nsteps);
 772     if (NULL == rindex || NULL == sindex || NULL == rcount || NULL == scount) {
 773         res = OMPI_ERR_OUT_OF_RESOURCE;
 774         goto cleanup_and_return;
 775     }
 776 
 777     if (vrank != -1) {
 778         step = 0;
 779         wsize = count;
 780         sindex[0] = rindex[0] = 0;
 781 
 782         for (int mask = 1; mask < nprocs_pof2; mask <<= 1) {
 783             
 784 
 785 
 786 
 787             int vdest = vrank ^ mask;
 788             
 789             int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem;
 790 
 791             if (rank < dest) {
 792                 
 793 
 794 
 795 
 796 
 797                 rcount[step] = wsize / 2;
 798                 scount[step] = wsize - rcount[step];
 799                 sindex[step] = rindex[step] + rcount[step];
 800             } else {
 801                 
 802 
 803 
 804 
 805 
 806                 scount[step] = wsize / 2;
 807                 rcount[step] = wsize - scount[step];
 808                 rindex[step] = sindex[step] + scount[step];
 809             }
 810 
 811             
 812             res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)sindex[step] * extent,
 813                                  tmpredbuf, scount[step], datatype, dest, schedule, false);
 814             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 815             res = NBC_Sched_recv((char *)tmp_buf + (ptrdiff_t)rindex[step] * extent,
 816                                  false, rcount[step], datatype, dest, schedule, true);
 817             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 818 
 819             
 820             res = NBC_Sched_op((char *)tmp_buf + (ptrdiff_t)rindex[step] * extent,
 821                                false, (char *)rbuf + (ptrdiff_t)rindex[step] * extent,
 822                                tmpredbuf, rcount[step], datatype, op, schedule, true);
 823             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 824 
 825             
 826             if (step + 1 < nsteps) {
 827                 rindex[step + 1] = rindex[step];
 828                 sindex[step + 1] = rindex[step];
 829                 wsize = rcount[step];
 830                 step++;
 831             }
 832         }
 833     }
 834     
 835 
 836 
 837 
 838 
 839     
 840 
 841 
 842 
 843 
 844 
 845 
 846     int vroot = 0;
 847     if (root < 2 * nprocs_rem) {
 848         if (root % 2 != 0) {
 849             vroot = 0;
 850             if (rank == root) {
 851                 
 852 
 853 
 854 
 855 
 856                 rindex[0] = 0;
 857                 step = 0, wsize = count;
 858                 for (int mask = 1; mask < nprocs_pof2; mask *= 2) {
 859                     rcount[step] = wsize / 2;
 860                     scount[step] = wsize - rcount[step];
 861                     rindex[step] = 0;
 862                     sindex[step] = rcount[step];
 863                     step++;
 864                     wsize /= 2;
 865                 }
 866 
 867                 res = NBC_Sched_recv(rbuf, tmpredbuf, rcount[nsteps - 1], datatype,
 868                                      0, schedule, true);
 869                 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 870                 vrank = 0;
 871 
 872             } else if (vrank == 0) {
 873                 
 874                 res = NBC_Sched_send(rbuf, tmpredbuf, rcount[nsteps - 1], datatype,
 875                                      root, schedule, true);
 876                 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 877                 vrank = -1;
 878             }
 879         } else {
 880             
 881             vroot = root / 2;
 882         }
 883     } else {
 884         
 885         vroot = root - nprocs_rem;
 886     }
 887 
 888     
 889 
 890 
 891 
 892 
 893 
 894 
 895 
 896     if (vrank != -1) {
 897         int vdest_tree, vroot_tree;
 898         step = nsteps - 1; 
 899 
 900         for (int mask = nprocs_pof2 >> 1; mask > 0; mask >>= 1) {
 901             int vdest = vrank ^ mask;
 902             
 903             int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem;
 904             if ((vdest == 0) && (root < 2 * nprocs_rem) && (root % 2 != 0))
 905                 dest = root;
 906 
 907             vdest_tree = vdest >> step;
 908             vdest_tree <<= step;
 909             vroot_tree = vroot >> step;
 910             vroot_tree <<= step;
 911             if (vdest_tree == vroot_tree) {
 912                 
 913 
 914                 res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)rindex[step] * extent,
 915                                      tmpredbuf, rcount[step], datatype, dest, schedule, false);
 916                 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 917                 break;
 918             } else {
 919                 
 920                 res = NBC_Sched_recv((char *)rbuf + (ptrdiff_t)sindex[step] * extent,
 921                                      tmpredbuf, scount[step], datatype, dest, schedule, true);
 922                 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 923             }
 924             step--;
 925         }
 926     }
 927 
 928   cleanup_and_return:
 929     if (NULL != rindex)
 930         free(rindex);
 931     if (NULL != sindex)
 932         free(sindex);
 933     if (NULL != rcount)
 934         free(rcount);
 935     if (NULL != scount)
 936         free(scount);
 937     return res;
 938 }
 939 
 940 int ompi_coll_libnbc_reduce_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
 941                                  MPI_Op op, int root, struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
 942                                  struct mca_coll_base_module_2_3_0_t *module) {
 943     int res = nbc_reduce_init(sendbuf, recvbuf, count, datatype, op, root,
 944                               comm, request, module, true);
 945     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 946         return res;
 947     }
 948 
 949     return OMPI_SUCCESS;
 950 }
 951 
 952 int ompi_coll_libnbc_reduce_inter_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
 953                                        MPI_Op op, int root, struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
 954                                        struct mca_coll_base_module_2_3_0_t *module) {
 955     int res = nbc_reduce_inter_init(sendbuf, recvbuf, count, datatype, op, root,
 956                                     comm, request, module, true);
 957     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 958         return res;
 959     }
 960 
 961     return OMPI_SUCCESS;
 962 }