This source file includes following definitions.
- pown
- calculate_level
- calculate_num_nodes_up_to_level
- ompi_coll_base_topo_build_tree
- ompi_coll_base_topo_build_in_order_bintree
- ompi_coll_base_topo_destroy_tree
- ompi_coll_base_topo_build_bmtree
- ompi_coll_base_topo_build_in_order_bmtree
- ompi_coll_base_topo_build_kmtree
- ompi_coll_base_topo_build_chain
- ompi_coll_base_topo_dump_tree
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 #include "ompi_config.h"
  22 
  23 #include "mpi.h"
  24 #include "opal/util/bit_ops.h"
  25 #include "ompi/constants.h"
  26 #include "ompi/communicator/communicator.h"
  27 #include "ompi/mca/coll/base/coll_tags.h"
  28 #include "ompi/mca/coll/base/coll_base_functions.h"
  29 #include "coll_base_topo.h"
  30 
  31 
  32 
  33 
  34 static int pown( int fanout, int num )
  35 {
  36     int j, p = 1;
  37     if( num < 0 ) return 0;
  38     if (1==num) return fanout;
  39     if (2==fanout) {
  40         return p<<num;
  41     }
  42     else {
  43         for( j = 0; j < num; j++ ) { p*= fanout; }
  44     }
  45     return p;
  46 }
  47 
  48 static int calculate_level( int fanout, int rank )
  49 {
  50     int level, num;
  51     if( rank < 0 ) return -1;
  52     for( level = 0, num = 0; num <= rank; level++ ) {
  53         num += pown(fanout, level);
  54     }
  55     return level-1;
  56 }
  57 
  58 static int calculate_num_nodes_up_to_level( int fanout, int level )
  59 {
  60     
  61 
  62     return ((pown(fanout,level) - 1)/(fanout - 1));
  63 }
  64 
  65 
  66 
  67 
  68 
  69 
  70 
  71 
  72 
  73 
  74 
  75 
  76 
  77 ompi_coll_tree_t*
  78 ompi_coll_base_topo_build_tree( int fanout,
  79                                  struct ompi_communicator_t* comm,
  80                                  int root )
  81 {
  82     int rank, size, schild, sparent, shiftedrank, i;
  83     int level; 
  84     int delta; 
  85     int slimit; 
  86     ompi_coll_tree_t* tree;
  87 
  88     OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree Building fo %d rt %d", fanout, root));
  89 
  90     if (fanout<1) {
  91         OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree invalid fanout %d", fanout));
  92         return NULL;
  93     }
  94     if (fanout>MAXTREEFANOUT) {
  95         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
  96         return NULL;
  97     }
  98 
  99     
 100 
 101 
 102     size = ompi_comm_size(comm);
 103     rank = ompi_comm_rank(comm);
 104 
 105     tree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
 106     if (!tree) {
 107         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree PANIC::out of memory"));
 108         return NULL;
 109     }
 110 
 111     tree->tree_root     = MPI_UNDEFINED;
 112     tree->tree_nextsize = MPI_UNDEFINED;
 113 
 114     
 115 
 116 
 117     tree->tree_root = root;
 118 
 119     
 120 
 121 
 122     tree->tree_fanout   = fanout;
 123     tree->tree_bmtree   = 0;
 124     tree->tree_root     = root;
 125     tree->tree_prev     = -1;
 126     tree->tree_nextsize = 0;
 127     for( i = 0; i < fanout; i++ ) {
 128         tree->tree_next[i] = -1;
 129     }
 130 
 131     
 132     if( size < 2 ) {
 133         return tree;
 134     }
 135 
 136     
 137 
 138 
 139 
 140 
 141 
 142     shiftedrank = rank - root;
 143     if( shiftedrank < 0 ) {
 144         shiftedrank += size;
 145     }
 146 
 147     
 148     level = calculate_level( fanout, shiftedrank );
 149     delta = pown( fanout, level );
 150 
 151     
 152     for( i = 0; i < fanout; i++ ) {
 153         schild = shiftedrank + delta * (i+1);
 154         if( schild < size ) {
 155             tree->tree_next[i] = (schild+root)%size;
 156             tree->tree_nextsize = tree->tree_nextsize + 1;
 157         } else {
 158             break;
 159         }
 160     }
 161 
 162     
 163     slimit = calculate_num_nodes_up_to_level( fanout, level );
 164     sparent = shiftedrank;
 165     if( sparent < fanout ) {
 166         sparent = 0;
 167     } else {
 168         while( sparent >= slimit ) {
 169             sparent -= delta/fanout;
 170         }
 171     }
 172     tree->tree_prev = (sparent+root)%size;
 173 
 174     return tree;
 175 }
 176 
 177 
 178 
 179 
 180 
 181 
 182 
 183 
 184 
 185 
 186 
 187 
 188 
 189 
 190 
 191 ompi_coll_tree_t*
 192 ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
 193 {
 194     int rank, size, myrank, rightsize, delta, parent, lchild, rchild;
 195     ompi_coll_tree_t* tree;
 196 
 197     
 198 
 199 
 200     size = ompi_comm_size(comm);
 201     rank = ompi_comm_rank(comm);
 202 
 203     tree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
 204     if (!tree) {
 205         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 206                      "coll:base:topo_build_tree PANIC::out of memory"));
 207         return NULL;
 208     }
 209 
 210     tree->tree_root     = MPI_UNDEFINED;
 211     tree->tree_nextsize = MPI_UNDEFINED;
 212 
 213     
 214 
 215 
 216     tree->tree_fanout   = 2;
 217     tree->tree_bmtree   = 0;
 218     tree->tree_root     = size - 1;
 219     tree->tree_prev     = -1;
 220     tree->tree_nextsize = 0;
 221     tree->tree_next[0]  = -1;
 222     tree->tree_next[1]  = -1;
 223     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 224                  "coll:base:topo_build_in_order_tree Building fo %d rt %d",
 225                  tree->tree_fanout, tree->tree_root));
 226 
 227     
 228 
 229 
 230     myrank = rank;
 231     parent = size - 1;
 232     delta = 0;
 233 
 234     while ( 1 ) {
 235         
 236         rightsize = size >> 1;
 237 
 238         
 239         lchild = -1;
 240         rchild = -1;
 241         if (size - 1 > 0) {
 242             lchild = parent - 1;
 243             if (lchild > 0) {
 244                 rchild = rightsize - 1;
 245             }
 246         }
 247 
 248         
 249 
 250 
 251 
 252 
 253 
 254 
 255         if (myrank == parent) {
 256             
 257 
 258             if (lchild >= 0) tree->tree_next[0] = lchild + delta;
 259             if (rchild >= 0) tree->tree_next[1] = rchild + delta;
 260             break;
 261         }
 262         if (myrank > rchild) {
 263             
 264 
 265 
 266 
 267 
 268             if (myrank == lchild) {
 269                 tree->tree_prev = parent + delta;
 270             }
 271             size = size - rightsize - 1;
 272             delta = delta + rightsize;
 273             myrank = myrank - rightsize;
 274             parent = size - 1;
 275 
 276         } else {
 277             
 278 
 279 
 280 
 281 
 282 
 283             if (myrank == rchild) {
 284                 tree->tree_prev = parent + delta;
 285             }
 286             size = rightsize;
 287             parent = rchild;
 288         }
 289     }
 290 
 291     if (tree->tree_next[0] >= 0) { tree->tree_nextsize = 1; }
 292     if (tree->tree_next[1] >= 0) { tree->tree_nextsize += 1; }
 293 
 294     return tree;
 295 }
 296 
 297 int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree )
 298 {
 299     ompi_coll_tree_t *ptr;
 300 
 301     if ((!tree)||(!*tree)) {
 302         return OMPI_SUCCESS;
 303     }
 304 
 305     ptr = *tree;
 306 
 307     free (ptr);
 308     *tree = NULL;   
 309 
 310     return OMPI_SUCCESS;
 311 }
 312 
 313 
 314 
 315 
 316 
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 
 325 ompi_coll_tree_t*
 326 ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
 327                                    int root )
 328 {
 329     int childs = 0, rank, size, mask = 1, index, remote, i;
 330     ompi_coll_tree_t *bmtree;
 331 
 332     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree rt %d", root));
 333 
 334     
 335 
 336 
 337     size = ompi_comm_size(comm);
 338     rank = ompi_comm_rank(comm);
 339 
 340     index = rank -root;
 341 
 342     bmtree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
 343     if (!bmtree) {
 344         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
 345         return NULL;
 346     }
 347 
 348     bmtree->tree_bmtree   = 1;
 349 
 350     bmtree->tree_root     = MPI_UNDEFINED;
 351     bmtree->tree_nextsize = MPI_UNDEFINED;
 352     for( i = 0;i < MAXTREEFANOUT; i++ ) {
 353         bmtree->tree_next[i] = -1;
 354     }
 355 
 356     if( index < 0 ) index += size;
 357 
 358     mask = opal_next_poweroftwo(index);
 359 
 360     
 361     if( root == rank ) {
 362         bmtree->tree_prev = root;
 363     } else {
 364         remote = (index ^ (mask >> 1)) + root;
 365         if( remote >= size ) remote -= size;
 366         bmtree->tree_prev = remote;
 367     }
 368     
 369     while( mask < size ) {
 370         remote = (index ^ mask);
 371         if( remote >= size ) break;
 372         remote += root;
 373         if( remote >= size ) remote -= size;
 374         if (childs==MAXTREEFANOUT) {
 375             OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
 376             free(bmtree);
 377             return NULL;
 378         }
 379         bmtree->tree_next[childs] = remote;
 380         mask <<= 1;
 381         childs++;
 382     }
 383     bmtree->tree_nextsize = childs;
 384     bmtree->tree_root     = root;
 385     return bmtree;
 386 }
 387 
 388 
 389 
 390 
 391 
 392 
 393 
 394 
 395 
 396 
 397 
 398 
 399 
 400 
 401 
 402 ompi_coll_tree_t*
 403 ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
 404                                             int root )
 405 {
 406     int childs = 0, rank, vrank, size, mask = 1, remote, i;
 407     ompi_coll_tree_t *bmtree;
 408 
 409     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_in_order_bmtree rt %d", root));
 410 
 411     
 412 
 413 
 414     size = ompi_comm_size(comm);
 415     rank = ompi_comm_rank(comm);
 416 
 417     vrank = (rank - root + size) % size;
 418 
 419     bmtree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
 420     if (!bmtree) {
 421         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
 422         return NULL;
 423     }
 424 
 425     bmtree->tree_bmtree   = 1;
 426     bmtree->tree_root     = MPI_UNDEFINED;
 427     bmtree->tree_nextsize = MPI_UNDEFINED;
 428     for(i=0;i<MAXTREEFANOUT;i++) {
 429         bmtree->tree_next[i] = -1;
 430     }
 431 
 432     if (root == rank) {
 433         bmtree->tree_prev = root;
 434     }
 435 
 436     while (mask < size) {
 437         remote = vrank ^ mask;
 438         if (remote < vrank) {
 439             bmtree->tree_prev = (remote + root) % size;
 440             break;
 441         } else if (remote < size) {
 442             bmtree->tree_next[childs] = (remote + root) % size;
 443             childs++;
 444             if (childs==MAXTREEFANOUT) {
 445                 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 446                              "coll:base:topo:build_bmtree max fanout incorrect %d needed %d",
 447                              MAXTREEFANOUT, childs));
 448                 free(bmtree);
 449                 return NULL;
 450             }
 451         }
 452         mask <<= 1;
 453     }
 454     bmtree->tree_nextsize = childs;
 455     bmtree->tree_root     = root;
 456 
 457     return bmtree;
 458 }
 459 
 460 
 461 
 462 
 463 
 464 
 465 
 466 
 467 
 468 
 469 
 470 
 471 
 472 
 473 ompi_coll_tree_t*
 474 ompi_coll_base_topo_build_kmtree(struct ompi_communicator_t* comm,
 475                                  int root, int radix)
 476 {
 477     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 478                  "coll:base:topo:build_kmtree root %d, radix %d", root, radix));
 479     int comm_size = ompi_comm_size(comm);
 480     int rank = ompi_comm_rank(comm);
 481 
 482     
 483     int log_radix = 0;
 484     for (int i = 1; i < comm_size; i *= radix)
 485         log_radix++;
 486     int nchilds_max = (radix - 1) * log_radix;
 487 
 488     int vrank = (rank - root + comm_size) % comm_size;
 489     ompi_coll_tree_t *kmtree = malloc(COLL_TREE_SIZE(nchilds_max));
 490     if (NULL == kmtree) {
 491         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
 492                      "coll:base:topo:build_kmtree PANIC out of memory"));
 493         return NULL;
 494     }
 495 
 496     kmtree->tree_bmtree = 0;
 497     kmtree->tree_root = root;
 498     kmtree->tree_prev = MPI_PROC_NULL;
 499     kmtree->tree_nextsize = 0;
 500 
 501     
 502     int mask = 0x1;
 503     while (mask < comm_size) {
 504         if (vrank % (radix * mask)) {
 505             kmtree->tree_prev = vrank / (radix * mask) * (radix * mask);
 506             kmtree->tree_prev = (kmtree->tree_prev + root) % comm_size;
 507             break;
 508         }
 509         mask *= radix;
 510     }
 511 
 512     
 513     mask /= radix;
 514     int nchilds = 0;
 515     while (mask > 0) {
 516         for (int r = 1; r < radix; r++) {
 517             int child = vrank + mask * r;
 518             if (child < comm_size) {
 519                 child = (child + root) % comm_size;
 520                 kmtree->tree_next[nchilds] = child;
 521                 nchilds++;
 522             }
 523         }
 524         mask /= radix;
 525     }
 526     kmtree->tree_nextsize = nchilds;
 527     return kmtree;
 528 }
 529 
 530 ompi_coll_tree_t*
 531 ompi_coll_base_topo_build_chain( int fanout,
 532                                   struct ompi_communicator_t* comm,
 533                                   int root )
 534 {
 535     int i, maxchainlen, mark, head, len, rank, size, srank ;
 536     ompi_coll_tree_t *chain;
 537 
 538     OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain fo %d rt %d", fanout, root));
 539 
 540     
 541 
 542 
 543     size = ompi_comm_size(comm);
 544     rank = ompi_comm_rank(comm);
 545 
 546     if( fanout < 1 ) {
 547         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
 548         fanout = 1;
 549     }
 550     if (fanout>MAXTREEFANOUT) {
 551         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
 552         fanout = MAXTREEFANOUT;
 553     }
 554 
 555     
 556 
 557 
 558     chain = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
 559     if (!chain) {
 560         OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain PANIC out of memory"));
 561         fflush(stdout);
 562         return NULL;
 563     }
 564     chain->tree_root     = MPI_UNDEFINED;
 565     chain->tree_nextsize = -1;
 566     for(i=0;i<fanout;i++) chain->tree_next[i] = -1;
 567 
 568     
 569 
 570 
 571     chain->tree_root = root;
 572     if( (size - 1) < fanout ) {
 573         chain->tree_nextsize = size-1;
 574         fanout = size-1;
 575     } else {
 576         chain->tree_nextsize = fanout;
 577     }
 578 
 579     
 580 
 581 
 582     srank = rank - root;
 583     if (srank < 0) srank += size;
 584 
 585     
 586 
 587 
 588     if( fanout == 1 ) {
 589         if( srank == 0 ) chain->tree_prev = -1;
 590         else chain->tree_prev = (srank-1+root)%size;
 591 
 592         if( (srank + 1) >= size) {
 593             chain->tree_next[0] = -1;
 594             chain->tree_nextsize = 0;
 595         } else {
 596             chain->tree_next[0] = (srank+1+root)%size;
 597             chain->tree_nextsize = 1;
 598         }
 599         return chain;
 600     }
 601 
 602     
 603     if( size == 1 ) {
 604         chain->tree_next[0] = -1;
 605         chain->tree_nextsize = 0;
 606         chain->tree_prev = -1;
 607         return chain;
 608     }
 609     
 610 
 611 
 612     maxchainlen = (size-1) / fanout;
 613     if( (size-1) % fanout != 0 ) {
 614         maxchainlen++;
 615         mark = (size-1)%fanout;
 616     } else {
 617         mark = fanout+1;
 618     }
 619 
 620     
 621 
 622 
 623     if( srank != 0 ) {
 624         int column;
 625         if( srank-1 < (mark * maxchainlen) ) {
 626             column = (srank-1)/maxchainlen;
 627             head = 1+column*maxchainlen;
 628             len = maxchainlen;
 629         } else {
 630             column = mark + (srank-1-mark*maxchainlen)/(maxchainlen-1);
 631             head = mark*maxchainlen+1+(column-mark)*(maxchainlen-1);
 632             len = maxchainlen-1;
 633         }
 634 
 635         if( srank == head ) {
 636             chain->tree_prev = 0; 
 637         } else {
 638             chain->tree_prev = srank-1; 
 639         }
 640         if( srank == (head + len - 1) ) {
 641             chain->tree_next[0] = -1;
 642             chain->tree_nextsize = 0;
 643         } else {
 644             if( (srank + 1) < size ) {
 645                 chain->tree_next[0] = srank+1;
 646                 chain->tree_nextsize = 1;
 647             } else {
 648                 chain->tree_next[0] = -1;
 649                 chain->tree_nextsize = 0;
 650             }
 651         }
 652         chain->tree_prev = (chain->tree_prev+root)%size;
 653         if( chain->tree_next[0] != -1 ) {
 654             chain->tree_next[0] = (chain->tree_next[0]+root)%size;
 655         }
 656     } else {
 657         
 658 
 659 
 660         chain->tree_prev = -1;
 661         chain->tree_next[0] = (root+1)%size;
 662         for( i = 1; i < fanout; i++ ) {
 663             chain->tree_next[i] = chain->tree_next[i-1] + maxchainlen;
 664             if( i > mark ) {
 665                 chain->tree_next[i]--;
 666             }
 667             chain->tree_next[i] %= size;
 668         }
 669         chain->tree_nextsize = fanout;
 670     }
 671 
 672     return chain;
 673 }
 674 
 675 int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
 676 {
 677     int i;
 678 
 679     OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo:topo_dump_tree %1d tree root %d"
 680                  " fanout %d BM %1d nextsize %d prev %d",
 681                  rank, tree->tree_root, tree->tree_bmtree, tree->tree_fanout,
 682                  tree->tree_nextsize, tree->tree_prev));
 683     if( tree->tree_nextsize ) {
 684         for( i = 0; i < tree->tree_nextsize; i++ )
 685             OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"[%1d] %d", i, tree->tree_next[i]));
 686     }
 687     return (0);
 688 }
 689