This source file includes following definitions.
- pown
- calculate_level
- calculate_num_nodes_up_to_level
- ompi_coll_base_topo_build_tree
- ompi_coll_base_topo_build_in_order_bintree
- ompi_coll_base_topo_destroy_tree
- ompi_coll_base_topo_build_bmtree
- ompi_coll_base_topo_build_in_order_bmtree
- ompi_coll_base_topo_build_kmtree
- ompi_coll_base_topo_build_chain
- ompi_coll_base_topo_dump_tree
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #include "ompi_config.h"
22
23 #include "mpi.h"
24 #include "opal/util/bit_ops.h"
25 #include "ompi/constants.h"
26 #include "ompi/communicator/communicator.h"
27 #include "ompi/mca/coll/base/coll_tags.h"
28 #include "ompi/mca/coll/base/coll_base_functions.h"
29 #include "coll_base_topo.h"
30
31
32
33
34 static int pown( int fanout, int num )
35 {
36 int j, p = 1;
37 if( num < 0 ) return 0;
38 if (1==num) return fanout;
39 if (2==fanout) {
40 return p<<num;
41 }
42 else {
43 for( j = 0; j < num; j++ ) { p*= fanout; }
44 }
45 return p;
46 }
47
48 static int calculate_level( int fanout, int rank )
49 {
50 int level, num;
51 if( rank < 0 ) return -1;
52 for( level = 0, num = 0; num <= rank; level++ ) {
53 num += pown(fanout, level);
54 }
55 return level-1;
56 }
57
58 static int calculate_num_nodes_up_to_level( int fanout, int level )
59 {
60
61
62 return ((pown(fanout,level) - 1)/(fanout - 1));
63 }
64
65
66
67
68
69
70
71
72
73
74
75
76
77 ompi_coll_tree_t*
78 ompi_coll_base_topo_build_tree( int fanout,
79 struct ompi_communicator_t* comm,
80 int root )
81 {
82 int rank, size, schild, sparent, shiftedrank, i;
83 int level;
84 int delta;
85 int slimit;
86 ompi_coll_tree_t* tree;
87
88 OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree Building fo %d rt %d", fanout, root));
89
90 if (fanout<1) {
91 OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree invalid fanout %d", fanout));
92 return NULL;
93 }
94 if (fanout>MAXTREEFANOUT) {
95 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
96 return NULL;
97 }
98
99
100
101
102 size = ompi_comm_size(comm);
103 rank = ompi_comm_rank(comm);
104
105 tree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
106 if (!tree) {
107 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree PANIC::out of memory"));
108 return NULL;
109 }
110
111 tree->tree_root = MPI_UNDEFINED;
112 tree->tree_nextsize = MPI_UNDEFINED;
113
114
115
116
117 tree->tree_root = root;
118
119
120
121
122 tree->tree_fanout = fanout;
123 tree->tree_bmtree = 0;
124 tree->tree_root = root;
125 tree->tree_prev = -1;
126 tree->tree_nextsize = 0;
127 for( i = 0; i < fanout; i++ ) {
128 tree->tree_next[i] = -1;
129 }
130
131
132 if( size < 2 ) {
133 return tree;
134 }
135
136
137
138
139
140
141
142 shiftedrank = rank - root;
143 if( shiftedrank < 0 ) {
144 shiftedrank += size;
145 }
146
147
148 level = calculate_level( fanout, shiftedrank );
149 delta = pown( fanout, level );
150
151
152 for( i = 0; i < fanout; i++ ) {
153 schild = shiftedrank + delta * (i+1);
154 if( schild < size ) {
155 tree->tree_next[i] = (schild+root)%size;
156 tree->tree_nextsize = tree->tree_nextsize + 1;
157 } else {
158 break;
159 }
160 }
161
162
163 slimit = calculate_num_nodes_up_to_level( fanout, level );
164 sparent = shiftedrank;
165 if( sparent < fanout ) {
166 sparent = 0;
167 } else {
168 while( sparent >= slimit ) {
169 sparent -= delta/fanout;
170 }
171 }
172 tree->tree_prev = (sparent+root)%size;
173
174 return tree;
175 }
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191 ompi_coll_tree_t*
192 ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
193 {
194 int rank, size, myrank, rightsize, delta, parent, lchild, rchild;
195 ompi_coll_tree_t* tree;
196
197
198
199
200 size = ompi_comm_size(comm);
201 rank = ompi_comm_rank(comm);
202
203 tree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
204 if (!tree) {
205 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
206 "coll:base:topo_build_tree PANIC::out of memory"));
207 return NULL;
208 }
209
210 tree->tree_root = MPI_UNDEFINED;
211 tree->tree_nextsize = MPI_UNDEFINED;
212
213
214
215
216 tree->tree_fanout = 2;
217 tree->tree_bmtree = 0;
218 tree->tree_root = size - 1;
219 tree->tree_prev = -1;
220 tree->tree_nextsize = 0;
221 tree->tree_next[0] = -1;
222 tree->tree_next[1] = -1;
223 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
224 "coll:base:topo_build_in_order_tree Building fo %d rt %d",
225 tree->tree_fanout, tree->tree_root));
226
227
228
229
230 myrank = rank;
231 parent = size - 1;
232 delta = 0;
233
234 while ( 1 ) {
235
236 rightsize = size >> 1;
237
238
239 lchild = -1;
240 rchild = -1;
241 if (size - 1 > 0) {
242 lchild = parent - 1;
243 if (lchild > 0) {
244 rchild = rightsize - 1;
245 }
246 }
247
248
249
250
251
252
253
254
255 if (myrank == parent) {
256
257
258 if (lchild >= 0) tree->tree_next[0] = lchild + delta;
259 if (rchild >= 0) tree->tree_next[1] = rchild + delta;
260 break;
261 }
262 if (myrank > rchild) {
263
264
265
266
267
268 if (myrank == lchild) {
269 tree->tree_prev = parent + delta;
270 }
271 size = size - rightsize - 1;
272 delta = delta + rightsize;
273 myrank = myrank - rightsize;
274 parent = size - 1;
275
276 } else {
277
278
279
280
281
282
283 if (myrank == rchild) {
284 tree->tree_prev = parent + delta;
285 }
286 size = rightsize;
287 parent = rchild;
288 }
289 }
290
291 if (tree->tree_next[0] >= 0) { tree->tree_nextsize = 1; }
292 if (tree->tree_next[1] >= 0) { tree->tree_nextsize += 1; }
293
294 return tree;
295 }
296
297 int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree )
298 {
299 ompi_coll_tree_t *ptr;
300
301 if ((!tree)||(!*tree)) {
302 return OMPI_SUCCESS;
303 }
304
305 ptr = *tree;
306
307 free (ptr);
308 *tree = NULL;
309
310 return OMPI_SUCCESS;
311 }
312
313
314
315
316
317
318
319
320
321
322
323
324
325 ompi_coll_tree_t*
326 ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
327 int root )
328 {
329 int childs = 0, rank, size, mask = 1, index, remote, i;
330 ompi_coll_tree_t *bmtree;
331
332 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree rt %d", root));
333
334
335
336
337 size = ompi_comm_size(comm);
338 rank = ompi_comm_rank(comm);
339
340 index = rank -root;
341
342 bmtree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
343 if (!bmtree) {
344 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
345 return NULL;
346 }
347
348 bmtree->tree_bmtree = 1;
349
350 bmtree->tree_root = MPI_UNDEFINED;
351 bmtree->tree_nextsize = MPI_UNDEFINED;
352 for( i = 0;i < MAXTREEFANOUT; i++ ) {
353 bmtree->tree_next[i] = -1;
354 }
355
356 if( index < 0 ) index += size;
357
358 mask = opal_next_poweroftwo(index);
359
360
361 if( root == rank ) {
362 bmtree->tree_prev = root;
363 } else {
364 remote = (index ^ (mask >> 1)) + root;
365 if( remote >= size ) remote -= size;
366 bmtree->tree_prev = remote;
367 }
368
369 while( mask < size ) {
370 remote = (index ^ mask);
371 if( remote >= size ) break;
372 remote += root;
373 if( remote >= size ) remote -= size;
374 if (childs==MAXTREEFANOUT) {
375 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
376 free(bmtree);
377 return NULL;
378 }
379 bmtree->tree_next[childs] = remote;
380 mask <<= 1;
381 childs++;
382 }
383 bmtree->tree_nextsize = childs;
384 bmtree->tree_root = root;
385 return bmtree;
386 }
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402 ompi_coll_tree_t*
403 ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
404 int root )
405 {
406 int childs = 0, rank, vrank, size, mask = 1, remote, i;
407 ompi_coll_tree_t *bmtree;
408
409 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_in_order_bmtree rt %d", root));
410
411
412
413
414 size = ompi_comm_size(comm);
415 rank = ompi_comm_rank(comm);
416
417 vrank = (rank - root + size) % size;
418
419 bmtree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
420 if (!bmtree) {
421 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
422 return NULL;
423 }
424
425 bmtree->tree_bmtree = 1;
426 bmtree->tree_root = MPI_UNDEFINED;
427 bmtree->tree_nextsize = MPI_UNDEFINED;
428 for(i=0;i<MAXTREEFANOUT;i++) {
429 bmtree->tree_next[i] = -1;
430 }
431
432 if (root == rank) {
433 bmtree->tree_prev = root;
434 }
435
436 while (mask < size) {
437 remote = vrank ^ mask;
438 if (remote < vrank) {
439 bmtree->tree_prev = (remote + root) % size;
440 break;
441 } else if (remote < size) {
442 bmtree->tree_next[childs] = (remote + root) % size;
443 childs++;
444 if (childs==MAXTREEFANOUT) {
445 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
446 "coll:base:topo:build_bmtree max fanout incorrect %d needed %d",
447 MAXTREEFANOUT, childs));
448 free(bmtree);
449 return NULL;
450 }
451 }
452 mask <<= 1;
453 }
454 bmtree->tree_nextsize = childs;
455 bmtree->tree_root = root;
456
457 return bmtree;
458 }
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 ompi_coll_tree_t*
474 ompi_coll_base_topo_build_kmtree(struct ompi_communicator_t* comm,
475 int root, int radix)
476 {
477 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
478 "coll:base:topo:build_kmtree root %d, radix %d", root, radix));
479 int comm_size = ompi_comm_size(comm);
480 int rank = ompi_comm_rank(comm);
481
482
483 int log_radix = 0;
484 for (int i = 1; i < comm_size; i *= radix)
485 log_radix++;
486 int nchilds_max = (radix - 1) * log_radix;
487
488 int vrank = (rank - root + comm_size) % comm_size;
489 ompi_coll_tree_t *kmtree = malloc(COLL_TREE_SIZE(nchilds_max));
490 if (NULL == kmtree) {
491 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
492 "coll:base:topo:build_kmtree PANIC out of memory"));
493 return NULL;
494 }
495
496 kmtree->tree_bmtree = 0;
497 kmtree->tree_root = root;
498 kmtree->tree_prev = MPI_PROC_NULL;
499 kmtree->tree_nextsize = 0;
500
501
502 int mask = 0x1;
503 while (mask < comm_size) {
504 if (vrank % (radix * mask)) {
505 kmtree->tree_prev = vrank / (radix * mask) * (radix * mask);
506 kmtree->tree_prev = (kmtree->tree_prev + root) % comm_size;
507 break;
508 }
509 mask *= radix;
510 }
511
512
513 mask /= radix;
514 int nchilds = 0;
515 while (mask > 0) {
516 for (int r = 1; r < radix; r++) {
517 int child = vrank + mask * r;
518 if (child < comm_size) {
519 child = (child + root) % comm_size;
520 kmtree->tree_next[nchilds] = child;
521 nchilds++;
522 }
523 }
524 mask /= radix;
525 }
526 kmtree->tree_nextsize = nchilds;
527 return kmtree;
528 }
529
530 ompi_coll_tree_t*
531 ompi_coll_base_topo_build_chain( int fanout,
532 struct ompi_communicator_t* comm,
533 int root )
534 {
535 int i, maxchainlen, mark, head, len, rank, size, srank ;
536 ompi_coll_tree_t *chain;
537
538 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain fo %d rt %d", fanout, root));
539
540
541
542
543 size = ompi_comm_size(comm);
544 rank = ompi_comm_rank(comm);
545
546 if( fanout < 1 ) {
547 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
548 fanout = 1;
549 }
550 if (fanout>MAXTREEFANOUT) {
551 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
552 fanout = MAXTREEFANOUT;
553 }
554
555
556
557
558 chain = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
559 if (!chain) {
560 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain PANIC out of memory"));
561 fflush(stdout);
562 return NULL;
563 }
564 chain->tree_root = MPI_UNDEFINED;
565 chain->tree_nextsize = -1;
566 for(i=0;i<fanout;i++) chain->tree_next[i] = -1;
567
568
569
570
571 chain->tree_root = root;
572 if( (size - 1) < fanout ) {
573 chain->tree_nextsize = size-1;
574 fanout = size-1;
575 } else {
576 chain->tree_nextsize = fanout;
577 }
578
579
580
581
582 srank = rank - root;
583 if (srank < 0) srank += size;
584
585
586
587
588 if( fanout == 1 ) {
589 if( srank == 0 ) chain->tree_prev = -1;
590 else chain->tree_prev = (srank-1+root)%size;
591
592 if( (srank + 1) >= size) {
593 chain->tree_next[0] = -1;
594 chain->tree_nextsize = 0;
595 } else {
596 chain->tree_next[0] = (srank+1+root)%size;
597 chain->tree_nextsize = 1;
598 }
599 return chain;
600 }
601
602
603 if( size == 1 ) {
604 chain->tree_next[0] = -1;
605 chain->tree_nextsize = 0;
606 chain->tree_prev = -1;
607 return chain;
608 }
609
610
611
612 maxchainlen = (size-1) / fanout;
613 if( (size-1) % fanout != 0 ) {
614 maxchainlen++;
615 mark = (size-1)%fanout;
616 } else {
617 mark = fanout+1;
618 }
619
620
621
622
623 if( srank != 0 ) {
624 int column;
625 if( srank-1 < (mark * maxchainlen) ) {
626 column = (srank-1)/maxchainlen;
627 head = 1+column*maxchainlen;
628 len = maxchainlen;
629 } else {
630 column = mark + (srank-1-mark*maxchainlen)/(maxchainlen-1);
631 head = mark*maxchainlen+1+(column-mark)*(maxchainlen-1);
632 len = maxchainlen-1;
633 }
634
635 if( srank == head ) {
636 chain->tree_prev = 0;
637 } else {
638 chain->tree_prev = srank-1;
639 }
640 if( srank == (head + len - 1) ) {
641 chain->tree_next[0] = -1;
642 chain->tree_nextsize = 0;
643 } else {
644 if( (srank + 1) < size ) {
645 chain->tree_next[0] = srank+1;
646 chain->tree_nextsize = 1;
647 } else {
648 chain->tree_next[0] = -1;
649 chain->tree_nextsize = 0;
650 }
651 }
652 chain->tree_prev = (chain->tree_prev+root)%size;
653 if( chain->tree_next[0] != -1 ) {
654 chain->tree_next[0] = (chain->tree_next[0]+root)%size;
655 }
656 } else {
657
658
659
660 chain->tree_prev = -1;
661 chain->tree_next[0] = (root+1)%size;
662 for( i = 1; i < fanout; i++ ) {
663 chain->tree_next[i] = chain->tree_next[i-1] + maxchainlen;
664 if( i > mark ) {
665 chain->tree_next[i]--;
666 }
667 chain->tree_next[i] %= size;
668 }
669 chain->tree_nextsize = fanout;
670 }
671
672 return chain;
673 }
674
675 int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
676 {
677 int i;
678
679 OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo:topo_dump_tree %1d tree root %d"
680 " fanout %d BM %1d nextsize %d prev %d",
681 rank, tree->tree_root, tree->tree_bmtree, tree->tree_fanout,
682 tree->tree_nextsize, tree->tree_prev));
683 if( tree->tree_nextsize ) {
684 for( i = 0; i < tree->tree_nextsize; i++ )
685 OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"[%1d] %d", i, tree->tree_next[i]));
686 }
687 return (0);
688 }
689