This source file includes following definitions.
- ompi_coll_tuned_allreduce_intra_dec_fixed
- ompi_coll_tuned_alltoall_intra_dec_fixed
- ompi_coll_tuned_alltoallv_intra_dec_fixed
- ompi_coll_tuned_barrier_intra_dec_fixed
- ompi_coll_tuned_bcast_intra_dec_fixed
- ompi_coll_tuned_reduce_intra_dec_fixed
- ompi_coll_tuned_reduce_scatter_intra_dec_fixed
- ompi_coll_tuned_reduce_scatter_block_intra_dec_fixed
- ompi_coll_tuned_allgather_intra_dec_fixed
- ompi_coll_tuned_allgatherv_intra_dec_fixed
- ompi_coll_tuned_gather_intra_dec_fixed
- ompi_coll_tuned_scatter_intra_dec_fixed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "ompi_config.h"
26
27 #include "mpi.h"
28 #include "opal/util/bit_ops.h"
29 #include "ompi/datatype/ompi_datatype.h"
30 #include "ompi/communicator/communicator.h"
31 #include "ompi/mca/coll/coll.h"
32 #include "ompi/mca/coll/base/coll_tags.h"
33 #include "ompi/op/op.h"
34 #include "coll_tuned.h"
35
36
37
38
39
40
41
42
43 int
44 ompi_coll_tuned_allreduce_intra_dec_fixed(const void *sbuf, void *rbuf, int count,
45 struct ompi_datatype_t *dtype,
46 struct ompi_op_t *op,
47 struct ompi_communicator_t *comm,
48 mca_coll_base_module_t *module)
49 {
50 size_t dsize, block_dsize;
51 int comm_size = ompi_comm_size(comm);
52 const size_t intermediate_message = 10000;
53 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_fixed"));
54
55
56
57
58
59
60
61
62 ompi_datatype_type_size(dtype, &dsize);
63 block_dsize = dsize * (ptrdiff_t)count;
64
65 if (block_dsize < intermediate_message) {
66 return (ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf,
67 count, dtype,
68 op, comm, module));
69 }
70
71 if( ompi_op_is_commute(op) && (count > comm_size) ) {
72 const size_t segment_size = 1 << 20;
73 if (((size_t)comm_size * (size_t)segment_size >= block_dsize)) {
74 return (ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype,
75 op, comm, module));
76 } else {
77 return (ompi_coll_base_allreduce_intra_ring_segmented(sbuf, rbuf,
78 count, dtype,
79 op, comm, module,
80 segment_size));
81 }
82 }
83
84 return (ompi_coll_base_allreduce_intra_nonoverlapping(sbuf, rbuf, count,
85 dtype, op, comm, module));
86 }
87
88
89
90
91
92
93
94
95
96 int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount,
97 struct ompi_datatype_t *sdtype,
98 void* rbuf, int rcount,
99 struct ompi_datatype_t *rdtype,
100 struct ompi_communicator_t *comm,
101 mca_coll_base_module_t *module)
102 {
103 int communicator_size;
104 size_t dsize, block_dsize;
105 #if 0
106 size_t total_dsize;
107 #endif
108
109 communicator_size = ompi_comm_size(comm);
110
111
112 if (communicator_size==2) {
113 return ompi_coll_base_alltoall_intra_two_procs(sbuf, scount, sdtype,
114 rbuf, rcount, rdtype,
115 comm, module);
116 }
117
118
119
120
121
122 if (MPI_IN_PLACE != sbuf) {
123 ompi_datatype_type_size(sdtype, &dsize);
124 } else {
125 ompi_datatype_type_size(rdtype, &dsize);
126 }
127 block_dsize = dsize * (ptrdiff_t)scount;
128
129 if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_small_msg)
130 && (communicator_size > 12)) {
131 return ompi_coll_base_alltoall_intra_bruck(sbuf, scount, sdtype,
132 rbuf, rcount, rdtype,
133 comm, module);
134
135 } else if (block_dsize < (size_t) ompi_coll_tuned_alltoall_intermediate_msg) {
136 return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype,
137 rbuf, rcount, rdtype,
138 comm, module);
139 }
140
141 return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype,
142 rbuf, rcount, rdtype,
143 comm, module);
144
145 #if 0
146
147
148
149 ompi_datatype_type_size(sdtype, &dsize);
150 total_dsize = dsize * scount * communicator_size;
151
152 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_fixed rank %d com_size %d msg_length %ld",
153 ompi_comm_rank(comm), communicator_size, total_dsize));
154
155 if (communicator_size >= 12 && total_dsize <= 768) {
156 return ompi_coll_base_alltoall_intra_bruck(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
157 }
158 if (total_dsize <= 131072) {
159 return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
160 }
161 return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
162 #endif
163 }
164
165
166
167
168
169
170 int ompi_coll_tuned_alltoallv_intra_dec_fixed(const void *sbuf, const int *scounts, const int *sdisps,
171 struct ompi_datatype_t *sdtype,
172 void *rbuf, const int *rcounts, const int *rdisps,
173 struct ompi_datatype_t *rdtype,
174 struct ompi_communicator_t *comm,
175 mca_coll_base_module_t *module)
176 {
177
178 return ompi_coll_base_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
179 rbuf, rcounts, rdisps,rdtype,
180 comm, module);
181 }
182
183
184
185
186
187
188
189
190
191 int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm,
192 mca_coll_base_module_t *module)
193 {
194 int communicator_size = ompi_comm_size(comm);
195
196 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_barrier_intra_dec_fixed com_size %d",
197 communicator_size));
198
199 if( 2 == communicator_size )
200 return ompi_coll_base_barrier_intra_two_procs(comm, module);
201
202
203
204
205
206 {
207 bool has_one = false;
208 for( ; communicator_size > 0; communicator_size >>= 1 ) {
209 if( communicator_size & 0x1 ) {
210 if( has_one )
211 return ompi_coll_base_barrier_intra_bruck(comm, module);
212 has_one = true;
213 }
214 }
215 }
216 return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
217 }
218
219
220
221
222
223
224
225
226
227 int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
228 struct ompi_datatype_t *datatype, int root,
229 struct ompi_communicator_t *comm,
230 mca_coll_base_module_t *module)
231 {
232
233
234 const size_t small_message_size = 2048;
235 const size_t intermediate_message_size = 370728;
236 const double a_p16 = 3.2118e-6;
237 const double b_p16 = 8.7936;
238 const double a_p64 = 2.3679e-6;
239 const double b_p64 = 1.1787;
240 const double a_p128 = 1.6134e-6;
241 const double b_p128 = 2.1102;
242
243 int communicator_size;
244 int segsize = 0;
245 size_t message_size, dsize;
246
247 communicator_size = ompi_comm_size(comm);
248
249
250 ompi_datatype_type_size(datatype, &dsize);
251 message_size = dsize * (unsigned long)count;
252
253 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_bcast_intra_dec_fixed"
254 " root %d rank %d com_size %d msg_length %lu",
255 root, ompi_comm_rank(comm), communicator_size, (unsigned long)message_size));
256
257
258
259 if ((message_size < small_message_size) || (count <= 1)) {
260
261 segsize = 0;
262 return ompi_coll_base_bcast_intra_binomial(buff, count, datatype,
263 root, comm, module,
264 segsize);
265
266 } else if (message_size < intermediate_message_size) {
267
268 segsize = 1024;
269 return ompi_coll_base_bcast_intra_split_bintree(buff, count, datatype,
270 root, comm, module,
271 segsize);
272
273 }
274
275 else if (communicator_size < (a_p128 * message_size + b_p128)) {
276
277 segsize = 1024 << 7;
278 return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
279 root, comm, module,
280 segsize);
281
282 } else if (communicator_size < 13) {
283
284 segsize = 1024 << 3;
285 return ompi_coll_base_bcast_intra_split_bintree(buff, count, datatype,
286 root, comm, module,
287 segsize);
288
289 } else if (communicator_size < (a_p64 * message_size + b_p64)) {
290
291 segsize = 1024 << 6;
292 return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
293 root, comm, module,
294 segsize);
295
296 } else if (communicator_size < (a_p16 * message_size + b_p16)) {
297
298 segsize = 1024 << 4;
299 return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
300 root, comm, module,
301 segsize);
302
303 }
304
305
306 segsize = 1024 << 3;
307 return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
308 root, comm, module,
309 segsize);
310 #if 0
311
312
313 if (communicator_size < 4) {
314 return ompi_coll_base_bcast_intra_basic_linear(buff, count, datatype, root, comm, module);
315 }
316 if (communicator_size == 4) {
317 if (message_size < 524288) segsize = 0;
318 else segsize = 16384;
319 return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
320 }
321 if (communicator_size <= 8 && message_size < 4096) {
322 return ompi_coll_base_bcast_intra_basic_linear(buff, count, datatype, root, comm, module);
323 }
324 if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) {
325 segsize = 16384;
326 return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
327 }
328 if (message_size >= 524288) {
329 segsize = 16384;
330 return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype, root, comm, module, segsize);
331 }
332 segsize = 0;
333
334
335 return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
336 #endif
337 }
338
339
340
341
342
343
344
345
346
347 int ompi_coll_tuned_reduce_intra_dec_fixed( const void *sendbuf, void *recvbuf,
348 int count, struct ompi_datatype_t* datatype,
349 struct ompi_op_t* op, int root,
350 struct ompi_communicator_t* comm,
351 mca_coll_base_module_t *module)
352 {
353 int communicator_size, segsize = 0;
354 size_t message_size, dsize;
355 const double a1 = 0.6016 / 1024.0;
356 const double b1 = 1.3496;
357 const double a2 = 0.0410 / 1024.0;
358 const double b2 = 9.7128;
359 const double a3 = 0.0422 / 1024.0;
360 const double b3 = 1.1614;
361 const double a4 = 0.0033 / 1024.0;
362 const double b4 = 1.6761;
363
364 const int max_requests = 0;
365
366 communicator_size = ompi_comm_size(comm);
367
368
369 ompi_datatype_type_size(datatype, &dsize);
370 message_size = dsize * (ptrdiff_t)count;
371
372
373
374
375
376 if( !ompi_op_is_commute(op) ) {
377 if ((communicator_size < 12) && (message_size < 2048)) {
378 return ompi_coll_base_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
379 }
380 return ompi_coll_base_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module,
381 0, max_requests);
382 }
383
384 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed "
385 "root %d rank %d com_size %d msg_length %lu",
386 root, ompi_comm_rank(comm), communicator_size, (unsigned long)message_size));
387
388 if ((communicator_size < 8) && (message_size < 512)){
389
390 return ompi_coll_base_reduce_intra_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm, module);
391 } else if (((communicator_size < 8) && (message_size < 20480)) ||
392 (message_size < 2048) || (count <= 1)) {
393
394 segsize = 0;
395 return ompi_coll_base_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
396 segsize, max_requests);
397 } else if (communicator_size > (a1 * message_size + b1)) {
398
399 segsize = 1024;
400 return ompi_coll_base_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
401 segsize, max_requests);
402 } else if (communicator_size > (a2 * message_size + b2)) {
403
404 segsize = 1024;
405 return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
406 segsize, max_requests);
407 } else if (communicator_size > (a3 * message_size + b3)) {
408
409 segsize = 32*1024;
410 return ompi_coll_base_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root,
411 comm, module, segsize, max_requests);
412 }
413 if (communicator_size > (a4 * message_size + b4)) {
414
415 segsize = 32*1024;
416 } else {
417
418 segsize = 64*1024;
419 }
420 return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
421 segsize, max_requests);
422
423 #if 0
424
425 if (message_size <= 4096) {
426 segsize = 0;
427 fanout = communicator_size - 1;
428
429
430 return ompi_coll_base_reduce_intra_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm, module);
431 }
432 if (message_size < 524288) {
433 if (message_size <= 65536 ) {
434 segsize = 32768;
435 fanout = 8;
436 } else {
437 segsize = 1024;
438 fanout = communicator_size/2;
439 }
440
441
442 return ompi_coll_base_reduce_intra_chain(sendbuf, recvbuf, count, datatype, op, root, comm, module,
443 segsize, fanout, max_requests);
444 }
445 segsize = 1024;
446 return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
447 segsize, max_requests);
448 #endif
449 }
450
451
452
453
454
455
456
457
458
459 int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( const void *sbuf, void *rbuf,
460 const int *rcounts,
461 struct ompi_datatype_t *dtype,
462 struct ompi_op_t *op,
463 struct ompi_communicator_t *comm,
464 mca_coll_base_module_t *module)
465 {
466 int comm_size, i, pow2;
467 size_t total_message_size, dsize;
468 const double a = 0.0012;
469 const double b = 8.0;
470 const size_t small_message_size = 12 * 1024;
471 const size_t large_message_size = 256 * 1024;
472
473 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed"));
474
475 comm_size = ompi_comm_size(comm);
476
477 ompi_datatype_type_size(dtype, &dsize);
478 total_message_size = 0;
479 for (i = 0; i < comm_size; i++) {
480 total_message_size += rcounts[i];
481 }
482
483 if( !ompi_op_is_commute(op) ) {
484 return ompi_coll_base_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
485 dtype, op,
486 comm, module);
487 }
488
489 total_message_size *= dsize;
490
491
492 pow2 = opal_next_poweroftwo_inclusive (comm_size);
493
494 if ((total_message_size <= small_message_size) ||
495 ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
496 (comm_size >= a * total_message_size + b)) {
497 return
498 ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
499 dtype, op,
500 comm, module);
501 }
502 return ompi_coll_base_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
503 dtype, op,
504 comm, module);
505 }
506
507
508
509
510
511
512
513
514
515 int ompi_coll_tuned_reduce_scatter_block_intra_dec_fixed(const void *sbuf, void *rbuf,
516 int rcount,
517 struct ompi_datatype_t *dtype,
518 struct ompi_op_t *op,
519 struct ompi_communicator_t *comm,
520 mca_coll_base_module_t *module)
521 {
522 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_block_intra_dec_fixed"));
523 return ompi_coll_base_reduce_scatter_block_basic_linear(sbuf, rbuf, rcount,
524 dtype, op, comm, module);
525 }
526
527
528
529
530
531
532
533
534
535
536 int ompi_coll_tuned_allgather_intra_dec_fixed(const void *sbuf, int scount,
537 struct ompi_datatype_t *sdtype,
538 void* rbuf, int rcount,
539 struct ompi_datatype_t *rdtype,
540 struct ompi_communicator_t *comm,
541 mca_coll_base_module_t *module)
542 {
543 int communicator_size, pow2_size;
544 size_t dsize, total_dsize;
545
546 communicator_size = ompi_comm_size(comm);
547
548
549 if (communicator_size == 2) {
550 return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
551 rbuf, rcount, rdtype,
552 comm, module);
553 }
554
555
556 if (MPI_IN_PLACE != sbuf) {
557 ompi_datatype_type_size(sdtype, &dsize);
558 } else {
559 ompi_datatype_type_size(rdtype, &dsize);
560 }
561 total_dsize = dsize * (ptrdiff_t)scount * (ptrdiff_t)communicator_size;
562
563 OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed"
564 " rank %d com_size %d msg_length %lu",
565 ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize));
566
567 pow2_size = opal_next_poweroftwo_inclusive (communicator_size);
568
569
570
571
572
573
574
575
576
577 if (total_dsize < 50000) {
578 if (pow2_size == communicator_size) {
579 return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
580 rbuf, rcount, rdtype,
581 comm, module);
582 } else {
583 return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
584 rbuf, rcount, rdtype,
585 comm, module);
586 }
587 } else {
588 if (communicator_size % 2) {
589 return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
590 rbuf, rcount, rdtype,
591 comm, module);
592 } else {
593 return ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
594 rbuf, rcount, rdtype,
595 comm, module);
596 }
597 }
598
599 #if defined(USE_MPICH2_DECISION)
600
601
602
603
604
605
606
607
608
609 if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
610 return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
611 rbuf, rcount, rdtype,
612 comm, module);
613 } else if (total_dsize <= 81920) {
614 return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
615 rbuf, rcount, rdtype,
616 comm, module);
617 }
618 return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
619 rbuf, rcount, rdtype,
620 comm, module);
621 #endif
622 }
623
624
625
626
627
628
629
630
631
632
633 int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
634 struct ompi_datatype_t *sdtype,
635 void* rbuf, const int *rcounts,
636 const int *rdispls,
637 struct ompi_datatype_t *rdtype,
638 struct ompi_communicator_t *comm,
639 mca_coll_base_module_t *module)
640 {
641 int i;
642 int communicator_size;
643 size_t dsize, total_dsize;
644
645 communicator_size = ompi_comm_size(comm);
646
647
648 if (communicator_size == 2) {
649 return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype,
650 rbuf, rcounts, rdispls, rdtype,
651 comm, module);
652 }
653
654
655 if (MPI_IN_PLACE != sbuf) {
656 ompi_datatype_type_size(sdtype, &dsize);
657 } else {
658 ompi_datatype_type_size(rdtype, &dsize);
659 }
660
661 total_dsize = 0;
662 for (i = 0; i < communicator_size; i++) {
663 total_dsize += dsize * (ptrdiff_t)rcounts[i];
664 }
665
666 OPAL_OUTPUT((ompi_coll_tuned_stream,
667 "ompi_coll_tuned_allgatherv_intra_dec_fixed"
668 " rank %d com_size %d msg_length %lu",
669 ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize));
670
671
672 if (total_dsize < 50000) {
673 return ompi_coll_base_allgatherv_intra_bruck(sbuf, scount, sdtype,
674 rbuf, rcounts, rdispls, rdtype,
675 comm, module);
676 } else {
677 if (communicator_size % 2) {
678 return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
679 rbuf, rcounts, rdispls, rdtype,
680 comm, module);
681 } else {
682 return ompi_coll_base_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
683 rbuf, rcounts, rdispls, rdtype,
684 comm, module);
685 }
686 }
687 }
688
689
690
691
692
693
694
695
696
697
698 int ompi_coll_tuned_gather_intra_dec_fixed(const void *sbuf, int scount,
699 struct ompi_datatype_t *sdtype,
700 void* rbuf, int rcount,
701 struct ompi_datatype_t *rdtype,
702 int root,
703 struct ompi_communicator_t *comm,
704 mca_coll_base_module_t *module)
705 {
706 const int large_segment_size = 32768;
707 const int small_segment_size = 1024;
708
709 const size_t large_block_size = 92160;
710 const size_t intermediate_block_size = 6000;
711 const size_t small_block_size = 1024;
712
713 const int large_communicator_size = 60;
714 const int small_communicator_size = 10;
715
716 int communicator_size, rank;
717 size_t dsize, block_size;
718
719 OPAL_OUTPUT((ompi_coll_tuned_stream,
720 "ompi_coll_tuned_gather_intra_dec_fixed"));
721
722 communicator_size = ompi_comm_size(comm);
723 rank = ompi_comm_rank(comm);
724
725
726 if (rank == root) {
727 ompi_datatype_type_size(rdtype, &dsize);
728 block_size = dsize * (ptrdiff_t)rcount;
729 } else {
730 ompi_datatype_type_size(sdtype, &dsize);
731 block_size = dsize * (ptrdiff_t)scount;
732 }
733
734 if (block_size > large_block_size) {
735 return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
736 rbuf, rcount, rdtype,
737 root, comm, module,
738 large_segment_size);
739
740 } else if (block_size > intermediate_block_size) {
741 return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
742 rbuf, rcount, rdtype,
743 root, comm, module,
744 small_segment_size);
745
746 } else if ((communicator_size > large_communicator_size) ||
747 ((communicator_size > small_communicator_size) &&
748 (block_size < small_block_size))) {
749 return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
750 rbuf, rcount, rdtype,
751 root, comm, module);
752 }
753
754 return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
755 rbuf, rcount, rdtype,
756 root, comm, module);
757 }
758
759
760
761
762
763
764
765
766
767
768 int ompi_coll_tuned_scatter_intra_dec_fixed(const void *sbuf, int scount,
769 struct ompi_datatype_t *sdtype,
770 void* rbuf, int rcount,
771 struct ompi_datatype_t *rdtype,
772 int root, struct ompi_communicator_t *comm,
773 mca_coll_base_module_t *module)
774 {
775 const size_t small_block_size = 300;
776 const int small_comm_size = 10;
777 int communicator_size, rank;
778 size_t dsize, block_size;
779
780 OPAL_OUTPUT((ompi_coll_tuned_stream,
781 "ompi_coll_tuned_scatter_intra_dec_fixed"));
782
783 communicator_size = ompi_comm_size(comm);
784 rank = ompi_comm_rank(comm);
785
786 if (root == rank) {
787 ompi_datatype_type_size(sdtype, &dsize);
788 block_size = dsize * (ptrdiff_t)scount;
789 } else {
790 ompi_datatype_type_size(rdtype, &dsize);
791 block_size = dsize * (ptrdiff_t)rcount;
792 }
793
794 if ((communicator_size > small_comm_size) &&
795 (block_size < small_block_size)) {
796 return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
797 rbuf, rcount, rdtype,
798 root, comm, module);
799 }
800 return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
801 rbuf, rcount, rdtype,
802 root, comm, module);
803 }