This source file includes following definitions.
- NBC_Allreduce_args_compare
- nbc_allreduce_init
- ompi_coll_libnbc_iallreduce
- nbc_allreduce_inter_init
- ompi_coll_libnbc_iallreduce_inter
- allred_sched_diss
- allred_sched_ring
- allred_sched_linear
- allred_sched_redscat_allgather
- ompi_coll_libnbc_allreduce_init
- ompi_coll_libnbc_allreduce_inter_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #include "nbc_internal.h"
22 #include "ompi/communicator/communicator.h"
23 #include "ompi/datatype/ompi_datatype.h"
24 #include "ompi/op/op.h"
25 #include "opal/util/bit_ops.h"
26
27 #include <assert.h>
28
29 static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf,
30 void *recvbuf, MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf);
31 static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, const void *sendbuf,
32 void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule,
33 void *tmpbuf);
34 static inline int allred_sched_linear(int rank, int p, const void *sendbuf, void *recvbuf, int count,
35 MPI_Datatype datatype, ptrdiff_t gap, MPI_Op op, int ext, int size,
36 NBC_Schedule *schedule, void *tmpbuf);
37 static inline int allred_sched_redscat_allgather(
38 int rank, int comm_size, int count, MPI_Datatype datatype, ptrdiff_t gap,
39 const void *sbuf, void *rbuf, MPI_Op op, char inplace,
40 NBC_Schedule *schedule, void *tmpbuf, struct ompi_communicator_t *comm);
41
42 #ifdef NBC_CACHE_SCHEDULE
43
44 int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param) {
45 if ((a->sendbuf == b->sendbuf) &&
46 (a->recvbuf == b->recvbuf) &&
47 (a->count == b->count) &&
48 (a->datatype == b->datatype) &&
49 (a->op == b->op)) {
50 return 0;
51 }
52
53 if( a->sendbuf < b->sendbuf ) {
54 return -1;
55 }
56
57 return 1;
58 }
59 #endif
60
61 static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
62 struct ompi_communicator_t *comm, ompi_request_t ** request,
63 struct mca_coll_base_module_2_3_0_t *module, bool persistent)
64 {
65 int rank, p, res;
66 ptrdiff_t ext, lb;
67 NBC_Schedule *schedule;
68 size_t size;
69 #ifdef NBC_CACHE_SCHEDULE
70 NBC_Allreduce_args *args, *found, search;
71 #endif
72 enum { NBC_ARED_BINOMIAL, NBC_ARED_RING, NBC_ARED_REDSCAT_ALLGATHER } alg;
73 char inplace;
74 void *tmpbuf = NULL;
75 ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
76 ptrdiff_t span, gap;
77
78 NBC_IN_PLACE(sendbuf, recvbuf, inplace);
79
80 rank = ompi_comm_rank (comm);
81 p = ompi_comm_size (comm);
82
83 res = ompi_datatype_get_extent(datatype, &lb, &ext);
84 if (OMPI_SUCCESS != res) {
85 NBC_Error ("MPI Error in ompi_datatype_type_extent() (%i)", res);
86 return res;
87 }
88
89 res = ompi_datatype_type_size (datatype, &size);
90 if (OMPI_SUCCESS != res) {
91 NBC_Error ("MPI Error in ompi_datatype_type_size() (%i)", res);
92 return res;
93 }
94
95 if (1 == p && (!persistent || inplace)) {
96 if (!inplace) {
97
98 res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm);
99 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
100 return res;
101 }
102 }
103 return nbc_get_noop_request(persistent, request);
104 }
105
106 span = opal_datatype_span(&datatype->super, count, &gap);
107 tmpbuf = malloc (span);
108 if (OPAL_UNLIKELY(NULL == tmpbuf)) {
109 return OMPI_ERR_OUT_OF_RESOURCE;
110 }
111
112
113 int nprocs_pof2 = opal_next_poweroftwo(p) >> 1;
114 if (libnbc_iallreduce_algorithm == 0) {
115 if(p < 4 || size*count < 65536 || !ompi_op_is_commute(op) || inplace) {
116 alg = NBC_ARED_BINOMIAL;
117 } else if (count >= nprocs_pof2 && ompi_op_is_commute(op)) {
118 alg = NBC_ARED_REDSCAT_ALLGATHER;
119 } else {
120 alg = NBC_ARED_RING;
121 }
122 } else {
123 if (libnbc_iallreduce_algorithm == 1)
124 alg = NBC_ARED_RING;
125 else if (libnbc_iallreduce_algorithm == 2)
126 alg = NBC_ARED_BINOMIAL;
127 else if (libnbc_iallreduce_algorithm == 3 && count >= nprocs_pof2 && ompi_op_is_commute(op)) {
128 alg = NBC_ARED_REDSCAT_ALLGATHER;
129 } else
130 alg = NBC_ARED_RING;
131 }
132 #ifdef NBC_CACHE_SCHEDULE
133
134 search.sendbuf = sendbuf;
135 search.recvbuf = recvbuf;
136 search.count = count;
137 search.datatype = datatype;
138 search.op = op;
139 found = (NBC_Allreduce_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], &search);
140 if (NULL == found) {
141 #endif
142 schedule = OBJ_NEW(NBC_Schedule);
143 if (NULL == schedule) {
144 free(tmpbuf);
145 return OMPI_ERR_OUT_OF_RESOURCE;
146 }
147
148 if (p == 1) {
149 res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
150 recvbuf, false, count, datatype, schedule, false);
151 } else {
152 switch(alg) {
153 case NBC_ARED_BINOMIAL:
154 res = allred_sched_diss(rank, p, count, datatype, gap, sendbuf, recvbuf, op, inplace, schedule, tmpbuf);
155 break;
156 case NBC_ARED_REDSCAT_ALLGATHER:
157 res = allred_sched_redscat_allgather(rank, p, count, datatype, gap, sendbuf, recvbuf, op, inplace, schedule, tmpbuf, comm);
158 break;
159 case NBC_ARED_RING:
160 res = allred_sched_ring(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, tmpbuf);
161 break;
162 }
163 }
164
165 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
166 OBJ_RELEASE(schedule);
167 free(tmpbuf);
168 return res;
169 }
170
171 res = NBC_Sched_commit(schedule);
172 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
173 OBJ_RELEASE(schedule);
174 free(tmpbuf);
175 return res;
176 }
177
178 #ifdef NBC_CACHE_SCHEDULE
179
180 args = (NBC_Allreduce_args *) malloc (sizeof(args));
181 if (NULL != args) {
182 args->sendbuf = sendbuf;
183 args->recvbuf = recvbuf;
184 args->count = count;
185 args->datatype = datatype;
186 args->op = op;
187 args->schedule = schedule;
188 res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE], args, args, 0);
189 if (0 == res) {
190 OBJ_RETAIN(schedule);
191
192
193 if (++libnbc_module->NBC_Dict_size[NBC_ALLREDUCE] > NBC_SCHED_DICT_UPPER) {
194 NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_ALLREDUCE],
195 &libnbc_module->NBC_Dict_size[NBC_ALLREDUCE]);
196 }
197 } else {
198 NBC_Error("error in dict_insert() (%i)", res);
199 free (args);
200 }
201 }
202 } else {
203
204 schedule = found->schedule;
205 OBJ_RETAIN(schedule);
206 }
207 #endif
208
209 res = NBC_Schedule_request (schedule, comm, libnbc_module, persistent, request, tmpbuf);
210 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
211 OBJ_RELEASE(schedule);
212 free(tmpbuf);
213 return res;
214 }
215
216 return OMPI_SUCCESS;
217 }
218
219 int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
220 struct ompi_communicator_t *comm, ompi_request_t ** request,
221 struct mca_coll_base_module_2_3_0_t *module) {
222 int res = nbc_allreduce_init(sendbuf, recvbuf, count, datatype, op,
223 comm, request, module, false);
224 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
225 return res;
226 }
227
228 res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
229 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
230 NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
231 *request = &ompi_request_null.request;
232 return res;
233 }
234
235 return OMPI_SUCCESS;
236 }
237
238 static int nbc_allreduce_inter_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
239 struct ompi_communicator_t *comm, ompi_request_t ** request,
240 struct mca_coll_base_module_2_3_0_t *module, bool persistent)
241 {
242 int rank, res, rsize;
243 size_t size;
244 MPI_Aint ext;
245 NBC_Schedule *schedule;
246 void *tmpbuf = NULL;
247 ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
248 ptrdiff_t span, gap;
249
250 rank = ompi_comm_rank (comm);
251 rsize = ompi_comm_remote_size (comm);
252
253 res = ompi_datatype_type_extent(datatype, &ext);
254 if (MPI_SUCCESS != res) {
255 NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res);
256 return res;
257 }
258
259 res = ompi_datatype_type_size(datatype, &size);
260 if (MPI_SUCCESS != res) {
261 NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res);
262 return res;
263 }
264
265 span = opal_datatype_span(&datatype->super, count, &gap);
266 tmpbuf = malloc (span);
267 if (OPAL_UNLIKELY(NULL == tmpbuf)) {
268 return OMPI_ERR_OUT_OF_RESOURCE;
269 }
270
271 schedule = OBJ_NEW(NBC_Schedule);
272 if (OPAL_UNLIKELY(NULL == schedule)) {
273 free(tmpbuf);
274 return OMPI_ERR_OUT_OF_RESOURCE;
275 }
276
277 res = allred_sched_linear (rank, rsize, sendbuf, recvbuf, count, datatype, gap, op,
278 ext, size, schedule, tmpbuf);
279 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
280 OBJ_RELEASE(schedule);
281 free(tmpbuf);
282 return res;
283 }
284
285 res = NBC_Sched_commit(schedule);
286 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
287 OBJ_RELEASE(schedule);
288 free(tmpbuf);
289 return res;
290 }
291
292 res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
293 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
294 OBJ_RELEASE(schedule);
295 free(tmpbuf);
296 return res;
297 }
298
299 return OMPI_SUCCESS;
300 }
301
302 int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
303 struct ompi_communicator_t *comm, ompi_request_t ** request,
304 struct mca_coll_base_module_2_3_0_t *module) {
305 int res = nbc_allreduce_inter_init(sendbuf, recvbuf, count, datatype, op,
306 comm, request, module, false);
307 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
308 return res;
309 }
310
311 res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
312 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
313 NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
314 *request = &ompi_request_null.request;
315 return res;
316 }
317
318 return OMPI_SUCCESS;
319 }
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344 #define RANK2VRANK(rank, vrank, root) \
345 { \
346 vrank = rank; \
347 if (rank == 0) vrank = root; \
348 if (rank == root) vrank = 0; \
349 }
350 #define VRANK2RANK(rank, vrank, root) \
351 { \
352 rank = vrank; \
353 if (vrank == 0) rank = root; \
354 if (vrank == root) rank = 0; \
355 }
356 static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf, void *recvbuf,
357 MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf) {
358 int root, vrank, maxr, vpeer, peer, res;
359 char *rbuf, *lbuf, *buf;
360 int tmprbuf, tmplbuf;
361
362 root = 0;
363 RANK2VRANK(rank, vrank, root);
364 maxr = (int)ceil((log((double)p)/LOG2));
365
366 if (0 == (maxr%2)) {
367 rbuf = (void *)(-gap);
368 tmprbuf = true;
369 lbuf = recvbuf;
370 tmplbuf = false;
371 } else {
372 lbuf = (void *)(-gap);
373 tmplbuf = true;
374 rbuf = recvbuf;
375 tmprbuf = false;
376 if (inplace) {
377 res = NBC_Sched_copy(rbuf, false, count, datatype,
378 ((char *)tmpbuf) - gap, false, count, datatype,
379 schedule, true);
380 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
381 return res;
382 }
383 }
384 }
385
386 for (int r = 1, firstred = 1 ; r <= maxr ; ++r) {
387 if ((vrank % (1 << r)) == 0) {
388
389 vpeer = vrank + (1 << (r - 1));
390 VRANK2RANK(peer, vpeer, root)
391 if (peer < p) {
392
393 res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true);
394 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
395 return res;
396 }
397
398
399 if (firstred && !inplace) {
400
401 res = NBC_Sched_op (sendbuf, false, rbuf, tmprbuf, count, datatype, op, schedule, true);
402 firstred = 0;
403 } else {
404
405 res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true);
406 }
407 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
408 return res;
409 }
410
411 buf = rbuf; rbuf = lbuf ; lbuf = buf;
412 tmprbuf ^= 1; tmplbuf ^= 1;
413 }
414 } else {
415
416 vpeer = vrank - (1 << (r - 1));
417 VRANK2RANK(peer, vpeer, root)
418 if (firstred && !inplace) {
419
420 res = NBC_Sched_send (sendbuf, false, count, datatype, peer, schedule, false);
421 } else {
422
423 res = NBC_Sched_send (lbuf, tmplbuf, count, datatype, peer, schedule, false);
424 }
425
426 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
427 return res;
428 }
429
430
431 break;
432 }
433 }
434
435
436
437 RANK2VRANK(rank, vrank, root);
438
439
440 if (vrank != 0) {
441 for (int r = 0; r < maxr ; ++r) {
442 if ((vrank >= (1 << r)) && (vrank < (1 << (r + 1)))) {
443 VRANK2RANK(peer, vrank - (1 << r), root);
444 res = NBC_Sched_recv (recvbuf, false, count, datatype, peer, schedule, false);
445 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
446 return res;
447 }
448 }
449 }
450
451 res = NBC_Sched_barrier (schedule);
452 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
453 return res;
454 }
455 }
456
457 if (0 == vrank) assert(lbuf == recvbuf);
458
459 for (int r = 0; r < maxr; ++r) {
460 if (((vrank + (1 << r) < p) && (vrank < (1 << r))) || (vrank == 0)) {
461 VRANK2RANK(peer, vrank + (1 << r), root);
462 res = NBC_Sched_send (recvbuf, false, count, datatype, peer, schedule, false);
463 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
464 return res;
465 }
466 }
467 }
468
469
470 return OMPI_SUCCESS;
471 }
472
473 static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, MPI_Op op,
474 int size, int ext, NBC_Schedule *schedule, void *tmpbuf) {
475 int segsize, *segsizes, *segoffsets;
476 int speer, rpeer;
477 int res = OMPI_SUCCESS;
478
479 if (count == 0) {
480 return OMPI_SUCCESS;
481 }
482
483 segsizes = (int *) malloc (sizeof (int) * p);
484 segoffsets = (int *) malloc (sizeof (int) * p);
485 if (NULL == segsizes || NULL == segoffsets) {
486 free (segsizes);
487 free (segoffsets);
488 return OMPI_ERR_OUT_OF_RESOURCE;
489 }
490
491 segsize = (count + p - 1) / p;
492
493 segoffsets[0] = 0;
494 for (int i = 0, mycount = count ; i < p ; ++i) {
495 mycount -= segsize;
496 segsizes[i] = segsize;
497 if (mycount < 0) {
498 segsizes[i] = segsize + mycount;
499 mycount = 0;
500 }
501
502 if (i) {
503 segoffsets[i] = segoffsets[i-1] + segsizes[i-1];
504 }
505 }
506
507
508 speer = (r + 1) % p;
509 rpeer = (r - 1 + p) % p;
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610 for (int round = 0 ; round < p - 1 ; ++round) {
611 int selement = (r+1-round + 2*p )%p;
612 int soffset = segoffsets[selement]*ext;
613 int relement = (r-round + 2*p )%p;
614 int roffset = segoffsets[relement]*ext;
615
616
617 if (round == 0) {
618 res = NBC_Sched_send ((char *) sendbuf + soffset, false, segsizes[selement], datatype, speer,
619 schedule, false);
620 } else {
621 res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer,
622 schedule, false);
623 }
624
625 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
626 break;
627 }
628
629 res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer,
630 schedule, true);
631 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
632 break;
633 }
634
635 res = NBC_Sched_op ((char *) sendbuf + roffset, false, (char *) recvbuf + roffset, false,
636 segsizes[relement], datatype, op, schedule, true);
637 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
638 break;
639 }
640 }
641
642 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
643 free (segsizes);
644 free (segoffsets);
645 return res;
646 }
647
648 for (int round = p - 1 ; round < 2 * p - 2 ; ++round) {
649 int selement = (r+1-round + 2*p )%p;
650 int soffset = segoffsets[selement]*ext;
651 int relement = (r-round + 2*p )%p;
652 int roffset = segoffsets[relement]*ext;
653
654 res = NBC_Sched_send ((char *) recvbuf + soffset, false, segsizes[selement], datatype, speer,
655 schedule, false);
656 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
657 break;
658 }
659
660 res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer,
661 schedule, true);
662 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
663 break;
664 }
665 }
666
667 free (segsizes);
668 free (segoffsets);
669
670 return res;
671 }
672
673 static inline int allred_sched_linear(int rank, int rsize, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
674 ptrdiff_t gap, MPI_Op op, int ext, int size, NBC_Schedule *schedule, void *tmpbuf) {
675 int res;
676
677 if (0 == count) {
678 return OMPI_SUCCESS;
679 }
680
681
682 res = NBC_Sched_send (sendbuf, false, count, datatype, 0, schedule, false);
683 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
684 return res;
685 }
686
687
688 if (0 != rank || 1 ==(rsize%2)) {
689 res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule, false);
690 } else {
691 res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, 0, schedule, false);
692 }
693 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
694 return res;
695 }
696
697 if (0 == rank) {
698 char *rbuf, *lbuf, *buf;
699 int tmprbuf, tmplbuf;
700
701 res = NBC_Sched_barrier (schedule);
702 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
703 return res;
704 }
705
706
707 if (0 == (rsize%2)) {
708 lbuf = (void *)(-gap);
709 tmplbuf = true;
710 rbuf = recvbuf;
711 tmprbuf = false;
712 } else {
713 rbuf = (void *)(-gap);
714 tmprbuf = true;
715 lbuf = recvbuf;
716 tmplbuf = false;
717 }
718
719
720 for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) {
721 res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, rpeer, schedule, true);
722 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
723 return res;
724 }
725
726 res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule, true);
727 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
728 return res;
729 }
730
731 buf = rbuf; rbuf = lbuf ; lbuf = buf;
732 tmprbuf ^= 1; tmplbuf ^= 1;
733 }
734
735
736 res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, 0, schedule, false);
737 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
738 return res;
739 }
740
741
742 res = NBC_Sched_send (recvbuf, false, count, datatype, 0, schedule, true);
743 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
744 return res;
745 }
746
747
748 for (int rpeer = 1 ; rpeer < rsize ; ++rpeer) {
749 res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rpeer, schedule, false);
750 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
751 return res;
752 }
753 }
754 }
755
756 return OMPI_SUCCESS;
757 }
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813 static inline int allred_sched_redscat_allgather(
814 int rank, int comm_size, int count, MPI_Datatype datatype, ptrdiff_t gap,
815 const void *sbuf, void *rbuf, MPI_Op op, char inplace,
816 NBC_Schedule *schedule, void *tmpbuf, struct ompi_communicator_t *comm)
817 {
818 int res = OMPI_SUCCESS;
819 int *rindex = NULL, *rcount = NULL, *sindex = NULL, *scount = NULL;
820
821 int nsteps = opal_hibit(comm_size, comm->c_cube_dim + 1);
822 int nprocs_pof2 = 1 << nsteps;
823 if (!inplace) {
824 res = NBC_Sched_copy((char *)sbuf, false, count, datatype,
825 rbuf, false, count, datatype, schedule, true);
826 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
827 }
828 char *tmp_buf = (char *)tmpbuf - gap;
829 ptrdiff_t lb, extent;
830 ompi_datatype_get_extent(datatype, &lb, &extent);
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847 int vrank, step, wsize;
848 int nprocs_rem = comm_size - nprocs_pof2;
849 if (rank < 2 * nprocs_rem) {
850 int count_lhalf = count / 2;
851 int count_rhalf = count - count_lhalf;
852 if (rank % 2 != 0) {
853
854
855
856
857
858 res = NBC_Sched_send(rbuf, false, count_lhalf, datatype, rank - 1,
859 schedule, false);
860 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
861 res = NBC_Sched_recv(tmp_buf + (ptrdiff_t)count_lhalf * extent,
862 false, count_rhalf, datatype, rank - 1, schedule, true);
863 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
864 res = NBC_Sched_op(tmp_buf + (ptrdiff_t)count_lhalf * extent,
865 false, (char *)rbuf + (ptrdiff_t)count_lhalf * extent,
866 false, count_rhalf, datatype, op, schedule, true);
867 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
868
869 res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
870 false, count_rhalf, datatype, rank - 1, schedule, true);
871 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
872
873 vrank = -1;
874 } else {
875
876
877
878
879
880 res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
881 false, count_rhalf, datatype, rank + 1, schedule, false);
882 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
883 res = NBC_Sched_recv(tmp_buf, false, count_lhalf, datatype, rank + 1,
884 schedule, true);
885 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
886 res = NBC_Sched_op(tmp_buf, false, rbuf, false, count_lhalf,
887 datatype, op, schedule, true);
888 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
889
890 res = NBC_Sched_recv((char *)rbuf + (ptrdiff_t)count_lhalf * extent,
891 false, count_rhalf, datatype, rank + 1, schedule, true);
892 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
893 vrank = rank / 2;
894 }
895 } else {
896 vrank = rank - nprocs_rem;
897 }
898
899
900
901
902
903
904
905
906
907
908
909
910 rindex = malloc(sizeof(*rindex) * nsteps);
911 sindex = malloc(sizeof(*sindex) * nsteps);
912 rcount = malloc(sizeof(*rcount) * nsteps);
913 scount = malloc(sizeof(*scount) * nsteps);
914 if (NULL == rindex || NULL == sindex || NULL == rcount || NULL == scount) {
915 res = OMPI_ERR_OUT_OF_RESOURCE;
916 goto cleanup_and_return;
917 }
918 if (vrank != -1) {
919 step = 0;
920 wsize = count;
921 sindex[0] = rindex[0] = 0;
922 for (int mask = 1; mask < nprocs_pof2; mask <<= 1) {
923
924
925
926
927 int vdest = vrank ^ mask;
928
929 int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem;
930 if (rank < dest) {
931
932
933
934
935
936 rcount[step] = wsize / 2;
937 scount[step] = wsize - rcount[step];
938 sindex[step] = rindex[step] + rcount[step];
939 } else {
940
941
942
943
944
945 scount[step] = wsize / 2;
946 rcount[step] = wsize - scount[step];
947 rindex[step] = sindex[step] + scount[step];
948 }
949
950 res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)sindex[step] * extent,
951 false, scount[step], datatype, dest, schedule, false);
952 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
953 res = NBC_Sched_recv((char *)tmp_buf + (ptrdiff_t)rindex[step] * extent,
954 false, rcount[step], datatype, dest, schedule, true);
955 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
956
957 res = NBC_Sched_op((char *)tmp_buf + (ptrdiff_t)rindex[step] * extent,
958 false, (char *)rbuf + (ptrdiff_t)rindex[step] * extent,
959 false, rcount[step], datatype, op, schedule, true);
960 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
961
962 if (step + 1 < nsteps) {
963 rindex[step + 1] = rindex[step];
964 sindex[step + 1] = rindex[step];
965 wsize = rcount[step];
966 step++;
967 }
968 }
969
970
971
972
973
974
975
976
977
978
979
980 step = nsteps - 1;
981 for (int mask = nprocs_pof2 >> 1; mask > 0; mask >>= 1) {
982 int vdest = vrank ^ mask;
983
984 int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem;
985
986
987
988
989 res = NBC_Sched_send((char *)rbuf + (ptrdiff_t)rindex[step] * extent,
990 false, rcount[step], datatype, dest, schedule, false);
991 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
992 res = NBC_Sched_recv((char *)rbuf + (ptrdiff_t)sindex[step] * extent,
993 false, scount[step], datatype, dest, schedule, true);
994 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
995 step--;
996 }
997 }
998
999
1000
1001 if (rank < 2 * nprocs_rem) {
1002 if (rank % 2 != 0) {
1003
1004 res = NBC_Sched_recv(rbuf, false, count, datatype, rank - 1, schedule, false);
1005 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
1006 } else {
1007
1008 res = NBC_Sched_send(rbuf, false, count, datatype, rank + 1, schedule, false);
1009 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
1010 }
1011 }
1012 cleanup_and_return:
1013 if (NULL != rindex)
1014 free(rindex);
1015 if (NULL != sindex)
1016 free(sindex);
1017 if (NULL != rcount)
1018 free(rcount);
1019 if (NULL != scount)
1020 free(scount);
1021 return res;
1022 }
1023
1024 int ompi_coll_libnbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
1025 struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
1026 struct mca_coll_base_module_2_3_0_t *module) {
1027 int res = nbc_allreduce_init(sendbuf, recvbuf, count, datatype, op,
1028 comm, request, module, true);
1029 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
1030 return res;
1031 }
1032
1033 return OMPI_SUCCESS;
1034 }
1035
1036 int ompi_coll_libnbc_allreduce_inter_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
1037 struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
1038 struct mca_coll_base_module_2_3_0_t *module) {
1039 int res = nbc_allreduce_inter_init(sendbuf, recvbuf, count, datatype, op,
1040 comm, request, module, true);
1041 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
1042 return res;
1043 }
1044
1045 return OMPI_SUCCESS;
1046 }
1047