This source file includes following definitions.
- NBC_Bcast_args_compare
- nbc_bcast_init
- ompi_coll_libnbc_ibcast
- bcast_sched_binomial
- bcast_sched_linear
- bcast_sched_chain
- bcast_sched_knomial
- nbc_bcast_inter_init
- ompi_coll_libnbc_ibcast_inter
- ompi_coll_libnbc_bcast_init
- ompi_coll_libnbc_bcast_inter_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #include "nbc_internal.h"
22
23 static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count,
24 MPI_Datatype datatype);
25 static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count,
26 MPI_Datatype datatype);
27 static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count,
28 MPI_Datatype datatype, int fragsize, size_t size);
29 static inline int bcast_sched_knomial(int rank, int comm_size, int root, NBC_Schedule *schedule, void *buf,
30 int count, MPI_Datatype datatype, int knomial_radix);
31
32 #ifdef NBC_CACHE_SCHEDULE
33
34 int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param) {
35 if ((a->buffer == b->buffer) &&
36 (a->count == b->count) &&
37 (a->datatype == b->datatype) &&
38 (a->root == b->root) ) {
39 return 0;
40 }
41
42 if( a->buffer < b->buffer ) {
43 return -1;
44 }
45
46 return 1;
47 }
48 #endif
49
50 static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int root,
51 struct ompi_communicator_t *comm, ompi_request_t ** request,
52 struct mca_coll_base_module_2_3_0_t *module, bool persistent)
53 {
54 int rank, p, res, segsize;
55 size_t size;
56 NBC_Schedule *schedule;
57 #ifdef NBC_CACHE_SCHEDULE
58 NBC_Bcast_args *args, *found, search;
59 #endif
60 enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN, NBC_BCAST_KNOMIAL } alg;
61 ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
62
63 rank = ompi_comm_rank (comm);
64 p = ompi_comm_size (comm);
65
66 if (1 == p) {
67 return nbc_get_noop_request(persistent, request);
68 }
69
70 res = ompi_datatype_type_size(datatype, &size);
71 if (MPI_SUCCESS != res) {
72 NBC_Error("MPI Error in ompi_datatype_type_size() (%i)", res);
73 return res;
74 }
75
76 segsize = 16384;
77
78 if (libnbc_ibcast_algorithm == 0) {
79 if( libnbc_ibcast_skip_dt_decision ) {
80 if (p <= 4) {
81 alg = NBC_BCAST_LINEAR;
82 }
83 else {
84 alg = NBC_BCAST_BINOMIAL;
85 }
86 }
87 else {
88 if (p <= 4) {
89 alg = NBC_BCAST_LINEAR;
90 } else if (size * count < 65536) {
91 alg = NBC_BCAST_BINOMIAL;
92 } else if (size * count < 524288) {
93 alg = NBC_BCAST_CHAIN;
94 segsize = 8192;
95 } else {
96 alg = NBC_BCAST_CHAIN;
97 segsize = 32768;
98 }
99 }
100 } else {
101
102 if (libnbc_ibcast_algorithm == 1) {
103 alg = NBC_BCAST_LINEAR;
104 } else if (libnbc_ibcast_algorithm == 2) {
105 alg = NBC_BCAST_BINOMIAL;
106 } else if (libnbc_ibcast_algorithm == 3) {
107 alg = NBC_BCAST_CHAIN;
108 } else if (libnbc_ibcast_algorithm == 4 && libnbc_ibcast_knomial_radix > 1) {
109 alg = NBC_BCAST_KNOMIAL;
110 } else {
111 alg = NBC_BCAST_LINEAR;
112 }
113 }
114
115 #ifdef NBC_CACHE_SCHEDULE
116
117 search.buffer = buffer;
118 search.count = count;
119 search.datatype = datatype;
120 search.root = root;
121 found = (NBC_Bcast_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], &search);
122 if (NULL == found) {
123 #endif
124 schedule = OBJ_NEW(NBC_Schedule);
125 if (OPAL_UNLIKELY(NULL == schedule)) {
126 return OMPI_ERR_OUT_OF_RESOURCE;
127 }
128
129 switch(alg) {
130 case NBC_BCAST_LINEAR:
131 res = bcast_sched_linear(rank, p, root, schedule, buffer, count, datatype);
132 break;
133 case NBC_BCAST_BINOMIAL:
134 res = bcast_sched_binomial(rank, p, root, schedule, buffer, count, datatype);
135 break;
136 case NBC_BCAST_CHAIN:
137 res = bcast_sched_chain(rank, p, root, schedule, buffer, count, datatype, segsize, size);
138 break;
139 case NBC_BCAST_KNOMIAL:
140 res = bcast_sched_knomial(rank, p, root, schedule, buffer, count, datatype, libnbc_ibcast_knomial_radix);
141 break;
142 }
143
144 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
145 OBJ_RELEASE(schedule);
146 return res;
147 }
148
149 res = NBC_Sched_commit (schedule);
150 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
151 OBJ_RELEASE(schedule);
152 return res;
153 }
154
155 #ifdef NBC_CACHE_SCHEDULE
156
157 args = (NBC_Bcast_args *) malloc (sizeof (args));
158 if (NULL != args) {
159 args->buffer = buffer;
160 args->count = count;
161 args->datatype = datatype;
162 args->root = root;
163 args->schedule = schedule;
164 res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST], args, args, 0);
165 if (0 == res) {
166 OBJ_RETAIN (schedule);
167
168
169 if (++libnbc_module->NBC_Dict_size[NBC_BCAST] > NBC_SCHED_DICT_UPPER) {
170 NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_BCAST],
171 &libnbc_module->NBC_Dict_size[NBC_BCAST]);
172 }
173 } else {
174 NBC_Error("error in dict_insert() (%i)", res);
175 free (args);
176 }
177 }
178 } else {
179
180 schedule = found->schedule;
181 OBJ_RETAIN(schedule);
182 }
183 #endif
184
185 res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, NULL);
186 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
187 OBJ_RELEASE(schedule);
188 return res;
189 }
190
191 return OMPI_SUCCESS;
192 }
193
194 int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int root,
195 struct ompi_communicator_t *comm, ompi_request_t ** request,
196 struct mca_coll_base_module_2_3_0_t *module)
197 {
198 int res = nbc_bcast_init(buffer, count, datatype, root,
199 comm, request, module, false);
200 if (OPAL_LIKELY(OMPI_SUCCESS != res)) {
201 return res;
202 }
203 res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
204 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
205 NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
206 *request = &ompi_request_null.request;
207 return res;
208 }
209
210 return OMPI_SUCCESS;
211 }
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226 #define RANK2VRANK(rank, vrank, root) \
227 { \
228 vrank = rank; \
229 if (rank == 0) vrank = root; \
230 if (rank == root) vrank = 0; \
231 }
232 #define VRANK2RANK(rank, vrank, root) \
233 { \
234 rank = vrank; \
235 if (vrank == 0) rank = root; \
236 if (vrank == root) rank = 0; \
237 }
238 static inline int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) {
239 int maxr, vrank, peer, res;
240
241 maxr = (int)ceil((log((double)p)/LOG2));
242
243 RANK2VRANK(rank, vrank, root);
244
245
246 if (vrank != 0) {
247 for (int r = 0 ; r < maxr ; ++r) {
248 if ((vrank >= (1 << r)) && (vrank < (1 << (r + 1)))) {
249 VRANK2RANK(peer, vrank - (1 << r), root);
250 res = NBC_Sched_recv (buffer, false, count, datatype, peer, schedule, false);
251 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
252 return res;
253 }
254 }
255 }
256
257 res = NBC_Sched_barrier (schedule);
258 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
259 return res;
260 }
261 }
262
263
264 for (int r = 0 ; r < maxr ; ++r) {
265 if (((vrank + (1 << r) < p) && (vrank < (1 << r))) || (vrank == 0)) {
266 VRANK2RANK(peer, vrank + (1 << r), root);
267 res = NBC_Sched_send (buffer, false, count, datatype, peer, schedule, false);
268 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
269 return res;
270 }
271 }
272 }
273
274 return OMPI_SUCCESS;
275 }
276
277
278 static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) {
279 int res;
280
281
282 if(rank == root) {
283 for (int peer = 0 ; peer < p ; ++peer) {
284 if (peer != root) {
285
286 res = NBC_Sched_send (buffer, false, count, datatype, peer, schedule, false);
287 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
288 return res;
289 }
290 }
291 }
292 } else {
293
294 res = NBC_Sched_recv (buffer, false, count, datatype, root, schedule, false);
295 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
296 return res;
297 }
298 }
299
300 return OMPI_SUCCESS;
301 }
302
303
304 static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, size_t size) {
305 int res, vrank, rpeer, speer, numfrag, fragcount, thiscount;
306 MPI_Aint ext;
307 char *buf;
308
309 RANK2VRANK(rank, vrank, root);
310 VRANK2RANK(rpeer, vrank-1, root);
311 VRANK2RANK(speer, vrank+1, root);
312 res = ompi_datatype_type_extent(datatype, &ext);
313 if (MPI_SUCCESS != res) {
314 NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res);
315 return res;
316 }
317
318 if (count == 0) {
319 return OMPI_SUCCESS;
320 }
321
322 numfrag = count * size/fragsize;
323 if ((count * size) % fragsize != 0) {
324 numfrag++;
325 }
326
327 fragcount = count/numfrag;
328
329 for (int fragnum = 0 ; fragnum < numfrag ; ++fragnum) {
330 buf = (char *) buffer + fragnum * fragcount * ext;
331 thiscount = fragcount;
332 if (fragnum == numfrag-1) {
333
334 thiscount = count - fragcount * fragnum;
335 }
336
337
338 if (vrank != 0) {
339 res = NBC_Sched_recv (buf, false, thiscount, datatype, rpeer, schedule, true);
340 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
341 return res;
342 }
343 }
344
345
346 if (vrank != p-1) {
347 res = NBC_Sched_send (buf, false, thiscount, datatype, speer, schedule, false);
348 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
349 return res;
350 }
351
352
353 if (vrank == 0) {
354 res = NBC_Sched_barrier (schedule);
355 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
356 return res;
357 }
358 }
359 }
360 }
361
362 return OMPI_SUCCESS;
363 }
364
365
366
367
368
369
370
371
372
373 static inline int bcast_sched_knomial(
374 int rank, int comm_size, int root, NBC_Schedule *schedule, void *buf,
375 int count, MPI_Datatype datatype, int knomial_radix)
376 {
377 int res = OMPI_SUCCESS;
378
379
380 int vrank = (rank - root + comm_size) % comm_size;
381 int mask = 0x1;
382 while (mask < comm_size) {
383 if (vrank % (knomial_radix * mask)) {
384 int parent = vrank / (knomial_radix * mask) * (knomial_radix * mask);
385 parent = (parent + root) % comm_size;
386 res = NBC_Sched_recv(buf, false, count, datatype, parent, schedule, true);
387 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
388 break;
389 }
390 mask *= knomial_radix;
391 }
392 mask /= knomial_radix;
393
394
395 while (mask > 0) {
396 for (int r = 1; r < knomial_radix; r++) {
397 int child = vrank + mask * r;
398 if (child < comm_size) {
399 child = (child + root) % comm_size;
400 res = NBC_Sched_send(buf, false, count, datatype, child, schedule, false);
401 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
402 }
403 }
404 mask /= knomial_radix;
405 }
406
407 cleanup_and_return:
408 return res;
409 }
410
411 static int nbc_bcast_inter_init(void *buffer, int count, MPI_Datatype datatype, int root,
412 struct ompi_communicator_t *comm, ompi_request_t ** request,
413 struct mca_coll_base_module_2_3_0_t *module, bool persistent) {
414 int res;
415 NBC_Schedule *schedule;
416 ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
417
418 schedule = OBJ_NEW(NBC_Schedule);
419 if (OPAL_UNLIKELY(NULL == schedule)) {
420 return OMPI_ERR_OUT_OF_RESOURCE;
421 }
422
423 if (root != MPI_PROC_NULL) {
424
425 if (root == MPI_ROOT) {
426 int remsize;
427
428 remsize = ompi_comm_remote_size (comm);
429
430 for (int peer = 0 ; peer < remsize ; ++peer) {
431
432 res = NBC_Sched_send (buffer, false, count, datatype, peer, schedule, false);
433 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
434 OBJ_RELEASE(schedule);
435 return res;
436 }
437 }
438 } else {
439
440 res = NBC_Sched_recv (buffer, false, count, datatype, root, schedule, false);
441 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
442 OBJ_RELEASE(schedule);
443 return res;
444 }
445 }
446 }
447
448 res = NBC_Sched_commit (schedule);
449 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
450 OBJ_RELEASE(schedule);
451 return res;
452 }
453
454 res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, NULL);
455 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
456 OBJ_RELEASE(schedule);
457 return res;
458 }
459
460 return OMPI_SUCCESS;
461 }
462
463 int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root,
464 struct ompi_communicator_t *comm, ompi_request_t ** request,
465 struct mca_coll_base_module_2_3_0_t *module) {
466 int res = nbc_bcast_inter_init(buffer, count, datatype, root,
467 comm, request, module, false);
468 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
469 return res;
470 }
471
472 res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
473 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
474 NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
475 *request = &ompi_request_null.request;
476 return res;
477 }
478
479 return OMPI_SUCCESS;
480 }
481
482 int ompi_coll_libnbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int root,
483 struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
484 struct mca_coll_base_module_2_3_0_t *module) {
485 int res = nbc_bcast_init(buffer, count, datatype, root,
486 comm, request, module, true);
487 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
488 return res;
489 }
490
491 return OMPI_SUCCESS;
492 }
493
494 int ompi_coll_libnbc_bcast_inter_init(void *buffer, int count, MPI_Datatype datatype, int root,
495 struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
496 struct mca_coll_base_module_2_3_0_t *module) {
497 int res = nbc_bcast_inter_init(buffer, count, datatype, root,
498 comm, request, module, true);
499 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
500 return res;
501 }
502
503 return OMPI_SUCCESS;
504 }